period.c

References to this file elsewhere.
1 #ifndef MS_SUA
2 # include <stdio.h>
3 #endif
4 #include <fcntl.h>
5 
6 #define STANDARD_ERROR 2
7 
8 #define STANDARD_OUTPUT 1
9 
10 #include "mpi.h"
11 #include "rsl_lite.h"
12 
13 static int yp_curs, ym_curs, xp_curs, xm_curs ;
14 
15 RSL_LITE_INIT_PERIOD ( 
16                 int * Fcomm0,
17                 int * shw0,
18                 int * n3dR0, int *n2dR0, int * typesizeR0 , 
19                 int * n3dI0, int *n2dI0, int * typesizeI0 , 
20                 int * n3dD0, int *n2dD0, int * typesizeD0 , 
21                 int * n3dL0, int *n2dL0, int * typesizeL0 , 
22                 int * me0, int * np0 , int * np_x0 , int * np_y0 ,
23                 int * ips0 , int * ipe0 , int * jps0 , int * jpe0 , int * kps0 , int * kpe0 )
24 {
25   int n3dR, n2dR, typesizeR ;
26   int n3dI, n2dI, typesizeI ;
27   int n3dD, n2dD, typesizeD ;
28   int n3dL, n2dL, typesizeL ;
29   int shw ;
30   int me, np, np_x, np_y ;
31   int ips , ipe , jps , jpe , kps , kpe ;
32   int yp, ym, xp, xm ;
33   int nbytes ;
34   int coords[2] ;
35   MPI_Comm comm, *comm0, dummy_comm ;
36 
37   comm0 = &dummy_comm ;
38   *comm0 = MPI_Comm_f2c( *Fcomm0 ) ;
39 
40   shw = *shw0 ;
41   n3dR = *n3dR0 ; n2dR = *n2dR0 ; typesizeR = *typesizeR0 ;
42   n3dI = *n3dI0 ; n2dI = *n2dI0 ; typesizeI = *typesizeI0 ;
43   n3dD = *n3dD0 ; n2dD = *n2dD0 ; typesizeD = *typesizeD0 ;
44   n3dL = *n3dL0 ; n2dL = *n2dL0 ; typesizeL = *typesizeL0 ;
45   me = *me0 ; np = *np0 ; np_x = *np_x0 ; np_y = *np_y0 ;
46   ips = *ips0-1 ; ipe = *ipe0-1 ; jps = *jps0-1 ; jpe = *jpe0-1 ; kps = *kps0-1 ; kpe = *kpe0-1 ;
47 
48 /*
49  This assumes that the topoology associated with the communicator is periodic
50  the period routines should be called with "local_communicator_periodic", which
51  is set up in module_dm.F for RSL_LITE.  Registry generated code automatically
52  does this (gen_comms.c for RSL_LITE).
53 */
54   if ( np_y > 1 ) {
55     nbytes = typesizeR*(ipe-ips+1+2*shw)*(shw+1)*(n3dR*(kpe-kps+1)+n2dR) +
56              typesizeI*(ipe-ips+1+2*shw)*(shw+1)*(n3dI*(kpe-kps+1)+n2dI) +
57              typesizeD*(ipe-ips+1+2*shw)*(shw+1)*(n3dD*(kpe-kps+1)+n2dD) +
58              typesizeL*(ipe-ips+1+2*shw)*(shw+1)*(n3dL*(kpe-kps+1)+n2dL) ;
59     MPI_Comm_rank( *comm0, &me ) ;
60     MPI_Cart_coords( *comm0, me, 2, coords ) ;
61     MPI_Cart_shift( *comm0, 0, 1, &ym, &yp ) ;
62     if ( yp != MPI_PROC_NULL && coords[0] == np_y - 1 ) {  /* process on top of mesh */
63        buffer_for_proc ( yp , nbytes, RSL_RECVBUF ) ;
64        buffer_for_proc ( yp , nbytes, RSL_SENDBUF ) ;
65     }
66     if ( ym != MPI_PROC_NULL && coords[0] == 0 ) {         /* process on bottom of mesh */
67        buffer_for_proc ( ym , nbytes, RSL_RECVBUF ) ;
68        buffer_for_proc ( ym , nbytes, RSL_SENDBUF ) ;
69     }
70   }
71   if ( np_x > 1 ) {
72     nbytes = typesizeR*(jpe-jps+1+2*shw)*(shw+1)*(n3dR*(kpe-kps+1)+n2dR) +
73              typesizeI*(jpe-jps+1+2*shw)*(shw+1)*(n3dI*(kpe-kps+1)+n2dI) +
74              typesizeD*(jpe-jps+1+2*shw)*(shw+1)*(n3dD*(kpe-kps+1)+n2dD) +
75              typesizeL*(jpe-jps+1+2*shw)*(shw+1)*(n3dL*(kpe-kps+1)+n2dL) ;
76     MPI_Comm_rank( *comm0, &me ) ;
77     MPI_Cart_coords( *comm0, me, 2, coords ) ;
78     MPI_Cart_shift( *comm0, 1, 1, &xm, &xp ) ;
79     if ( xm != MPI_PROC_NULL && coords[1] == np_x - 1 ) { /* process on right hand side of mesh */
80        buffer_for_proc ( xp , nbytes, RSL_RECVBUF ) ;
81        buffer_for_proc ( xp , nbytes, RSL_SENDBUF ) ;
82     }
83     if ( xp != MPI_PROC_NULL && coords[1] == 0 ) {        /* process on left hand side of mesh */
84        buffer_for_proc ( xm,  nbytes, RSL_RECVBUF ) ;
85        buffer_for_proc ( xm , nbytes, RSL_SENDBUF ) ;
86     }
87   }
88   yp_curs = 0 ; ym_curs = 0 ; xp_curs = 0 ; xm_curs = 0 ;
89 }
90 
91 
92 RSL_LITE_PACK_PERIOD ( int* Fcomm0, char * buf , int * shw0 , int * typesize0 , int * xy0 , int * pu0 , int * imemord , int * stag0 ,
93            int *me0, int * np0 , int * np_x0 , int * np_y0 , 
94            int * ids0 , int * ide0 , int * jds0 , int * jde0 , int * kds0 , int * kde0 ,
95            int * ims0 , int * ime0 , int * jms0 , int * jme0 , int * kms0 , int * kme0 ,
96            int * ips0 , int * ipe0 , int * jps0 , int * jpe0 , int * kps0 , int * kpe0 )
97 {
98   int me, np, np_x, np_y ;
99   int shw , typesize ;
100   int ids , ide , jds , jde , kds , kde ;
101   int ims , ime , jms , jme , kms , kme ;
102   int ips , ipe , jps , jpe , kps , kpe ;
103   int stag ;  /* 0 not stag, 1 stag */
104   int xy ;   /* y = 0 , x = 1 */
105   int pu ;   /* pack = 0 , unpack = 1 */
106   register int i, j, k, t ;
107 #ifdef crayx1
108   register int i2,i3,i4,i_offset;
109 #endif
110   char *p ;
111   int the_buf ;
112   int yp, ym, xp, xm ;
113   int nbytes, ierr ;
114   register int *pi, *qi ;
115   int coords[2] ;
116   int js, je, ks, ke, is, ie, wcount ;
117   MPI_Comm comm, *comm0, dummy_comm ;
118 
119   comm0 = &dummy_comm ;
120   *comm0 = MPI_Comm_f2c( *Fcomm0 ) ;
121 
122   me = *me0 ; np = *np0 ; np_x = *np_x0 ; np_y = *np_y0 ;
123   stag = *stag0 ;
124   shw = *shw0 ; typesize = *typesize0 ;
125   ids = *ids0-1 ; ide = *ide0-1 ; jds = *jds0-1 ; jde = *jde0-1 ; kds = *kds0-1 ; kde = *kde0-1 ;
126   ims = *ims0-1 ; ime = *ime0-1 ; jms = *jms0-1 ; jme = *jme0-1 ; kms = *kms0-1 ; kme = *kme0-1 ;
127   ips = *ips0-1 ; ipe = *ipe0-1 ; jps = *jps0-1 ; jpe = *jpe0-1 ; kps = *kps0-1 ; kpe = *kpe0-1 ;
128   xy = *xy0 ;
129   pu = *pu0 ;
130 
131 #define RANGE(S1,E1,S2,E2,S3,E3,S4,E4) (((E1)-(S1)+1)*((E2)-(S2)+1)*((E3)-(S3)+1)*((E4)-(S4)+1))
132 #if 0
133 #define IMAX(A) (((A)>ids)?(A):ids)
134 #define IMIN(A) (((A)<ide)?(A):ide)
135 #define JMAX(A) (((A)>jds)?(A):jds)
136 #define JMIN(A) (((A)<jde)?(A):jde)
137 #else
138 /* allow the extent in other dimension to go into boundary region (e.g. < ids or > ide) since
139    this will handle corner points for doubly periodic updates (he wrote hopefully) */
140 #define IMAX(A) (A)
141 #define IMIN(A) (A)
142 #define JMAX(A) (A)
143 #define JMIN(A) (A)
144 #endif
145 
146   the_buf = ( pu == 0 ) ? RSL_SENDBUF : RSL_RECVBUF ;
147 
148   if ( np_x > 1 && xy == 1 ) {   /* exchange period in x dim */
149     MPI_Comm_rank( *comm0, &me ) ;
150     MPI_Cart_coords( *comm0, me, 2, coords ) ;
151     MPI_Cart_shift( *comm0, 1, 1, &xm, &xp ) ;
152     if ( coords[1] == np_x - 1 ) {                /* process on right hand edge of domain */
153       p = buffer_for_proc( xp , 0 , the_buf ) ;
154       if ( pu == 0 ) {
155         js = JMAX(jps-shw) ; je = JMIN(jpe+shw) ;
156         ks = kps           ; ke = kpe ;
157         is = ipe-shw       ; ie = ipe-1         ;
158         nbytes = buffer_size_for_proc( xp , the_buf ) ;
159         if ( xp_curs + RANGE( JMAX(jps-shw), JMIN(jpe+shw), kps, kpe, ipe-shw, ipe-1, 1, typesize ) > nbytes ) {
160 #ifndef MS_SUA
161 	  fprintf(stderr,"memory overwrite in rsl_lite_pack_period_x, right hand X to %d, %d > %d\n",xp,
162 	      xp_curs + RANGE( JMAX(jps-shw), JMIN(jpe+shw), kps, kpe, ipe-shw, ipe-1, 1, typesize ), nbytes ) ;
163 #endif
164 	  MPI_Abort(MPI_COMM_WORLD, 98) ;
165         }
166         if ( typesize == 8 ) {
167           F_PACK_LINT ( buf, p+xp_curs, imemord, &js, &je, &ks, &ke, &is, &ie,
168                                         &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ;
169           xp_curs += wcount*typesize ;
170         } else
171 	if ( typesize == 4 ) {
172           F_PACK_INT ( buf, p+xp_curs, imemord, &js, &je, &ks, &ke, &is, &ie,
173                                        &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ;
174           xp_curs += wcount*typesize ;
175 	}
176 	else {
177 #ifndef MS_SUA
178           fprintf(stderr,"internal error: %s %d\n",__FILE__,__LINE__) ;
179 #endif
180 	}
181       } else {
182         js = JMAX(jps-shw) ; je = JMIN(jpe+shw) ;
183         ks = kps           ; ke = kpe ;
184         is = ipe           ; ie = ipe+shw-1+stag ;
185         if ( typesize == 8 ) {
186           F_UNPACK_LINT ( p+xp_curs, buf, imemord, &js, &je, &ks, &ke, &is, &ie,
187                                           &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ;
188           xp_curs += wcount*typesize ;
189         } else
190 	if ( typesize == 4 ) {
191           F_UNPACK_INT ( p+xp_curs, buf, imemord, &js, &je, &ks, &ke, &is, &ie,
192                                          &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ;
193           xp_curs += wcount*typesize ;
194 	}
195 	else {
196 #ifndef MS_SUA
197           fprintf(stderr,"internal error: %s %d\n",__FILE__,__LINE__) ;
198 #endif
199         }
200       }
201     }
202     if ( coords[1] == 0 ) {         /* process on left hand edge of domain */
203       p = buffer_for_proc( xm , 0 , the_buf ) ;
204       if ( pu == 0 ) {
205         js = JMAX(jps-shw) ; je = JMIN(jpe+shw) ;
206         ks = kps           ; ke = kpe ;
207         is = ips           ; ie = ips+shw-1+stag ;
208         nbytes = buffer_size_for_proc( xm , the_buf ) ;
209         if ( xm_curs + RANGE( JMAX(jps-shw), JMIN(jpe+shw), kps, kpe, ips, ips+shw-1+stag, 1, typesize ) > nbytes ) {
210 #ifndef MS_SUA
211 	  fprintf(stderr,"memory overwrite in rsl_lite_pack_period_x,  left hand X to %d , %d > %d\n",xm,
212 	      xm_curs + RANGE( JMAX(jps-shw), JMIN(jpe+shw), kps, kpe, ips, ips+shw-1+stag, 1, typesize ), nbytes ) ;
213 #endif
214 	  MPI_Abort(MPI_COMM_WORLD, 98) ;
215         }
216         if ( typesize == 8 ) {
217           F_PACK_LINT ( buf, p+xm_curs, imemord, &js, &je, &ks, &ke, &is, &ie,
218                                         &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ;
219           xm_curs += wcount*typesize ;
220         } else
221 	if ( typesize == 4 ) {
222           F_PACK_INT ( buf, p+xm_curs, imemord, &js, &je, &ks, &ke, &is, &ie,
223                                        &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ;
224           xm_curs += wcount*typesize ;
225 	}
226 	else {
227 #ifndef MS_SUA
228           fprintf(stderr,"internal error: %s %d\n",__FILE__,__LINE__) ;
229 #endif
230         }
231       } else {
232         js = JMAX(jps-shw) ; je = JMIN(jpe+shw) ;
233         ks = kps           ; ke = kpe ;
234         is = ips-shw       ; ie = ips-1           ;
235         if ( typesize == 8 ) {
236           F_UNPACK_LINT ( p+xm_curs, buf, imemord, &js, &je, &ks, &ke, &is, &ie,
237                                           &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ;
238           xm_curs += wcount*typesize ;
239         } else
240 	if ( typesize == 4 ) {
241           F_UNPACK_INT ( p+xm_curs, buf, imemord, &js, &je, &ks, &ke, &is, &ie,
242                                          &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ;
243           xm_curs += wcount*typesize ;
244 	}
245 	else {
246 #ifndef MS_SUA
247           fprintf(stderr,"internal error: %s %d\n",__FILE__,__LINE__) ;
248 #endif
249         }
250       }
251     }
252   }
253   if ( np_y > 1 && xy == 0 ) {    /* exchange period in Y dim */
254     MPI_Comm_rank( *comm0, &me ) ;
255     MPI_Cart_coords( *comm0, me, 2, coords ) ;
256     MPI_Cart_shift( *comm0, 0, 1, &ym, &yp ) ;
257     if ( coords[0] == np_y - 1 ) {                /* process on top edge of domain */
258       p = buffer_for_proc( yp , 0 , the_buf ) ;
259       if ( pu == 0 ) {
260         is = IMAX(ips-shw) ; ie = IMIN(ipe+shw) ;
261         ks = kps           ; ke = kpe ;
262         js = jpe-shw       ; je = jpe-1         ;
263         nbytes = buffer_size_for_proc( yp , the_buf ) ;
264         if ( yp_curs + RANGE( IMAX(ips-shw), IMIN(ipe+shw), kps, kpe, jpe-shw, jpe-1, 1, typesize ) > nbytes ) {
265 #ifndef MS_SUA
266 	  fprintf(stderr,"memory overwrite in rsl_lite_pack_period_y, right hand Y to %d, %d > %d\n",yp,
267 	      yp_curs + RANGE( IMAX(ips-shw), IMIN(ipe+shw), kps, kpe, jpe-shw, jpe-1, 1, typesize ), nbytes ) ;
268 #endif
269 	  MPI_Abort(MPI_COMM_WORLD, 98) ;
270         }
271         if ( typesize == 8 ) {
272           F_PACK_LINT ( buf, p+yp_curs, imemord, &js, &je, &ks, &ke, &is, &ie,
273                                         &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ;
274           yp_curs += wcount*typesize ;
275         } else
276 	if ( typesize == 4 ) {
277           F_PACK_INT ( buf, p+yp_curs, imemord, &js, &je, &ks, &ke, &is, &ie,
278                                        &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ;
279           yp_curs += wcount*typesize ;
280 	}
281 	else {
282 #ifndef MS_SUA
283           fprintf(stderr,"internal error: %s %d\n",__FILE__,__LINE__) ;
284 #endif
285 	}
286       } else {
287         is = IMAX(ips-shw) ; ie = IMIN(ipe+shw) ;
288         ks = kps           ; ke = kpe ;
289         js = jpe           ; je = jpe+shw-1+stag ;
290         if ( typesize == 8 ) {
291           F_UNPACK_LINT ( p+yp_curs, buf, imemord, &js, &je, &ks, &ke, &is, &ie,
292                                           &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ;
293           yp_curs += wcount*typesize ;
294         } else
295 	if ( typesize == 4 ) {
296           F_UNPACK_INT ( p+yp_curs, buf, imemord, &js, &je, &ks, &ke, &is, &ie,
297                                          &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ;
298           yp_curs += wcount*typesize ;
299 	}
300 	else {
301 #ifndef MS_SUA
302           fprintf(stderr,"internal error: %s %d\n",__FILE__,__LINE__) ;
303 #endif
304         }
305       }
306     }
307     if ( coords[0] == 0 ) {         /* process on bottom edge of domain */
308       p = buffer_for_proc( ym , 0 , the_buf ) ;
309       if ( pu == 0 ) {
310         is = IMAX(ips-shw) ; ie = IMIN(ipe+shw) ;
311         ks = kps           ; ke = kpe ;
312         js = jps           ; je = jps+shw-1+stag ;
313         nbytes = buffer_size_for_proc( ym , the_buf ) ;
314         if ( ym_curs + RANGE( IMAX(ips-shw), IMIN(ipe+shw), kps, kpe, jps, jps+shw-1+stag, 1, typesize ) > nbytes ) {
315 #ifndef MS_SUA
316 	  fprintf(stderr,"memory overwrite in rsl_lite_pack_period_y,  left hand Y to %d , %d > %d\n",xm,
317 	      ym_curs + RANGE( IMAX(ips-shw), IMIN(ipe+shw), kps, kpe, jps, jps+shw-1+stag, 1, typesize ), nbytes ) ;
318 #endif
319 	  MPI_Abort(MPI_COMM_WORLD, 98) ;
320         }
321         if ( typesize == 8 ) {
322           F_PACK_LINT ( buf, p+ym_curs, imemord, &js, &je, &ks, &ke, &is, &ie,
323                                         &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ;
324           ym_curs += wcount*typesize ;
325         } else
326 	if ( typesize == 4 ) {
327           F_PACK_INT ( buf, p+ym_curs, imemord, &js, &je, &ks, &ke, &is, &ie,
328                                        &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ;
329           ym_curs += wcount*typesize ;
330 	}
331 	else {
332 #ifndef MS_SUA
333           fprintf(stderr,"internal error: %s %d\n",__FILE__,__LINE__) ;
334 #endif
335         }
336       } else {
337         is = IMAX(ips-shw) ; ie = IMIN(ipe+shw) ;
338         ks = kps           ; ke = kpe ;
339         js = jps-shw       ; je = jps-1           ;
340         if ( typesize == 8 ) {
341           F_UNPACK_LINT ( p+ym_curs, buf, imemord, &js, &je, &ks, &ke, &is, &ie,
342                                           &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ;
343           ym_curs += wcount*typesize ;
344         } else
345 	if ( typesize == 4 ) {
346           F_UNPACK_INT ( p+ym_curs, buf, imemord, &js, &je, &ks, &ke, &is, &ie,
347                                          &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ;
348           ym_curs += wcount*typesize ;
349 	}
350 	else {
351 #ifndef MS_SUA
352           fprintf(stderr,"internal error: %s %d\n",__FILE__,__LINE__) ;
353 #endif
354         }
355       }
356     }
357   }
358 }
359 
360 static MPI_Request yp_recv, ym_recv, yp_send, ym_send ;
361 static MPI_Request xp_recv, xm_recv, xp_send, xm_send ;
362 
363 RSL_LITE_EXCH_PERIOD_X ( int * Fcomm0, int *me0, int * np0 , int * np_x0 , int * np_y0 )
364 {
365   int me, np, np_x, np_y ;
366   int yp, ym, xp, xm, nbytes ;
367   MPI_Status stat ;
368   MPI_Comm comm, *comm0, dummy_comm ;
369   int coords[2] ;
370 
371   comm0 = &dummy_comm ;
372   *comm0 = MPI_Comm_f2c( *Fcomm0 ) ;
373 #if 1
374   comm = *comm0 ; me = *me0 ; np = *np0 ; np_x = *np_x0 ; np_y = *np_y0 ;
375 
376   if ( np_x > 1 ) {
377     MPI_Comm_rank( *comm0, &me ) ;
378     MPI_Cart_coords( *comm0, me, 2, coords ) ;
379     MPI_Cart_shift( *comm0, 1, 1, &xm, &xp ) ;
380     if ( coords[1] == np_x - 1 ) {   /* proc on right hand side of domain */
381       nbytes = buffer_size_for_proc( xp, RSL_RECVBUF ) ;
382       MPI_Irecv ( buffer_for_proc( xp , xp_curs, RSL_RECVBUF ), nbytes, MPI_CHAR, xp, me, comm, &xp_recv ) ;
383     }
384     if ( coords[1] == 0 ) {          /* proc on left hand side of domain */
385       nbytes = buffer_size_for_proc( xm, RSL_RECVBUF ) ;
386       MPI_Irecv ( buffer_for_proc( xm, xm_curs, RSL_RECVBUF ), nbytes, MPI_CHAR, xm, me, comm, &xm_recv ) ;
387     }
388     if ( coords[1] == np_x - 1 ) {   /* proc on right hand side of domain */
389       MPI_Isend ( buffer_for_proc( xp , 0,       RSL_SENDBUF ), xp_curs, MPI_CHAR, xp, xp, comm, &xp_send ) ;
390     }
391     if ( coords[1] == 0 ) {          /* proc on left hand side of domain */
392       MPI_Isend ( buffer_for_proc( xm, 0,       RSL_SENDBUF ), xm_curs, MPI_CHAR, xm, xm, comm, &xm_send ) ;
393     }
394     if ( coords[1] == np_x - 1 ) MPI_Wait( &xp_recv, &stat ) ; 
395     if ( coords[1] == 0        ) MPI_Wait( &xm_recv, &stat ) ; 
396     if ( coords[1] == np_x - 1 ) MPI_Wait( &xp_send, &stat ) ; 
397     if ( coords[1] == 0        ) MPI_Wait( &xm_send, &stat ) ;
398   }
399 #else 
400 # ifndef MS_SUA
401 fprintf(stderr,"RSL_LITE_EXCH_PERIOD_X disabled\n") ;
402 # endif
403 #endif
404   yp_curs = 0 ; ym_curs = 0 ; xp_curs = 0 ; xm_curs = 0 ;
405 }
406 
407 RSL_LITE_EXCH_PERIOD_Y ( int * Fcomm0, int *me0, int * np0 , int * np_x0 , int * np_y0 )
408 {
409   int me, np, np_x, np_y ;
410   int yp, ym, xp, xm, nbytes ;
411   MPI_Status stat ;
412   MPI_Comm comm, *comm0, dummy_comm ;
413   int coords[2] ;
414 
415   comm0 = &dummy_comm ;
416   *comm0 = MPI_Comm_f2c( *Fcomm0 ) ;
417 #if 1
418   comm = *comm0 ; me = *me0 ; np = *np0 ; np_x = *np_x0 ; np_y = *np_y0 ;
419 
420   if ( np_y > 1 ) {
421     MPI_Comm_rank( *comm0, &me ) ;
422     MPI_Cart_coords( *comm0, me, 2, coords ) ;
423     MPI_Cart_shift( *comm0, 0, 1, &ym, &yp ) ;
424     if ( coords[0] == np_y - 1 ) {   /* proc on top of domain */
425       nbytes = buffer_size_for_proc( yp, RSL_RECVBUF ) ;
426       MPI_Irecv ( buffer_for_proc( yp , yp_curs, RSL_RECVBUF ), nbytes, MPI_CHAR, yp, me, comm, &yp_recv ) ;
427     }
428     if ( coords[0] == 0 ) {          /* proc on bottom of domain */
429       nbytes = buffer_size_for_proc( ym, RSL_RECVBUF ) ;
430       MPI_Irecv ( buffer_for_proc( ym, ym_curs, RSL_RECVBUF ), nbytes, MPI_CHAR, ym, me, comm, &ym_recv ) ;
431     }
432     if ( coords[0] == np_y - 1 ) {   /* proc on top of domain */
433       MPI_Isend ( buffer_for_proc( yp , 0,       RSL_SENDBUF ), yp_curs, MPI_CHAR, yp, yp, comm, &yp_send ) ;
434     }
435     if ( coords[0] == 0 ) {          /* proc on bottom of domain */
436       MPI_Isend ( buffer_for_proc( ym, 0,       RSL_SENDBUF ), ym_curs, MPI_CHAR, ym, ym, comm, &ym_send ) ;
437     }
438     if ( coords[0] == np_y - 1 ) MPI_Wait( &yp_recv, &stat ) ;
439     if ( coords[0] == 0        ) MPI_Wait( &ym_recv, &stat ) ;
440     if ( coords[0] == np_y - 1 ) MPI_Wait( &yp_send, &stat ) ;
441     if ( coords[0] == 0        ) MPI_Wait( &ym_send, &stat ) ;
442   }
443 #else
444 # ifndef MS_SUA
445 fprintf(stderr,"RSL_LITE_EXCH_PERIOD_Y disabled\n") ;
446 # endif
447 #endif
448   yp_curs = 0 ; ym_curs = 0 ; xp_curs = 0 ; xm_curs = 0 ;
449 }
450