f_xpose.f
References to this file elsewhere.
1
2
3 subroutine trans_z2x ( np, comm, dir, r_wordsize, i_wordsize, memorder, &
4 a, &
5 sd1, ed1, sd2, ed2, sd3, ed3, &
6 sp1, ep1, sp2, ep2, sp3, ep3, &
7 sm1, em1, sm2, em2, sm3, em3, &
8 ax, &
9 sp1x, ep1x, sp2x, ep2x, sp3x, ep3x, &
10 sm1x, em1x, sm2x, em2x, sm3x, em3x )
11 USE duplicate_of_driver_constants
12 implicit none
13 include 'mpif.h'
14 integer, intent(in) :: sd1, ed1, sd2, ed2, sd3, ed3, &
15 sp1, ep1, sp2, ep2, sp3, ep3, &
16 sm1, em1, sm2, em2, sm3, em3, &
17 sp1x, ep1x, sp2x, ep2x, sp3x, ep3x, &
18 sm1x, em1x, sm2x, em2x, sm3x, em3x
19 integer, intent(in) :: np, comm, r_wordsize, i_wordsize
20 integer, intent(in) :: dir ! 1 is a->ax, otherwise ax->a
21 integer, intent(in) :: memorder
22
23 !local
24 integer :: ids, ide, jds, jde, kds, kde, &
25 ips, ipe, jps, jpe, kps, kpe, &
26 ims, ime, jms, jme, kms, kme, &
27 ipsx, ipex, jpsx, jpex, kpsx, kpex, &
28 imsx, imex, jmsx, jmex, kmsx, kmex
29
30 integer, dimension((ep1-sp1+1)*(ep2-sp2+1)*(ep3-sp3+1)*max(1,(r_wordsize/i_wordsize))) :: a
31 integer, dimension((ep1x-sp1x+1)*(ep2x-ep2x+1)*(ep3x-sp3x+1)*max(1,(r_wordsize/i_wordsize))) :: ax
32 integer, dimension(0:(ep1-sp1+1)*(ep2-sp2+1)*(ep3-sp3+1)*max(1,(r_wordsize/i_wordsize))) :: zbuf
33 integer, dimension(0:(ep1x-sp1x+1)*(ep2x-sp2x+1)*(ep3x-sp3x+1)*max(1,(r_wordsize/i_wordsize))) :: xbuf
34
35 integer pencil(4), allpencils(4,np)
36 integer sendcnts(np), sdispls(np), recvcnts(np), rdispls(np)
37 integer allsendcnts(np+2,np), is(np), ie(np), ks(np),ke(np)
38 integer sendcurs(np), recvcurs(np)
39 integer i,j,k,p,sc,sp,rp,yp,zp,curs,zbufsz,cells,nkcells,ivectype,ierr
40
41 SELECT CASE ( memorder )
42 CASE ( DATA_ORDER_XYZ )
43 ids = sd1 ; ide = ed1 ; jds = sd2 ; jde = ed2 ; kds = sd3 ; kde = ed3
44 ips = sp1 ; ipe = ep1 ; jps = sp2 ; jpe = ep2 ; kps = sp3 ; kpe = ep3
45 ims = sm1 ; ime = em1 ; jms = sm2 ; jme = em2 ; kms = sm3 ; kme = em3
46 ipsx = sp1x ; ipex = ep1x ; jpsx = sp2x ; jpex = ep2x ; kpsx = sp3x ; kpex = ep3x
47 imsx = sm1x ; imex = em1x ; jmsx = sm2x ; jmex = em2x ; kmsx = sm3x ; kmex = em3x
48 CASE ( DATA_ORDER_YXZ )
49 ids = sd2 ; ide = ed2 ; jds = sd1 ; jde = ed1 ; kds = sd3 ; kde = ed3
50 ips = sp2 ; ipe = ep2 ; jps = sp1 ; jpe = ep1 ; kps = sp3 ; kpe = ep3
51 ims = sm2 ; ime = em2 ; jms = sm1 ; jme = em1 ; kms = sm3 ; kme = em3
52 ipsx = sp2x ; ipex = ep2x ; jpsx = sp1x ; jpex = ep1x ; kpsx = sp3x ; kpex = ep3x
53 imsx = sm2x ; imex = em2x ; jmsx = sm1x ; jmex = em1x ; kmsx = sm3x ; kmex = em3x
54 CASE ( DATA_ORDER_XZY )
55 ids = sd1 ; ide = ed1 ; jds = sd3 ; jde = ed3 ; kds = sd2 ; kde = ed2
56 ips = sp1 ; ipe = ep1 ; jps = sp3 ; jpe = ep3 ; kps = sp2 ; kpe = ep2
57 ims = sm1 ; ime = em1 ; jms = sm3 ; jme = em3 ; kms = sm2 ; kme = em2
58 ipsx = sp1x ; ipex = ep1x ; jpsx = sp3x ; jpex = ep3x ; kpsx = sp2x ; kpex = ep2x
59 imsx = sm1x ; imex = em1x ; jmsx = sm3x ; jmex = em3x ; kmsx = sm2x ; kmex = em2x
60 CASE ( DATA_ORDER_YZX )
61 ids = sd3 ; ide = ed3 ; jds = sd1 ; jde = ed1 ; kds = sd2 ; kde = ed2
62 ips = sp3 ; ipe = ep3 ; jps = sp1 ; jpe = ep1 ; kps = sp2 ; kpe = ep2
63 ims = sm3 ; ime = em3 ; jms = sm1 ; jme = em1 ; kms = sm2 ; kme = em2
64 ipsx = sp3x ; ipex = ep3x ; jpsx = sp1x ; jpex = ep1x ; kpsx = sp2x ; kpex = ep2x
65 imsx = sm3x ; imex = em3x ; jmsx = sm1x ; jmex = em1x ; kmsx = sm2x ; kmex = em2x
66 CASE ( DATA_ORDER_ZXY )
67 ids = sd2 ; ide = ed2 ; jds = sd3 ; jde = ed3 ; kds = sd1 ; kde = ed1
68 ips = sp2 ; ipe = ep2 ; jps = sp3 ; jpe = ep3 ; kps = sp1 ; kpe = ep1
69 ims = sm2 ; ime = em2 ; jms = sm3 ; jme = em3 ; kms = sm1 ; kme = em1
70 ipsx = sp2x ; ipex = ep2x ; jpsx = sp3x ; jpex = ep3x ; kpsx = sp1x ; kpex = ep1x
71 imsx = sm2x ; imex = em2x ; jmsx = sm3x ; jmex = em3x ; kmsx = sm1x ; kmex = em1x
72 CASE ( DATA_ORDER_ZYX )
73 ids = sd3 ; ide = ed3 ; jds = sd2 ; jde = ed2 ; kds = sd1 ; kde = ed1
74 ips = sp3 ; ipe = ep3 ; jps = sp2 ; jpe = ep2 ; kps = sp1 ; kpe = ep1
75 ims = sm3 ; ime = em3 ; jms = sm2 ; jme = em2 ; kms = sm1 ; kme = em1
76 ipsx = sp3x ; ipex = ep3x ; jpsx = sp2x ; jpex = ep2x ; kpsx = sp1x ; kpex = ep1x
77 imsx = sm3x ; imex = em3x ; jmsx = sm2x ; jmex = em2x ; kmsx = sm1x ; kmex = em1x
78 END SELECT
79
80 sendcnts = 0 ; recvcnts = 0
81
82 xbuf = 0
83 zbuf = 0
84
85 ! work out send/recv sizes to each processor in X dimension
86 pencil(1) = ips
87 pencil(2) = ipe
88 pencil(3) = kpsx
89 pencil(4) = kpex
90 call mpi_allgather( pencil, 4, MPI_INTEGER, allpencils, 4, MPI_INTEGER, comm, ierr )
91 do p = 1, np
92 is(p) = allpencils(1,p)
93 ie(p) = allpencils(2,p)
94 ks(p) = allpencils(3,p)
95 ke(p) = allpencils(4,p)
96 enddo
97 ! pack send buffer
98 sendcurs = 0
99 sdispls = 0
100 sc = 0
101 do p = 1, np
102 if ( r_wordsize .eq. i_wordsize ) then
103 if ( dir .eq. 1 ) then
104 call f_pack_int ( a, zbuf(sc), memorder, &
105 & jps, jpe, ks(p), ke(p), ips, ipe, &
106 & jms, jme, kms, kme, ims, ime, sendcurs(p) )
107 else
108 call f_pack_int ( ax, xbuf(sc), memorder, &
109 & jpsx, jpex, kpsx, kpex, is(p), ie(p), &
110 & jmsx, jmex, kmsx, kmex, imsx, imex, sendcurs(p) )
111 endif
112 else if ( r_wordsize .eq. 8 ) THEN
113 if ( dir .eq. 1 ) then
114 call f_pack_lint ( a, zbuf(sc), memorder, &
115 & jps, jpe, ks(p), ke(p), ips, ipe, &
116 & jms, jme, kms, kme, ims, ime, sendcurs(p) )
117 else
118 call f_pack_lint ( ax, xbuf(sc), memorder, &
119 & jpsx, jpex, kpsx, kpex, is(p), ie(p), &
120 & jmsx, jmex, kmsx, kmex, imsx, imex, sendcurs(p) )
121 endif
122 sendcurs(p) = sendcurs(p) * max(1,(r_wordsize/i_wordsize))
123 else
124 write(0,*)'RSL_LITE internal error: type size mismatch ',"f_xpose.b",122
125 call mpi_abort(ierr)
126 endif
127 sc = sc + sendcurs(p)
128 sendcnts(p) = sendcurs(p)
129 if ( p .GT. 1 ) sdispls(p) = sdispls(p-1) + sendcnts(p-1)
130 enddo
131 ! work out receive counts and displs
132 rdispls = 0
133 recvcnts = 0
134 do p = 1, np
135 if ( dir .eq. 1 ) then
136 recvcnts(p) = (ie(p)-is(p)+1)*(kpex-kpsx+1)*(jpex-jpsx+1) * max(1,(r_wordsize/i_wordsize))
137 else
138 recvcnts(p) = (ke(p)-ks(p)+1)*(ipe-ips+1)*(jpe-jps+1) * max(1,(r_wordsize/i_wordsize))
139 endif
140 if ( p .GT. 1 ) rdispls(p) = rdispls(p-1) + recvcnts(p-1)
141 enddo
142 ! do the transpose
143 if ( dir .eq. 1 ) then
144 call mpi_alltoallv(zbuf, sendcnts, sdispls, MPI_INTEGER, &
145 xbuf, recvcnts, rdispls, MPI_INTEGER, comm, ierr )
146 else
147 call mpi_alltoallv(xbuf, sendcnts, sdispls, MPI_INTEGER, &
148 zbuf, recvcnts, rdispls, MPI_INTEGER, comm, ierr )
149 endif
150 ! unpack
151 do p = 1, np
152 if ( r_wordsize .eq. i_wordsize ) then
153 if ( dir .eq. 1 ) then
154 call f_unpack_int ( xbuf(rdispls(p)), ax, memorder, &
155 & jpsx, jpex, kpsx, kpex, is(p), ie(p), &
156 & jmsx, jmex, kmsx, kmex, imsx, imex, curs )
157 else
158 call f_unpack_int ( zbuf(rdispls(p)), a, memorder, &
159 & jps, jpe, ks(p), ke(p), ips, ipe, &
160 & jms, jme, kms, kme, ims, ime, curs )
161 endif
162 else if ( r_wordsize .eq. 8 ) THEN
163 if ( dir .eq. 1 ) then
164 call f_unpack_lint ( xbuf(rdispls(p)), ax, memorder, &
165 & jpsx, jpex, kpsx, kpex, is(p), ie(p), &
166 & jmsx, jmex, kmsx, kmex, imsx, imex, curs )
167 else
168 call f_unpack_lint ( zbuf(rdispls(p)), a, memorder, &
169 & jps, jpe, ks(p), ke(p), ips, ipe, &
170 & jms, jme, kms, kme, ims, ime, curs )
171 endif
172 else
173 write(0,*)'RSL_LITE internal error: type size mismatch ',"f_xpose.b",171
174 call mpi_abort(ierr)
175 endif
176 enddo
177 return
178 end subroutine trans_z2x
179
180 subroutine trans_x2y ( np, comm, dir, r_wordsize, i_wordsize, memorder, &
181 ax, &
182 sd1, ed1, sd2, ed2, sd3, ed3, &
183 sp1x, ep1x, sp2x, ep2x, sp3x, ep3x, &
184 sm1x, em1x, sm2x, em2x, sm3x, em3x, &
185 ay, &
186 sp1y, ep1y, sp2y, ep2y, sp3y, ep3y, &
187 sm1y, em1y, sm2y, em2y, sm3y, em3y )
188 USE duplicate_of_driver_constants
189 implicit none
190 include 'mpif.h'
191 integer, intent(in) :: memorder
192 integer, intent(in) :: sd1, ed1, sd2, ed2, sd3, ed3, &
193 sp1x, ep1x, sp2x, ep2x, sp3x, ep3x, &
194 sm1x, em1x, sm2x, em2x, sm3x, em3x, &
195 sp1y, ep1y, sp2y, ep2y, sp3y, ep3y, &
196 sm1y, em1y, sm2y, em2y, sm3y, em3y
197
198 integer, intent(in) :: np, comm, r_wordsize, i_wordsize
199 integer, intent(in) :: dir ! 1 is a->ax, otherwise ax->a
200
201 integer, dimension((ep1x-sp1x+1)*(ep2x-ep2x+1)*(ep3x-sp3x+1)*max(1,(r_wordsize/i_wordsize))) :: ax
202 integer, dimension((ep1y-sp1y+1)*(ep2y-sp2y+1)*(ep3y-sp3y+1)*max(1,(r_wordsize/i_wordsize))) :: ay
203 integer, dimension(0:(ep1x-sp1x+1)*(ep2x-sp2x+1)*(ep3x-sp3x+1)*max(1,(r_wordsize/i_wordsize))) :: xbuf
204 integer, dimension(0:(ep1y-sp1y+1)*(ep2y-sp2y+1)*(ep3y-sp3y+1)*max(1,(r_wordsize/i_wordsize))) :: ybuf
205
206 !local
207 integer ids, ide, jds, jde, kds, kde, &
208 ipsx, ipex, jpsx, jpex, kpsx, kpex, &
209 imsx, imex, jmsx, jmex, kmsx, kmex, &
210 ipsy, ipey, jpsy, jpey, kpsy, kpey, &
211 imsy, imey, jmsy, jmey, kmsy, kmey
212 integer pencil(4), allpencils(4,np)
213 integer sendcnts(np), sdispls(np), recvcnts(np), rdispls(np)
214 integer allsendcnts(np+2,np), is(np), ie(np), js(np), je(np)
215 integer sendcurs(np), recvcurs(np)
216 integer i,j,k,p,sc,sp,rp,yp,zp,curs,xbufsz,cells,nkcells,ivectype,ierr
217
218 SELECT CASE ( memorder )
219 CASE ( DATA_ORDER_XYZ )
220 ids = sd1 ; ide = ed1 ; jds = sd2 ; jde = ed2 ; kds = sd3 ; kde = ed3
221 ipsx = sp1x ; ipex = ep1x ; jpsx = sp2x ; jpex = ep2x ; kpsx = sp3x ; kpex = ep3x
222 imsx = sm1x ; imex = em1x ; jmsx = sm2x ; jmex = em2x ; kmsx = sm3x ; kmex = em3x
223 ipsy = sp1y ; ipey = ep1y ; jpsy = sp2y ; jpey = ep2y ; kpsy = sp3y ; kpey = ep3y
224 imsy = sm1y ; imey = em1y ; jmsy = sm2y ; jmey = em2y ; kmsy = sm3y ; kmey = em3y
225 CASE ( DATA_ORDER_YXZ )
226 ids = sd2 ; ide = ed2 ; jds = sd1 ; jde = ed1 ; kds = sd3 ; kde = ed3
227 ipsx = sp2x ; ipex = ep2x ; jpsx = sp1x ; jpex = ep1x ; kpsx = sp3x ; kpex = ep3x
228 imsx = sm2x ; imex = em2x ; jmsx = sm1x ; jmex = em1x ; kmsx = sm3x ; kmex = em3x
229 ipsy = sp2y ; ipey = ep2y ; jpsy = sp1y ; jpey = ep1y ; kpsy = sp3y ; kpey = ep3y
230 imsy = sm2y ; imey = em2y ; jmsy = sm1y ; jmey = em1y ; kmsy = sm3y ; kmey = em3y
231 CASE ( DATA_ORDER_XZY )
232 ids = sd1 ; ide = ed1 ; jds = sd3 ; jde = ed3 ; kds = sd2 ; kde = ed2
233 ipsx = sp1x ; ipex = ep1x ; jpsx = sp3x ; jpex = ep3x ; kpsx = sp2x ; kpex = ep2x
234 imsx = sm1x ; imex = em1x ; jmsx = sm3x ; jmex = em3x ; kmsx = sm2x ; kmex = em2x
235 ipsy = sp1y ; ipey = ep1y ; jpsy = sp3y ; jpey = ep3y ; kpsy = sp2y ; kpey = ep2y
236 imsy = sm1y ; imey = em1y ; jmsy = sm3y ; jmey = em3y ; kmsy = sm2y ; kmey = em2y
237 CASE ( DATA_ORDER_YZX )
238 ids = sd3 ; ide = ed3 ; jds = sd1 ; jde = ed1 ; kds = sd2 ; kde = ed2
239 ipsx = sp3x ; ipex = ep3x ; jpsx = sp1x ; jpex = ep1x ; kpsx = sp2x ; kpex = ep2x
240 imsx = sm3x ; imex = em3x ; jmsx = sm1x ; jmex = em1x ; kmsx = sm2x ; kmex = em2x
241 ipsy = sp3y ; ipey = ep3y ; jpsy = sp1y ; jpey = ep1y ; kpsy = sp2y ; kpey = ep2y
242 imsy = sm3y ; imey = em3y ; jmsy = sm1y ; jmey = em1y ; kmsy = sm2y ; kmey = em2y
243 CASE ( DATA_ORDER_ZXY )
244 ids = sd2 ; ide = ed2 ; jds = sd3 ; jde = ed3 ; kds = sd1 ; kde = ed1
245 ipsx = sp2x ; ipex = ep2x ; jpsx = sp3x ; jpex = ep3x ; kpsx = sp1x ; kpex = ep1x
246 imsx = sm2x ; imex = em2x ; jmsx = sm3x ; jmex = em3x ; kmsx = sm1x ; kmex = em1x
247 ipsy = sp2y ; ipey = ep2y ; jpsy = sp3y ; jpey = ep3y ; kpsy = sp1y ; kpey = ep1y
248 imsy = sm2y ; imey = em2y ; jmsy = sm3y ; jmey = em3y ; kmsy = sm1y ; kmey = em1y
249 CASE ( DATA_ORDER_ZYX )
250 ids = sd3 ; ide = ed3 ; jds = sd2 ; jde = ed2 ; kds = sd1 ; kde = ed1
251 ipsx = sp3x ; ipex = ep3x ; jpsx = sp2x ; jpex = ep2x ; kpsx = sp1x ; kpex = ep1x
252 imsx = sm3x ; imex = em3x ; jmsx = sm2x ; jmex = em2x ; kmsx = sm1x ; kmex = em1x
253 ipsy = sp3y ; ipey = ep3y ; jpsy = sp2y ; jpey = ep2y ; kpsy = sp1y ; kpey = ep1y
254 imsy = sm3y ; imey = em3y ; jmsy = sm2y ; jmey = em2y ; kmsy = sm1y ; kmey = em1y
255 END SELECT
256
257 sendcnts = 0 ; recvcnts = 0
258
259 xbuf = 0
260 ybuf = 0
261
262 ! work out send/recv sizes to each processor in X dimension
263 pencil(1) = jpsx
264 pencil(2) = jpex
265 pencil(3) = ipsy
266 pencil(4) = ipey
267
268 call mpi_allgather( pencil, 4, MPI_INTEGER, allpencils, 4, MPI_INTEGER, comm, ierr )
269 do p = 1, np
270 js(p) = allpencils(1,p)
271 je(p) = allpencils(2,p)
272 is(p) = allpencils(3,p)
273 ie(p) = allpencils(4,p)
274 enddo
275
276
277 ! pack send buffer
278 sendcurs = 0
279 sdispls = 0
280 sc = 0
281 do p = 1, np
282 if ( r_wordsize .eq. i_wordsize ) then
283 if ( dir .eq. 1 ) then
284 call f_pack_int ( ax, xbuf(sc), memorder, &
285 & jpsx, jpex, kpsx, kpex, is(p), ie(p), &
286 & jmsx, jmex, kmsx, kmex, imsx, imex, sendcurs(p) )
287 else
288 call f_pack_int ( ay, ybuf(sc), memorder, &
289 & js(p), je(p), kpsy, kpey, ipsy, ipey, &
290 & jmsy, jmey, kmsy, kmey, imsy, imey, sendcurs(p) )
291 endif
292 else if ( r_wordsize .eq. 8 ) THEN
293 if ( dir .eq. 1 ) then
294 call f_pack_lint ( ax, xbuf(sc), memorder, &
295 & jpsx, jpex, kpsx, kpex, is(p), ie(p), &
296 & jmsx, jmex, kmsx, kmex, imsx, imex, sendcurs(p) )
297 else
298 call f_pack_lint ( ay, ybuf(sc), memorder, &
299 & js(p), je(p), kpsy, kpey, ipsy, ipey, &
300 & jmsy, jmey, kmsy, kmey, imsy, imey, sendcurs(p) )
301 endif
302 sendcurs(p) = sendcurs(p) * max(1,(r_wordsize/i_wordsize))
303 else
304 write(0,*)'RSL_LITE internal error: type size mismatch ',"f_xpose.b",302
305 call mpi_abort(ierr)
306 endif
307 sc = sc + sendcurs(p)
308 sendcnts(p) = sendcurs(p)
309 if ( p .GT. 1 ) sdispls(p) = sdispls(p-1) + sendcnts(p-1)
310 enddo
311
312 ! work out receive counts and displs
313 rdispls = 0
314 recvcnts = 0
315 do p = 1, np
316 if ( dir .eq. 1 ) then
317 recvcnts(p) = (je(p)-js(p)+1)*(kpey-kpsy+1)*(ipey-ipsy+1) * max(1,(r_wordsize/i_wordsize))
318 else
319 recvcnts(p) = (ie(p)-is(p)+1)*(kpex-kpsx+1)*(jpex-jpsx+1) * max(1,(r_wordsize/i_wordsize))
320 endif
321 if ( p .GT. 1 ) rdispls(p) = rdispls(p-1) + recvcnts(p-1)
322 enddo
323
324 ! do the transpose
325 if ( dir .eq. 1 ) then
326 call mpi_alltoallv(xbuf, sendcnts, sdispls, MPI_INTEGER, &
327 ybuf, recvcnts, rdispls, MPI_INTEGER, comm, ierr )
328 else
329 call mpi_alltoallv(ybuf, sendcnts, sdispls, MPI_INTEGER, &
330 xbuf, recvcnts, rdispls, MPI_INTEGER, comm, ierr )
331 endif
332 ! unpack
333 do p = 1, np
334 if ( r_wordsize .eq. i_wordsize ) then
335 if ( dir .eq. 1 ) then
336 call f_unpack_int ( ybuf(rdispls(p)), ay, memorder, &
337 & js(p), je(p), kpsy, kpey, ipsy, ipey, &
338 & jmsy, jmey, kmsy, kmey, imsy, imey, curs )
339 else
340 call f_unpack_int ( xbuf(rdispls(p)), ax, memorder, &
341 & jpsx, jpex, kpsx, kpex, is(p), ie(p), &
342 & jmsx, jmex, kmsx, kmex, imsx, imex, curs )
343 endif
344 else if ( r_wordsize .eq. 8 ) THEN
345 if ( dir .eq. 1 ) then
346 call f_unpack_lint ( ybuf(rdispls(p)), ay, memorder, &
347 & js(p), je(p), kpsy, kpey, ipsy, ipey, &
348 & jmsy, jmey, kmsy, kmey, imsy, imey, curs )
349 else
350 call f_unpack_lint ( xbuf(rdispls(p)), ax, memorder, &
351 & jpsx, jpex, kpsx, kpex, is(p), ie(p), &
352 & jmsx, jmex, kmsx, kmex, imsx, imex, curs )
353 endif
354 else
355 write(0,*)'RSL_LITE internal error: type size mismatch ',"f_xpose.b",353
356 call mpi_abort(ierr)
357 endif
358 enddo
359 return
360 end subroutine trans_x2y
361