da_y_facade_to_global.inc
References to this file elsewhere.
1 subroutine da_y_facade_to_global( re_slice, template, re_glob_slice )
2
3 !---------------------------------------------------------------------------
4 ! Purpose: Collect a local y_facade_type object into a global y_facade_type
5 ! object while preserving serial-code storage order. This is
6 ! used to perform bitwise-exact floating-point summations in
7 ! serial-code-order during bitwise-exact testing of
8 ! distributed-memory parallel configurations.
9 !
10 ! Method: Indices stowed away during Read_Obs() are used to restore serial
11 ! storage order. Memory for global objects is allocated here.
12 ! Global objects are minimally allocated to save memory.
13 ! Memory must be deallocated by the caller via a call to
14 ! da_y_facade_free().
15 ! Memory for local object re_slice is deallocated here. Do not
16 ! use re_slice after this call.
17 ! The "template" argument is needed because some tasks may not
18 ! have any local obs.
19 !
20 ! PERFORMANCE NOTE: This implementation is NOT very efficient. Speed it
21 ! up if testing is too slow.
22 !---------------------------------------------------------------------------
23
24 implicit none
25
26 ! task-local objects (really intent(in ))
27 type (y_facade_type), intent(inout) :: re_slice ! residual vector
28 type (residual_template_type), intent(in) :: template ! allocation info
29 ! task-global objects (really intent( out))
30 type (y_facade_type), intent(inout) :: re_glob_slice ! residual vector
31
32 ! Local declarations
33 #ifdef DM_PARALLEL
34 integer :: n, k, serial_n, serial_numvals
35 integer :: proc
36 integer :: num_obs_send
37 integer :: buf_idx
38 integer :: num_obs_all
39 integer :: num_recv_all
40 integer :: obs_global_index(re_slice%num_obs)
41 integer :: num_values(re_slice%num_obs)
42 integer :: num_send_buf_lcl
43 integer,allocatable :: num_obs_send_all(:)
44 integer,allocatable :: obs_global_index_all(:)
45 integer,allocatable :: obs_global_index_inv(:)
46 integer,allocatable :: obs_start_all(:) ! start index of each obs
47 integer,pointer :: num_values_all(:)
48 integer,allocatable :: num_send_buf_all(:)
49 integer,allocatable :: recv_counts(:)
50 integer,allocatable :: recv_displs(:)
51 real,allocatable :: re_vals_lcl(:)
52 real,allocatable :: re_vals_all(:)
53 integer :: i
54
55 ! TOdo: later, upgrade from "allgather" to "gather-broadcast"
56
57 ! collect information about all observations
58 num_obs_send = 0
59 obs_global_index = -1
60 do n=1, re_slice%num_obs
61 if ( re_slice%obs(n)%proc_domain ) then
62 num_obs_send = num_obs_send + 1
63 obs_global_index(num_obs_send) = re_slice%obs(n)%obs_global_index
64 end if
65 end do
66 do n=1, num_obs_send
67 if ( obs_global_index(n) < 0 ) then
68 call da_error(__FILE__,__LINE__, &
69 (/'ASSERTION ERROR: bad local value of obs_global_index'/))
70 end if
71 end do
72 ! exchange num_obs_send and obs_global_index
73 allocate( num_obs_send_all(0:num_procs-1), &
74 num_send_buf_all(0:num_procs-1), &
75 recv_counts(0:num_procs-1), &
76 recv_displs(0:num_procs-1) )
77 ! gather num_obs_send
78 call mpi_allgather( num_obs_send, 1, mpi_integer, num_obs_send_all, 1, &
79 mpi_integer, comm, ierr )
80 num_obs_all = sum( num_obs_send_all )
81 if ( num_obs_all /= re_slice%num_obs_glo ) then
82 call da_error (__FILE__,__LINE__, &
83 (/'ASSERTION ERROR: inconsistent count of sound obs'/))
84 end if
85 ! set up to gather obs_global_index
86 recv_counts = num_obs_send_all
87 recv_displs(0) = 0
88 do proc=1, num_procs-1
89 recv_displs(proc) = recv_displs(proc-1) + recv_counts(proc-1)
90 end do
91 allocate( num_values_all(num_obs_all), &
92 obs_global_index_all(num_obs_all), &
93 obs_global_index_inv(num_obs_all), &
94 obs_start_all(num_obs_all) )
95 ! gather obs_global_index
96 call mpi_allgatherv( obs_global_index, num_obs_send, mpi_integer, &
97 obs_global_index_all, recv_counts, recv_displs, &
98 mpi_integer, comm, ierr )
99 ! invert obs_global_index_all
100 ! initialize to "invalid" value
101 obs_global_index_inv = -1
102 do n=1, num_obs_all
103 if ( ( obs_global_index_all(n) < 1 ) .OR. &
104 ( obs_global_index_all(n) > size(obs_global_index_inv) ) ) then
105 call da_error (__FILE__,__LINE__, &
106 (/'ASSERTION ERROR: obs_global_index_all(n) out of range'/))
107 end if
108 if ( obs_global_index_inv(obs_global_index_all(n)) /= -1 ) then
109 call da_error (__FILE__,__LINE__, &
110 (/'ASSERTION ERROR: obs owned by more than one task'/))
111 end if
112 obs_global_index_inv(obs_global_index_all(n)) = n
113 end do
114 do n=1, num_obs_all
115 if ( obs_global_index_inv(obs_global_index_all(n)) == -1 ) then
116 call da_error (__FILE__,__LINE__, &
117 (/'ASSERTION ERROR: obs not owned by any task'/))
118 end if
119 end do
120
121 ! Create re_glob_slice and populate with residual_generic_type objects
122 ! allocated to match template.
123 call da_y_facade_create( re_glob_slice, num_obs_all, num_obs_all, &
124 template=template )
125
126 ! NOTE: This i loop should be inside the n loops.
127 ! Ideally, residual_generic class should know how to pack/unpack
128 ! (serialize/deserialize) itself for arbitrary I/O or communications (MPI or
129 ! otherwise) that clients may choose to implement. Below are a possible set
130 ! of new methods for residual_generic_type:
131 ! residual_generic_pack_counts( res_generic, (out)rcount, (out)icount )
132 ! [client allocates ibuf(icount) and rbuf(rcount)]
133 ! residual_generic_pack( res_generic, (inout)rstart, (inout)rbuf, &
134 ! (inout)istart, (inout)ibuf )
135 ! [client MPI communications: ibuf->ibufg rbuf->rbufg]
136 ! residual_generic_unpack_counts( ibufg, (out)rstarts, (out)rcounts )
137 ! residual_generic_unpack( (out)res_generic, rstarts(n), rcounts(n), rbufg )
138 ! TOdo:
139 ! 1) Design serialization for residual_generic_type.
140 ! 2) Implement new methods.
141 ! 3) Refactor below...
142 ! 4) Optimize performance.
143 ! At the moment this refactoring is relatively low-priority since
144 ! da_y_facade_to_global() is already well-encapsulated and peformance is not
145 ! (yet) a concern for testing.
146 ! Loop over vector and scalar elements
147
148 do i=template%lbnd,template%ubnd
149 num_obs_send = 0
150 num_values = 0
151 num_send_buf_lcl = 0
152 ! collect information to allocate buffers
153 do n=1, re_slice%num_obs
154 if ( re_slice%obs(n)%proc_domain ) then
155 num_obs_send = num_obs_send + 1
156 ! track number of scalars/levels per obs element
157 num_values(num_obs_send) = size(re_slice%obs(n)%values(i)%ptr)
158 ! track total buffer size
159 num_send_buf_lcl = num_send_buf_lcl + num_values(num_obs_send)
160 end if
161 end do
162 ! gather num_send_buf_lcl
163 call mpi_allgather( num_send_buf_lcl, 1, mpi_integer, &
164 num_send_buf_all, 1, &
165 mpi_integer, comm, ierr )
166 ! gather num_values
167 recv_counts = num_obs_send_all
168 recv_displs(0) = 0
169 do proc=1, num_procs-1
170 recv_displs(proc) = recv_displs(proc-1) + recv_counts(proc-1)
171 end do
172 ! num_values
173 call mpi_allgatherv( num_values, num_obs_send, mpi_integer, &
174 num_values_all, recv_counts, recv_displs, &
175 mpi_integer, comm, ierr )
176 ! set up to gather local arrays
177 ! compute start index of each obs in gathered buffers
178 obs_start_all(1) = 1
179 do n=2, num_obs_all
180 obs_start_all(n) = obs_start_all(n-1) + num_values_all(n-1)
181 end do
182 ! finish setting up to gather local arrays
183 recv_counts = num_send_buf_all
184 recv_displs(0) = 0
185 do proc=1, num_procs-1
186 recv_displs(proc) = recv_displs(proc-1) + recv_counts(proc-1)
187 end do
188 num_recv_all = sum( recv_counts )
189 ! allocate and initialize send buffer
190 allocate( re_vals_lcl(num_send_buf_lcl) )
191 buf_idx = 0
192 do n=1, re_slice%num_obs
193 if ( re_slice%obs(n)%proc_domain ) then
194 do k=1, size(re_slice%obs(n)%values(i)%ptr)
195 buf_idx = buf_idx + 1
196 re_vals_lcl(buf_idx) = re_slice%obs(n)%values(i)%ptr(k)
197 end do
198 end if
199 end do
200 if ( buf_idx /= num_send_buf_lcl ) then
201 call da_error (__FILE__,__LINE__, &
202 (/'ASSERTION ERROR: buf_idx /= num_send_buf_lcl'/))
203 end if
204 ! allocate receive buffer
205 allocate( re_vals_all(num_recv_all) )
206 ! finally, actually gather the values
207 call mpi_allgatherv( re_vals_lcl, num_send_buf_lcl, true_mpi_real, &
208 re_vals_all, recv_counts, recv_displs, &
209 true_mpi_real, comm, ierr )
210 ! initialize re_glob_slice
211 ! NOTE: The i loop should be inside this n loop, see comment above...
212 ! TOdo: Refactor...
213 do n=1, re_glob_slice%num_obs
214 serial_n = obs_global_index_inv(n)
215 serial_numvals = num_values_all(serial_n)
216 buf_idx = obs_start_all(serial_n)
217 ! note that we only collected obs someone owns
218 re_glob_slice%obs(n)%proc_domain = .true.
219 re_glob_slice%obs(n)%obs_global_index = obs_global_index_all(serial_n)
220 call da_res_generic_alloc_and_set( re_glob_slice%obs(n), i, &
221 re_vals_all(buf_idx:(buf_idx+serial_numvals-1)) )
222 end do
223 ! deallocate communication buffers, etc.
224 deallocate( re_vals_all )
225 deallocate( re_vals_lcl )
226
227 end do ! end of i loop
228
229 call da_y_facade_free( re_slice )
230
231 deallocate( num_values_all, obs_global_index_all, &
232 obs_global_index_inv, obs_start_all )
233 deallocate( num_obs_send_all, num_send_buf_all, &
234 recv_counts, recv_displs )
235
236 #else
237 call da_error (__FILE__,__LINE__, &
238 (/'may only be called for a distributed memory parallel run'/))
239 #endif
240
241 end subroutine da_y_facade_to_global
242
243