da_y_facade_to_global.inc

References to this file elsewhere.
1 subroutine da_y_facade_to_global( re_slice, template, re_glob_slice )
2 
3    !---------------------------------------------------------------------------
4    ! Purpose:  Collect a local y_facade_type object into a global y_facade_type 
5    !           object while preserving serial-code storage order.  This is 
6    !           used to perform bitwise-exact floating-point summations in 
7    !           serial-code-order during bitwise-exact testing of 
8    !           distributed-memory parallel configurations.  
9    !
10    ! Method:   Indices stowed away during Read_Obs() are used to restore serial 
11    !           storage order.  Memory for global objects is allocated here.  
12    !           Global objects are minimally allocated to save memory.  
13    !           Memory must be deallocated by the caller via a call to 
14    !           da_y_facade_free().  
15    !           Memory for local object re_slice is deallocated here.  Do not 
16    !           use re_slice after this call.  
17    !           The "template" argument is needed because some tasks may not 
18    !           have any local obs.  
19    !
20    ! PERFORMANCE NOTE:   This implementation is NOT very efficient.  Speed it 
21    !                     up if testing is too slow.  
22    !---------------------------------------------------------------------------
23 
24    implicit none
25 
26    ! task-local objects  (really intent(in   ))
27    type (y_facade_type), intent(inout) :: re_slice      ! residual vector
28    type (residual_template_type), intent(in) :: template  ! allocation info
29    ! task-global objects (really intent(  out))
30    type (y_facade_type), intent(inout) :: re_glob_slice ! residual vector
31 
32    ! Local declarations
33 #ifdef DM_PARALLEL
34    integer                      :: n, k, serial_n, serial_numvals
35    integer                      :: proc
36    integer                      :: num_obs_send
37    integer                      :: buf_idx
38    integer                      :: num_obs_all
39    integer                      :: num_recv_all
40    integer                      :: obs_global_index(re_slice%num_obs)
41    integer                      :: num_values(re_slice%num_obs)
42    integer                      :: num_send_buf_lcl
43    integer,allocatable          :: num_obs_send_all(:)
44    integer,allocatable          :: obs_global_index_all(:)
45    integer,allocatable          :: obs_global_index_inv(:)
46    integer,allocatable          :: obs_start_all(:)  ! start index of each obs
47    integer,pointer              :: num_values_all(:)
48    integer,allocatable          :: num_send_buf_all(:)
49    integer,allocatable          :: recv_counts(:)
50    integer,allocatable          :: recv_displs(:)
51    real,allocatable             :: re_vals_lcl(:)
52    real,allocatable             :: re_vals_all(:)
53    integer                      :: i
54 
55    ! TOdo:  later, upgrade from "allgather" to "gather-broadcast"
56 
57    ! collect information about all observations
58    num_obs_send = 0
59    obs_global_index = -1
60    do n=1, re_slice%num_obs
61       if ( re_slice%obs(n)%proc_domain ) then
62          num_obs_send = num_obs_send + 1
63          obs_global_index(num_obs_send) = re_slice%obs(n)%obs_global_index
64       end if
65    end do
66    do n=1, num_obs_send
67       if ( obs_global_index(n) < 0 ) then
68          call da_error(__FILE__,__LINE__, &
69             (/'ASSERTION ERROR:  bad local value of obs_global_index'/))
70       end if
71    end do
72    ! exchange num_obs_send and obs_global_index
73    allocate( num_obs_send_all(0:num_procs-1),     &
74               num_send_buf_all(0:num_procs-1),     &
75               recv_counts(0:num_procs-1),          &
76               recv_displs(0:num_procs-1) )
77    ! gather num_obs_send
78    call mpi_allgather( num_obs_send, 1, mpi_integer, num_obs_send_all, 1, &
79                         mpi_integer, comm, ierr )
80    num_obs_all = sum( num_obs_send_all )
81    if ( num_obs_all /= re_slice%num_obs_glo ) then
82       call da_error (__FILE__,__LINE__, &
83          (/'ASSERTION ERROR:  inconsistent count of sound obs'/))
84    end if
85    ! set up to gather obs_global_index
86    recv_counts = num_obs_send_all
87    recv_displs(0) = 0
88    do proc=1, num_procs-1
89       recv_displs(proc) = recv_displs(proc-1) + recv_counts(proc-1)
90    end do
91    allocate( num_values_all(num_obs_all),       &
92               obs_global_index_all(num_obs_all), &
93               obs_global_index_inv(num_obs_all), &
94               obs_start_all(num_obs_all) )
95    ! gather obs_global_index
96    call mpi_allgatherv( obs_global_index, num_obs_send, mpi_integer,    &
97                          obs_global_index_all, recv_counts, recv_displs, &
98                          mpi_integer, comm, ierr )
99    ! invert obs_global_index_all
100    ! initialize to "invalid" value
101    obs_global_index_inv = -1
102    do n=1, num_obs_all
103       if ( ( obs_global_index_all(n) <  1 ) .OR. &
104            ( obs_global_index_all(n) > size(obs_global_index_inv) ) ) then
105          call da_error (__FILE__,__LINE__, &
106             (/'ASSERTION ERROR:  obs_global_index_all(n) out of range'/))
107       end if
108       if ( obs_global_index_inv(obs_global_index_all(n)) /= -1 ) then
109          call da_error (__FILE__,__LINE__, &
110             (/'ASSERTION ERROR:  obs owned by more than one task'/))
111       end if
112       obs_global_index_inv(obs_global_index_all(n)) = n
113    end do
114    do n=1, num_obs_all
115       if ( obs_global_index_inv(obs_global_index_all(n)) == -1 ) then
116          call da_error (__FILE__,__LINE__, &
117             (/'ASSERTION ERROR:  obs not owned by any task'/))
118       end if
119    end do
120 
121    ! Create re_glob_slice and populate with residual_generic_type objects 
122    ! allocated to match template.  
123    call da_y_facade_create( re_glob_slice, num_obs_all, num_obs_all, &
124                           template=template )
125 
126    ! NOTE:  This i loop should be inside the n loops.  
127    ! Ideally, residual_generic class should know how to pack/unpack 
128    ! (serialize/deserialize) itself for arbitrary I/O or communications (MPI or 
129    ! otherwise) that clients may choose to implement.  Below are a possible set 
130    ! of new methods for residual_generic_type:  
131    !  residual_generic_pack_counts( res_generic, (out)rcount, (out)icount )
132    !  [client allocates ibuf(icount) and rbuf(rcount)]
133    !  residual_generic_pack( res_generic, (inout)rstart, (inout)rbuf, &
134    !                         (inout)istart, (inout)ibuf )
135    !  [client MPI communications:   ibuf->ibufg  rbuf->rbufg]
136    !  residual_generic_unpack_counts( ibufg, (out)rstarts, (out)rcounts )
137    !  residual_generic_unpack( (out)res_generic, rstarts(n), rcounts(n), rbufg )
138    ! TOdo:  
139    !  1) Design serialization for residual_generic_type.  
140    !  2) Implement new methods.  
141    !  3) Refactor below...  
142    !  4) Optimize performance.  
143    ! At the moment this refactoring is relatively low-priority since 
144    ! da_y_facade_to_global() is already well-encapsulated and peformance is not 
145    ! (yet) a concern for testing.  
146    ! Loop over vector and scalar elements
147    
148    do i=template%lbnd,template%ubnd
149       num_obs_send = 0
150       num_values = 0
151       num_send_buf_lcl = 0
152       ! collect information to allocate buffers
153       do n=1, re_slice%num_obs
154          if ( re_slice%obs(n)%proc_domain ) then
155             num_obs_send = num_obs_send + 1
156             ! track number of scalars/levels per obs element
157             num_values(num_obs_send) = size(re_slice%obs(n)%values(i)%ptr)
158             ! track total buffer size
159             num_send_buf_lcl = num_send_buf_lcl + num_values(num_obs_send)
160          end if
161       end do
162       ! gather num_send_buf_lcl
163       call mpi_allgather( num_send_buf_lcl, 1, mpi_integer, &
164                           num_send_buf_all, 1,              &
165                           mpi_integer, comm, ierr )
166       ! gather num_values
167       recv_counts = num_obs_send_all
168       recv_displs(0) = 0
169       do proc=1, num_procs-1
170          recv_displs(proc) = recv_displs(proc-1) + recv_counts(proc-1)
171       end do
172       ! num_values
173       call mpi_allgatherv( num_values, num_obs_send, mpi_integer,    &
174                            num_values_all, recv_counts, recv_displs, &
175                            mpi_integer, comm, ierr )
176       ! set up to gather local arrays
177       ! compute start index of each obs in gathered buffers
178       obs_start_all(1) = 1
179       do n=2, num_obs_all
180          obs_start_all(n) = obs_start_all(n-1) + num_values_all(n-1)
181       end do
182       ! finish setting up to gather local arrays
183       recv_counts = num_send_buf_all
184       recv_displs(0) = 0
185       do proc=1, num_procs-1
186          recv_displs(proc) = recv_displs(proc-1) + recv_counts(proc-1)
187       end do
188       num_recv_all = sum( recv_counts )
189       ! allocate and initialize send buffer
190       allocate( re_vals_lcl(num_send_buf_lcl) )
191       buf_idx = 0
192       do n=1, re_slice%num_obs
193          if ( re_slice%obs(n)%proc_domain ) then
194             do k=1, size(re_slice%obs(n)%values(i)%ptr)
195                buf_idx = buf_idx + 1
196                re_vals_lcl(buf_idx) = re_slice%obs(n)%values(i)%ptr(k)
197             end do
198          end if
199       end do
200       if ( buf_idx /= num_send_buf_lcl ) then
201          call da_error (__FILE__,__LINE__, &
202             (/'ASSERTION ERROR:  buf_idx /= num_send_buf_lcl'/))
203       end if
204       ! allocate receive buffer
205       allocate( re_vals_all(num_recv_all) )
206       ! finally, actually gather the values
207       call mpi_allgatherv( re_vals_lcl, num_send_buf_lcl, true_mpi_real,   &
208                            re_vals_all, recv_counts, recv_displs, &
209                            true_mpi_real, comm, ierr )
210       ! initialize re_glob_slice
211       ! NOTE:  The i loop should be inside this n loop, see comment above...  
212       ! TOdo:  Refactor...  
213       do n=1, re_glob_slice%num_obs
214          serial_n = obs_global_index_inv(n)
215          serial_numvals = num_values_all(serial_n)
216          buf_idx = obs_start_all(serial_n)
217          ! note that we only collected obs someone owns
218          re_glob_slice%obs(n)%proc_domain = .true.
219          re_glob_slice%obs(n)%obs_global_index = obs_global_index_all(serial_n)
220          call da_res_generic_alloc_and_set( re_glob_slice%obs(n), i, &
221             re_vals_all(buf_idx:(buf_idx+serial_numvals-1)) )
222       end do
223       ! deallocate communication buffers, etc.
224       deallocate( re_vals_all )
225       deallocate( re_vals_lcl ) 
226 
227    end do  ! end of i loop
228 
229    call da_y_facade_free( re_slice )
230 
231    deallocate( num_values_all, obs_global_index_all, &
232                 obs_global_index_inv, obs_start_all )
233    deallocate( num_obs_send_all, num_send_buf_all, &
234                 recv_counts, recv_displs )
235 
236 #else
237    call da_error (__FILE__,__LINE__, &
238       (/'may only be called for a distributed memory parallel run'/))
239 #endif
240 
241 end subroutine da_y_facade_to_global
242 
243