da_trace_report.inc
References to this file elsewhere.
1 subroutine da_trace_report
2
3 !--------------------------------------------------------------------
4 ! Purpose: Produce a trace report
5 !--------------------------------------------------------------------
6
7 implicit none
8
9 integer :: i ! loop counter
10 integer :: j ! loop counter
11 integer :: CountRate
12 integer :: MasterNoRoutines
13 integer :: temp
14 integer :: MinElapsedPos
15 integer :: MaxElapsedPos
16 integer :: MinCPUPos
17 integer :: MaxCPUPos
18 integer :: itemp1(MaxNoRoutines)
19 integer :: MasterMaxHeap(0:num_procs-1,MaxNoRoutines)
20 integer :: MasterNoCalls(0:num_procs-1,MaxNoRoutines)
21 integer :: OverallNoCalls(MaxNoRoutines)
22 integer, allocatable :: Index(:)
23
24 real :: TempCpuTime
25 real :: TotalElapsedTime !
26 real :: TotalCPUTime(1) !
27 real :: SpeedUp ! speed up factor
28 real :: PercentCPUTime ! percentage in CPU time
29 real :: PercentElapsedTime ! percentage in elapsed time
30 real :: rtemp1(MaxNoRoutines)
31 real :: MasterElapsedTime(0:num_procs-1,MaxNoRoutines)
32 real :: MasterElapsedTimeLocal(0:num_procs-1,MaxNoRoutines)
33 real :: MasterCPUTime(0:num_procs-1,MaxNoRoutines)
34 real :: MasterCPUTimeLocal(0:num_procs-1,MaxNoRoutines)
35 real :: OverallElapsedTime(MaxNoRoutines)
36 real :: OverallCPUTime(MaxNoRoutines)
37
38 character (len=TraceNameLen) :: MasterTimerNames(MaxNoRoutines)
39
40 if (.not. use_html) then
41 write (unit=trace_unit, fmt='(A)') &
42 "Report only available if use_html is true"
43 return
44 end if
45
46 call system_clock (COUNT=temp)
47
48 TotalElapsedTime=temp-BaseElapsedTime ! on PE 0
49
50 call cpu_time(TempCpuTime)
51
52 TotalCPUTime(1) = TempCpuTime - BaseCPUTime
53
54 call system_clock(&
55 COUNT_RATE=CountRate)
56
57 ! Ensure the lists from each PE match. use the routine list from the
58 ! traced PE as the master copy
59
60 MasterTimerNames(:)=TimerNames(:)
61
62 if (myproc == trace_pe) then
63 MasterNoRoutines=NoRoutines
64 else
65 MasterNoRoutines=0
66 end if
67
68 #ifdef DM_PARALLEL
69 call da_proc_sum_int(MasterNoRoutines)
70 ! only PE 0 knows the result
71
72 call mpi_bcast(MasterTimerNames(1:MaxNoRoutines), &
73 TraceNameLen*MaxNoRoutines, &
74 MPI_character,trace_pe, comm,ierr)
75 #endif
76
77 MasterElapsedTime(:,:)=0.0
78 MasterCPUTime(:,:)=0.0
79 MasterElapsedTimeLocal(:,:)=0.0
80 MasterCPUTimeLocal(:,:)=0.0
81 MasterNoCalls(:,:)=0
82 MasterMaxHeap(:,:)=0
83
84 do i=1,MaxNoRoutines
85 do j=1,NoRoutines
86 if (TimerNames(j) == MasterTimerNames(i)) then
87 MasterElapsedTime(myproc,i)=ElapsedTime(j)
88 MasterCPUTime(myproc,i)=CPUTime(j)
89 MasterElapsedTimeLocal(myproc,i)=ElapsedTimeLocal(j)
90 MasterCPUTimeLocal(myproc,i)=CPUTimeLocal(j)
91 MasterNoCalls(myproc,i)=NoCalls(j)
92 MasterMaxHeap(myproc,i)=MaxHeap(j)
93 cycle
94 end if
95 end do
96 end do
97
98 #ifdef DM_PARALLEL
99 do i=0,num_procs-1
100 call da_proc_sum_real(MasterElapsedTime(i,:))
101 call da_proc_sum_real(MasterCPUTime(i,:))
102 call da_proc_sum_real(MasterElapsedTimeLocal(i,:))
103 call da_proc_sum_real(MasterCPUTimeLocal(i,:))
104 call da_proc_sum_ints(MasterNoCalls(i,:))
105 call da_proc_sum_ints(MasterMaxHeap(i,:))
106 end do
107 #endif
108
109 if (rootproc) then
110
111 do j=1,MasterNoRoutines
112 rtemp1(j)=sum(MasterElapsedTimeLocal(:,j))
113 end do
114 !==========================================================================
115 ! Sort subroutines into time order based on local Elapsed Time.
116 ! All PEs should have the same sort order after the sum.
117 !==========================================================================
118
119 allocate (Index(MasterNoRoutines))
120
121 call da_trace_real_sort(rtemp1,MasterNoRoutines,index)
122
123 do j=1,MasterNoRoutines
124 OverallElapsedTime(j)=sum(MasterElapsedTimeLocal(:,j))
125 OverallCPUTime(j)=sum(MasterCPUTimeLocal(:,j))
126 OverallNoCalls(j)=sum(MasterNoCalls(:,j))
127 end do
128
129 write(unit=trace_unit, fmt='(A,I4,A)') &
130 "</pre><hr><H1>For PE",trace_pe,"</H1>"
131
132 ! Output timing information
133
134 write(unit=trace_unit, &
135 fmt='("<a name=local><h2>Local Timing Summary</h2></a>")')
136
137 if (num_procs == 1) then
138 ! needs changing to work MPP
139 write (unit=trace_unit, &
140 fmt='("(Tracing itself took ",F8.1,"s CPU Time, and ",F8.1,a)') &
141 (CPUTimeLocalStart-CPUTimeStart(1)-TotalCPUTime(1)), &
142 (ElapsedTimeLocalStart-ElapsedTimeStart(1)-TotalElapsedTime) / &
143 real(CountRate), &
144 "s elapsed time during the run. This is not included in the times below.)<p>"
145 else
146 write (unit=trace_unit,fmt='(A)') &
147 "No estimate can be made of time in trace itself.<p>"
148 end if
149
150 write(unit=trace_unit, &
151 fmt='("<TABLE BORDER>")')
152 write(unit=trace_unit, &
153 fmt='("<TR><TH>Routine Name<TH>Calls<TH COLSPAN=4>Elapsed Time (seconds)<TH COLSPAN=4>")')
154 write(unit=trace_unit, &
155 fmt='("CPU Time (seconds)<TH>Speed up")')
156 write(unit=trace_unit, &
157 fmt='("<TR><TH></TH><TH>per PE</TH><TH>Average per PE<TH>%<TH>Minimum<TH>Maximum <TH>Total<TH>%<TH>Minimum<TH>Maximum")')
158 write(unit=trace_unit, &
159 fmt='("<TH>",I4," PE")') num_procs
160
161 do i=MasterNoRoutines,1,-1
162 Pointer=index(i)
163
164 if (TotalCPUTime(1) > 0.0) then
165 PercentCPUTime=100.0 * OverallCPUTime(Pointer)/TotalCPUTime(1)
166 else
167 PercentCPUTime=100.0
168 end if
169
170 if (TotalElapsedTime > 0.0) then
171 PercentElapsedTime=100.0*OverallElapsedTime(Pointer)/(real(num_procs) * TotalElapsedTime)
172 else
173 PercentElapsedTime=100.0
174 end if
175
176 if (OverallElapsedTime(Pointer) > 0.0) then
177 SpeedUp = OverallCPUTime(Pointer) / (OverallElapsedTime(Pointer)/real(CountRate*num_procs))
178 else
179 SpeedUp = 0.0
180 end if
181
182 ! This horrible solution as MinLOC does not take a DIM argument, so sum
183 ! is needed to convert the array to an integer
184
185 MinElapsedPos = sum(MinLOC(MasterElapsedTimeLocal(:,Pointer)))-1
186 MaxElapsedPos = sum(MAXLOC(MasterElapsedTimeLocal(:,Pointer)))-1
187 MinCPUPos = sum(MinLOC(MasterCPUTimeLocal(:,Pointer)))-1
188 MaxCPUPos = sum(MAXLOC(MasterCPUTimeLocal(:,Pointer)))-1
189
190 !----------------------------------------------------------------------
191 ! Write out results. Note the abnormally long format line is needed as
192 ! the NAG compiler complains if a quoted line is split.
193 !----------------------------------------------------------------------
194
195 write (unit=trace_unit, fmt='(7A)') &
196 "<TR><TD><a href=", &
197 trim(Documentation_url), &
198 "/", &
199 trim(MasterTimerNames(Pointer)), & ! Subroutine name
200 ".html>", &
201 trim(MasterTimerNames(Pointer)), & ! Subroutine name
202 "</a>"
203 write (unit=trace_unit, &
204 fmt='("<TD>",F10.1,2("<TD>",F10.2,"<TD>",F10.1,2("<TD>",F10.1," on",I3)),"<TD>",F5.2)') &
205 real(OverallNoCalls(Pointer))/real(num_procs), & ! Average number of calls per PE
206 OverallElapsedTime(Pointer)/(num_procs*real(CountRate)), & ! Average Elapsed Time
207 PercentElapsedTime, & ! Percent Elapsed Time
208 MasterElapsedTimeLocal(MinElapsedPos,Pointer)/real(CountRate), & ! Min average Elapsed Time
209 MinElapsedPos, & ! Which PE
210 MasterElapsedTimeLocal(MaxElapsedPos,Pointer)/real(CountRate), & ! Max average Elapsed Time
211 MaxElapsedPos, & ! Which PE
212 OverallCPUTime(Pointer), & ! CPU time
213 PercentCPUTime, & ! Percent CPU time
214 MasterCPUTimeLocal(MinCPUPos,Pointer), & ! Min average CPU Time
215 MinCPUPos, & ! Which PE
216 MasterCPUTimeLocal(MaxCPUPos,Pointer), & ! Max average CPU Time
217 MaxCPUPos, & ! Which PE
218 SpeedUp ! Speedup
219 if (trace_csv) then
220 write(unit=trace_csv_unit, &
221 fmt='(2A,F10.1,2(",",F10.2,",",F10.1,2(",",F10.1,",",I3)),",",F5.2)') &
222 '"local",', &
223 '"'//trim(MasterTimerNames(Pointer))//'",', &
224 real(OverallNoCalls(Pointer))/real(num_procs), & ! Average number of calls per PE
225 OverallElapsedTime(Pointer)/(num_procs*real(CountRate)), & ! Average Elapsed Time
226 PercentElapsedTime, & ! Percent Elapsed Time
227 MasterElapsedTimeLocal(MinElapsedPos,Pointer)/real(CountRate), & ! Min average Elapsed Time
228 MinElapsedPos, & ! Which PE
229 MasterElapsedTimeLocal(MaxElapsedPos,Pointer)/real(CountRate), & ! Max average Elapsed Time
230 MaxElapsedPos, & ! Which PE
231 OverallCPUTime(Pointer), & ! CPU time
232 PercentCPUTime, & ! Percent CPU time
233 MasterCPUTimeLocal(MinCPUPos,Pointer), & ! Min average CPU Time
234 MinCPUPos, & ! Which PE
235 MasterCPUTimeLocal(MaxCPUPos,Pointer), & ! Max average CPU Time
236 MaxCPUPos, & ! Which PE
237 SpeedUp ! Speedup
238 end if
239 end do
240
241 write (unit=trace_unit, &
242 fmt='(A,I4,A,F8.1,A,F8.1,A)') &
243 "<TR><TD><B>Total</B>",MasterNoRoutines, "<TD></TD><TD><B>", &
244 TotalElapsedTime/real(CountRate), &
245 "</B><TD></TD><TD></TD><TD></TD><TD><B>", &
246 TotalCPUTime(1),"</B><TD></TD><TD></TD><TD></TD>"
247 if (TotalElapsedTime > 0.0) then
248 write (unit=trace_unit, fmt='("<TD><B>",F8.1,"</B>")') &
249 (TotalCPUTime(1))/(TotalElapsedTime/real(CountRate))
250 end if
251 write(unit=trace_unit, &
252 fmt='("</TABLE><p><p>")')
253
254 if (trace_csv) then
255 write(unit=trace_csv_unit,fmt=*) " "
256 end if
257
258 !===================================================================================
259 ! Sort subroutines into time order based on overall Elapsed Time.
260 ! All PEs should have the same sort order after the sum.
261 !===================================================================================
262
263 do j=1,MasterNoRoutines
264 rtemp1(j)=sum(MasterElapsedTime(:,j))
265 end do
266
267 call da_trace_real_sort(rtemp1,MasterNoRoutines,index)
268
269 do j=1,MasterNoRoutines
270 OverallElapsedTime(j)=sum(MasterElapsedTime(:,j))
271 OverallCPUTime(j)=sum(MasterCPUTime(:,j))
272 end do
273
274 ! Output timing information
275
276 write(unit=trace_unit, &
277 fmt='("</pre><hr><a name=overall><h2>Overall Timing Summary</h2></a>")')
278
279 write(unit=trace_unit, &
280 fmt='("<TABLE BORDER>")')
281 write(unit=trace_unit, &
282 fmt='("<TR><TH>Routine Name<TH>Calls<TH COLSPAN=4>Elapsed Time (seconds)<TH COLSPAN=4>")')
283 write(unit=trace_unit, &
284 fmt='("CPU Time (seconds)<TH>Speed up")')
285 write(unit=trace_unit, &
286 fmt='("<TR><TH></TH><TH>per PE</TH><TH>Average per PE<TH>%<TH>Minimum<TH>Maximum<TH>Total<TH>%<TH>Minimum<TH>Maximum")')
287 write(unit=trace_unit, &
288 fmt='("<TH>",I4," PE")') num_procs
289
290 do i=MasterNoRoutines,1,-1
291 Pointer=index(i)
292
293 if (TotalCPUTime(1) > 0.0) then
294 PercentCPUTime=100.0 * OverallCPUTime(Pointer)/TotalCPUTime(1)
295 else
296 PercentCPUTime=100.0
297 end if
298
299 if (TotalElapsedTime > 0.0) then
300 PercentElapsedTime=100.0 * OverallElapsedTime(Pointer)/(real(num_procs) * TotalElapsedTime)
301 else
302 PercentElapsedTime=100.0
303 end if
304
305 if (OverallElapsedTime(Pointer) > 0.0) then
306 SpeedUp = OverallCPUTime(Pointer) / (OverallElapsedTime(Pointer)/real(num_procs*CountRate))
307 else
308 SpeedUp = 0.0
309 end if
310
311 ! This horrible solution as MinLOC does not take a DIM argument, so
312 ! sum is needed to convert the array to an integer
313
314 MinElapsedPos = sum(MinLOC(MasterElapsedTime(:,Pointer)))-1
315 MaxElapsedPos = sum(MAXLOC(MasterElapsedTime(:,Pointer)))-1
316 MinCPUPos = sum(MinLOC(MasterCPUTime(:,Pointer)))-1
317 MaxCPUPos = sum(MaxLOC(MasterCPUTime(:,Pointer)))-1
318
319 !----------------------------------------------------------------------
320 ! Write out results. Note the abnormally long format line is needed as
321 ! the NAG compiler complains if a quoted line is split.
322 !----------------------------------------------------------------------
323
324 write (unit=trace_unit, fmt='(7A)') &
325 "<TR><TD><a href=", &
326 trim(Documentation_url), &
327 "/", &
328 trim(MasterTimerNames(Pointer)), & ! Subroutine name
329 ".html>", &
330 trim(MasterTimerNames(Pointer)), & ! Subroutine name
331 "</a>"
332 write (unit=trace_unit, &
333 fmt='("<TD>",F10.1,2("<TD>",F10.2,"<TD>",F10.1,2("<TD>",F10.1," on",I3)),"<TD>",F5.2)') &
334 real(OverallNoCalls(Pointer))/real(num_procs), & ! Number of calls per PE
335 OverallElapsedTime(Pointer)/(real(num_procs*CountRate)), & ! Average Elapsed Time
336 PercentElapsedTime, & ! Percent Elapsed Time
337 MasterElapsedTime(MinElapsedPos,Pointer)/real(CountRate), & ! Min average Elapsed Time
338 MinElapsedPos, & ! Which PE
339 MasterElapsedTime(MaxElapsedPos,Pointer)/real(CountRate), & ! Max average Elapsed Time
340 MaxElapsedPos, & ! Which PE
341 OverallCPUTime(Pointer), & ! CPU time
342 PercentCPUTime, & ! Percent CPU time
343 MasterCPUTime(MinCPUPos,Pointer), & ! Min average CPU Time
344 MinCPUPos, & ! Which PE
345 MasterCPUTime(MaxCPUPos,Pointer), & ! Max average CPU Time
346 MaxCPUPos, & ! Which PE
347 SpeedUp ! SpeedUp
348 if (trace_csv) then
349 write (unit=trace_csv_unit, &
350 fmt='(2A,F10.1,2(",",F10.2,",",F10.1,2(",",F10.1,",",I3)),",",F5.2)') &
351 '"overall",', &
352 '"'//trim(MasterTimerNames(Pointer))//'",', &
353 real(OverallNoCalls(Pointer))/real(num_procs), & ! Number of calls per PE
354 OverallElapsedTime(Pointer)/(real(num_procs*CountRate)), & ! Average Elapsed Time
355 PercentElapsedTime, & ! Percent Elapsed Time
356 MasterElapsedTime(MinElapsedPos,Pointer)/real(CountRate), & ! Min average Elapsed Time
357 MinElapsedPos, & ! Which PE
358 MasterElapsedTime(MaxElapsedPos,Pointer)/real(CountRate), & ! Max average Elapsed Time
359 MaxElapsedPos, & ! Which PE
360 OverallCPUTime(Pointer), & ! CPU time
361 PercentCPUTime, & ! Percent CPU time
362 MasterCPUTime(MinCPUPos,Pointer), & ! Min average CPU Time
363 MinCPUPos, & ! Which PE
364 MasterCPUTime(MaxCPUPos,Pointer), & ! Max average CPU Time
365 MaxCPUPos, & ! Which PE
366 SpeedUp ! SpeedUp
367 end if
368 end do
369
370 write (unit=trace_unit, &
371 fmt='(A,I4,A,F8.1,A,F8.1,A)') &
372 "<TR><TD><B>Total</B>",MasterNoRoutines, "</TD><TD><TD><B>", &
373 TotalElapsedTime/real(CountRate), &
374 "</B><TD></TD><TD></TD><TD></TD><TD><B>",TotalCPUTime(1), &
375 "</B><TD></TD><TD></TD><TD></TD>"
376 if (TotalElapsedTime > 0.0) then
377 write (unit=trace_unit, fmt='("<TD><B>",F8.1,"</B>")') &
378 TotalCPUTime(1)/(TotalElapsedTime/real(CountRate))
379 end if
380
381 write(unit=trace_unit, &
382 fmt='("</TABLE>")')
383
384 if (trace_csv) then
385 write(unit=trace_csv_unit,fmt=*) " "
386 end if
387
388 end if ! rootproc
389
390 !============================================================================
391 ! Sort subroutines into memory use order by max memory on one PE
392 !============================================================================
393
394 if (trace_memory) then
395
396 #ifdef DM_PARALLEL
397 do j=1,MaxNoRoutines
398 call da_proc_sum_ints(MasterMaxHeap(:,j))
399 end do
400 #endif
401
402 if (rootproc) then
403 do j=1,MasterNoRoutines
404 itemp1(j)=MAXVAL(MasterMaxHeap(:,j))
405 end do
406
407 call da_trace_int_sort(itemp1,MasterNoRoutines,index)
408
409 write (unit=trace_unit,fmt='("<hr><a name=memory><h2>Maximum memory usage for routines</h2></a>")')
410 write (unit=trace_unit,fmt='("<TABLE BORDER>")')
411 write (unit=trace_unit,fmt='("<TR><TH>Routine<TH>Max in any PE (kbytes)")')
412 write (unit=trace_unit,fmt='("<TH>Overall (kbytes)<TH>Average per PE (kbytes)")')
413
414 do i=MasterNoRoutines,1,-1
415 Pointer=index(i)
416 write (unit=trace_unit, &
417 fmt='("<TR><TD><a href=",A,"/",A,".html>",A,"</a><TD>",I15,"<TD>",I15,"<TD>",I15)') &
418 trim(Documentation_url),trim(TimerNames(Pointer)),trim(TimerNames(Pointer)), &
419 MAXVAL(MasterMaxHeap(:,Pointer)),sum(MasterMaxHeap(:,Pointer)), &
420 sum(MasterMaxHeap(:,Pointer))/num_procs
421 if (trace_csv) then
422 write (unit=trace_csv_unit, &
423 fmt='(2A,I15,",",I15,",",I15)') &
424 '"memory",', &
425 '"'//trim(TimerNames(Pointer))//'",', &
426 MAXVAL(MasterMaxHeap(:,Pointer)),sum(MasterMaxHeap(:,Pointer)), &
427 sum(MasterMaxHeap(:,Pointer))/num_procs
428 end if
429 end do
430 write (unit=trace_unit,fmt='("</table></body></html>")')
431
432 if (trace_csv) then
433 write(unit=trace_csv_unit,fmt=*)
434 end if
435 end if
436 end if
437
438 if (trace_write .AND. trace_unit /= 0) then
439 close(trace_unit)
440 end if
441
442 if (trace_csv .and. rootproc) then
443 close(trace_csv_unit)
444 end if
445
446 if (myproc == 0) then
447 deallocate(index)
448 end if
449
450 end subroutine da_trace_report
451
452