<HTML> <BODY BGCOLOR=#ccccdd LINK=#0000aa VLINK=#0000ff ALINK=#ff0000 ><BASE TARGET="bottom_target"><PRE>
<A NAME='DA_TRACE_REPORT'><A href='../../html_code/tracing/da_trace_report.inc.html#DA_TRACE_REPORT' TARGET='top_target'><IMG SRC="../../gif/bar_red.gif" border=0></A>

subroutine da_trace_report 2,11

   !--------------------------------------------------------------------
   ! Purpose: Produce a trace report
   !--------------------------------------------------------------------

   implicit none

   integer :: i                        ! loop counter
   integer :: j                        ! loop counter
   integer :: CountRate
   integer :: MasterNoRoutines
   integer :: temp
   integer :: MinElapsedPos
   integer :: MaxElapsedPos
   integer :: MinCPUPos
   integer :: MaxCPUPos
   integer :: itemp1(MaxNoRoutines)
   integer :: MasterMaxHeap(0:num_procs-1,MaxNoRoutines)
   integer :: MasterNoCalls(0:num_procs-1,MaxNoRoutines)
   integer :: OverallNoCalls(MaxNoRoutines)
   integer, allocatable :: Index(:)

   real    :: TempCpuTime
   real    :: TotalElapsedTime             !
   real    :: TotalCPUTime(1)              !
   real    :: SpeedUp                      ! speed up factor
   real    :: PercentCPUTime               ! percentage in CPU time
   real    :: PercentElapsedTime           ! percentage in elapsed time
   real    :: rtemp1(MaxNoRoutines)
   real    :: MasterElapsedTime(0:num_procs-1,MaxNoRoutines)
   real    :: MasterElapsedTimeLocal(0:num_procs-1,MaxNoRoutines)
   real    :: MasterCPUTime(0:num_procs-1,MaxNoRoutines)
   real    :: MasterCPUTimeLocal(0:num_procs-1,MaxNoRoutines)
   real    :: OverallElapsedTime(MaxNoRoutines)
   real    :: OverallCPUTime(MaxNoRoutines)

   character (len=TraceNameLen) :: MasterTimerNames(MaxNoRoutines)

   if (.not. use_html) then
      write (unit=trace_unit, fmt='(A)') &amp;
         "Report only available if use_html is true"
      return
   end if

   call system_clock (COUNT=temp)

   TotalElapsedTime=temp-BaseElapsedTime ! on PE 0

   call cpu_time(TempCpuTime)

   TotalCPUTime(1) = TempCpuTime - BaseCPUTime

   call system_clock(&amp;
      COUNT_RATE=CountRate)

   ! Ensure the lists from each PE match. use the routine list from the 
   ! traced PE as the master copy

   MasterTimerNames(:)=TimerNames(:)

   if (myproc == trace_pe) then
      MasterNoRoutines=NoRoutines
   else
      MasterNoRoutines=0
   end if

#ifdef DM_PARALLEL
   call da_proc_sum_int(MasterNoRoutines)
   ! only PE 0 knows the result

   call mpi_bcast(MasterTimerNames(1:MaxNoRoutines), &amp;
                  TraceNameLen*MaxNoRoutines, &amp;
                  MPI_character,trace_pe, comm,ierr)
#endif

   MasterElapsedTime(:,:)=0.0
   MasterCPUTime(:,:)=0.0
   MasterElapsedTimeLocal(:,:)=0.0
   MasterCPUTimeLocal(:,:)=0.0
   MasterNoCalls(:,:)=0
   MasterMaxHeap(:,:)=0

   do i=1,MaxNoRoutines
      do j=1,NoRoutines
         if (TimerNames(j) == MasterTimerNames(i)) then
            MasterElapsedTime(myproc,i)=ElapsedTime(j)
            MasterCPUTime(myproc,i)=CPUTime(j)
            MasterElapsedTimeLocal(myproc,i)=ElapsedTimeLocal(j)
            MasterCPUTimeLocal(myproc,i)=CPUTimeLocal(j)
            MasterNoCalls(myproc,i)=NoCalls(j)
            MasterMaxHeap(myproc,i)=MaxHeap(j)
            cycle
         end if
      end do
   end do

#ifdef DM_PARALLEL
   do i=0,num_procs-1
      call da_proc_sum_real(MasterElapsedTime(i,:))
      call da_proc_sum_real(MasterCPUTime(i,:))
      call da_proc_sum_real(MasterElapsedTimeLocal(i,:))
      call da_proc_sum_real(MasterCPUTimeLocal(i,:))
      call da_proc_sum_ints(MasterNoCalls(i,:))
      call da_proc_sum_ints(MasterMaxHeap(i,:))
   end do
#endif

   if (rootproc) then

      do j=1,MasterNoRoutines
         rtemp1(j)=sum(MasterElapsedTimeLocal(:,j))
      end do
      !==========================================================================
      ! Sort subroutines into time order based on local Elapsed Time.
      ! All PEs should have the same sort order after the sum.
      !==========================================================================

      allocate (Index(MasterNoRoutines))

      call da_trace_real_sort(rtemp1,MasterNoRoutines,index)

      do j=1,MasterNoRoutines
         OverallElapsedTime(j)=sum(MasterElapsedTimeLocal(:,j))
         OverallCPUTime(j)=sum(MasterCPUTimeLocal(:,j))
         OverallNoCalls(j)=sum(MasterNoCalls(:,j))
      end do

      write(unit=trace_unit, fmt='(A,I4,A)') &amp;
         "&lt;/pre&gt;&lt;hr&gt;&lt;H1&gt;For PE",trace_pe,"&lt;/H1&gt;"

      ! Output timing information

      write(unit=trace_unit, &amp;
         fmt='("&lt;a name=local&gt;&lt;h2&gt;Local Timing Summary&lt;/h2&gt;&lt;/a&gt;")')

      if (num_procs == 1) then
         ! needs changing to work MPP
         write (unit=trace_unit, &amp;
            fmt='("(Tracing itself took ",F8.1,"s CPU Time, and ",F8.1,a)') &amp;
            (CPUTimeLocalStart-CPUTimeStart(1)-TotalCPUTime(1)), &amp;
            (ElapsedTimeLocalStart-ElapsedTimeStart(1)-TotalElapsedTime) / &amp;
            real(CountRate), &amp;
            "s elapsed time during the run. This is not included in the times below.)&lt;p&gt;"
      else
         write (unit=trace_unit,fmt='(A)') &amp;
            "No estimate can be made of time in trace itself.&lt;p&gt;"
      end if

      write(unit=trace_unit, &amp;
         fmt='("&lt;TABLE BORDER&gt;")')
      write(unit=trace_unit, &amp;
         fmt='("&lt;TR&gt;&lt;TH&gt;Routine Name&lt;TH&gt;Calls&lt;TH COLSPAN=4&gt;Elapsed Time (seconds)&lt;TH COLSPAN=4&gt;")')
      write(unit=trace_unit, &amp;
         fmt='("CPU Time (seconds)&lt;TH&gt;Speed up")')
      write(unit=trace_unit, &amp;
         fmt='("&lt;TR&gt;&lt;TH&gt;&lt;/TH&gt;&lt;TH&gt;per PE&lt;/TH&gt;&lt;TH&gt;Average per PE&lt;TH&gt;%&lt;TH&gt;Minimum&lt;TH&gt;Maximum &lt;TH&gt;Total&lt;TH&gt;%&lt;TH&gt;Minimum&lt;TH&gt;Maximum")')
      write(unit=trace_unit, &amp;
         fmt='("&lt;TH&gt;",I4," PE")') num_procs

      do i=MasterNoRoutines,1,-1
         Pointer=index(i)    

         if (TotalCPUTime(1) &gt; 0.0) then
            PercentCPUTime=100.0 * OverallCPUTime(Pointer)/TotalCPUTime(1)
         else
           PercentCPUTime=100.0
         end if

         if (TotalElapsedTime &gt; 0.0) then
            PercentElapsedTime=100.0*OverallElapsedTime(Pointer)/(real(num_procs) * TotalElapsedTime)
         else
            PercentElapsedTime=100.0
         end if

         if (OverallElapsedTime(Pointer) &gt; 0.0) then
            SpeedUp = OverallCPUTime(Pointer) / (OverallElapsedTime(Pointer)/real(CountRate*num_procs))
         else
            SpeedUp = 0.0
         end if

         ! This horrible solution as MinLOC does not take a DIM argument, so sum
         ! is needed to convert the array to an integer

         MinElapsedPos = sum(MinLOC(MasterElapsedTimeLocal(:,Pointer)))-1
         MaxElapsedPos = sum(MAXLOC(MasterElapsedTimeLocal(:,Pointer)))-1
         MinCPUPos     = sum(MinLOC(MasterCPUTimeLocal(:,Pointer)))-1
         MaxCPUPos     = sum(MAXLOC(MasterCPUTimeLocal(:,Pointer)))-1

         !----------------------------------------------------------------------
         ! Write out results. Note the abnormally long format line is needed as
         ! the NAG compiler complains if a quoted line is split.
         !----------------------------------------------------------------------

         write (unit=trace_unit, fmt='(7A)') &amp;
            "&lt;TR&gt;&lt;TD&gt;&lt;a href=", &amp;
            trim(Documentation_url), &amp;
            "/", &amp;
            trim(MasterTimerNames(Pointer)), &amp; ! Subroutine name
            ".html&gt;", &amp;
            trim(MasterTimerNames(Pointer)), &amp; ! Subroutine name
            "&lt;/a&gt;"
         write (unit=trace_unit, &amp;
            fmt='("&lt;TD&gt;",F10.1,2("&lt;TD&gt;",F10.2,"&lt;TD&gt;",F10.1,2("&lt;TD&gt;",F10.1," on",I3)),"&lt;TD&gt;",F5.2)') &amp;
            real(OverallNoCalls(Pointer))/real(num_procs),                       &amp; ! Average number of calls per PE
            OverallElapsedTime(Pointer)/(num_procs*real(CountRate)),             &amp; ! Average Elapsed Time
            PercentElapsedTime,                                              &amp; ! Percent Elapsed Time
            MasterElapsedTimeLocal(MinElapsedPos,Pointer)/real(CountRate),   &amp; ! Min average Elapsed Time
            MinElapsedPos,                                                   &amp; ! Which PE
            MasterElapsedTimeLocal(MaxElapsedPos,Pointer)/real(CountRate),   &amp; ! Max average Elapsed Time
            MaxElapsedPos,                                                   &amp; ! Which PE
            OverallCPUTime(Pointer),                                         &amp; ! CPU time
            PercentCPUTime,                                                  &amp; ! Percent CPU time
            MasterCPUTimeLocal(MinCPUPos,Pointer),                           &amp; ! Min average CPU Time
            MinCPUPos,                                                       &amp; ! Which PE
            MasterCPUTimeLocal(MaxCPUPos,Pointer),                           &amp; ! Max average CPU Time
            MaxCPUPos,                                                       &amp; ! Which PE
            SpeedUp                                                            ! Speedup
         if (trace_csv) then
            write(unit=trace_csv_unit,  &amp;
               fmt='(2A,F10.1,2(",",F10.2,",",F10.1,2(",",F10.1,",",I3)),",",F5.2)') &amp;
               '"local",', &amp;
               '"'//trim(MasterTimerNames(Pointer))//'",', &amp;
               real(OverallNoCalls(Pointer))/real(num_procs),                       &amp; ! Average number of calls per PE
               OverallElapsedTime(Pointer)/(num_procs*real(CountRate)),             &amp; ! Average Elapsed Time
               PercentElapsedTime,                                              &amp; ! Percent Elapsed Time
               MasterElapsedTimeLocal(MinElapsedPos,Pointer)/real(CountRate),   &amp; ! Min average Elapsed Time
               MinElapsedPos,                                                   &amp; ! Which PE
               MasterElapsedTimeLocal(MaxElapsedPos,Pointer)/real(CountRate),   &amp; ! Max average Elapsed Time
               MaxElapsedPos,                                                   &amp; ! Which PE
               OverallCPUTime(Pointer),                                         &amp; ! CPU time
               PercentCPUTime,                                                  &amp; ! Percent CPU time
               MasterCPUTimeLocal(MinCPUPos,Pointer),                           &amp; ! Min average CPU Time
               MinCPUPos,                                                       &amp; ! Which PE
               MasterCPUTimeLocal(MaxCPUPos,Pointer),                           &amp; ! Max average CPU Time
               MaxCPUPos,                                                       &amp; ! Which PE
               SpeedUp                                                            ! Speedup
         end if
      end do

      write (unit=trace_unit, &amp;
         fmt='(A,I4,A,F8.1,A,F8.1,A)') &amp;
         "&lt;TR&gt;&lt;TD&gt;&lt;B&gt;Total&lt;/B&gt;",MasterNoRoutines, "&lt;TD&gt;&lt;/TD&gt;&lt;TD&gt;&lt;B&gt;", &amp;
         TotalElapsedTime/real(CountRate), &amp;
         "&lt;/B&gt;&lt;TD&gt;&lt;/TD&gt;&lt;TD&gt;&lt;/TD&gt;&lt;TD&gt;&lt;/TD&gt;&lt;TD&gt;&lt;B&gt;", &amp;
         TotalCPUTime(1),"&lt;/B&gt;&lt;TD&gt;&lt;/TD&gt;&lt;TD&gt;&lt;/TD&gt;&lt;TD&gt;&lt;/TD&gt;"
      if (TotalElapsedTime &gt; 0.0) then
         write (unit=trace_unit, fmt='("&lt;TD&gt;&lt;B&gt;",F8.1,"&lt;/B&gt;")') &amp;
            (TotalCPUTime(1))/(TotalElapsedTime/real(CountRate))
      end if
      write(unit=trace_unit, &amp;
         fmt='("&lt;/TABLE&gt;&lt;p&gt;&lt;p&gt;")')

      if (trace_csv) then
         write(unit=trace_csv_unit,fmt=*) " "
      end if

      !===================================================================================
      ! Sort subroutines into time order based on overall Elapsed Time.
      ! All PEs should have the same sort order after the sum. 
      !===================================================================================

      do j=1,MasterNoRoutines
         rtemp1(j)=sum(MasterElapsedTime(:,j))
      end do

      call da_trace_real_sort(rtemp1,MasterNoRoutines,index)

      do j=1,MasterNoRoutines
         OverallElapsedTime(j)=sum(MasterElapsedTime(:,j))
         OverallCPUTime(j)=sum(MasterCPUTime(:,j))
      end do

      ! Output timing information

      write(unit=trace_unit, &amp;
         fmt='("&lt;/pre&gt;&lt;hr&gt;&lt;a name=overall&gt;&lt;h2&gt;Overall Timing Summary&lt;/h2&gt;&lt;/a&gt;")')

      write(unit=trace_unit, &amp;
         fmt='("&lt;TABLE BORDER&gt;")')
      write(unit=trace_unit, &amp;
         fmt='("&lt;TR&gt;&lt;TH&gt;Routine Name&lt;TH&gt;Calls&lt;TH COLSPAN=4&gt;Elapsed Time (seconds)&lt;TH COLSPAN=4&gt;")')
      write(unit=trace_unit, &amp;
         fmt='("CPU Time (seconds)&lt;TH&gt;Speed up")')
      write(unit=trace_unit, &amp;
         fmt='("&lt;TR&gt;&lt;TH&gt;&lt;/TH&gt;&lt;TH&gt;per PE&lt;/TH&gt;&lt;TH&gt;Average per PE&lt;TH&gt;%&lt;TH&gt;Minimum&lt;TH&gt;Maximum&lt;TH&gt;Total&lt;TH&gt;%&lt;TH&gt;Minimum&lt;TH&gt;Maximum")')
      write(unit=trace_unit, &amp;
         fmt='("&lt;TH&gt;",I4," PE")') num_procs

      do i=MasterNoRoutines,1,-1
         Pointer=index(i)    

         if (TotalCPUTime(1) &gt; 0.0) then
            PercentCPUTime=100.0 * OverallCPUTime(Pointer)/TotalCPUTime(1)
         else
            PercentCPUTime=100.0
         end if

         if (TotalElapsedTime &gt; 0.0) then
            PercentElapsedTime=100.0 * OverallElapsedTime(Pointer)/(real(num_procs) * TotalElapsedTime)
         else
            PercentElapsedTime=100.0
         end if

         if (OverallElapsedTime(Pointer) &gt; 0.0) then
            SpeedUp = OverallCPUTime(Pointer) / (OverallElapsedTime(Pointer)/real(num_procs*CountRate))
         else
            SpeedUp = 0.0
         end if

         ! This horrible solution as MinLOC does not take a DIM argument, so 
         ! sum is needed to convert the array to an integer

         MinElapsedPos = sum(MinLOC(MasterElapsedTime(:,Pointer)))-1
         MaxElapsedPos = sum(MAXLOC(MasterElapsedTime(:,Pointer)))-1
         MinCPUPos     = sum(MinLOC(MasterCPUTime(:,Pointer)))-1
         MaxCPUPos     = sum(MaxLOC(MasterCPUTime(:,Pointer)))-1

         !----------------------------------------------------------------------
         ! Write out results. Note the abnormally long format line is needed as
         ! the NAG compiler complains if a quoted line is split.
         !----------------------------------------------------------------------

         write (unit=trace_unit, fmt='(7A)') &amp;
            "&lt;TR&gt;&lt;TD&gt;&lt;a href=", &amp;
            trim(Documentation_url), &amp;
            "/", &amp;
            trim(MasterTimerNames(Pointer)), &amp;    ! Subroutine name
            ".html&gt;", &amp;
            trim(MasterTimerNames(Pointer)), &amp;    ! Subroutine name
            "&lt;/a&gt;"
         write (unit=trace_unit, &amp;
            fmt='("&lt;TD&gt;",F10.1,2("&lt;TD&gt;",F10.2,"&lt;TD&gt;",F10.1,2("&lt;TD&gt;",F10.1," on",I3)),"&lt;TD&gt;",F5.2)') &amp;
            real(OverallNoCalls(Pointer))/real(num_procs),                  &amp; ! Number of calls per PE
            OverallElapsedTime(Pointer)/(real(num_procs*CountRate)),        &amp; ! Average Elapsed Time
            PercentElapsedTime,                                         &amp; ! Percent Elapsed Time
            MasterElapsedTime(MinElapsedPos,Pointer)/real(CountRate),   &amp; ! Min average Elapsed Time
            MinElapsedPos,                                              &amp; ! Which PE
            MasterElapsedTime(MaxElapsedPos,Pointer)/real(CountRate),   &amp; ! Max average Elapsed Time
            MaxElapsedPos,                                              &amp; ! Which PE
            OverallCPUTime(Pointer),                                    &amp; ! CPU time
            PercentCPUTime,                                             &amp; ! Percent CPU time
            MasterCPUTime(MinCPUPos,Pointer),                           &amp; ! Min average CPU Time
            MinCPUPos,                                                  &amp; ! Which PE
            MasterCPUTime(MaxCPUPos,Pointer),                           &amp; ! Max average CPU Time
            MaxCPUPos,                                                  &amp; ! Which PE
            SpeedUp                                                       ! SpeedUp
         if (trace_csv) then
            write (unit=trace_csv_unit, &amp;
               fmt='(2A,F10.1,2(",",F10.2,",",F10.1,2(",",F10.1,",",I3)),",",F5.2)') &amp;
               '"overall",', &amp;
               '"'//trim(MasterTimerNames(Pointer))//'",', &amp;
               real(OverallNoCalls(Pointer))/real(num_procs),                  &amp; ! Number of calls per PE
               OverallElapsedTime(Pointer)/(real(num_procs*CountRate)),        &amp; ! Average Elapsed Time
               PercentElapsedTime,                                         &amp; ! Percent Elapsed Time
               MasterElapsedTime(MinElapsedPos,Pointer)/real(CountRate),   &amp; ! Min average Elapsed Time
               MinElapsedPos,                                              &amp; ! Which PE
               MasterElapsedTime(MaxElapsedPos,Pointer)/real(CountRate),   &amp; ! Max average Elapsed Time
               MaxElapsedPos,                                              &amp; ! Which PE
               OverallCPUTime(Pointer),                                    &amp; ! CPU time
               PercentCPUTime,                                             &amp; ! Percent CPU time
               MasterCPUTime(MinCPUPos,Pointer),                           &amp; ! Min average CPU Time
               MinCPUPos,                                                  &amp; ! Which PE
               MasterCPUTime(MaxCPUPos,Pointer),                           &amp; ! Max average CPU Time
               MaxCPUPos,                                                  &amp; ! Which PE
               SpeedUp                                                       ! SpeedUp
         end if
      end do

      write (unit=trace_unit, &amp;
         fmt='(A,I4,A,F8.1,A,F8.1,A)') &amp;
         "&lt;TR&gt;&lt;TD&gt;&lt;B&gt;Total&lt;/B&gt;",MasterNoRoutines, "&lt;/TD&gt;&lt;TD&gt;&lt;TD&gt;&lt;B&gt;", &amp;
         TotalElapsedTime/real(CountRate), &amp;
         "&lt;/B&gt;&lt;TD&gt;&lt;/TD&gt;&lt;TD&gt;&lt;/TD&gt;&lt;TD&gt;&lt;/TD&gt;&lt;TD&gt;&lt;B&gt;",TotalCPUTime(1), &amp;
         "&lt;/B&gt;&lt;TD&gt;&lt;/TD&gt;&lt;TD&gt;&lt;/TD&gt;&lt;TD&gt;&lt;/TD&gt;"
      if (TotalElapsedTime &gt; 0.0) then
         write (unit=trace_unit, fmt='("&lt;TD&gt;&lt;B&gt;",F8.1,"&lt;/B&gt;")') &amp;
            TotalCPUTime(1)/(TotalElapsedTime/real(CountRate))
      end if

      write(unit=trace_unit, &amp;
         fmt='("&lt;/TABLE&gt;")')

      if (trace_csv) then
         write(unit=trace_csv_unit,fmt=*) " "
      end if

   end if ! rootproc

   !============================================================================
   ! Sort subroutines into memory use order by max memory on one PE
   !============================================================================

   if (trace_memory) then

#ifdef DM_PARALLEL
      do j=1,MaxNoRoutines
         call da_proc_sum_ints(MasterMaxHeap(:,j))
      end do
#endif

      if (rootproc) then
         do j=1,MasterNoRoutines
            itemp1(j)=MAXVAL(MasterMaxHeap(:,j))
         end do

         call da_trace_int_sort(itemp1,MasterNoRoutines,index)

         write (unit=trace_unit,fmt='("&lt;hr&gt;&lt;a name=memory&gt;&lt;h2&gt;Maximum memory usage for routines&lt;/h2&gt;&lt;/a&gt;")')
         write (unit=trace_unit,fmt='("&lt;TABLE BORDER&gt;")')
         write (unit=trace_unit,fmt='("&lt;TR&gt;&lt;TH&gt;Routine&lt;TH&gt;Max in any PE (kbytes)")')
         write (unit=trace_unit,fmt='("&lt;TH&gt;Overall (kbytes)&lt;TH&gt;Average per PE (kbytes)")')

         do i=MasterNoRoutines,1,-1
            Pointer=index(i)
            write (unit=trace_unit, &amp;
               fmt='("&lt;TR&gt;&lt;TD&gt;&lt;a href=",A,"/",A,".html&gt;",A,"&lt;/a&gt;&lt;TD&gt;",I15,"&lt;TD&gt;",I15,"&lt;TD&gt;",I15)') &amp;
               trim(Documentation_url),trim(TimerNames(Pointer)),trim(TimerNames(Pointer)), &amp;
               MAXVAL(MasterMaxHeap(:,Pointer)),sum(MasterMaxHeap(:,Pointer)), &amp;
               sum(MasterMaxHeap(:,Pointer))/num_procs
            if (trace_csv) then
               write (unit=trace_csv_unit, &amp;
                  fmt='(2A,I15,",",I15,",",I15)') &amp;
                  '"memory",', &amp;
                  '"'//trim(TimerNames(Pointer))//'",', &amp;
                  MAXVAL(MasterMaxHeap(:,Pointer)),sum(MasterMaxHeap(:,Pointer)), &amp;
                  sum(MasterMaxHeap(:,Pointer))/num_procs
            end if        
         end do
         write (unit=trace_unit,fmt='("&lt;/table&gt;&lt;/body&gt;&lt;/html&gt;")')

         if (trace_csv) then
            write(unit=trace_csv_unit,fmt=*)
         end if
      end if
   end if

   if (trace_write .AND. trace_unit /= 0) then
      close(trace_unit)
   end if
  
   if (trace_csv .and. rootproc) then
      close(trace_csv_unit)
   end if

   if (myproc == 0) then
      deallocate(index)
   end if

end subroutine da_trace_report