!WRF:MEDIATION_LAYER:SOLVER
!2009-12-28 Hongli Wang - finish the Kessler warm rain scheme
SUBROUTINE solve_em_tl ( grid , config_flags , &
! Actual arguments generated from Registry
#include "em_dummy_args.inc"
!
                    )

! Driver layer modules
   USE module_domain
   USE module_configure
   USE module_driver_constants
   USE module_machine
   USE module_tiles
   USE module_dm
! Mediation layer modules
! Model layer modules
   USE module_model_constants
   USE module_small_step_em
   USE module_em
   USE module_big_step_utilities_em
   USE module_bc
   USE module_bc_em
   USE module_solvedebug_em
   USE module_physics_addtendc
   USE module_diffusion_em
! Registry generated module
   USE module_state_description
   USE module_radiation_driver
   USE module_surface_driver
   USE module_cumulus_driver
   USE module_cu_du , only : DUCU_D
   USE module_microphysics_driver
   USE module_pbl_driver
   USE module_mp_nconvp !Xiaoyan Zhang 11/2006
   USE MODULE_MP_KESSLER_DB   
#ifdef WRF_CHEM
   USE module_input_chem_data
   USE module_chem_utilities
#endif

   USE g_module_small_step_em
   USE g_module_em
   USE g_module_big_step_utilities_em
   USE g_module_bc
   USE g_module_bc_em
   USE g_module_diffusion_em

   USE module_trace, only : trace_entry, trace_exit

   IMPLICIT NONE

#ifdef DM_PARALLEL
   INCLUDE 'mpif.h'
#endif

   !  Input data.

   TYPE(domain) , TARGET          :: grid

   !  Definitions of dummy arguments to this routine (generated from Registry).
#include <em_dummy_decl.inc>

   !  Structure that contains run-time configuration (namelist) data for domain
   TYPE (grid_config_rec_type) , INTENT(IN)          :: config_flags

   ! Local data

   INTEGER                         :: k_start , k_end, its, ite, jts, jte
   INTEGER                         :: ids , ide , jds , jde , kds , kde , &
                                      ims , ime , jms , jme , kms , kme , &
                                      ips , ipe , jps , jpe , kps , kpe
   INTEGER                         :: ij , iteration,iteration1
   INTEGER                         :: im , num_3d_m , ic , num_3d_c
   INTEGER                         :: loop
   INTEGER                         :: ijds, ijde
   INTEGER                         :: itmpstep
   INTEGER                         :: sz

! storage for tendencies and decoupled state (generated from Registry)
#include <em_i1_decl.inc>

   INTEGER :: rc 
   INTEGER :: number_of_small_timesteps, rk_step
   INTEGER :: klevel,ijm,ijp,i,j,k,size1,size2    ! for prints/plots only
   INTEGER :: idum1, idum2, dynamics_option

   INTEGER :: rk_order, iwmax, jwmax, kwmax
   REAL :: dt_rk, dts_rk, dtm, wmat
   LOGICAL :: leapfrog
   INTEGER :: l,kte,kk

!--Local variables
   real, DIMENSION(grid%sm31:grid%em31,grid%sm32:grid%em32,grid%sm33:grid%em33)           :: g_rqvcuten
   real, DIMENSION(grid%sm31:grid%em31,grid%sm32:grid%em32,grid%sm33:grid%em33)           :: g_rthcuten

   real, DIMENSION(max(grid%ed31,grid%ed33),grid%sd32:grid%ed32,grid%spec_bdy_width,4)    :: g_rqc_btm
   real, DIMENSION(max(grid%ed31,grid%ed33),grid%sd32:grid%ed32,grid%spec_bdy_width,4)    :: g_rqr_btm
   real, DIMENSION(max(grid%ed31,grid%ed33),grid%sd32:grid%ed32,grid%spec_bdy_width,4)    :: g_rqi_btm
   real, DIMENSION(max(grid%ed31,grid%ed33),grid%sd32:grid%ed32,grid%spec_bdy_width,4)    :: g_rqs_btm
   real, DIMENSION(max(grid%ed31,grid%ed33),grid%sd32:grid%ed32,grid%spec_bdy_width,4)    :: g_rqg_btm

! These are used if -DDEREF_KLUDGE is compiled
!  see http://www.mmm.ucar.edu/wrf/WG2/topics/deref_kludge.htm
   INTEGER     :: sm31  , em31  , sm32  , em32  , sm33  , em33
   INTEGER     :: sm31x , em31x , sm32x , em32x , sm33x , em33x
   INTEGER     :: sm31y , em31y , sm32y , em32y , sm33y , em33y

   LOGICAL      :: solverDisabled   ! return without doing if enabled
   
! Define benchmarking timers if -DBENCH is compiled
#include <bench_solve_em_def.h>

!----------------------
! Executable statements
!----------------------

! Trick problematic compilers into not performing copy-in/copy-out by adding
! indices to array arguments in the CALL statements in this routine.
! It has the effect of passing only the first element of the array, rather 
! than the entire array.  See:  
! http://www.mmm.ucar.edu/wrf/WG2/topics/deref_kludge.htm
#include "deref_kludge.h"

! Limit the number of arguments if compiled with -DLIMIT_ARGS by copying 
! scalar (non-array) arguments out of the grid data structure into locally
! defined copies (defined in em_dummy_decl.inc, above, as they are if they
! are arguments). An equivalent include of em_scalar_derefs.inc appears
! at the end of the routine to copy back any chnaged non-array values.
! The definition of COPY_IN or COPY_OUT before the include defines the
! direction of the copy.  Em_scalar_derefs.inc is generated from Registry
#define COPY_IN
#include <em_scalar_derefs.inc>

! Needed by some comm layers, e.g. RSL. If needed, nmm_data_calls.inc is
! generated from the registry.  The definition of REGISTER_I1 allows
! I1 data to be communicated in this routine if necessary.
#ifdef DM_PARALLEL
#    define REGISTER_I1
#      include <em_data_calls.inc>
#endif

!<DESCRIPTION>
!<pre>
! solve_em_tl is the main driver for advancing a grid a single timestep.
! It is a mediation-layer routine -> DM and SM calls are made where 
! needed for parallel processing.  
!
! solve_em_tl can integrate the equations using 3 time-integration methods
!      
!    - 3rd order Runge-Kutta time integration (recommended)
!      
!    - 2nd order Runge-Kutta time integration
!      
!    - Leapfrog time integration
!      (note: the leapfrog scheme is not correctly implemented
!      for most of the physics)
!
! The main sections of solve_em_tl are
!     
! (1) Runge-Kutta (RK) loop
!     
! (2) Non-timesplit physics (i.e., tendencies computed for updating
!     model state variables during the first RK sub-step (loop)
!     
! (3) Small (acoustic, sound) timestep loop - within the RK sub-steps
!     
! (4) Scalar advance for moist and chem scalar variables (and TKE)
!     within the RK sub-steps.
!     
! (5) time-split physics (after the RK step), currently this includes
!     only microphyics
!
! A more detailed description of these sections follows.
!</pre>
!</DESCRIPTION>

! Initialize timers if compiled with -DBENCH
#include <bench_solve_em_init.h>

   call trace_entry("solve_em_tl")
   ! 090921 bp; 
   solverDisabled = .false.
   call nl_get_disable_solver( 1, solverDisabled )
   if(solverDisabled) then
      return
   endif

!  set leapfrog or runge-kutta solver (2nd or 3rd order)

   dynamics_option = config_flags%rk_ord

!  Obtain dimension information stored in the grid data structure.
  CALL get_ijk_from_grid (  grid ,                   &
                            ids, ide, jds, jde, kds, kde,    &
                            ims, ime, jms, jme, kms, kme,    &
                            ips, ipe, jps, jpe, kps, kpe    )

  k_start         = kps
  k_end           = kpe

  ijds = min(ids, jds)
  ijde = max(ide, jde)

  num_3d_m        = num_moist
  num_3d_c        = num_chem

!  Compute these starting and stopping locations for each tile and number of tiles.
!  See: http://www.mmm.ucar.edu/wrf/WG2/topics/settiles
  CALL set_tiles ( grid , ids , ide , jds , jde , ips , ipe , jps , jpe )

  itimestep = itimestep + 1

#ifdef DM_PARALLEL
    if(grid%trace_use) call trace_entry("solve_tl_halo_comm")
#   include "HALO_EM_INIT_1.inc"
#   include "HALO_EM_INIT_2.inc"
#   include "HALO_EM_INIT_3.inc"
#   include "HALO_EM_INIT_4.inc"
#   include "HALO_EM_INIT_5.inc"
#   include "HALO_EM_TL_INIT_1.inc"
#   include "HALO_EM_TL_INIT_2.inc"
#   include "HALO_EM_TL_INIT_3.inc"
#   include "HALO_EM_TL_INIT_5.inc"
    if(grid%trace_use) call trace_exit("solve_tl_halo_comm")
#endif

!**********************************************************************
!
!  LET US BEGIN.......
!
!<DESCRIPTION>
!<pre>
! (1) RK integration loop is named the "Runge_Kutta_loop:"
!
!   Predictor-corrector type time integration.
!   Advection terms are evaluated at time t for the predictor step,
!   and advection is re-evaluated with the latest predicted value for
!   each succeeding time corrector step
!
!   2nd order Runge Kutta (rk_order = 2):
!   Step 1 is taken to the midpoint predictor, step 2 is the full step.
!
!   3rd order Runge Kutta (rk_order = 3):
!   Step 1 is taken to from t to dt/3, step 2 is from t to dt/2,
!   and step 3 is from t to dt.
!
!   non-timesplit physics are evaluated during first RK step and
!   these physics tendencies are stored for use in each RK pass.
!</pre>
!</DESCRIPTION>
!**********************************************************************

#ifdef WRF_CHEM
!
!    prepare chem aerosols for advection before communication
!    so far only for RADM2/SORGAM choice
!

   kte=min(k_end,kde-1)
!
! change units for advection to mixing ratio
!
      if(imicrogram == 1)then

   if ( p_so4aj >= PARAM_FIRST_SCALAR ) then
   !$OMP PARALLEL DO   &
   !$OMP PRIVATE ( ij ,its,ite,jts,jte,kte)
   aerosol_decouple_loop : DO ij = 1 , grid%num_tiles
       its = max(grid%i_start(ij),ids)
       ite = min(grid%i_end(ij),ide-1)
       jts = max(grid%j_start(ij),jds)
       jte = min(grid%j_end(ij),jde-1)
      do l=p_so4aj,num_chem
      do j=jts,jte
      do k=k_start,kte+1 
      kk=min(k,kde-1)
      do i=its,ite
        chem_1(i,k,j,l)=chem_1(i,kk,j,l)*alt(i,kk,j)
        chem_2(i,k,j,l)=chem_2(i,kk,j,l)*alt(i,kk,j)
      enddo
      enddo
      enddo
      enddo
    enddo aerosol_decouple_loop
    endif
    imicrogram=0
    endif
# ifdef DM_PARALLEL
   if ( num_chem >= PARAM_FIRST_SCALAR ) then
    if(grid%trace_use) call trace_entry("solve_tl_halo_comm")
!-----------------------------------------------------------------------
! see above
!--------------------------------------------------------------
     CALL wrf_debug ( 200 , ' call HALO_RK_CHEM' )
     IF      ( h_mom_adv_order <= 4 ) THEN
#      include "HALO_EM_CHEM_E_3.inc"
     ELSE IF ( h_mom_adv_order <= 6 ) THEN
#      include "HALO_EM_CHEM_E_5.inc"
     ELSE
       WRITE(wrf_err_message,*)'solve_em_tl: invalid h_mom_adv_order = ',h_mom_adv_order
       CALL wrf_error_fatal(TRIM(wrf_err_message))
     ENDIF
    if(grid%trace_use) call trace_exit("solve_tl_halo_comm")
   endif
# endif
!--------------------------------------------------------------
#endif

 rk_order = config_flags%rk_ord
 leapfrog = .false.
 dts = dt/float(time_step_sound)

 IF(rk_ord == 1) leapfrog = .true.

 Runge_Kutta_loop:  DO rk_step = 1, rk_order

   !  Set the step size and number of small timesteps for
   !  each part of the timestep

   dtm = dt
   IF ( rk_order == 1 ) THEN   ! Leapfrog

       IF (step_number /= 1) THEN
         number_of_small_timesteps = 2*time_step_sound
         dt_rk = dt
         dtm = 2*dt
       ELSE
         number_of_small_timesteps = time_step_sound
         dt_rk = dt/2.
         dtm = dt
       END IF

       dts_rk = dts

   ELSE IF ( rk_order == 2 ) THEN   ! 2nd order Runge-Kutta timestep

       IF ( rk_step == 1) THEN
             dt_rk  = 0.5*dt
             dts_rk = dts
             number_of_small_timesteps = time_step_sound/2
       ELSE
             dt_rk = dt
             dts_rk = dts
             number_of_small_timesteps = time_step_sound
       ENDIF

   ELSE IF ( rk_order == 3 ) THEN ! third order Runge-Kutta

       IF ( rk_step == 1) THEN
            dt_rk = dt/3.
            dts_rk = dt_rk
            number_of_small_timesteps = 1
       ELSE IF (rk_step == 2) THEN
            dt_rk  = 0.5*dt
            dts_rk = dts
            number_of_small_timesteps = time_step_sound/2
       ELSE
            dt_rk = dt
            dts_rk = dts
            number_of_small_timesteps = time_step_sound
       ENDIF

   ELSE

      write(wrf_err_message,*)' unknown solver, error exit for dynamics_option = ',dynamics_option
      CALL wrf_error_fatal( wrf_err_message )

   END IF

!
!  Time level t is in the *_2 variable in the first part 
!  of the step, and in the *_1 variable after the predictor.
!  the latest predicted values are stored in the *_2 variables.
!
   CALL wrf_debug ( 200 , ' call rk_step_prep ' )

BENCH_START(step_prep_tim)
   !$OMP PARALLEL DO   &
   !$OMP PRIVATE ( ij )

   DO ij = 1 , grid%num_tiles


      call g_rk_step_prep( config_flags,                     &
                           u_2,g_u_2,v_2,g_v_2,w_2,g_w_2,    &
                           ph_2,g_ph_2,mu_2,g_mu_2,          &
                           moist_2,g_moist_2,                &
                           ru,g_ru,rv,g_rv,rw,g_rw,ww,g_ww,  &
                           php,g_php,alt,g_alt,muu,g_muu,    &
                           muv,g_muv,mub,mut,g_mut,phb,      &
                           al,g_al,alb,cqu,g_cqu,cqv,g_cqv,  &
                           cqw,g_cqw,msfu,msfv,msft,         &
                           dnw,rdx,rdy,num_3d_m,             &
                           ids,ide,jds,jde,kde,              &
                           ims,ime,jms,jme,kms,kme,          &
                           grid%i_start(ij),grid%i_end(ij),  &
                           grid%j_start(ij),grid%j_end(ij),  &
                           k_start,k_end )

   END DO

   !$OMP END PARALLEL DO
BENCH_END(step_prep_tim)

#ifdef DM_PARALLEL
    if(grid%trace_use) call trace_entry("solve_tl_halo_comm")
!-----------------------------------------------------------------------
!  Stencils for patch communications  (WCS, 29 June 2001)
!  Note:  the small size of this halo exchange reflects the 
!         fact that we are carrying the uncoupled variables 
!         as state variables in the mass coordinate model, as
!         opposed to the coupled variables as in the height
!         coordinate model.
!
!                           * * * * *
!         *        * * *    * * * * *
!       * + *      * + *    * * + * * 
!         *        * * *    * * * * *
!                           * * * * *
!
!  3D variables - note staggering!  ru(X), rv(Y), ww(Z), php(Z)
!
!j ru     x
!j rv     x
!j ww     x
!j php    x
!j alt    x
!j ph_2   x
!j phb    x
!
!  the following are 2D (xy) variables
!
!j muu    x
!j muv    x
!j mut    x
!--------------------------------------------------------------
#    include "HALO_EM_A.inc"
#    include "HALO_EM_TL_A.inc"
     if(grid%trace_use) call trace_exit("solve_tl_halo_comm")
#endif

! set boundary conditions on variables 
! from big_step_prep for use in big_step_proc

#ifdef DM_PARALLEL
     if(grid%trace_use) call trace_entry("solve_tl_halo_comm")
#  include "PERIOD_BDY_EM_A.inc"
     if(grid%trace_use) call trace_exit("solve_tl_halo_comm")
#endif

!   CALL set_tiles ( grid , ids , ide , jds , jde , ips-1 , ipe+1 , jps-1 , jpe+1 )

BENCH_START(set_phys_bc_tim)
   if(dyn_opt == DYN_EM) then
   !$OMP PARALLEL DO   &
   !$OMP PRIVATE ( ij )

   DO ij = 1 , grid%num_tiles

       CALL wrf_debug ( 200 , ' call rk_phys_bc_dry_1' )

        CALL rk_phys_bc_dry_1( config_flags, ru, rv, rw, ww,      & 
                               muu, muv, mut, php, alt, p,        &
                               ids, ide, jds, jde, kds, kde,      &
                               ims, ime, jms, jme, kms, kme,      &
                               ips, ipe, jps, jpe, kps, kpe,      &
                               grid%i_start(ij), grid%i_end(ij),  &
                               grid%j_start(ij), grid%j_end(ij),  &
                               k_start, k_end                )

       CALL set_physical_bc3d( ph_2, 'w', config_flags,            &
                                 ids, ide, jds, jde, kds, kde, &
                                 ims, ime, jms, jme, kms, kme, &
                                 ips, ipe, jps, jpe, kps, kpe, &
                               grid%i_start(ij), grid%i_end(ij),        &
                               grid%j_start(ij), grid%j_end(ij),        &
                               k_start, k_end                )

   END DO
   !$OMP END PARALLEL DO
   endif
BENCH_END(set_phys_bc_tim)

    rk_step_is_one : IF (rk_step == 1) THEN ! only need to initialize diffusion tendencies

 ! initialize all tendencies to zero in order to update physics
 ! tendencies first (separate from dry dynamics).
 
BENCH_START(init_zero_tend_tim)
     !$OMP PARALLEL DO   &
     !$OMP PRIVATE ( ij )

     DO ij = 1 , grid%num_tiles

        CALL wrf_debug ( 200 , ' call init_zero_tendency' )

        call g_init_zero_tendency ( ru_tendf,g_ru_tendf,rv_tendf,g_rv_tendf, &
                                    rw_tendf,g_rw_tendf,ph_tendf,g_ph_tendf, &
                                    t_tendf,g_t_tendf,&
                                    moist_tend,g_moist_tend,num_3d_m,  &
                                    ims, ime, jms, jme, kms, kme,     &
                                    grid%i_start(ij), grid%i_end(ij), &
                                    grid%j_start(ij), grid%j_end(ij), &
                                    k_start, k_end                   )
     END DO
   !$OMP END PARALLEL DO
BENCH_END(init_zero_tend_tim)

#ifdef DM_PARALLEL
     if(dyn_opt == DYN_EM) then
       if(grid%trace_use) call trace_entry("solve_tl_halo_comm")
#     include "HALO_EM_PHYS_A.inc"
#     include "HALO_EM_PHYS_TL_A.inc"
       if(grid%trace_use) call trace_exit("solve_tl_halo_comm")
     endif
#endif

!<DESCRIPTION>
!<pre>
!(2) The non-timesplit physics begins with a call to "phy_prep"
!    (which computes some diagnostic variables such as temperature,
!    pressure, u and v at p points, etc).  This is followed by
!    calls to the physics drivers:
!
!              radiation,
!              surface,
!              pbl,
!              cumulus,
!              3D TKE and mixing.
!<pre>
!</DESCRIPTION>


BENCH_START(phy_prep_tim)
!     if(dyn_opt == DYN_EM) then
      !$OMP PARALLEL DO   &
      !$OMP PRIVATE ( ij )
      DO ij = 1 , grid%num_tiles

         CALL wrf_debug ( 200 , ' call phy_prep' )

         call g_phy_prep( p,g_p,pb,ph_2,g_ph_2,phb,t_2,g_t_2, &
                          mu_3d,rho,th_phy,g_th_phy,p_phy,g_p_phy, &
                          pi_phy,g_pi_phy,u_phy,v_phy,p8w,&
                          g_p8w,t_phy,g_t_phy,t8w,g_t8w, &
                          z,g_z,z_at_w,g_z_at_w,dz8w,fnm,fnp, &
                          rthraten,rthblten,rublten,rvblten,rqvblten,rqcblten,&
                          rqiblten,rthcuten,rqvcuten,rqccuten, &
                          rqrcuten,rqicuten,rqscuten,rthften,rqvften, &
                          ide,jde,kde,ims,ime,jms,jme,kms,kme, &
                          grid%i_start(ij),grid%i_end(ij), &
                          grid%j_start(ij),grid%j_end(ij),k_start,k_end )
      ENDDO
      !$OMP END PARALLEL DO
!     endif

BENCH_END(phy_prep_tim)

!  physics to implement

!      CALL set_tiles ( grid , ids , ide-1 , jds , jde-1 ips , ipe , jps , jpe )

! Open MP loops are in physics drivers
! radiation

         CALL wrf_debug ( 200 , ' call radiation_driver' )
BENCH_START(rad_driver_tim)
         if(dyn_opt == DYN_EM) then
         CALL radiation_driver(itimestep,dt,AER_DRY,AER_WATER,       &
                    RTHRATENLW,RTHRATENSW,RTHRATEN,GLW,GSW,          &
                    SWDOWN,                                          &
                    XLAT,XLONG,ALBEDO,CLDFRA,EMISS,                  &
                    rho,moist_2,num_3d_m,                            &
                    p8w,p_phy,pb,pi_phy,dz8w,t_phy,t8w,              &
                    GMT,JULDAY,config_flags,RADT,STEPRA,ICLOUD,      &
                    taucldi,taucldc,warm_rain,                       &
                    XLAND,TSK,HTOP,HBOT,CUPPT,VEGFRA,SNOW,           &
                    julyr,                                           &
                    1   ,                                            &
                    TOTSWDN,TOTLWDN,RSWTOA,RLWTOA,CZMEAN,            &
                    CFRACL,CFRACM,CFRACH,                            &
                    ACFRST,NCFRST,ACFRCV,NCFRCV,                     &
                    ids,ide, jds,jde, kds,kde,                       &
                    ims,ime, jms,jme, kms,kme,                       &
                    grid%i_start, min(grid%i_end, ide-1),            &
                    grid%j_start, min(grid%j_end, jde-1),            &
                    k_start    , min(k_end,kde-1) , grid%num_tiles   )
         endif
BENCH_END(rad_driver_tim)



!********* Surface driver
! surface

BENCH_START(surf_driver_tim)
      if(dyn_opt == DYN_EM) then
      CALL wrf_debug ( 200 , ' call surface_driver' )
      CALL surface_driver(                                          &
     &           ACSNOM,ACSNOW,AKHS,AKMS,ALBEDO,BR,CANWAT,CAPG        &
     &          ,CHKLOWQ,CONFIG_FLAGS,DT,DX,DZ8W,DZS,EMISS,GLW        &
     &          ,GRDFLX,GSW,GZ1OZ0,HFX,HOL,HT,IFSNOW,ISFFLX           &
     &          ,ISLTYP,ITIMESTEP,IVGTYP,LOWLYR,MAVAIL,MOIST_2,MOL,RMOL&
     &          ,NUM_SOIL_LAYERS,NUM_3D_M,P8W,PBLH,PI_PHY,PSHLTR,PSIH &
     &          ,PSIM,P_PHY,Q10,Q2,QFX,QSFC,QSHLTR,QZ0,RAINBL         &
     &          ,RAINCV,RAINNCV,REGIME,RHO,SFCEVP,SFCEXC,SFCRUNOFF    &
     &          ,SMOIS,SMSTAV,SMSTOT,SNOALB,SNOW,SNOWC,SNOWH,STEPBL   &
     &          ,T2,TH10,TH2,THC,THZ0,TH_PHY,TMN,TSHLTR,TSK,TSLB      &
     &          ,T_PHY,U10,UDRUNOFF,UST,UZ0,U_FRAME,U_PHY,V10,VEGFRA  &
     &          ,VZ0,V_FRAME,V_PHY,WARM_RAIN,WSPD,XICE,XLAND,Z,ZNT,ZS &
     &          ,CT,TKE_MYJ                                           &
     &          ,ALBBCK,LH,SH2O,SHDMAX,SHDMIN,Z0                      &
     &          ,flqc,flhc,qsg,qvg,qcg,soilt1,tsnav                   & ! for RUC LSM
     &          ,SMFR3D,KEEPFR3DFLAG                                  &
     &          ,PSFC                                                 &
     &          ,ids,ide, jds,jde, kds,kde                            &
     &          ,ims,ime, jms,jme, kms,kme                            &
     &          ,grid%i_start, min(grid%i_end, ide-1)                 &
     &          ,grid%j_start, min(grid%j_end, jde-1)                 &
     &          ,k_start    , min(k_end,kde-1) , grid%num_tiles       )
      endif
BENCH_END(surf_driver_tim)

!*********
! pbl

BENCH_START(pbl_driver_tim)
      if(dyn_opt == DYN_EM) then
      CALL wrf_debug ( 200 , ' call pbl_driver' )
      CALL pbl_driver(itimestep,dt,u_frame,v_frame                    &
     &           ,RUBLTEN,RVBLTEN,RTHBLTEN                            &
     &           ,RQVBLTEN,RQCBLTEN,RQIBLTEN                          &
     &           ,TSK,XLAND,ZNT,HT                                    &
     &           ,UST,HOL,MOL,PBLH                                    &
     &           ,HFX,QFX,REGIME,GRDFLX                               &
     &           ,u_phy,v_phy,th_phy,rho,moist_2                      &
     &           ,p_phy,pi_phy,p8w,t_phy,dz8w,z                       &
     &           ,TKE_MYJ,EXCH_H,AKHS,AKMS                            &
     &           ,THZ0,QZ0,UZ0,VZ0,QSFC,LOWLYR                        &
     &           ,PSIM, PSIH, GZ1OZ0, WSPD, BR, CHKLOWQ               &
     &           ,config_flags,DX,num_3d_m                            &
     &           ,STEPBL,warm_rain                                    &
     &           ,KPBL,CT,LH,SNOW,XICE                                &
     &           ,ids,ide, jds,jde, kds,kde                           &
     &           ,ims,ime, jms,jme, kms,kme                           &
     &           ,grid%i_start, min(grid%i_end,ide-1)                 &
     &           ,grid%j_start, min(grid%j_end,jde-1)                 &
     &           ,k_start    , min(k_end,kde-1) , grid%num_tiles  )
      endif
BENCH_END(pbl_driver_tim)

! cumulus para.

BENCH_START(cu_driver_tim)
         if(dyn_opt == DYN_EM) then
         CALL wrf_debug ( 200 , ' call cumulus_driver' )
         CALL cumulus_driver(                                          &
                 ! Order dependent args: domain, mem, tile dims
                     ids,ide, jds,jde, kds,kde                         &
                   , ims,ime, jms,jme, kms,kme                         &
                   , grid%i_start, min(grid%i_end, ide-1)              &
                   , grid%j_start, min(grid%j_end, jde-1)              &
                   , k_start  , min(k_end,kde-1) , grid%num_tiles      &
                 ! Prognostic variables
                   , U=u_phy   ,V=v_phy   ,TH=th_phy  ,T=t_phy         &
                   , W=w_2     ,P=p_phy   ,PI=pi_phy  ,RHO=rho         &
                 ! Other arguments
                   , ITIMESTEP=itimestep ,DT=dt      ,DX=dx            &
                   , RAINC=rainc   ,RAINCV=raincv   ,NCA=nca           &
                   , HTOP=htop     ,HBOT=hbot       ,KPBL=kpbl         &
                   , DZ8W=dz8w     ,P8W=p8w                            &
                   , W0AVG=w0avg   ,STEPCU=stepcu                      &
                   , CLDEFI=cldefi ,LOWLYR=lowlyr ,XLAND=xland         &
                   , APR_GR=apr_gr ,APR_W=apr_w   ,APR_MC=apr_mc       &
                   , APR_ST=apr_st ,APR_AS=apr_as ,APR_CAPMA=apr_capma &
                   , APR_CAPME=apr_capme          ,APR_CAPMI=apr_capmi &
                   , MASS_FLUX=mass_flux          ,XF_ENS=xf_ens       &
                   , PR_ENS=pr_ens ,HT=ht                              &
                   , ENSDIM=ensdim ,MAXIENS=maxiens ,MAXENS=maxens     &
                   , MAXENS2=maxens2                ,MAXENS3=maxens3   &
                   , CU_ACT_FLAG=cu_act_flag   ,WARM_RAIN=warm_rain    &
                 ! Selection flag
                   , CU_PHYSICS=config_flags%cu_physics                &
                 ! Moisture tendency arguments
                   , RQVCUTEN=rqvcuten , RQCCUTEN=rqccuten             &
                   , RQRCUTEN=rqrcuten , RQVBLTEN=rqvblten             &
                   , RQVFTEN=rqvften                                   &
                 ! Other tendency arguments
                   , RTHRATEN=rthraten , RTHBLTEN=rthblten             &
                   , RTHCUTEN=rthcuten , RTHFTEN=rthften               &
                 ! Moisture tracer arguments
                   , QV_CURR=moist_2(ims,kms,jms,P_QV), F_QV=F_QV      &
                   , QC_CURR=moist_2(ims,kms,jms,P_QC), F_QC=F_QC      &
                   , QR_CURR=moist_2(ims,kms,jms,P_QR), F_QR=F_QR      &
                   , QI_CURR=moist_2(ims,kms,jms,P_QI), F_QI=F_QI      &
                   , QS_CURR=moist_2(ims,kms,jms,P_QS), F_QS=F_QS      &
                   , QG_CURR=moist_2(ims,kms,jms,P_QG), F_QG=F_QG      &
                                                                       )
         endif

         IF (cu_physics .eq. 5) then            !IF for CUDU
          print*,'call cudu'
            CALL wrf_debug(100,'in du_cps')
            g_rqvcuten(:,:,:) = 0.0
            g_rthcuten(:,:,:) = 0.0
            DO ij = 1, grid%num_tiles

               do j = grid%j_start(ij), min(grid%j_end(ij), jde-1)
               do k = k_start, min(k_end, kde-1)
               do i = grid%i_start(ij), min(grid%i_end(ij), ide-1)
                  rho(i,k,j) = 1./alt(i,k,j)*(1.+moist_2(i,k,j,P_QV))
                  u_phy(i,k,j) = 0.5*(u_2(i,k,j)+u_2(i+1,k,j))
                  v_phy(i,k,j) = 0.5*(v_2(i,k,j)+v_2(i,k,j+1))
                  rthcuten(i,k,j) = rthcuten(i,k,j)/mut(i,j)
                  rqvcuten(i,k,j) = rqvcuten(i,k,j)/mut(i,j)
                  g_rthcuten(i,k,j) = g_rthcuten(i,k,j)/mut(i,j)
                  g_rqvcuten(i,k,j) = g_rqvcuten(i,k,j)/mut(i,j)
               end do
               end do
               end do

               do j = grid%j_start(ij), min(grid%j_end(ij), jde-1)
               do k = k_start, kde-1
               do i = grid%i_start(ij), min(grid%i_end(ij), ide-1)
                  dz8w(i,k,j)= z_at_w(i,k+1,j)-z_at_w(i,k,j)
               end do
               end do
               end do

               do j = grid%j_start(ij), min(grid%j_end(ij), jde-1)
               do i = grid%i_start(ij), min(grid%i_end(ij), ide-1)
                  dz8w(i,kde,j) = 0.
               end do
               end do
          
               CALL DUCU_D( ids,ide,jds,jde,kds,kde &
               ,ims,ime,jms,jme,kms,kme &
               ,grid%i_start(ij),min(grid%i_end(ij), ide-1)      &
               ,grid%j_start(ij), min(grid%j_end(ij), jde-1)    &
               ,k_start,min(k_end, kde-1) &
               ,dt ,itimestep ,dx                    &
               ,rho,g_rho,raincv,g_raincv                       &
               ,nca,u_phy ,v_phy                        &
               ,th_phy,g_th_phy,t_phy,g_t_phy                   &
               ,w_2                                           &
               ,dz8w,g_dz8w,z,g_z,p_phy,g_p_phy                 &
               ,pi_phy,grid%W0AVG,XLV              &
               ,CP ,R_d ,R_v ,G ,EP_2             &
               ,SVP1 ,SVP2 ,SVP3 ,SVPT0    &
               ,stepcu                                   &
               ,cu_act_flag ,warm_rain    &
               ,HTOP,HBOT                           &
               ,moist_2(ims,kms,jms,P_QV),g_moist_2(ims,kms,jms,P_QV) &
              ! optionals
               ,rthcuten,g_rthcuten, rqvcuten,g_rqvcuten )
     
               do j = grid%j_start(ij), min(grid%j_end(ij), jde-1)
               do i = grid%i_start(ij), min(grid%i_end(ij), ide-1)
                  do k = k_start, min(k_end, kde-1)
                     g_t_tendf(I,K,J) = g_t_tendf(I,K,J) + rthcuten(I,K,J) * g_mut(I,J) + g_rthcuten(I,K,J) * mut(I,J)
                     t_tendf(I,K,J) = t_tendf(I,K,J) + rthcuten(I,K,J) * mut(I,J)
                     g_moist_tend(I,K,J,P_QV) = g_moist_tend(I,K,J,P_QV) + rqvcuten(I,K,J) * g_mut(I,J) + g_rqvcuten(I,K,J) * mut(I,J)
                     moist_tend(I,K,J,P_QV) = moist_tend(I,K,J,P_QV) + rqvcuten(I,K,J) * mut(I,J)
                  end do
                  g_rainc(I,J) = g_rainc(I,J) + g_raincv(I,J)
                  rainc(I,J) = rainc(I,J) + raincv(I,J)
               end do
               end do

            ENDDO
         ENDIF

BENCH_END(cu_driver_tim)

! calculate_phy_tend

BENCH_START(cal_phy_tend)
      if(dyn_opt == DYN_EM) then
      !$OMP PARALLEL DO   &
      !$OMP PRIVATE ( ij )

      DO ij = 1 , grid%num_tiles

          CALL wrf_debug ( 200 , ' call calculate_phy_tend' )
          CALL calculate_phy_tend (config_flags,mut,pi_phy,            &
                     RTHRATEN,                                         &
                     RUBLTEN,RVBLTEN,RTHBLTEN,                         &
                     RQVBLTEN,RQCBLTEN,RQIBLTEN,                       &
                     RTHCUTEN,RQVCUTEN,RQCCUTEN,RQRCUTEN,              &
                     RQICUTEN,RQSCUTEN,                                &
                     ids,ide, jds,jde, kds,kde,                        &
                     ims,ime, jms,jme, kms,kme,                        &
                     grid%i_start(ij), min(grid%i_end(ij),ide-1),      &
                     grid%j_start(ij), min(grid%j_end(ij),jde-1),      &
                     k_start    , min(k_end,kde-1)                     )

      ENDDO
      !$OMP END PARALLEL DO
      endif
BENCH_END(cal_phy_tend)

! tke diffusion

     IF(diff_opt .eq. 2 .OR. diff_opt .eq. 1) THEN

BENCH_START(comp_diff_metrics_tim)
       if(dyn_opt == DYN_EM) then
       !$OMP PARALLEL DO   &
       !$OMP PRIVATE ( ij )

       DO ij = 1 , grid%num_tiles

          CALL wrf_debug ( 200 , ' call compute_diff_metrics ' )
          CALL compute_diff_metrics ( config_flags, ph_2, phb, z, rdz, rdzw, &
                                      zx, zy, rdx, rdy,                      &
                                      ids, ide, jds, jde, kds, kde,          &
                                      ims, ime, jms, jme, kms, kme,          &
                                      grid%i_start(ij), grid%i_end(ij),      &
                                      grid%j_start(ij), grid%j_end(ij),      &
                                      k_start    , k_end                    )
       ENDDO
       !$OMP END PARALLEL DO
       endif
BENCH_END(comp_diff_metrics_tim)

#ifdef DM_PARALLEL
     if(dyn_opt == DYN_EM) then
       if(grid%trace_use) call trace_entry("solve_tl_halo_comm")
#  include "PERIOD_BDY_EM_A1.inc"
       if(grid%trace_use) call trace_exit("solve_tl_halo_comm")
     endif
#endif

BENCH_START(tke_diff_bc_tim)
       if(dyn_opt == DYN_EM) then
       DO ij = 1 , grid%num_tiles

          CALL wrf_debug ( 200 , ' call bc for diffusion_metrics ' )
          CALL set_physical_bc3d( rdzw , 'w', config_flags,           &
                                  ids, ide, jds, jde, kds, kde,       &
                                  ims, ime, jms, jme, kms, kme,       &
                                  ips, ipe, jps, jpe, kps, kpe,       &
                                  grid%i_start(ij), grid%i_end(ij),   &
                                  grid%j_start(ij), grid%j_end(ij),   &
                                  k_start    , k_end                 )
          CALL set_physical_bc3d( rdz , 'w', config_flags,           &
                                  ids, ide, jds, jde, kds, kde,       &
                                  ims, ime, jms, jme, kms, kme,       &
                                  ips, ipe, jps, jpe, kps, kpe,       &
                                  grid%i_start(ij), grid%i_end(ij),   &
                                  grid%j_start(ij), grid%j_end(ij),   &
                                  k_start    , k_end                 )
          CALL set_physical_bc3d( z , 'w', config_flags,           &
                                  ids, ide, jds, jde, kds, kde,       &
                                  ims, ime, jms, jme, kms, kme,       &
                                  ips, ipe, jps, jpe, kps, kpe,       &
                                  grid%i_start(ij), grid%i_end(ij),   &
                                  grid%j_start(ij), grid%j_end(ij),   &
                                  k_start    , k_end                 )
          CALL set_physical_bc3d( zx , 'w', config_flags,           &
                                  ids, ide, jds, jde, kds, kde,       &
                                  ims, ime, jms, jme, kms, kme,       &
                                  ips, ipe, jps, jpe, kps, kpe,       &
                                  grid%i_start(ij), grid%i_end(ij),   &
                                  grid%j_start(ij), grid%j_end(ij),   &
                                  k_start    , k_end                 )
          CALL set_physical_bc3d( zy , 'w', config_flags,           &
                                  ids, ide, jds, jde, kds, kde,       &
                                  ims, ime, jms, jme, kms, kme,       &
                                  ips, ipe, jps, jpe, kps, kpe,       &
                                  grid%i_start(ij), grid%i_end(ij),   &
                                  grid%j_start(ij), grid%j_end(ij),   &
                                  k_start    , k_end                 )

       ENDDO
       endif
BENCH_END(tke_diff_bc_tim)

#ifdef DM_PARALLEL
     if(dyn_opt == DYN_EM) then
       if(grid%trace_use) call trace_entry("solve_tl_halo_comm")
#     include "HALO_EM_TKE_C.inc"
#     include "HALO_EM_TKE_TL_C.inc"
       if(grid%trace_use) call trace_exit("solve_tl_halo_comm")
     endif
#endif

BENCH_START(deform_div_tim)
       if(dyn_opt == DYN_EM) then
       !$OMP PARALLEL DO   &
       !$OMP PRIVATE ( ij )

       DO ij = 1 , grid%num_tiles

          CALL wrf_debug ( 200 , ' call cal_deform_and_div' )
          CALL cal_deform_and_div ( config_flags,u_2,v_2,w_2,div,        &
                                    defor11,defor22,defor33,defor12,     &
                                    defor13,defor23,                     &
                                    u_base, v_base,msfu,msfv,msft,       &
                                    rdx, rdy, dn, dnw, rdz, rdzw,        &
                                    fnm,fnp,cf1,cf2,cf3,zx,zy,           &
                                    ids, ide, jds, jde, kds, kde,        &
                                    ims, ime, jms, jme, kms, kme,        &
                                    grid%i_start(ij), grid%i_end(ij),    &
                                    grid%j_start(ij), grid%j_end(ij),    &
                                    k_start    , k_end                  )
       ENDDO
       !$OMP END PARALLEL DO
       endif
BENCH_END(deform_div_tim)


#ifdef DM_PARALLEL
     if(dyn_opt == DYN_EM) then
       if(grid%trace_use) call trace_entry("solve_tl_halo_comm")
#     include "HALO_EM_TKE_D.inc"
       if(grid%trace_use) call trace_exit("solve_tl_halo_comm")
     endif
#endif


! calculate tke, kmh, and kmv

BENCH_START(calc_tke_tim)
       !$OMP PARALLEL DO   &
       !$OMP PRIVATE ( ij )

       DO ij = 1 , grid%num_tiles
          CALL wrf_debug ( 200 , ' call calculate_km_kh' )

          call g_calculate_km_kh( config_flags,dt,dampcoef,zdamp,damp_opt, &
                                  xkmh,xkmhd,g_xkmhd,xkmv,xkhh,xkhv,bn2,g_bn2,khdif, &
                                  defor11,defor22,defor33,defor12,defor13,defor23, &
                                  tke_2(ims,kms,jms),p8w,g_p8w,t8w,g_t8w, &
                                  th_phy,g_th_phy,t_phy,g_t_phy,p_phy,&
                                  g_p_phy,moist_2,g_moist_2, &
                                  dx,dy,rdz,rdzw,num_3d_m,cf1,cf2,cf3,kh_tke_upper_bound, &
                                  ids,ide,jds,jde,kde,ims,ime,jms,jme,kms,kme, &
                                  grid%i_start(ij),grid%i_end(ij), &
                                  grid%j_start(ij),grid%j_end(ij),k_start,k_end )
       ENDDO
       !$OMP END PARALLEL DO
BENCH_END(calc_tke_tim)

#ifdef DM_PARALLEL
       if(grid%trace_use) call trace_entry("solve_tl_halo_comm")
#     include "HALO_EM_TKE_E.inc"
#     include "HALO_EM_TKE_TL_E.inc"
       if(grid%trace_use) call trace_exit("solve_tl_halo_comm")
#endif

     ENDIF

#ifdef DM_PARALLEL
     if(dyn_opt == DYN_EM) then
       if(grid%trace_use) call trace_entry("solve_tl_halo_comm")
#      include "PERIOD_BDY_EM_PHY_BC.inc"
#      include "PERIOD_BDY_EM_CHEM.inc"
       if(grid%trace_use) call trace_exit("solve_tl_halo_comm")
     endif
#endif

BENCH_START(phy_bc_tim)
     if(dyn_opt == DYN_EM) then
     !$OMP PARALLEL DO   &
     !$OMP PRIVATE ( ij )

     DO ij = 1 , grid%num_tiles

          CALL wrf_debug ( 200 , ' call phy_bc' )
       CALL phy_bc (config_flags,div,defor11,defor22,defor33,            &
                            defor12,defor13,defor23,                     &
                            xkmh,xkmhd,xkmv,xkhh,xkhv,                   &
                            tke_2(ims,kms,jms),                          &
                            RUBLTEN, RVBLTEN,                            &
                            ids, ide, jds, jde, kds, kde,                &
                            ims, ime, jms, jme, kms, kme,                &
                            ips, ipe, jps, jpe, kps, kpe,                &
                            grid%i_start(ij), grid%i_end(ij),                      &
                            grid%j_start(ij), grid%j_end(ij),                      &
                            k_start    , k_end                           )
     ENDDO
     !$OMP END PARALLEL DO
     endif
BENCH_END(phy_bc_tim)

#ifdef DM_PARALLEL
     if(grid%trace_use) call trace_entry("solve_tl_halo_comm")
!-----------------------------------------------------------------------
!
! MPP for some physics tendency, km, kh, deformation, and divergence
!
!               *                     *
!             * + *      * + *        +
!               *                     *
!
! (for PBL)
! RUBLTEN                  x
! RVBLTEN                             x
!
! (for diff_opt >= 1)
! defor11                  x
! defor22                             x
! defor12       x
! defor13                  x
! defor23                             x
! div           x
! xkmv          x
! xkmh          x
! xkmhd         x
! xkhv          x
! xkhh          x
! tke           x
!
!-----------------------------------------------------------------------
      IF ( bl_pbl_physics .ge. 1 ) THEN
#      include "HALO_EM_PHYS_PBL.inc"
      ENDIF
      IF ( diff_opt .ge. 1 ) THEN
#      include "HALO_EM_PHYS_DIFFUSION.inc"
      ENDIF

      IF      ( h_mom_adv_order <= 4 ) THEN
#       include "HALO_EM_TKE_3.inc"
      ELSE IF ( h_mom_adv_order <= 6 ) THEN
#       include "HALO_EM_TKE_5.inc"
      ELSE
        WRITE(wrf_err_message,*)'solve_em_tl: invalid h_mom_adv_order = ',h_mom_adv_order
        CALL wrf_error_fatal(TRIM(wrf_err_message))
      ENDIF
     if(grid%trace_use) call trace_exit("solve_tl_halo_comm")
#endif

BENCH_START(update_phy_ten_tim)
      if(dyn_opt == DYN_EM) then
      !$OMP PARALLEL DO   &
      !$OMP PRIVATE ( ij )

      DO ij = 1 , grid%num_tiles

          CALL wrf_debug ( 200 , ' call update_phy_ten' )
        CALL update_phy_ten(t_tendf, ru_tendf, rv_tendf,moist_tend,        &
                          RTHRATEN,RTHBLTEN,RTHCUTEN,RUBLTEN,RVBLTEN,  &
                          RQVBLTEN,RQCBLTEN,RQIBLTEN,                  &
                          RQVCUTEN,RQCCUTEN,RQRCUTEN,RQICUTEN,RQSCUTEN,&
                          num_3d_m,config_flags,rk_step,              &
                          ids, ide, jds, jde, kds, kde,                &
                          ims, ime, jms, jme, kms, kme,                &
                          grid%i_start(ij), grid%i_end(ij),                      &
                          grid%j_start(ij), grid%j_end(ij),                      &
                          k_start, k_end                               )

      END DO
      !$OMP END PARALLEL DO
      endif
BENCH_END(update_phy_ten_tim)

     IF( diff_opt .eq. 2 .and. km_opt .eq. 2 ) THEN

BENCH_START(tke_rhs_tim)
       if(dyn_opt == DYN_EM) then
       !$OMP PARALLEL DO   &
       !$OMP PRIVATE ( ij )

       DO ij = 1 , grid%num_tiles

          CALL tke_rhs  ( tke_tend,BN2,                               &
                          config_flags,defor11,defor22,defor33,       &
                          defor12,defor13,defor23,u_2,v_2,w_2,div,    &
                          tke_2(ims,kms,jms),mut,                     &
                          th_phy,p_phy,p8w,t8w,z,fnm,fnp,             &
                          cf1,cf2,cf3,msft,xkmh,xkmv,xkhv,rdx,rdy,    &
                          dx,dy,dt,zx,zy,rdz,rdzw,dn,dnw,mix_cr_len,  &
                          ids, ide, jds, jde, kds, kde,               &
                          ims, ime, jms, jme, kms, kme,               &
                          grid%i_start(ij), grid%i_end(ij),           &
                          grid%j_start(ij), grid%j_end(ij),           &
                          k_start    , k_end                         )

       ENDDO
       !$OMP END PARALLEL DO
       endif
BENCH_END(tke_rhs_tim)

     ENDIF

! calculate vertical diffusion first and then horizontal
! (keep this order)

     IF(diff_opt .eq. 2) THEN

       IF (bl_pbl_physics .eq. 0) THEN

BENCH_START(vert_diff_tim)
         if(dyn_opt == DYN_EM) then
         !$OMP PARALLEL DO   &
         !$OMP PRIVATE ( ij )
         DO ij = 1 , grid%num_tiles

           CALL wrf_debug ( 200 , ' call vertical_diffusion_2 ' )
           CALL vertical_diffusion_2( ru_tendf, rv_tendf, rw_tendf,              &
                                      t_tendf, tke_tend,                         &
                                      moist_tend, num_3d_m,                      &
                                      chem_tend, num_3d_c,                       &
                                      u_2, v_2,                                  &
                                      t_2,u_base,v_base,t_base,qv_base,          &
                                      mut,tke_2,config_flags,                    &
                                      defor13,defor23,defor33,                   &
                                      div, moist_2, chem_2, xkmv, xkhv, km_opt,  &
                                      fnm, fnp, dn, dnw, rdz, rdzw,              &
                                      ids, ide, jds, jde, kds, kde,              &
                                      ims, ime, jms, jme, kms, kme,              &
                                      grid%i_start(ij), grid%i_end(ij),          &
                                      grid%j_start(ij), grid%j_end(ij),          &
                                      k_start    , k_end                        )

         ENDDO
         !$OMP END PARALLEL DO
         endif
BENCH_END(vert_diff_tim)

       ENDIF
!
BENCH_START(hor_diff_tim)
       if(dyn_opt == DYN_EM) then
       !$OMP PARALLEL DO   &
       !$OMP PRIVATE ( ij )
       DO ij = 1 , grid%num_tiles

          CALL wrf_debug ( 200 , ' call horizontal_diffusion_2' )
         CALL horizontal_diffusion_2( t_tendf, ru_tendf, rv_tendf, rw_tendf, &
                                      tke_tend,                              &
                                      moist_tend, num_3d_m,                  &
                                      chem_tend, num_3d_c,                   &
                                      t_2, th_phy,                           &
                                      mut, tke_2, config_flags,              &
                                      defor11, defor22, defor12,             &
                                      defor13, defor23, div,                 &
                                      moist_2, chem_2,                       &
                                      msfu, msfv, msft, xkmhd, xkhh, km_opt, &
                                      rdx, rdy, rdz, rdzw,                   &
                                      fnm, fnp, cf1, cf2, cf3,               &
                                      zx, zy, dn, dnw,                       &
                                      ids, ide, jds, jde, kds, kde,          &
                                      ims, ime, jms, jme, kms, kme,          &
                                      grid%i_start(ij), grid%i_end(ij),      &
                                      grid%j_start(ij), grid%j_end(ij),      &
                                      k_start    , k_end                    )
       ENDDO
       !$OMP END PARALLEL DO
       endif
BENCH_END(hor_diff_tim)

     ENDIF

     END IF rk_step_is_one


   !$OMP PARALLEL DO   &
   !$OMP PRIVATE ( ij )
   DO ij = 1 , grid%num_tiles
      CALL wrf_debug ( 200 , ' call rk_tendency' )


      call g_rk_tendency( config_flags,rk_step, &
                          ru_tend,g_ru_tend, &
                          rv_tend,g_rv_tend, &
                          rw_tend,g_rw_tend, &
                          ph_tend,g_ph_tend, &
                          t_tend,g_t_tend, &
                          ru_tendf,g_ru_tendf, &
                          rv_tendf,g_rv_tendf, &
                          rw_tendf,g_rw_tendf, &
                          t_tendf,g_t_tendf, &
                          mu_tend,g_mu_tend, &
                          u_save,g_u_save,&
                          v_save,g_v_save,w_save,g_w_save, &
                          ph_save,g_ph_save,t_save,g_t_save, &
                          ru,g_ru,rv,g_rv,rw,g_rw,ww,g_ww, &
                          u_2,g_u_2,v_2,g_v_2,w_2,&
                          g_w_2,t_2,g_t_2,ph_2,g_ph_2, &
                          u_1,g_u_1,v_1,g_v_1,w_1,g_w_1,t_1,g_t_1,ph_1,g_ph_1,phb, &
                          t_init,mu_2,g_mu_2,mut,g_mut,muu,g_muu,&
                          muv,g_muv,mub, &
                          al,g_al,alt,g_alt,p,g_p,pb,php,g_php, &
                          cqu,g_cqu,cqv,g_cqv,cqw,g_cqw, &
                          u_base,v_base,z_base,msfu,msfv,msft,f,e,sina,&
                          cosa,fnm,fnp,rdn,rdnw,dt,rdx,rdy, &
                          kvdif,xkmhd,g_xkmhd,cf1,cf2,cf3,cfn,cfn1, &
                          non_hydrostatic,leapfrog, &
                          ids,ide,jds,jde,kde,ims,&
                          ime,jms,jme,kms,kme, &
                          grid%i_start(ij),grid%i_end(ij), &
                          grid%j_start(ij),grid%j_end(ij),k_start,k_end )

!   IF (rk_step == 1 ) then
!
!      call g_surface_drag( ru_tendf, g_ru_tendf, rv_tendf, g_rv_tendf, &
!                           u_2, g_u_2, v_2, g_v_2, xland, muu, g_muu, muv, &
!                           g_muv, z, g_z, z_at_w, g_z_at_w, &
!                           ids, ide, jds, jde, kds, kde, &
!                           ims, ime, jms, jme, kms, kme, &
!                           grid%i_start(ij), grid%i_end(ij), &
!                           grid%j_start(ij), grid%j_end(ij), k_start, k_end )
!   
!#ifdef DM_PARALLEL
!     if(grid%trace_use) call trace_entry("solve_tl_halo_comm")
!#     include "HALO_EM_SD_TL.inc"
!     if(grid%trace_use) call trace_exit("solve_tl_halo_comm")
!#endif
!
!
!  ENDIF

   END DO
   !$OMP END PARALLEL DO
BENCH_END(rk_tend_tim)

BENCH_START(relax_bdy_dry_tim)
   !$OMP PARALLEL DO   &
   !$OMP PRIVATE ( ij )
   DO ij = 1 , grid%num_tiles

     IF( (config_flags%specified .or. config_flags%nested) .and. rk_step == 1 ) THEN 
         call g_relax_bdy_dry( config_flags, &
                               u_save,g_u_save,v_save,g_v_save,ph_save,g_ph_save, &
                               t_save,g_t_save,w_save,g_w_save, &
                               mu_tend,g_mu_tend,ru,g_ru,rv,g_rv,ph_2,g_ph_2, &
                               t_2,g_t_2,w_2,g_w_2,mu_2,g_mu_2,mut,g_mut, &
                               u_b,g_u_b,v_b,g_v_b,ph_b,g_ph_b,t_b,g_t_b,&
                               w_b,g_w_b,mu_b,g_mu_b,u_bt,g_u_bt,v_bt,g_v_bt, &
                               ph_bt,g_ph_bt,t_bt,g_t_bt,w_bt,g_w_bt,mu_bt,g_mu_bt, &
                               spec_bdy_width,spec_zone,&
                               relax_zone,dtbc,fcx,gcx, &
                               ijds,ijde, &
                               ids,ide,jds,jde,kds,kde, &
                               ims,ime,jms,jme,kms,kme, &
                               grid%i_start(ij),grid%i_end(ij), &
                               grid%j_start(ij),grid%j_end(ij),k_start,k_end )

     ENDIF

     call g_rk_addtend_dry( ru_tend,g_ru_tend,rv_tend,g_rv_tend,rw_tend,g_rw_tend,ph_tend,g_ph_tend, &
                            t_tend,g_t_tend,ru_tendf,g_ru_tendf,rv_tendf,g_rv_tendf,rw_tendf,g_rw_tendf, &
                            ph_tendf,g_ph_tendf,t_tendf,g_t_tendf,u_save,g_u_save,v_save,g_v_save, &
                            w_save,g_w_save,ph_save,g_ph_save,t_save,g_t_save,rk_step,h_diabatic, &
                            mut,g_mut,msft,msfu,msfv,ide,jde,ims,ime,jms,jme,kms,kme, &
                            grid%i_start(ij),grid%i_end(ij),grid%j_start(ij),grid%j_end(ij),k_start,k_end )

     IF( config_flags%specified .or. config_flags%nested ) THEN 
         call g_spec_bdy_dry( config_flags,ru_tend,g_ru_tend,rv_tend,g_rv_tend, &
                              ph_tend,g_ph_tend,t_tend,g_t_tend,rw_tend,g_rw_tend, &
                              mu_tend,g_mu_tend,u_bt,g_u_bt,v_bt,g_v_bt,ph_bt,g_ph_bt, &
                              t_bt,g_t_bt,w_bt,g_w_bt,mu_bt,g_mu_bt,spec_bdy_width,spec_zone,ijds,ijde, &
                              ids,ide,jds,jde,kds,kde, &
                              ims,ime,jms,jme,kms,kme, &
                              grid%i_start(ij),grid%i_end(ij), &
                              grid%j_start(ij),grid%j_end(ij), &
                              k_start,k_end )

     ENDIF

   END DO
   !$OMP END PARALLEL DO
BENCH_END(relax_bdy_dry_tim)


!<DESCRIPTION>
!<pre>
! (3) Small (acoustic,sound) steps.
!
!    Several acoustic steps are taken each RK pass.  A small step 
!    sequence begins with calculating perturbation variables 
!    and coupling them to the column dry-air-mass mu 
!    (call to small_step_prep).  This is followed by computing
!    coefficients for the vertically implicit part of the
!    small timestep (call to calc_coef_w).  
!
!    The small steps are taken
!    in the named loop "small_steps:".  In the small_steps loop, first 
!    the horizontal momentum (u and v) are advanced (call to advance_uv),
!    next mu and theta are advanced (call to advance_mu_t) followed by
!    advancing w and the geopotential (call to advance_w).  Diagnostic
!    values for pressure and inverse density are updated at the end of
!    each small_step.
!
!    The small-step section ends with the change of the perturbation variables
!    back to full variables (call to small_step_finish).
!</pre>
!</DESCRIPTION>

BENCH_START(small_step_prep_tim)
   !$OMP PARALLEL DO   &
   !$OMP PRIVATE ( ij )

   DO ij = 1 , grid%num_tiles

    ! Calculate coefficients for the vertically implicit acoustic/gravity wave
    ! integration.  We only need calculate these for the first pass through -
    ! the predictor step.  They are reused as is for the corrector step.
    ! For third-order RK, we need to recompute these after the first 
    ! predictor because we may have changed the small timestep -> dts.

      CALL wrf_debug ( 200 , ' call calc_coef_w' )

!Print*, '2222222222222222222+++++++++++++++++++++++++TL CODE TESTING'
      call g_small_step_prep( u_1,g_u_1,u_2,g_u_2,v_1,g_v_1,v_2,g_v_2,w_1,g_w_1,w_2,g_w_2, &
                              t_1,g_t_1,t_2,g_t_2,ph_1,g_ph_1,ph_2,&
                              g_ph_2,mub,mu_1,g_mu_1,mu_2,g_mu_2,muu,g_muu,muus,g_muus,muv,g_muv, &
                              muvs,g_muvs,mut,g_mut,muts,g_muts,mudf,g_mudf,u_save,&
                              g_u_save,v_save,g_v_save,w_save,g_w_save,t_save,g_t_save,ph_save,g_ph_save, &
                              mu_save,g_mu_save,ww,g_ww,ww1,g_ww1,c2a,g_c2a,pb,p,&
                              g_p,alt,g_alt,msfu,msfv,msft,rk_step,leapfrog, &
                              ide,jde,kde,ims,ime,jms,jme,kms,kme, &
                              grid%i_start(ij),grid%i_end(ij), &
                              grid%j_start(ij),grid%j_end(ij),k_start,k_end )

      call g_calc_p_rho( al,g_al,p,g_p,ph_2,g_ph_2,alt,g_alt,t_2,g_t_2, &
                         t_save,g_t_save,c2a,g_c2a,pm1,g_pm1,mu_2,g_mu_2,muts,g_muts,&
                         znu,t0,rdnw,dnw,smdiv,non_hydrostatic,0, &
                         ide,jde,kde,ims,ime,jms,jme,kms,kme, &
                         grid%i_start(ij),grid%i_end(ij),grid%j_start(ij),&
                         grid%j_end(ij),k_start,k_end )

      IF (non_hydrostatic) THEN
         call g_calc_coef_w( a,g_a,alpha,g_alpha,gamma,g_gamma,mut,g_mut, &
                             cqw,g_cqw,rdn,rdnw,c2a,g_c2a,dts,g,epssm, &
                             ide,jde,kde,ims,ime,jms,jme,kms,kme, &
                             grid%i_start(ij),grid%i_end(ij),grid%j_start(ij),grid%j_end(ij) )
      ENDIF
   ENDDO

   !$OMP END PARALLEL DO
BENCH_END(small_step_prep_tim)

#ifdef DM_PARALLEL
     if(grid%trace_use) call trace_entry("solve_tl_halo_comm")
!-----------------------------------------------------------------------
!  Stencils for patch communications  (WCS, 29 June 2001)
!  Note:  the small size of this halo exchange reflects the 
!         fact that we are carrying the uncoupled variables 
!         as state variables in the mass coordinate model, as
!         opposed to the coupled variables as in the height
!         coordinate model.
!
!                              * * * * *
!            *        * * *    * * * * *
!          * + *      * + *    * * + * * 
!            *        * * *    * * * * *
!                              * * * * *
!
!  3D variables - note staggering!  ph_2(Z), u_save(X), v_save(Y)
!
!j ph_2      x
!j al        x
!j p         x
!j t_1       x
!j t_save    x
!j u_save    x
!j v_save    x
!
!  the following are 2D (xy) variables
!
!j mu_1      x
!j mu_2      x
!j mudf      x
!--------------------------------------------------------------
#      include "HALO_EM_B.inc"
#      include "HALO_EM_TL_B.inc"
#      include "PERIOD_BDY_EM_B.inc"
     if(grid%trace_use) call trace_exit("solve_tl_halo_comm")
#endif

BENCH_START(set_phys_bc2_tim)
   if(dyn_opt == DYN_EM) then
   !$OMP PARALLEL DO   &
   !$OMP PRIVATE ( ij )

   DO ij = 1 , grid%num_tiles

         CALL set_physical_bc3d( ru_tend, 'u', config_flags,          &
                                 ids, ide, jds, jde, kds, kde, &
                                 ims, ime, jms, jme, kms, kme, &
                                 ips, ipe, jps, jpe, kps, kpe, &
                           grid%i_start(ij), grid%i_end(ij),                 &
                           grid%j_start(ij), grid%j_end(ij),                 &
                           k_start    , k_end                     )

         CALL set_physical_bc3d( rv_tend, 'v', config_flags,            &
                                 ids, ide, jds, jde, kds, kde, &
                                 ims, ime, jms, jme, kms, kme, &
                                 ips, ipe, jps, jpe, kps, kpe, &
                           grid%i_start(ij), grid%i_end(ij),                 &
                           grid%j_start(ij), grid%j_end(ij),                 &
                           k_start    , k_end                     )

         CALL set_physical_bc3d( ph_2, 'w', config_flags,          &
                                 ids, ide, jds, jde, kds, kde, &
                                 ims, ime, jms, jme, kms, kme, &
                                 ips, ipe, jps, jpe, kps, kpe, &
                           grid%i_start(ij), grid%i_end(ij),                 &
                           grid%j_start(ij), grid%j_end(ij),                 &
                           k_start    , k_end                     )

         CALL set_physical_bc3d( al, 'p', config_flags,            &
                                 ids, ide, jds, jde, kds, kde, &
                                 ims, ime, jms, jme, kms, kme, &
                                 ips, ipe, jps, jpe, kps, kpe, &
                           grid%i_start(ij), grid%i_end(ij),                 &
                           grid%j_start(ij), grid%j_end(ij),                 &
                           k_start    , k_end                     )

         CALL set_physical_bc3d( p, 'p', config_flags,             &
                                 ids, ide, jds, jde, kds, kde, &
                                 ims, ime, jms, jme, kms, kme, &
                                 ips, ipe, jps, jpe, kps, kpe, &
                           grid%i_start(ij), grid%i_end(ij),                 &
                           grid%j_start(ij), grid%j_end(ij),                 &
                           k_start    , k_end                     )

         CALL set_physical_bc3d( t_1, 'p', config_flags,             &
                                 ids, ide, jds, jde, kds, kde, &
                                 ims, ime, jms, jme, kms, kme, &
                                 ips, ipe, jps, jpe, kps, kpe, &
                           grid%i_start(ij), grid%i_end(ij),                 &
                           grid%j_start(ij), grid%j_end(ij),                 &
                           k_start    , k_end                     )

         CALL set_physical_bc3d( t_save, 't', config_flags,             &
                                 ids, ide, jds, jde, kds, kde, &
                                 ims, ime, jms, jme, kms, kme, &
                                 ips, ipe, jps, jpe, kps, kpe, &
                           grid%i_start(ij), grid%i_end(ij),                 &
                           grid%j_start(ij), grid%j_end(ij),                 &
                           k_start    , k_end                     )

         CALL set_physical_bc2d( mu_1, 't', config_flags,          &
                                 ids, ide, jds, jde,               &
                                 ims, ime, jms, jme,               &
                                 ips, ipe, jps, jpe,               &
                                 grid%i_start(ij), grid%i_end(ij), &
                                 grid%j_start(ij), grid%j_end(ij) )

         CALL set_physical_bc2d( mu_2, 't', config_flags,          &
                                 ids, ide, jds, jde,               &
                                 ims, ime, jms, jme,               &
                                 ips, ipe, jps, jpe,               &
                                 grid%i_start(ij), grid%i_end(ij), &
                                 grid%j_start(ij), grid%j_end(ij) )

         CALL set_physical_bc2d( mudf, 't', config_flags,          &
                                 ids, ide, jds, jde,               &
                                 ims, ime, jms, jme,               &
                                 ips, ipe, jps, jpe,               &
                                 grid%i_start(ij), grid%i_end(ij), &
                                 grid%j_start(ij), grid%j_end(ij) )

    END DO
    !$OMP END PARALLEL DO
    endif
BENCH_END(set_phys_bc2_tim)

!Print*, '333333333333333333333333333+++++++++++++++++++++++++TL CODE TESTING'
   small_steps : DO iteration = 1 , number_of_small_timesteps

   ! Boundary condition time (or communication time).  

#ifdef DM_PARALLEL
     if(grid%trace_use) call trace_entry("solve_tl_halo_comm")
#      include "PERIOD_BDY_EM_B.inc"
     if(grid%trace_use) call trace_exit("solve_tl_halo_comm")
#endif


      !$OMP PARALLEL DO   &
      !$OMP PRIVATE ( ij )

      DO ij = 1 , grid%num_tiles

BENCH_START(advance_uv_tim)
         call g_advance_uv( u_2,g_u_2,ru_tend,g_ru_tend,v_2,g_v_2,rv_tend,g_rv_tend, &
                            p,g_p,pb,ph_2,g_ph_2,php,g_php,alt,g_alt,al,g_al,&
                            mu_2,g_mu_2,muu,g_muu,cqu,g_cqu,muv,g_muv,cqv,g_cqv, &
                            mudf,g_mudf,rdx,rdy,dts,cf1,cf2,cf3,fnm,fnp,emdiv,rdnw,config_flags,&
                            spec_zone,non_hydrostatic, &
                            ids,ide,jds,jde,kde,ims,ime,jms,jme,kms,kme, &
                            grid%i_start(ij),grid%i_end(ij), &
                            grid%j_start(ij),grid%j_end(ij),k_start,k_end )
BENCH_END(advance_uv_tim)

BENCH_START(spec_bdy_uv_tim)
         IF( config_flags%specified .or. config_flags%nested ) THEN
             call g_spec_bdyupdate( u_2,g_u_2,ru_tend,g_ru_tend,dts_rk, &
                                    'u',spec_zone, &
                                    ids,ide,jds,jde,kde,ims,ime,jms,jme,kms,kme, &
                                    grid%i_start(ij),grid%i_end(ij), &
                                    grid%j_start(ij),grid%j_end(ij),k_start,k_end )
             call g_spec_bdyupdate( v_2,g_v_2,rv_tend,g_rv_tend,dts_rk, &
                                    'v',spec_zone, &
                                    ids,ide,jds,jde,kde,ims,ime,jms,jme,kms,kme, &
                                    grid%i_start(ij),grid%i_end(ij), &
                                    grid%j_start(ij),grid%j_end(ij),k_start,k_end )

         ENDIF
BENCH_END(spec_bdy_uv_tim)

      END DO
      !$OMP END PARALLEL DO

#ifdef DM_PARALLEL
     if(grid%trace_use) call trace_entry("solve_tl_halo_comm")
!
!  Stencils for patch communications  (WCS, 29 June 2001)
!
!         *                     *
!       * + *      * + *        +
!         *                     *
!
!  u_2               x
!  v_2                          x
!
#     include "HALO_EM_C.inc"
#     include "HALO_EM_TL_C.inc"
     if(grid%trace_use) call trace_exit("solve_tl_halo_comm")
#endif

      !$OMP PARALLEL DO   &
      !$OMP PRIVATE ( ij )

      DO ij = 1 , grid%num_tiles

        !  advance the mass in the column, theta, and calculate ww

BENCH_START(advance_mu_t_tim)
          call g_advance_mu_t( ww,g_ww,ww1,g_ww1,u_2,g_u_2,u_save,g_u_save,v_2,g_v_2,v_save,g_v_save, &
                               mu_2,g_mu_2,mut,g_mut,muave,&
                               g_muave,muts,g_muts,muu,g_muu,muv,g_muv,mudf,g_mudf, &
                               t_2,g_t_2,t_save,g_t_save,t_2save,g_t_2save,t_tend,g_t_tend,mu_tend,&
                               g_mu_tend,rdx,rdy,dts,epssm,dnw,fnm,fnp,rdnw,msfu,msfv,msft, &
                               config_flags, &
                               ids,ide,jds,jde,kde,ims,ime,jms,jme,kms,kme, &
                               grid%i_start(ij),grid%i_end(ij), &
                               grid%j_start(ij),grid%j_end(ij),k_start,k_end )
BENCH_END(advance_mu_t_tim)

BENCH_START(spec_bdy_t_tim)
         IF( config_flags%specified .or. config_flags%nested ) THEN
             call g_spec_bdyupdate( t_2,g_t_2,t_tend,g_t_tend,dts_rk, &
                                    't',spec_zone,ids, &
                                    ide,jds,jde,kde,ims,ime,jms,jme,kms,kme, &
                                    grid%i_start(ij),grid%i_end(ij), &
                                    grid%j_start(ij),grid%j_end(ij),k_start,k_end )
             call g_spec_bdyupdate( mu_2,g_mu_2,mu_tend,g_mu_tend,dts_rk, &
                                    'm',spec_zone, &
                                    ids,ide,jds,jde,1,ims,ime,jms,jme,1,1, &
                                    grid%i_start(ij),grid%i_end(ij), &
                                    grid%j_start(ij),grid%j_end(ij),1,1 )
             call g_spec_bdyupdate( muts,g_muts,mu_tend,g_mu_tend,dts_rk, &
                                    'm',spec_zone, &
                                    ids,ide,jds,jde,1,ims,ime,jms,jme,1,1, &
                                    grid%i_start(ij),grid%i_end(ij), &
                                    grid%j_start(ij),grid%j_end(ij),1,1 )
         ENDIF
BENCH_END(spec_bdy_t_tim)

         ! sumflux accumulates the time-averged mass flux
         ! (time averaged over the acoustic steps) for use
         ! in the scalar advection (flux divergence).  Using
         ! time averaged values gives us exact scalar conservation.

BENCH_START(sumflux_tim)
         call g_sumflux( u_2,g_u_2,v_2,g_v_2,ww,g_ww, &
                         u_save,g_u_save,v_save,g_v_save, &
                         ww1,g_ww1,muu,g_muu,muv,g_muv, &
                         ru_m,g_ru_m,rv_m,g_rv_m,ww_m,g_ww_m,msfu,msfv, &
                         iteration,number_of_small_timesteps, &
                         ide,jde,kde,ims,ime,jms,jme,kms,kme, &
                         grid%i_start(ij),grid%i_end(ij), &
                         grid%j_start(ij),grid%j_end(ij),k_start,k_end )
BENCH_END(sumflux_tim)

         ! small (acoustic) step for the vertical momentum,
         ! density and coupled potential temperature.


BENCH_START(advance_w_tim)
        IF ( non_hydrostatic ) THEN
           call g_advance_w( w_2,g_w_2,rw_tend,g_rw_tend,ww,g_ww, &
                             u_2,g_u_2,v_2,g_v_2,mu_2,g_mu_2, &
                             mut,g_mut,muave,g_muave,muts,g_muts,&
                             t_2save,g_t_2save,t_2,g_t_2,t_save,g_t_save, &
                             ph_2,g_ph_2,ph_save,g_ph_save,phb,ph_tend,g_ph_tend,ht, &
                             c2a,g_c2a,cqw,g_cqw,&
                             alt,g_alt,alb,a,g_a,alpha,g_alpha,gamma,g_gamma, &
                             rdx,rdy,dts,t0,epssm,fnm,fnp,rdnw,rdn, &
                             cf1,cf2,cf3,msft,config_flags, &
                             ids,ide,jds,jde,ims,ime,jms,jme,kms,kme, &
                             grid%i_start(ij),grid%i_end(ij), &
                             grid%j_start(ij),grid%j_end(ij),k_start,k_end )
        ENDIF
BENCH_END(advance_w_tim)

        IF( config_flags%specified .or. config_flags%nested ) THEN

BENCH_START(spec_bdynhyd_tim)
           IF (non_hydrostatic)  THEN
              call g_spec_bdyupdate_ph( ph_save,g_ph_save,ph_2,g_ph_2,ph_tend,g_ph_tend,mu_tend,g_mu_tend,muts,g_muts,dts_rk,'h',&
&spec_zone,ids,ide,jds,jde,kde,ims,ime,jms,jme,kms,kme,grid%i_start(ij),grid%i_end(ij),grid%j_start(ij),grid%j_end(ij),&
&k_start,k_end )
             IF( config_flags%specified ) THEN
                 call g_zero_grad_bdy( w_2,g_w_2,'w',spec_zone, &
                                       ids,ide,jds,jde,kde,ims,ime,jms,jme,kms,kme, &
                                       grid%i_start(ij),grid%i_end(ij), &
                                       grid%j_start(ij),grid%j_end(ij),k_start )
             ELSE
                call g_spec_bdyupdate( w_2,g_w_2,rw_tend,g_rw_tend,dts_rk, &
                                       'h',spec_zone, &
                                       ids,ide,jds,jde,kde,ims,ime,jms,jme,kms,kme,&
                                       grid%i_start(ij),grid%i_end(ij), &
                                       grid%j_start(ij),grid%j_end(ij),k_start,k_end )
             ENDIF
          ENDIF
BENCH_END(spec_bdynhyd_tim)
        ENDIF

BENCH_START(cald_p_rho_tim)
        call g_calc_p_rho( al,g_al,p,g_p,ph_2,g_ph_2,alt,g_alt, &
                           t_2,g_t_2,t_save,g_t_save, &
                           c2a,g_c2a,pm1,g_pm1,mu_2,g_mu_2,muts,g_muts,&
                           znu,t0,rdnw,dnw,smdiv,non_hydrostatic,iteration, &
                           ide,jde,kde,ims,ime,jms,jme,kms,kme, &
                           grid%i_start(ij),grid%i_end(ij), &
                           grid%j_start(ij),grid%j_end(ij),k_start,k_end )
BENCH_END(cald_p_rho_tim)

   ENDDO
   !$OMP END PARALLEL DO

#ifdef DM_PARALLEL
     if(grid%trace_use) call trace_entry("solve_tl_halo_comm")
!
!  Stencils for patch communications  (WCS, 29 June 2001)
!
!         *                     *
!       * + *      * + *        +
!         *                     *
!
!  ph_2   x
!  al     x
!  p      x
!
!  2D variables (x,y)
!
!  mu_2   x
!  muts   x
!  mudf   x

#      include "HALO_EM_C2.inc"
#      include "HALO_EM_TL_C2.inc"
#      include "PERIOD_BDY_EM_B3.inc"
     if(grid%trace_use) call trace_exit("solve_tl_halo_comm")
#endif

BENCH_START(phys_bc_tim)
      if(dyn_opt == DYN_EM) then
      !$OMP PARALLEL DO   &
      !$OMP PRIVATE ( ij )

      DO ij = 1 , grid%num_tiles

        ! boundary condition set for next small timestep

         CALL set_physical_bc3d( ph_2, 'w', config_flags,          &
                                 ids, ide, jds, jde, kds, kde,     &
                                 ims, ime, jms, jme, kms, kme,     &
                                 ips, ipe, jps, jpe, kps, kpe,     &
                                 grid%i_start(ij), grid%i_end(ij), &
                                 grid%j_start(ij), grid%j_end(ij), &
                                 k_start    , k_end               )

         CALL set_physical_bc3d( al, 'p', config_flags,            &
                                 ids, ide, jds, jde, kds, kde,     &
                                 ims, ime, jms, jme, kms, kme,     &
                                 ips, ipe, jps, jpe, kps, kpe,     &
                                 grid%i_start(ij), grid%i_end(ij), &
                                 grid%j_start(ij), grid%j_end(ij), &
                                 k_start    , k_end               )

         CALL set_physical_bc3d( p, 'p', config_flags,             &
                                 ids, ide, jds, jde, kds, kde,     &
                                 ims, ime, jms, jme, kms, kme,     &
                                 ips, ipe, jps, jpe, kps, kpe,     &
                                 grid%i_start(ij), grid%i_end(ij), &
                                 grid%j_start(ij), grid%j_end(ij), &
                                 k_start    , k_end               )

         CALL set_physical_bc2d( muts, 't', config_flags,          &
                                 ids, ide, jds, jde,               &
                                 ims, ime, jms, jme,               &
                                 ips, ipe, jps, jpe,               &
                                 grid%i_start(ij), grid%i_end(ij), &
                                 grid%j_start(ij), grid%j_end(ij) )

         CALL set_physical_bc2d( mu_2, 't', config_flags,          &
                                 ids, ide, jds, jde,               &
                                 ims, ime, jms, jme,               &
                                 ips, ipe, jps, jpe,               &
                                 grid%i_start(ij), grid%i_end(ij), &
                                 grid%j_start(ij), grid%j_end(ij) )

         CALL set_physical_bc2d( mudf, 't', config_flags,          &
                                 ids, ide, jds, jde,               &
                                 ims, ime, jms, jme,               &
                                 ips, ipe, jps, jpe,               &
                                 grid%i_start(ij), grid%i_end(ij), &
                                 grid%j_start(ij), grid%j_end(ij) )

      END DO
      !$OMP END PARALLEL DO
      endif
BENCH_END(phys_bc_tim)

   END DO small_steps
!Print*, '44444444444444444444444444444++++++++++++++++++++++++TL CODE TESTING'

   !$OMP PARALLEL DO   &
   !$OMP PRIVATE ( ij )

   DO ij = 1 , grid%num_tiles

      CALL wrf_debug ( 200 , ' call rk_small_finish' )

      ! change time-perturbation variables back to 
      ! full perturbation variables.
      ! first get updated mu at u and v points

BENCH_START(calc_mu_uv_tim)
      call g_calc_mu_uv_1( config_flags,muts,g_muts,muus,g_muus,muvs,g_muvs, &
                           ids,ide,jds,jde,ims,ime,jms,jme, &
                           grid%i_start(ij),grid%i_end(ij), &
                           grid%j_start(ij),grid%j_end(ij) )
BENCH_END(calc_mu_uv_tim)

BENCH_START(small_step_finish_tim)
      call g_small_step_finish( u_2,g_u_2,v_2,g_v_2,w_2,g_w_2,t_2,g_t_2,ph_2,g_ph_2, &
                                ww,mu_2,g_mu_2,mut,g_mut,muts,g_muts,muu,g_muu,&
                                muus,g_muus,muv,g_muv,muvs,g_muvs, &
                                u_save,g_u_save,v_save,g_v_save,w_save, &
                                g_w_save,t_save,g_t_save,ph_save,g_ph_save,mu_save,&
                                g_mu_save,msfu,msfv,msft, &
				number_of_small_timesteps,dts_rk, &
                                ide,jde,kds,kde,ims,ime,jms,jme,kms,kme, &
                                grid%i_start(ij),grid%i_end(ij), &
                                grid%j_start(ij),grid%j_end(ij) )


BENCH_END(small_step_finish_tim)

   END DO
   !$OMP END PARALLEL DO



#ifdef DM_PARALLEL
     if(grid%trace_use) call trace_entry("solve_tl_halo_comm")
!
!  Stencils for patch communications  (WCS, 29 June 2001)
!
!
! ru_m      x
! rv_m      x
!
!--------------------------------------------------------------

#  include "HALO_EM_D.inc"
#  include "HALO_EM_TL_D.inc"
     if(grid%trace_use) call trace_exit("solve_tl_halo_comm")
#endif

!<DESCRIPTION>
!<pre>
! (4) Still within the RK loop, the scalar variables are advanced.
!
!    For the moist and chem variables, each one is advanced
!    individually, using named loops "moist_variable_loop:"
!    and "chem_variable_loop:".  Each RK substep begins by
!    calculating the advective tendency, and, for the first RK step, 
!    3D mixing (calling rk_scalar_tend) followed by an update
!    of the scalar (calling rk_scalar_update).
!</pre>
!</DESCRIPTION>

  moist_scalar_advance: IF (num_3d_m >= PARAM_FIRST_SCALAR )  THEN

   moist_variable_loop: do im = PARAM_FIRST_SCALAR, num_3d_m

   !$OMP PARALLEL DO   &
   !$OMP PRIVATE ( ij )

   moist_tile_loop_1: DO ij = 1 , grid%num_tiles

       CALL wrf_debug ( 200 , ' call rk_scalar_tend' )

BENCH_START(rk_scalar_tend_tim)

       call g_rk_scalar_tend( im,im,config_flags,rk_step, &
                              ru_m,g_ru_m,rv_m,g_rv_m,ww_m,g_ww_m, &
                              mut,g_mut,alt,g_alt,moist_1(ims,kms,jms,im), &
                              g_moist_1(ims,kms,jms,im),moist_2(ims,kms,jms,im), &
                              g_moist_2(ims,kms,jms,im),moist_tend(ims,kms,jms,im),&
                              g_moist_tend(ims,kms,jms,im), &
                              advect_tend,g_advect_tend,qv_base, &
                               .true. ,fnm,fnp,msfu,msfv,msft,rdx,rdy,rdn,rdnw, &
                              kvdif,xkmhd,g_xkmhd,leapfrog, &
                              ids,ide,jds,jde,kde,ims,ime,jms,jme,kms,kme, &
                              grid%i_start(ij),grid%i_end(ij), &
                              grid%j_start(ij),grid%j_end(ij),k_start,k_end )

BENCH_END(rk_scalar_tend_tim)

BENCH_START(rlx_bdy_scalar_tim)
     IF( (config_flags%specified .or. config_flags%nested) .and. rk_step == 1 ) THEN 

       IF(im .eq. P_QV)THEN
          call g_spec_bdy_scalar( moist_tend(ims,kms,jms,im),g_moist_tend(ims,kms,jms,im), &
                                  rqv_bt,g_rqv_bt,spec_bdy_width,spec_zone, &
                                  ijds,ijde, &
                                  ids,ide,jds,jde,kds,kde,ims,ime,jms,jme,kms,kme, &
                                  grid%i_start(ij),grid%i_end(ij), &
                                  grid%j_start(ij),grid%j_end(ij),k_start,k_end )
       ENDIF

     ENDIF

!  ugly code for nested b.c for moist scalars other than qv

     IF( config_flags%nested .and. (rk_step == 1) ) THEN 

       IF (im .eq. P_QC) THEN
            g_rqc_btm(:,:,:,:) = 0.
            call g_spec_bdy_scalar( moist_tend(ims,kms,jms,im),g_moist_tend(ims,kms,jms,im), &
                                    rqc_bt,g_rqc_btm,spec_bdy_width,spec_zone, &
                                    ijds,ijde, &
                                    ids,ide,jds,jde,kds,kde,ims,ime,jms,jme,kms,kme, &
                                    grid%i_start(ij),grid%i_end(ij), &
                                    grid%j_start(ij),grid%j_end(ij),k_start,k_end )
       ELSE IF (im .eq. P_QR) THEN
            g_rqr_btm(:,:,:,:) = 0.
            call g_spec_bdy_scalar( moist_tend(ims,kms,jms,im),g_moist_tend(ims,kms,jms,im), &
                                    rqr_bt,g_rqr_btm,spec_bdy_width,spec_zone, &
                                    ijds,ijde, &
                                    ids,ide,jds,jde,kds,kde,ims,ime,jms,jme,kms,kme, &
                                    grid%i_start(ij),grid%i_end(ij), &
                                    grid%j_start(ij),grid%j_end(ij),k_start,k_end )
       ELSE IF (im .eq. P_QI) THEN
            g_rqi_btm(:,:,:,:) = 0.
            call g_spec_bdy_scalar( moist_tend(ims,kms,jms,im),g_moist_tend(ims,kms,jms,im), &
                                    rqi_bt,g_rqi_btm,spec_bdy_width,spec_zone, &
                                    ijds,ijde, &
                                    ids,ide,jds,jde,kds,kde,ims,ime,jms,jme,kms,kme, &
                                    grid%i_start(ij),grid%i_end(ij), &
                                    grid%j_start(ij),grid%j_end(ij),k_start,k_end )
       ELSE IF (im .eq. P_QS) THEN
            g_rqs_btm(:,:,:,:) = 0.
            call g_spec_bdy_scalar( moist_tend(ims,kms,jms,im),g_moist_tend(ims,kms,jms,im), &
                                    rqs_bt,g_rqs_btm,spec_bdy_width,spec_zone, &
                                    ijds,ijde, &
                                    ids,ide,jds,jde,kds,kde,ims,ime,jms,jme,kms,kme, &
                                    grid%i_start(ij),grid%i_end(ij), &
                                    grid%j_start(ij),grid%j_end(ij),k_start,k_end )

       ELSE IF (im .eq. P_QG) THEN
            g_rqg_btm(:,:,:,:) = 0.
            call g_spec_bdy_scalar( moist_tend(ims,kms,jms,im),g_moist_tend(ims,kms,jms,im), &
                                    rqg_bt,g_rqg_btm,spec_bdy_width,spec_zone, &
                                    ijds,ijde, &
                                    ids,ide,jds,jde,kds,kde,ims,ime,jms,jme,kms,kme, &
                                    grid%i_start(ij),grid%i_end(ij), &
                                    grid%j_start(ij),grid%j_end(ij),k_start,k_end )
       ENDIF

     ENDIF ! b.c test for moist nested boundary condition

BENCH_END(rlx_bdy_scalar_tim)

   ENDDO moist_tile_loop_1
   !$OMP END PARALLEL DO

   !$OMP PARALLEL DO   &
   !$OMP PRIVATE ( ij )

   moist_tile_loop_2: DO ij = 1 , grid%num_tiles

       CALL wrf_debug ( 200 , ' call rk_update_scalar' )

BENCH_START(update_scal_tim)
       call g_rk_update_scalar( im,im,moist_1(ims,kms,jms,im),g_moist_1(ims,kms,jms,im), &
                                moist_2(ims,kms,jms,im),g_moist_2(ims,kms,jms,im), &
                                moist_tend(ims,kms,jms,im),g_moist_tend(ims,kms,jms,im), &
                                advect_tend,g_advect_tend,msft,mu_1,g_mu_1,mu_2,g_mu_2,&
                                mub,rk_step,dt_rk,spec_zone,epsts,leapfrog,config_flags, &
                                ids,ide,jds,jde,kde,ims,ime,jms,jme,kms,kme, &
                                grid%i_start(ij),grid%i_end(ij), &
                                grid%j_start(ij),grid%j_end(ij),k_start,k_end )
BENCH_END(update_scal_tim)

BENCH_START(flow_depbdy_tim)
       if(dyn_opt == DYN_EM) then
       IF( config_flags%specified ) THEN
         IF(im .ne. P_QV)THEN
           CALL flow_dep_bdy  (  moist_2(ims,kms,jms,im),                     &
                               ru_m, rv_m, config_flags, &
                               spec_zone,                  &
                               ids,ide, jds,jde, kds,kde,  & ! domain dims
                               ims,ime, jms,jme, kms,kme,  & ! memory dims
                               ips,ipe, jps,jpe, kps,kpe,  & ! patch  dims
                               grid%i_start(ij), grid%i_end(ij),                      &
                               grid%j_start(ij), grid%j_end(ij),                      &
                               k_start, k_end                               )
         ENDIF
       ENDIF
       endif
BENCH_END(flow_depbdy_tim)

   ENDDO moist_tile_loop_2
   !$OMP END PARALLEL DO

   ENDDO moist_variable_loop

 ENDIF moist_scalar_advance

BENCH_START(tke_adv_tim)
 if(dyn_opt == DYN_EM) then
 TKE_advance: IF (km_opt .eq. 2) then

#ifdef DM_PARALLEL
     if(grid%trace_use) call trace_entry("solve_tl_halo_comm")
      IF      ( h_mom_adv_order <= 4 ) THEN
#       include "HALO_EM_TKE_ADVECT_3.inc"
      ELSE IF ( h_mom_adv_order <= 6 ) THEN
#       include "HALO_EM_TKE_ADVECT_5.inc"
      ELSE
        WRITE(wrf_err_message,*)'solve_em_tl: invalid h_mom_adv_order = ',h_mom_adv_order
        CALL wrf_error_fatal(TRIM(wrf_err_message))
      ENDIF
     if(grid%trace_use) call trace_exit("solve_tl_halo_comm")
#endif

   !$OMP PARALLEL DO   &
   !$OMP PRIVATE ( ij )

   tke_tile_loop_1: DO ij = 1 , grid%num_tiles

     CALL wrf_debug ( 200 , ' call rk_scalar_tend for tke' )
     CALL rk_scalar_tend ( 1, 1, config_flags,               &
                           rk_step, dt_rk,                   &
                           ru_m, rv_m, ww_m,                 &
                           mut, alt,                         &
                           tke_1(ims,kms,jms),               &
                           tke_2(ims,kms,jms),               &
                           tke_tend(ims,kms,jms),            &
                           advect_tend,RQVFTEN,              &
                           qv_base, .false., fnm, fnp,       &
                           msfu, msfv, msft,                 &
                           rdx, rdy, rdn, rdnw, khdif,       &
                           kvdif, xkmhd,                     &
                           leapfrog,                         &
                           ids, ide, jds, jde, kds, kde,     &
                           ims, ime, jms, jme, kms, kme,     &
                           grid%i_start(ij), grid%i_end(ij), &
                           grid%j_start(ij), grid%j_end(ij), &
                           k_start    , k_end               )

   ENDDO tke_tile_loop_1
   !$OMP END PARALLEL DO

   !$OMP PARALLEL DO   &
   !$OMP PRIVATE ( ij )

   tke_tile_loop_2: DO ij = 1 , grid%num_tiles

     CALL wrf_debug ( 200 , ' call rk_update_scalar' )
     CALL rk_update_scalar( 1, 1,                             &
                            tke_1(ims,kms,jms),               &
                            tke_2(ims,kms,jms),               &
                            tke_tend(ims,kms,jms),            &
                            advect_tend,msft,                 &
                            mu_1, mu_2, mub,                  &
                            rk_step, dt_rk, spec_zone,        &
                            epsts, leapfrog,config_flags,     &
                            ids, ide, jds, jde, kds, kde,     &
                            ims, ime, jms, jme, kms, kme,     &
                            grid%i_start(ij), grid%i_end(ij), &
                            grid%j_start(ij), grid%j_end(ij), &
                            k_start    , k_end               ) 

! bound the tke (greater than 0, less than tke_upper_bound)

     CALL bound_tke( tke_2(ims,kms,jms), tke_upper_bound, &
                     ids, ide, jds, jde, kds, kde,        &
                     ims, ime, jms, jme, kms, kme,        &
                     grid%i_start(ij), grid%i_end(ij),    &
                     grid%j_start(ij), grid%j_end(ij),    &
                     k_start    , k_end                  )

     IF( config_flags%specified .or. config_flags%nested ) THEN
         CALL flow_dep_bdy (  tke_2(ims,kms,jms),                     &
                              ru_m, rv_m, config_flags,               &
                              spec_zone,                              &
                              ids,ide, jds,jde, kds,kde,  & ! domain dims
                              ims,ime, jms,jme, kms,kme,  & ! memory dims
                              ips,ipe, jps,jpe, kps,kpe,  & ! patch  dims
                              grid%i_start(ij), grid%i_end(ij),       &
                              grid%j_start(ij), grid%j_end(ij),       &
                              k_start, k_end                               )
     ENDIF
   ENDDO tke_tile_loop_2
   !$OMP END PARALLEL DO

   END IF TKE_advance
   endif
BENCH_END(tke_adv_tim)

!  next the chemical species

  chem_scalar_advance: IF (num_3d_c >= PARAM_FIRST_SCALAR)  THEN

   chem_variable_loop: do ic = PARAM_FIRST_SCALAR, num_3d_c

   !$OMP PARALLEL DO   &
   !$OMP PRIVATE ( ij )

   chem_tile_loop_1: DO ij = 1 , grid%num_tiles

       CALL wrf_debug ( 200 , ' call rk_scalar_tend' )
       CALL rk_scalar_tend ( ic, ic, config_flags,                  &
                             rk_step, dt_rk,                   &
                             ru_m, rv_m, ww_m,                 &
                             mut, alt,                         &
                             chem_1(ims,kms,jms,ic),           &
                             chem_2(ims,kms,jms,ic),           &
                             chem_tend(ims,kms,jms,ic),        &
                             advect_tend,RQVFTEN,              &
                             qv_base, .false., fnm, fnp,       &
                             msfu, msfv, msft,                 &
                             rdx, rdy, rdn, rdnw,              &
                             khdif, kvdif, xkmhd,              &
                             leapfrog,                         &
                             ids, ide, jds, jde, kds, kde,     &
                             ims, ime, jms, jme, kms, kme,     &
                             grid%i_start(ij), grid%i_end(ij), &
                             grid%j_start(ij), grid%j_end(ij), &
                             k_start    , k_end               )

   ENDDO chem_tile_loop_1
   !$OMP END PARALLEL DO


   !$OMP PARALLEL DO   &
   !$OMP PRIVATE ( ij )

   chem_tile_loop_2: DO ij = 1 , grid%num_tiles

       CALL wrf_debug ( 200 , ' call rk_update_scalar' )
       CALL rk_update_scalar( ic, ic,                           &
                              chem_1(ims,kms,jms,ic),           &
                              chem_2(ims,kms,jms,ic),           &
                              chem_tend(ims,kms,jms,ic),        &
                              advect_tend, msft,                &
                              mu_1, mu_2, mub,                  &
                              rk_step, dt_rk, spec_zone,        &
                              epsts, leapfrog,config_flags,     &
                              ids, ide, jds, jde, kds, kde,     &
                              ims, ime, jms, jme, kms, kme,     &
                              grid%i_start(ij), grid%i_end(ij), &
                              grid%j_start(ij), grid%j_end(ij), &
                              k_start    , k_end               )


       IF( config_flags%specified ) THEN
! come back to this and figure out why two different routines are needed. JM 20041203
#ifndef WRF_CHEM
           CALL flow_dep_bdy  ( chem_2(ims,kms,jms,ic),     &
                                ru_m, rv_m, config_flags,   &
                                spec_zone,                  &
                                ids,ide, jds,jde, kds,kde,  & ! domain dims
                                ims,ime, jms,jme, kms,kme,  & ! memory dims
                                ips,ipe, jps,jpe, kps,kpe,  & ! patch  dims
                                grid%i_start(ij), grid%i_end(ij),  &
                                grid%j_start(ij), grid%j_end(ij),  &
                                k_start, k_end                    )
#else
           CALL flow_dep_bdy_chem( chem_2(ims,kms,jms,ic),z,&
                                ru_m, rv_m, config_flags,alt,   &
                                t_1,pb,p,t0,p1000mb,rcp,ph_2,phb,g, &
                                spec_zone,ic,               &
                                ids,ide, jds,jde, kds,kde,  & ! domain dims
                                ims,ime, jms,jme, kms,kme,  & ! memory dims
                                ips,ipe, jps,jpe, kps,kpe,  & ! patch  dims
                                grid%i_start(ij), grid%i_end(ij),  &
                                grid%j_start(ij), grid%j_end(ij),  &
                                k_start, k_end                    )
#endif
       ENDIF


   ENDDO chem_tile_loop_2
   !$OMP END PARALLEL DO

   ENDDO chem_variable_loop

 ENDIF chem_scalar_advance

 !  update the pressure and density at the new time level

   !$OMP PARALLEL DO   &
   !$OMP PRIVATE ( ij )
   DO ij = 1 , grid%num_tiles

BENCH_START(calc_p_rho_tim)
      call g_calc_p_rho_phi( moist_2,g_moist_2,num_3d_m,al,g_al,alb, &
                             mu_2,g_mu_2,muts,g_muts,ph_2,g_ph_2,p,g_p,pb,t_2,g_t_2, &
                             p0,t0,dnw,rdnw,rdn,non_hydrostatic, &
                             ide,jde,kde,ims,ime,jms,jme,kms,kme, &
                             grid%i_start(ij),grid%i_end(ij), &
                             grid%j_start(ij),grid%j_end(ij),k_start,k_end )
BENCH_END(calc_p_rho_tim)

BENCH_START(diag_w_tim)
     IF (.not. non_hydrostatic) THEN
        call g_diagnose_w( ph_tend,g_ph_tend,ph_2,g_ph_2,ph_1,g_ph_1,w_2,g_w_2, &
                           muts,g_muts,dt_rk,u_2,g_u_2,v_2,g_v_2,ht,cf1,cf2,cf3,&
                           rdx,rdy,msft, &
                           ide,jde,ims,ime,jms,jme,kms,kme, &
                           grid%i_start(ij),grid%i_end(ij), &
                           grid%j_start(ij),grid%j_end(ij),k_end )
     ENDIF
BENCH_END(diag_w_tim)

   ENDDO
   !$OMP END PARALLEL DO

!  Reset the boundary conditions if there is another corrector step.
!  (rk_step < rk_order), else we'll handle it at the end of everything
!  (after the split physics, before exiting the timestep).

   rk_step_1_check: IF ( rk_step < rk_order ) THEN

!-----------------------------------------------------------
!  Stencils for patch communications  (WCS, 29 June 2001)
!
!  here's where we need a wide comm stencil - these are the 
!  uncoupled variables so are used for high order calc in
!  advection and mixong routines.
!
!                              * * * * *
!            *        * * *    * * * * *
!          * + *      * + *    * * + * * 
!            *        * * *    * * * * *
!                              * * * * *
!
!
! u_2                              x
! v_2                              x
! w_2                              x
! t_2                              x
! ph_2                             x
! al         x
!
!  2D variable
! mu_2       x
!
!  4D variable
! moist_2               x
! chem_2                x

#ifdef DM_PARALLEL
     if(grid%trace_use) call trace_entry("solve_tl_halo_comm")
   IF      ( h_mom_adv_order <= 4 ) THEN
#    include "HALO_EM_D2_3.inc"
#    include "HALO_EM_TL_D2_3.inc"
   ELSE IF ( h_mom_adv_order <= 6 ) THEN
#    include "HALO_EM_D2_5.inc"
#    include "HALO_EM_TL_D2_5.inc"
   ELSE 
     WRITE(wrf_err_message,*)'solve_em_tl: invalid h_mom_adv_order = ',h_mom_adv_order
     CALL wrf_error_fatal(TRIM(wrf_err_message))
   ENDIF
#  include "PERIOD_BDY_EM_D.inc"
#  include "PERIOD_BDY_EM_MOIST2.inc"
#  include "PERIOD_BDY_EM_CHEM2.inc"
     if(grid%trace_use) call trace_exit("solve_tl_halo_comm")
#endif

BENCH_START(bc_end_tim)
   if(dyn_opt == DYN_EM) then
   !$OMP PARALLEL DO   &
   !$OMP PRIVATE ( ij )

    tile_bc_loop_1: DO ij = 1 , grid%num_tiles

      CALL wrf_debug ( 200 , ' call rk_phys_bc_dry_2' )

      CALL rk_phys_bc_dry_2( config_flags,                         &
                             u_2, v_2, w_2,                    &
                             t_2, ph_2, mu_2,                  &
                             ids, ide, jds, jde, kds, kde,     &
                             ims, ime, jms, jme, kms, kme,     &
                             ips, ipe, jps, jpe, kps, kpe,     &
                             grid%i_start(ij), grid%i_end(ij), &
                             grid%j_start(ij), grid%j_end(ij), &
                             k_start    , k_end               )

      IF (num_3d_m >= PARAM_FIRST_SCALAR) THEN

        moisture_loop_bdy_1 : DO im = PARAM_FIRST_SCALAR , num_3d_m
  
          CALL set_physical_bc3d( moist_2(ims,kms,jms,im), 'p', config_flags,   &
                                   ids, ide, jds, jde, kds, kde,             &
                                   ims, ime, jms, jme, kms, kme,             &
                                   ips, ipe, jps, jpe, kps, kpe,             &
                                   grid%i_start(ij), grid%i_end(ij),                   &
                                   grid%j_start(ij), grid%j_end(ij),                   &
                                   k_start    , k_end                       )
         END DO moisture_loop_bdy_1

      ENDIF

      IF (num_3d_c >= PARAM_FIRST_SCALAR) THEN

        chem_species_bdy_loop_1 : DO ic = PARAM_FIRST_SCALAR , num_3d_c

          CALL set_physical_bc3d( chem_2(ims,kms,jms,ic), 'p', config_flags,   &
                                  ids, ide, jds, jde, kds, kde,            &
                                  ims, ime, jms, jme, kms, kme,            &
                                  ips, ipe, jps, jpe, kps, kpe,            &
                                  grid%i_start(ij), grid%i_end(ij),                  &
                                  grid%j_start(ij), grid%j_end(ij),                  &
                                  k_start    , k_end-1                    )

        END DO chem_species_bdy_loop_1

      END IF

      IF (km_opt .eq. 2) THEN

        CALL set_physical_bc3d( tke_2(ims,kms,jms) , 'p', config_flags,  &
                                ids, ide, jds, jde, kds, kde,            &
                                ims, ime, jms, jme, kms, kme,            &
                                ips, ipe, jps, jpe, kps, kpe,            &
                                grid%i_start(ij), grid%i_end(ij),        &
                                grid%j_start(ij), grid%j_end(ij),        &
                                k_start    , k_end                      )
      END IF

    END DO tile_bc_loop_1
   !$OMP END PARALLEL DO
   endif
BENCH_END(bc_end_tim)


#ifdef DM_PARALLEL
     if(grid%trace_use) call trace_entry("solve_tl_halo_comm")

      IF      ( h_mom_adv_order <= 4 ) THEN
#       include "HALO_EM_TKE_3.inc"
      ELSE IF ( h_mom_adv_order <= 6 ) THEN
#       include "HALO_EM_TKE_5.inc"
      ELSE
        WRITE(wrf_err_message,*)'solve_em_tl: invalid h_mom_adv_order = ',h_mom_adv_order
        CALL wrf_error_fatal(TRIM(wrf_err_message))
      ENDIF

#if 0
   IF (km_opt .eq. 2) THEN
#      include  "HALO_EM_TKE_F.inc"
   ENDIF
     if(grid%trace_use) call trace_exit("solve_tl_halo_comm")
#endif

   if ( num_moist .ge. PARAM_FIRST_SCALAR ) then

!                           * * * * *
!         *        * * *    * * * * *
!       * + *      * + *    * * + * *
!         *        * * *    * * * * *
!                           * * * * *

! moist_2                       x

     IF      ( h_mom_adv_order <= 4 ) THEN
#      include "HALO_EM_MOIST_E_3.inc"
     ELSE IF ( h_mom_adv_order <= 6 ) THEN
#      include "HALO_EM_MOIST_E_5.inc"
     ELSE
       WRITE(wrf_err_message,*)'solve_em_tl: invalid h_mom_adv_order = ',h_mom_adv_order
       CALL wrf_error_fatal(TRIM(wrf_err_message))
     ENDIF
   endif
   if ( num_chem >= PARAM_FIRST_SCALAR ) then

!                           * * * * *
!         *        * * *    * * * * *
!       * + *      * + *    * * + * *
!         *        * * *    * * * * *
!                           * * * * *

! chem_2                        x

     IF      ( h_mom_adv_order <= 4 ) THEN
#      include "HALO_EM_CHEM_E_3.inc"
     ELSE IF ( h_mom_adv_order <= 6 ) THEN
#      include "HALO_EM_CHEM_E_5.inc"
     ELSE
       WRITE(wrf_err_message,*)'solve_em_tl: invalid h_mom_adv_order = ',h_mom_adv_order
       CALL wrf_error_fatal(TRIM(wrf_err_message))
     ENDIF
   endif
#endif

   ENDIF rk_step_1_check

!**********************************************************
!
!  end of RK predictor-corrector loop
!
!**********************************************************

 END DO Runge_Kutta_loop

!----------------

#if 0
   !$OMP PARALLEL DO   &
   !$OMP PRIVATE ( ij )


   DO ij = 1 , grid%num_tiles

BENCH_START(advance_ppt_tim)
      CALL wrf_debug ( 200 , ' call advance_ppt' )
      CALL advance_ppt(RTHCUTEN,RQVCUTEN,RQCCUTEN,RQRCUTEN, &
                     RQICUTEN,RQSCUTEN,RAINC,RAINCV,NCA,    &
                     CUPPT, config_flags,                   &
                     ids,ide, jds,jde, kds,kde,             &
                     ims,ime, jms,jme, kms,kme,             &
                     grid%i_start(ij), grid%i_end(ij),      &
                     grid%j_start(ij), grid%j_end(ij),      &
                     k_start    , k_end                    )
BENCH_END(advance_ppt_tim)

   ENDDO
   !$OMP END PARALLEL DO
#endif


!<DESCRIPTION>
!<pre>
! (5) time-split physics.
!
!     Microphysics are the only time  split physics in the WRF model 
!     at this time.  Split-physics begins with the calculation of
!     needed diagnostic quantities (pressure, temperature, etc.)
!     followed by a call to the microphysics driver, 
!     and finishes with a clean-up, storing off of a diabatic tendency
!     from the moist physics, and a re-calulation of the  diagnostic
!     quantities pressure and density.
!</pre>
!</DESCRIPTION>

ij = 1

!whl
  IF (config_flags%mp_physics >= 0)  then

   IF( config_flags%specified .or. config_flags%nested ) THEN
     sz = spec_zone
   ELSE
     sz = 0
   ENDIF


       CALL wrf_debug ( 200 , ' call moist_physics_prep' )

BENCH_START(moist_physics_prep_tim)
       CALL g_moist_physics_prep_em( t_2, g_t_2, t_1, g_t_1, t0, rho,    &
                                   g_rho, al, g_al, alb, p, g_p, p8w, g_p8w, p0, &
                                   pb, ph_2, g_ph_2, phb, pi_phy, g_pi_phy,  &
                                   p_phy, g_p_phy, z, g_z, z_at_w, g_z_at_w, &
                                   dz8w, g_dz8w, dtm, h_diabatic, g_h_diabatic, &
                                   config_flags, fnm, fnp, ids, ide, jds, jde,  &
                                   kds, kde, ims, ime, jms, jme, kms, kme, &
                                   max(grid%i_start(ij),ids+sz),          &
                                   min(grid%i_end(ij),ide-1-sz),          &
                                   max(grid%j_start(ij),jds+sz),          &
                                   min(grid%j_end(ij),jde-1-sz),k_start    , k_end               )
BENCH_END(moist_physics_prep_tim)
!whl
!     print*,'config_flags%mp_physics in tl ', config_flags%mp_physics
  if ( config_flags%mp_physics == 1) then
!   print*,'call kessler_d'
   call kessler_d(t_2, g_t_2, moist_2(ims,kms,jms,P_QV),  g_moist_2(ims,kms,jms,P_QV),&
&                             moist_2(ims,kms,jms,P_QC),  g_moist_2(ims,kms,jms,P_QC),&
&                             moist_2(ims,kms,jms,P_QR),  g_moist_2(ims,kms,jms,P_QR),&
&     rho,g_rho, p_phy, g_p_phy, pi_phy, g_pi_phy, dtm&
&    , z, xlv, cp, ep_2,svp1, svp2, svp3, svpt0, rhowater, dz8w, rainnc, &
&    g_rainnc, rainncv, g_rainncv, ids, ide, jds, jde, kds, kde, ims, ime, &
&    jms, jme, kms, kme, &
                                   max(grid%i_start(ij),ids+sz),          &
                                   min(grid%i_end(ij),ide-1-sz),          &
                                   max(grid%j_start(ij),jds+sz),          &
                                   min(grid%j_end(ij),jde-1-sz),k_start    , min(k_end,kde-1)         )
!    print*,'tl kessler_d ',sum(g_rainnc*g_rainnc)
  elseif(config_flags%mp_physics == 0)then
!    print*, 'call g_lscond'
    CALL g_lscond ( t_2,g_t_2,p_phy,g_p_phy,moist_2(ims,kms,jms,P_QV),    &
                    g_moist_2(ims,kms,jms,P_QV),rho,g_rho,                &
                    pi_phy,g_pi_phy,r_v,xlv,cp,ep_2,svp1,svp2,svp3,svpt0, &
                    dz8w,g_dz8w,rainnc,g_rainnc, &
                    rainncv,g_rainncv,ids,ide,jds,jde,kds,kde,ims,ime,jms,jme,kms,kme,&
                    max(grid%i_start(ij),ids+sz),          &
                    min(grid%i_end(ij),ide-1-sz),          &
                    max(grid%j_start(ij),jds+sz),          &
                    min(grid%j_end(ij),jde-1-sz),k_start    , min(k_end,kde-1)    )
!  print*,'tl ',sum(g_rainnc*g_rainnc)
  else
  print*, ' no rain process'
  endif

       CALL wrf_debug ( 200 , ' call moist_physics_finish' )
BENCH_START(moist_phys_end_tim)

       CALL  g_moist_physics_finish_em( t_2, g_t_2, t_1, g_t_1, t0, muts, &
                                        g_muts, h_diabatic, g_h_diabatic, dtm, config_flags, &
                                        ids, ide, jds, jde, kds, kde, ims, ime, jms, jme, kms, kme, &
                                        max(grid%i_start(ij),ids+sz),          &
                                        min(grid%i_end(ij),ide-1-sz),          &
                                        max(grid%j_start(ij),jds+sz),          &
                                        min(grid%j_end(ij),jde-1-sz),k_start    , k_end               )

       call g_calc_p_rho_phi( moist_2,g_moist_2,num_3d_m,al,g_al,alb,mu_2,   &
                           g_mu_2,muts,g_muts,ph_2,g_ph_2,p,g_p,pb,t_2,g_t_2,p0,t0,dnw,&
                           rdnw,rdn,non_hydrostatic,ide,jde,kde,ims,ime,jms,jme,kms,kme,&
                           max(grid%i_start(ij),ids+sz),          &
                           min(grid%i_end(ij),ide-1-sz),          &
                           max(grid%j_start(ij),jds+sz),          &
                           min(grid%j_end(ij),jde-1-sz),k_start    , k_end               )

      IF (.not. non_hydrostatic) THEN
      call g_diagnose_w( ph_tend,g_ph_tend,ph_2,g_ph_2,ph_1,g_ph_1,w_2,   &
                         g_w_2,muts,g_muts,dt_rk,u_2,g_u_2,v_2,g_v_2,ht,cf1,cf2,cf3,&
                         rdx,rdy,msft,ide,jde,ims,ime,jms,jme,kms,kme,&
                         max(grid%i_start(ij),ids+sz),          &
                         min(grid%i_end(ij),ide-1-sz),          &
                         max(grid%j_start(ij),jds+sz),          &
                         min(grid%j_end(ij),jde-1-sz), k_end               )
      ENDIF

BENCH_END(moist_phys_end_tim)
  ENDIF  !mp==0

   scalar_tile_loop_2: DO ij = 1 , grid%num_tiles

          CALL wrf_debug ( 200 , ' call scalar_tile_loop_2' )

     IF ( num_3d_c >= PARAM_FIRST_SCALAR ) then

!
!  tiled chemistry not here, it is called from solve_interface, and found in chem_driver
!

     END IF

   END DO scalar_tile_loop_2


   if(dyn_opt == DYN_EM) then
   IF (leapfrog ) THEN

    ! do time filter and switch for the dry variables

   !$OMP PARALLEL DO   &
   !$OMP PRIVATE ( ij )

    DO ij = 1 , grid%num_tiles

BENCH_START(time_filt_tim)
      call time_filter( u_1, u_2, u_save,                 &
                        v_1, v_2, v_save,                 &
                        w_1, w_2, w_save,                 &
                        t_1, t_2, t_save,                 &
                        ph_1, ph_2, ph_save,              &
                        mu_1, mu_2, mu_save,              &
                        epsts,                            &
                        ids, ide, jds, jde, kds, kde,     &
                        ims, ime, jms, jme, kms, kme,     &
                        grid%i_start(ij), grid%i_end(ij), &
                        grid%j_start(ij), grid%j_end(ij), &
                        k_start    , k_end               )
BENCH_END(time_filt_tim)

    ENDDO
   !$OMP END PARALLEL DO

    END IF
    endif

   !  We're finished except for boundary condition (and patch) update

   ! Boundary condition time (or communication time).  At this time, we have
   ! implemented periodic and symmetric physical boundary conditions.

   ! b.c. routine for data within patch.

   ! we need to do both time levels of 
   ! data because the time filter only works in the physical solution space.

   ! First, do patch communications for boundary conditions (periodicity)

!-----------------------------------------------------------
!  Stencils for patch communications  (WCS, 29 June 2001)
!
!  here's where we need a wide comm stencil - these are the 
!  uncoupled variables so are used for high order calc in
!  advection and mixong routines.
!
!                              * * * * *
!            *        * * *    * * * * *
!          * + *      * + *    * * + * * 
!            *        * * *    * * * * *
!                              * * * * *
!
!   u_1                            x
!   u_2                            x
!   v_1                            x
!   v_2                            x
!   w_1                            x
!   w_2                            x
!   t_1                            x
!   t_2                            x
!  ph_1                            x
!  ph_2                            x
!  tke_1                           x
!  tke_2                           x
!
!    2D variables
!  mu_1     x
!  mu_2     x
!
!    4D variables
!  moist_1                         x
!  moist_2                         x
!   chem_1                         x
!   chem_2                         x
!----------------------------------------------------------


#ifdef DM_PARALLEL
     if(grid%trace_use) call trace_entry("solve_tl_halo_comm")
   IF      ( h_mom_adv_order <= 4 ) THEN
#    include "HALO_EM_D3_3.inc"
#    include "HALO_EM_TL_D3_3.inc"
   ELSE IF ( h_mom_adv_order <= 6 ) THEN
#    include "HALO_EM_D3_5.inc"
#    include "HALO_EM_TL_D3_5.inc"
   ELSE 
     WRITE(wrf_err_message,*)'solve_em_tl: invalid h_mom_adv_order = ',h_mom_adv_order
     CALL wrf_error_fatal(TRIM(wrf_err_message))
   ENDIF
#  include "PERIOD_BDY_EM_D3.inc"
#  include "PERIOD_BDY_EM_MOIST.inc"
#  include "PERIOD_BDY_EM_CHEM.inc"
     if(grid%trace_use) call trace_exit("solve_tl_halo_comm")
#endif

!  now set physical b.c on a patch

BENCH_START(bc_2d_tim)
   if(dyn_opt == DYN_EM) then
   !$OMP PARALLEL DO   &
   !$OMP PRIVATE ( ij )

   tile_bc_loop_2: DO ij = 1 , grid%num_tiles

     CALL wrf_debug ( 200 , ' call set_phys_bc_dry_2' )

     CALL set_phys_bc_dry_2( config_flags,                           &
                             u_1, u_2, v_1, v_2, w_1, w_2,           &
                             t_1, t_2, ph_1, ph_2, mu_1, mu_2,       &
                             ids, ide, jds, jde, kds, kde,           &
                             ims, ime, jms, jme, kms, kme,           &
                             ips, ipe, jps, jpe, kps, kpe,           &
                             grid%i_start(ij), grid%i_end(ij),       &
                             grid%j_start(ij), grid%j_end(ij),       &
                             k_start    , k_end                     )

     CALL set_physical_bc3d( tke_1(ims,kms,jms), 'p', config_flags,   &
                             ids, ide, jds, jde, kds, kde,            &
                             ims, ime, jms, jme, kms, kme,            &
                             ips, ipe, jps, jpe, kps, kpe,            &
                             grid%i_start(ij), grid%i_end(ij),        &
                             grid%j_start(ij), grid%j_end(ij),        &
                             k_start    , k_end-1                    )
     CALL set_physical_bc3d( tke_2(ims,kms,jms) , 'p', config_flags,  &
                             ids, ide, jds, jde, kds, kde,            &
                             ims, ime, jms, jme, kms, kme,            &
                             ips, ipe, jps, jpe, kps, kpe,            &
                             grid%i_start(ij), grid%i_end(ij),        &
                             grid%j_start(ij), grid%j_end(ij),        &
                             k_start    , k_end                      )

     moisture_loop_bdy_2 : DO im = PARAM_FIRST_SCALAR , num_3d_m

       CALL set_physical_bc3d( moist_1(ims,kms,jms,im), 'p',           &
                               config_flags,                           &
                               ids, ide, jds, jde, kds, kde,           &
                               ims, ime, jms, jme, kms, kme,           &
                               ips, ipe, jps, jpe, kps, kpe,           &
                               grid%i_start(ij), grid%i_end(ij),       &
                               grid%j_start(ij), grid%j_end(ij),       &
                               k_start    , k_end                     )
       CALL set_physical_bc3d( moist_2(ims,kms,jms,im), 'p',           &
                               config_flags,                           &
                               ids, ide, jds, jde, kds, kde,           &
                               ims, ime, jms, jme, kms, kme,           &
                               ips, ipe, jps, jpe, kps, kpe,           &
                               grid%i_start(ij), grid%i_end(ij),       &
                               grid%j_start(ij), grid%j_end(ij),       &
                               k_start    , k_end                     )

     END DO moisture_loop_bdy_2

     chem_species_bdy_loop_2 : DO ic = PARAM_FIRST_SCALAR , num_3d_c

       CALL set_physical_bc3d( chem_1(ims,kms,jms,ic), 'p', config_flags,   &
                               ids, ide, jds, jde, kds, kde,            &
                               ims, ime, jms, jme, kms, kme,            &
                               ips, ipe, jps, jpe, kps, kpe,            &
                               grid%i_start(ij), grid%i_end(ij),                  &
                               grid%j_start(ij), grid%j_end(ij),                  &
                               k_start    , k_end                    )
       CALL set_physical_bc3d( chem_2(ims,kms,jms,ic) , 'p', config_flags,  &
                               ids, ide, jds, jde, kds, kde,            &
                               ims, ime, jms, jme, kms, kme,            &
                               ips, ipe, jps, jpe, kps, kpe,            &
                               grid%i_start(ij), grid%i_end(ij),                  &
                               grid%j_start(ij), grid%j_end(ij),                  &
                               k_start    , k_end                      )

     END DO chem_species_bdy_loop_2

   END DO tile_bc_loop_2
   !$OMP END PARALLEL DO
   endif
BENCH_END(bc_2d_tim)
!whl
! g_p=0.0
Goto 4005
   IF( config_flags%specified .or. config_flags%nested ) THEN 
     dtbc = dtbc + dt
   ENDIF

4005 continue

#ifdef DM_PARALLEL
     if(grid%trace_use) call trace_entry("solve_tl_halo_comm")
!-----------------------------------------------------------------------
! see above
!--------------------------------------------------------------
   CALL wrf_debug ( 200 , ' call HALO_RK_E' )
   IF      ( h_mom_adv_order <= 4 ) THEN
#    include "HALO_EM_E_3.inc"
#    include "HALO_EM_TL_E_3.inc"
   ELSE IF ( h_mom_adv_order <= 6 ) THEN
#    include "HALO_EM_E_5.inc"
#    include "HALO_EM_TL_E_5.inc"
   ELSE
     WRITE(wrf_err_message,*)'solve_em_tl: invalid h_mom_adv_order = ',h_mom_adv_order
     CALL wrf_error_fatal(TRIM(wrf_err_message))
   ENDIF
     if(grid%trace_use) call trace_exit("solve_tl_halo_comm")
#endif

#ifdef DM_PARALLEL
     if(grid%trace_use) call trace_entry("solve_tl_halo_comm")
   if ( num_moist >= PARAM_FIRST_SCALAR  ) then
!-----------------------------------------------------------------------
! see above
!--------------------------------------------------------------
     CALL wrf_debug ( 200 , ' call HALO_RK_MOIST' )
     IF      ( h_mom_adv_order <= 4 ) THEN
#      include "HALO_EM_MOIST_E_3.inc"
#      include "HALO_EM_MOIST_TL_E_3.inc"
     ELSE IF ( h_mom_adv_order <= 6 ) THEN
#      include "HALO_EM_MOIST_E_5.inc"
#      include "HALO_EM_MOIST_TL_E_5.inc"
     ELSE
       WRITE(wrf_err_message,*)'solve_em_tl: invalid h_mom_adv_order = ',h_mom_adv_order
       CALL wrf_error_fatal(TRIM(wrf_err_message))
     ENDIF
   endif
   if ( num_chem >= PARAM_FIRST_SCALAR ) then
!-----------------------------------------------------------------------
! see above
!--------------------------------------------------------------
     CALL wrf_debug ( 200 , ' call HALO_RK_CHEM' )
     IF      ( h_mom_adv_order <= 4 ) THEN
#      include "HALO_EM_CHEM_E_3.inc"
     ELSE IF ( h_mom_adv_order <= 6 ) THEN
#      include "HALO_EM_CHEM_E_5.inc"
     ELSE
       WRITE(wrf_err_message,*)'solve_em_tl: invalid h_mom_adv_order = ',h_mom_adv_order
       CALL wrf_error_fatal(TRIM(wrf_err_message))
     ENDIF
   endif
     if(grid%trace_use) call trace_exit("solve_tl_halo_comm")
#endif


   CALL wrf_debug ( 200 , ' call end of solve_em_tl' )

   call trace_exit("solve_em_tl")

!Print*, '5555555555555555555555555555555555++++++++++++++++++++++++TL CODE TESTING'
! Finish timers if compiled with -DBENCH.
#include <bench_solve_em_end.h>

! See comment before earlier #include of this file.
#define COPY_OUT
#include <em_scalar_derefs.inc>


   RETURN

END SUBROUTINE solve_em_tl

