//#undef STANDALONE
// call and definition in original code
//
//CALL rk_update_scalar( ic, ic,                           &
//                                  chem_old(ims,kms,jms,ic),         &  ! was chem_1
//                                  chem(ims,kms,jms,ic),             &
//                                  chem_tend(ims,kms,jms,ic),        &
//                                  advect_tend, grid%msftx, grid%msfty, &
//                                  grid%mu_1, grid%mu_2, grid%mub,      &
//                                  rk_step, dt_rk, grid%spec_zone,      &
//                                  config_flags,                     &
//                                  ids, ide, jds, jde, kds, kde,     &
//                                  ims, ime, jms, jme, kms, kme,     &
//                                  grid%i_start(ij), grid%i_end(ij), &
//                                  grid%j_start(ij), grid%j_end(ij), &
//                                  k_start    , k_end               )
//
//SUBROUTINE rk_update_scalar( scs, sce,                      &
//                             scalar_1, scalar_2, sc_tend,   &
//                             advect_tend, msftx, msfty,     &
//                             mu_old, mu_new, mu_base,       &
//                             rk_step, dt, spec_zone,        &
//                             config_flags,                  &
//                             ids, ide, jds, jde, kds, kde,  &
//                             ims, ime, jms, jme, kms, kme,  &
//                             its, ite, jts, jte, kts, kte  )
//

__global__ void update_scalar_gpu (
                    float * scalar_1, float * scalar_2
                   ,float * sc_tend ,float * advect_tend
                   ,int rk_step, float dt, int spec_zone
                   ,int im
                   ,int ids, int ide,  int jds, int jde,  int kds, int kde
                   ,int ims, int ime,  int jms, int jme,  int kms, int kme
                   ,int ips, int ipe,  int jps, int jpe,  int kps, int kpe
//,float * debuggal
                         )
{
 float tendency ;

 int k ;
 int d2 = (ime-ims+1)*(jme-jms+1);
 int d3 = d2*(kme-kms+1);
 int imx = im * d3 ;

 int i_start = ips ;
 int i_end   = ipe ;
 int j_start = jps ;
 int j_end   = jpe ;
 //int k_start = kps ;
 //int k_end   = kpe-1 ;
 if (j_end == jde) j_end = j_end - 1 ;
 if (i_end == ide) i_end = i_end - 1 ;

 int i_start_spc = i_start ;
 int i_end_spc   = i_end ;
 int j_start_spc = j_start ;
 int j_end_spc   = j_end ;
 //int k_start_spc = k_start ;
 //int k_end_spc   = k_end ;

 i_start = max( ips,ids+spec_zone ) ;
 i_end   = min( ipe,ide-spec_zone-1 ) ;
 j_start = max( jps,jds+spec_zone ) ;
 j_end   = min( jpe,jde-spec_zone-1 ) ;
 //k_start = kps ;
 //k_end   = min( kpe, kde-1 ) ;

 float muold, r_munew ;

 if ( ig >= ids && ig <= ide-1 && jg >= jds && jg <= jde-1 ) {
   muold = TexRef2D(mu_old,ti,tj) + TexRef2D(mub,ti,tj) ;
   r_munew = 1./(TexRef2D(mu_new,ti,tj)+TexRef2D(mub,ti,tj)) ; 
// pass through the topmost layer unmodified
   k = kpe ;
#ifdef PINNING
   scalar_2[P3IJK(ti,k,tj)+imx] = TexRef3D( scalar,ti,tj,k ) ;
#else
   scalar_2[P3(ti,k,tj)+imx] = TexRef3D( scalar,ti,tj,k ) ;
   }
#endif
   for ( k = kps ; k < kpe ; k++ ) {
    tendency = 0. ;
    if ( jg >= j_start && jg <= j_end && ig >= i_start && ig <= i_end ) {
     tendency += advect_tend[P3(ti,k,tj)+imx] * TexRef2D(msfty,ti,tj) ;
    }
    if ( jg >= j_start_spc && jg <= j_end_spc && ig >= i_start_spc && ig <= i_end_spc ) {
     tendency += sc_tend[P3(ti,k,tj)+imx] ;
    }
#ifndef STANDALONE
    if ( rk_step == 1 ) {
#endif
#ifdef PINNING
      scalar_1[P3IJK(ti,k,tj)+imx] = TexRef3D( scalar,ti,tj,k ) ;
#else
      scalar_1[P3(ti,k,tj)+imx] = TexRef3D( scalar,ti,tj,k ) ;
#endif
#ifndef STANDALONE
    }
#endif
#ifdef PINNING
    scalar_2[P3IJK(ti,k,tj)+imx] = (muold*scalar_1[P3IJK(ti,k,tj)+imx] + dt * tendency)*r_munew ;
#else
    scalar_2[P3(ti,k,tj)+imx] = (muold*scalar_1[P3(ti,k,tj)+imx] + dt * tendency)*r_munew ;
#endif
   }
 }
}
