Index: external/io_pnetcdf/wrf_io.F90
===================================================================
--- external/io_pnetcdf/wrf_io.F90	(revision 3854)
+++ external/io_pnetcdf/wrf_io.F90	(working copy)
@@ -1179,6 +1179,7 @@
   integer                           :: stat
   character (7)                     :: Buffer
   integer                           :: VDimIDs(2)
+  integer                           :: info, ierr   ! added for Blue Gene (see NF_CREAT below)
 
   if(WrfIOnotInitialized) then
     Status = WRF_IO_NOT_INITIALIZED 
@@ -1195,7 +1196,22 @@
   DH%TimeIndex = 0
   DH%Times     = ZeroDate
 !  stat = NFMPI_CREATE(Comm, FileName, NF_CLOBBER, MPI_INFO_NULL, DH%NCID)
-   stat = NFMPI_CREATE(Comm, FileName, IOR(NF_CLOBBER, NF_64BIT_OFFSET), MPI_INFO_NULL, DH%NCID)
+
+#ifndef BLUEGENE
+  stat = NFMPI_CREATE(Comm, FileName, IOR(NF_CLOBBER, NF_64BIT_OFFSET), MPI_INFO_NULL, DH%NCID)
+#else
+!!!!!!!!!!!!!!!
+! rob latham suggested hint
+
+  call mpi_info_create( info, ierr )
+!  call mpi_info_set(info,'cd_buffer_size','4194304',ierr)
+  call mpi_info_set(info,'cd_buffer_size','8388608',ierr)
+  stat = NFMPI_CREATE(Comm, FileName, IOR(NF_CLOBBER, NF_64BIT_OFFSET), info, DH%NCID)
+  call mpi_info_free( info, ierr)
+!
+!!!!!!!!!!!!!!! 
+#endif
+
   call netcdf_err(stat,Status)
   if(Status /= WRF_NO_ERR) then
     write(msg,*) 'NetCDF error in ext_pnc_open_for_write_begin ',__FILE__,', line', __LINE__
Index: external/RSL_LITE/gen_comms.c
===================================================================
--- external/RSL_LITE/gen_comms.c	(revision 3854)
+++ external/RSL_LITE/gen_comms.c	(working copy)
@@ -170,6 +170,10 @@
   fprintf(fp,"  INTEGER ,                    INTENT(IN) :: ims, ime, jms, jme, kms, kme\n") ;
   fprintf(fp,"  INTEGER ,                    INTENT(IN) :: ips, ipe, jps, jpe, kps, kpe\n") ;
   fprintf(fp,"  INTEGER :: itrace\n") ;
+  fprintf(fp,"  INTEGER :: rsl_sendw_p, rsl_sendbeg_p, rsl_recvw_p, rsl_recvbeg_p\n") ;
+  fprintf(fp,"  INTEGER :: rsl_sendw_m, rsl_sendbeg_m, rsl_recvw_m, rsl_recvbeg_m\n") ;
+  fprintf(fp,"  LOGICAL, EXTERNAL :: rsl_comm_iter\n") ;
+  fprintf(fp,"  INTEGER :: idim1, idim2, idim3, idim4, idim5, idim6, idim7\n") ;
   }
 
 int print_body( FILE * fp, char * commname )
@@ -1605,6 +1609,7 @@
     sprintf( Shift.use, "" ) ;
     strcpy( Shift.comm_define, "SHW:" ) ;
     strcpy( Shift.name , fname ) ;
+    if ( sw_move ) {
     for ( p = Domain.fields ; p != NULL ; p = p->next ) {
       if (( p->node_kind & (FIELD | FOURD) ) && p->ndims >= 2 && ! p->boundary_array )
       {
@@ -1644,6 +1649,7 @@
       }
     }
     if ( strlen(Shift.comm_define) > 0 )Shift.comm_define[strlen(Shift.comm_define)-1] = '\0' ;
+    }
 
     gen_halos( dirname , NULL, &Shift ) ;
 
@@ -1651,6 +1657,7 @@
     if ((fp = fopen( fname , "w" )) == NULL ) return(1) ;
 
 /* now generate the shifts themselves */
+    if ( sw_move ) {
     for ( p = Domain.fields ; p != NULL ; p = p->next )
     {
 
@@ -1864,6 +1871,7 @@
 	}
       }
     }
+    } /* if sw_move */
     close_the_file(fp) ;
   }
 }
Index: phys/Makefile
===================================================================
--- phys/Makefile	(revision 3854)
+++ phys/Makefile	(working copy)
@@ -351,13 +351,7 @@
 module_force_scm.o: \
 		../dyn_em/module_init_utilities.o 
 
-get_sfire_id.inc:
-	        chmod +x commit_hash
-		./commit_hash > sfire_id.inc
-		cat sfire_id.inc
-
 module_fr_sfire_driver.o: \
-                get_sfire_id.inc \
 		../share/module_model_constants.o  \
 		module_fr_sfire_atm.o \
 		module_fr_sfire_phys.o \
Index: phys/commit_hash
===================================================================
--- phys/commit_hash	(revision 3854)
+++ phys/commit_hash	(working copy)
@@ -1,2 +0,0 @@
-#!/bin/bash
-echo "id='5ef422698db930faf5f742c098480859b7b24790'"
Index: phys/module_fr_sfire_driver.F
===================================================================
--- phys/module_fr_sfire_driver.F	(revision 3854)
+++ phys/module_fr_sfire_driver.F	(working copy)
@@ -1041,7 +1041,8 @@
 
 subroutine print_id
 character(len=128)::id,msg
-#include "sfire_id.inc"
+!#include "sfire_id.inc"
+id='5ef422698db930faf5f742c098480859b7b24790'
 msg='version '//id
 call message(msg)
 end subroutine print_id
Index: frame/module_alloc_space.F
===================================================================
--- frame/module_alloc_space.F	(revision 3854)
+++ frame/module_alloc_space.F	(working copy)
@@ -88,7 +88,7 @@
 
       WRITE(message,*)&
           'alloc_space_field: domain ',id,', ',num_bytes_allocated,' bytes allocated'
-      CALL  wrf_debug( 1, message )
+      CALL  wrf_debug( 0, message )
 
    END SUBROUTINE alloc_space_field_core
 END MODULE module_alloc_space
Index: frame/module_io.F
===================================================================
--- frame/module_io.F	(revision 3854)
+++ frame/module_io.F	(working copy)
@@ -3972,12 +3972,6 @@
 
         IF ( nproc .GT. 1 ) THEN
 
-if ( trim(VarName) .eq. 'T_BYS' ) write(0,*)__FILE__,'MemOrd(1:2) ',MemOrd(1:2)
-if ( trim(VarName) .eq. 'T_BYS' ) write(0,*)__FILE__,'bdy_mask(P_YSB) ',bdy_mask( P_YSB)
-if ( trim(VarName) .eq. 'T_BYS' ) write(0,*)__FILE__,'bdy_mask(P_YEB) ',bdy_mask( P_YEB)
-if ( trim(VarName) .eq. 'T_BYS' ) write(0,*)__FILE__,'ips - 1 ',ips-1
-if ( trim(VarName) .eq. 'T_BYS' ) write(0,*)__FILE__,'ipe-ips+1 ', ipe-ips+1
-
 #if 1
           IF ( (MemOrd(1:2) .EQ. 'ys' .AND. bdy_mask( P_YSB )) .OR.     &
                (MemOrd(1:2) .EQ. 'ye' .AND. bdy_mask( P_YEB ))       ) THEN
@@ -3998,15 +3992,9 @@
           ENDIF
 #endif
 
-if ( trim(VarName) .eq. 'T_BYS' ) write(0,*)__FILE__,'my_displ ', my_displ
-if ( trim(VarName) .eq. 'T_BYS' ) write(0,*)__FILE__,'my_count ', my_count
-
           CALL mpi_gather( my_displ, 1, MPI_INTEGER, displs, 1, MPI_INTEGER, collective_root, communicator, ierr )
           CALL mpi_gather( my_count, 1, MPI_INTEGER, counts, 1, MPI_INTEGER, collective_root, communicator, ierr )
 
-if ( trim(VarName) .eq. 'T_BYS' ) write(0,*)__FILE__,'displs ', displs(1:nproc)
-if ( trim(VarName) .eq. 'T_BYS' ) write(0,*)__FILE__,'counts ', counts(1:nproc)
-
           do j = jds,jde    ! bdy_width
           do k = kds,kde    ! levels
              lx   = ime-ims+1
Index: arch/configure_new.defaults
===================================================================
--- arch/configure_new.defaults	(revision 3854)
+++ arch/configure_new.defaults	(working copy)
@@ -200,7 +200,7 @@
 ESMF_LDFLAG     =       $(CPLUSPLUSLIB)
 FCOPTIM         =       -O3
 FCREDUCEDOPT	=       $(FCOPTIM)
-FCNOOPT		=       -O0
+FCNOOPT		=       -O0 -fno-inline -fno-ip
 FCDEBUG         =       # -g $(FCNOOPT) -traceback
 FORMAT_FIXED    =       -FI
 FORMAT_FREE     =       -FR
@@ -273,7 +273,7 @@
 ESMF_LDFLAG     =       $(CPLUSPLUSLIB)
 FCOPTIM         =       -O3
 FCREDUCEDOPT	=       $(FCOPTIM)
-FCNOOPT		=       -O0
+FCNOOPT		=       -O0 -fno-inline -fno-ip
 FCDEBUG         =       # -g $(FCNOOPT) -traceback
 FORMAT_FIXED    =       -FI
 FORMAT_FREE     =       -FR
@@ -350,7 +350,7 @@
 ESMF_LDFLAG     =       $(CPLUSPLUSLIB)
 FCOPTIM         =       -O3
 FCREDUCEDOPT	=       $(FCOPTIM)
-FCNOOPT		=       -O0
+FCNOOPT		=       -O0 -fno-inline -fno-ip
 FCDEBUG         =       # -g $(FCNOOPT) -traceback
 FORMAT_FIXED    =       -FI
 FORMAT_FREE     =       -FR
@@ -383,8 +383,8 @@
 LD              =       $(FC)
 RWORDSIZE       =       CONFIGURE_RWORDSIZE
 PROMOTION       =       -r$(RWORDSIZE) -i4
-ARCH_LOCAL      =       -DF2CSTYLE -DNO_RRTM_PHYSICS -DNONSTANDARD_SYSTEM_SUBR
-CFLAGS_LOCAL    =       -DF2CSTYLE
+ARCH_LOCAL      =       -DNO_RRTM_PHYSICS -DNONSTANDARD_SYSTEM_SUBR
+CFLAGS_LOCAL    =       
 LDFLAGS_LOCAL   =
 CPLUSPLUSLIB    =       
 ESMF_LDFLAG     =       $(CPLUSPLUSLIB)
@@ -392,12 +392,12 @@
 FCREDUCEDOPT	=       $(FCOPTIM)
 FCNOOPT         =       -O0
 FCDEBUG         =       # -g $(FCNOOPT)
-FORMAT_FIXED    =       -fixed-form
-FORMAT_FREE     =       -free-form
+FORMAT_FIXED    =       -fixedform
+FORMAT_FREE     =       -freeform
 FCSUFFIX        =
 BYTESWAPIO      =       -byteswapio
 FCBASEOPTS      =       -w -fno-second-underscore $(FCDEBUG) $(FORMAT_FREE) $(BYTESWAPIO)
-MODULE_SRCH_FLAG =     -fmod=$(WRF_SRC_ROOT_DIR)/main
+MODULE_SRCH_FLAG =     -module $(WRF_SRC_ROOT_DIR)/main
 TRADFLAG        =      -traditional
 CPP             =      /lib/cpp -C -P
 AR              =      ar
@@ -499,18 +499,20 @@
 PROMOTION       =        -i4
 ARCH_LOCAL      =       -DMACOS -DNONSTANDARD_SYSTEM_FUNC
 CFLAGS_LOCAL    =       -w -O3 -ip -DMACOS
-LDFLAGS_LOCAL   =       -ip
+# increase stack size; also note that for OpenMP, set environment OMP_STACKSIZE 4G or greater
+LDFLAGS_LOCAL   =       -ip -Wl,-stack_addr,0xF10000000 -Wl,-stack_size,0x64000000
 CPLUSPLUSLIB    =       
 ESMF_LDFLAG     =       $(CPLUSPLUSLIB)
 FCOPTIM         =       -O3
 FCREDUCEDOPT	=       $(FCOPTIM)
-FCNOOPT         =       -O0
+FCNOOPT         =       -O0 -fno-inline -fno-ip
 FCDEBUG         =       # -g $(FCNOOPT) -traceback
 FORMAT_FIXED    =       -FI
 FORMAT_FREE     =       -FR
 FCSUFFIX        =
 BYTESWAPIO      =       -convert big_endian
-FCBASEOPTS      =       -w -ftz -align all -fno-alias -fp-model precise $(FCDEBUG) $(FORMAT_FREE) $(BYTESWAPIO)
+# added -fno-common at suggestion of R. Dubtsov as workaround for failing to link program_name
+FCBASEOPTS      =       -w -ftz -align all -fno-alias -fp-model precise -fno-common $(FCDEBUG) $(FORMAT_FREE) $(BYTESWAPIO)
 MODULE_SRCH_FLAG =
 TRADFLAG        =      -traditional
 CPP             =      cpp -C -P -xassembler-with-cpp
@@ -537,18 +539,20 @@
 PROMOTION       =        -i4
 ARCH_LOCAL      =       -DMACOS -DNONSTANDARD_SYSTEM_FUNC
 CFLAGS_LOCAL    =       -w -O3 -DMACOS
-LDFLAGS_LOCAL   =       
+# increase stack size; also note that for OpenMP, set environment OMP_STACKSIZE 4G or greater
+LDFLAGS_LOCAL   =       -ip -Wl,-stack_addr,0xF10000000 -Wl,-stack_size,0x64000000
 CPLUSPLUSLIB    =       
 ESMF_LDFLAG     =       $(CPLUSPLUSLIB)
 FCOPTIM         =       -O3
 FCREDUCEDOPT	=       $(FCOPTIM)
-FCNOOPT         =       -O0
+FCNOOPT         =       -O0 -fno-inline -fno-ip
 FCDEBUG         =       # -g $(FCNOOPT) -traceback
 FORMAT_FIXED    =       -FI
 FORMAT_FREE     =       -FR
 FCSUFFIX        =
 BYTESWAPIO      =       -convert big_endian
-FCBASEOPTS      =       -w -ftz -align all -fno-alias -fp-model precise $(FCDEBUG) $(FORMAT_FREE) $(BYTESWAPIO)
+# added -fno-common at suggestion of R. Dubtsov as workaround for failing to link program_name
+FCBASEOPTS      =       -w -ftz -align all -fno-alias -fp-model precise -fno-common $(FCDEBUG) $(FORMAT_FREE) $(BYTESWAPIO)
 MODULE_SRCH_FLAG =
 TRADFLAG        =      -traditional
 CPP             =      cpp -C -P -xassembler-with-cpp
@@ -818,7 +822,7 @@
 CC_TOOLS        =      $(SCC)
 
 ###########################################################
-#ARCH    Linux ppc64 BG blxlf compiler with blxlc # dmpar
+#ARCH    Linux ppc64 BG /L blxlf compiler with blxlc # dmpar
 #
 DMPARALLEL      =       # 1
 OMPCPP		=	# not supported
@@ -836,7 +840,7 @@
 RWORDSIZE       =       CONFIGURE_RWORDSIZE
 PROMOTION       =        -qrealsize=$(RWORDSIZE) -qintsize=4
 # If system has even more processors, set VERY_LARGE_MAXPROC to that number
-ARCH_LOCAL      =       -DMOVE_NL_OUTSIDE_MODULE_CONFIGURE -DNONSTANDARD_SYSTEM_SUBR -DLANDREAD_STUB -DVERY_LARGE_MAXPROC=36768
+ARCH_LOCAL      =       -DMOVE_NL_OUTSIDE_MODULE_CONFIGURE -DNONSTANDARD_SYSTEM_SUBR -DLANDREAD_STUB -DVERY_LARGE_MAXPROC=36768 -DBLUEGENE
 CFLAGS_LOCAL    =       -DNOUNDERSCORE -DNCARIBM_NOC99 $(MPI_INC)  -DLANDREAD_STUB
 LIB_LOCAL       =       $(MPI_LIB)
 LDFLAGS_LOCAL   =       -Wl,--allow-multiple-definition -qstatic
@@ -862,7 +866,87 @@
 M4 		=       m4 -B 14000
 RANLIB 		=       ranlib
 CC_TOOLS        =       cc
-
 ###########################################################
+#ARCH    Linux ppc64 BG /P xlf compiler with xlc # smpar dmpar dm+sm
+#     developed on surveyor.alcf.anl.gov (thanks to ANL/ALCF)
+#
+DMPARALLEL      =       # 1
+OMPCPP		=	# -D_OPENMP
+OMP		=	# -qsmp=noauto
+# these have to be the same as DM_FC on surveyor or it fails with weird errors in time manager
+SFC             =       time mpixlf90_r
+SCC             =       time mpixlc_r
+DM_FC           =       time mpixlf90_r
+DM_CC           =       time mpixlc_r -DMPI2_SUPPORT
+FC              =        $(DM_FC)
+CC              =       $(DM_CC) -DFSEEKO64_OK
+LD              =       $(FC)
+RWORDSIZE       =       CONFIGURE_RWORDSIZE
+PROMOTION       =        -qrealsize=$(RWORDSIZE) -qintsize=4
+# If system has even more processors, set VERY_LARGE_MAXPROC to that number
+ARCH_LOCAL      =       -DMOVE_NL_OUTSIDE_MODULE_CONFIGURE -DNONSTANDARD_SYSTEM_SUBR -DLANDREAD_STUB -DVERY_LARGE_MAXPROC=36768 -DBLUEGENE
+CFLAGS_LOCAL    =       -DNOUNDERSCORE -DLANDREAD_STUB
+LIB_LOCAL       =
+LDFLAGS_LOCAL   =       -Wl,--allow-multiple-definition -qstatic
+CPLUSPLUSLIB    =       
+ESMF_LDFLAG     =       $(CPLUSPLUSLIB)
+FCOPTIM		=       -O3 -qnoipa -qarch=auto -qcache=auto -qtune=auto
+FCNOOPT		=       -qnoopt
+FCDEBUG         =       # $(FCNOOPT) -g -qfullpath
+FORMAT_FIXED    =       -qfixed
+FORMAT_FREE     =       -qfree=f90
+FCSUFFIX        =       -qsuffix=f=f90
+BYTESWAPIO      =       
+FCBASEOPTS      =       -w -qspill=20000 -qmaxmem=64000 $(FCDEBUG) $(FORMAT_FREE) $(BYTESWAPIO) #-qflttrap=zerodivide:invalid:enable -qsigtrap
+MODULE_SRCH_FLAG =     
+TRADFLAG        =       
+# this might be different on different systems but we want the xlf version of cpp, not Linux's
+# surveyor.alcf.anl.gov
+CPP             =       /opt/ibmcmp/xlf/bg/11.1/exe/cpp -C -P
+AR              =       ar
+ARFLAGS         =       ru
+M4 		=       m4 -B 14000
+RANLIB 		=       ranlib
+CC_TOOLS        =       cc
+###########################################################
+#ARCH    Linux ppc64 IBM Blade Server xlf compiler with xlc # dmpar
+#    provided by Luis C. Cana Cascallar for IBM JS21 blade server, May 2009
+#
+DMPARALLEL      =       # 1
+OMPCPP		=	# not supported
+OMP		=	# not supported
+SFC		=       xlf90_r -q64
+SCC		=       xlc_r -q64
+DM_FC		=	mpif90 -q64
+DM_CC		=       mpicc -q64 -DMPI2_SUPPORT  -DFSEEKO64_OK
+FC              =       CONFIGURE_FC
+CC              =       CONFIGURE_CC
+LD		=	$(FC)
+RWORDSIZE       =       CONFIGURE_RWORDSIZE
+PROMOTION       =        -qrealsize=$(RWORDSIZE) -qintsize=4
+# If system has even more processors, set VERY_LARGE_MAXPROC to that number
+ARCH_LOCAL      =       -DMOVE_NL_OUTSIDE_MODULE_CONFIGURE -DNONSTANDARD_SYSTEM_SUBR -DLANDREAD_STUB -DVERY_LARGE_MAXPROC=36768
+CFLAGS_LOCAL    =       -DNOUNDERSCORE -DLANDREAD_STUB
+LDFLAGS_LOCAL   =       
+CPLUSPLUSLIB    =       -lC
+ESMF_LDFLAG     =       $(CPLUSPLUSLIB)
+FCOPTIM		=       -O3 -qtune=ppc970 -qarch=ppc970 
+FCNOOPT		=       -qnoopt
+FCDEBUG         =       # -g $(FCNOOPT) -qfullpath
+FORMAT_FIXED    =       -qfixed
+FORMAT_FREE     =       -qfree=f90
+FCSUFFIX        =       -qsuffix=f=f90
+BYTESWAPIO      =       
+FCBASEOPTS      =       -w -qspill=20000 -qmaxmem=32767 $(FCDEBUG) $(FORMAT_FREE) $(BYTESWAPIO) #-qflttrap=zerodivide:invalid:enable -qsigtrap
+MODULE_SRCH_FLAG =     
+TRADFLAG        =       
+# this might be different on different systems but we want the xlf version of cpp, not Linux
+CPP             =       /opt/ibmcmp/xlf/11.1/exe/cpp -C -P
+AR              =       ar
+ARFLAGS         =       ru
+M4 		=       m4 -B 14000
+RANLIB 		=       ranlib
+CC_TOOLS        =       xlc -q64
+###########################################################
 #ARCH  NULL
 
