#include <stdio.h>
#include <stdlib.h>
#if defined( DM_PARALLEL ) && ! defined( STUBMPI )
# include <mpi.h>
#endif

#ifndef CRAY
# ifdef NOUNDERSCORE
#      define COLLECT_ON_COMM  collect_on_comm
#      define COLLECT_ON_COMM0 collect_on_comm0
#      define DIST_ON_COMM  dist_on_comm
#      define DIST_ON_COMM0 dist_on_comm0
#      define INT_PACK_DATA  int_pack_data
#      define INT_GET_TI_HEADER_C  int_get_ti_header_c
#      define INT_GEN_TI_HEADER_C  int_gen_ti_header_c
# else
#   ifdef F2CSTYLE
#      define COLLECT_ON_COMM  collect_on_comm__
#      define COLLECT_ON_COMM0 collect_on_comm0__
#      define DIST_ON_COMM  dist_on_comm__
#      define DIST_ON_COMM0 dist_on_comm0__
#      define INT_PACK_DATA  int_pack_data__
#      define INT_GET_TI_HEADER_C  int_get_ti_header_c__
#      define INT_GEN_TI_HEADER_C  int_gen_ti_header_c__
#   else
#      define COLLECT_ON_COMM  collect_on_comm_
#      define COLLECT_ON_COMM0 collect_on_comm0_
#      define DIST_ON_COMM  dist_on_comm_
#      define DIST_ON_COMM0 dist_on_comm0_
#      define INT_PACK_DATA  int_pack_data_
#      define INT_GET_TI_HEADER_C  int_get_ti_header_c_
#      define INT_GEN_TI_HEADER_C  int_gen_ti_header_c_
#   endif
# endif
#endif

COLLECT_ON_COMM ( int * comm, int * typesize ,
                 void * inbuf, int *ninbuf , void * outbuf, int * noutbuf )
{
  col_on_comm ( comm, typesize ,
                inbuf, ninbuf , outbuf, noutbuf, 1 ) ;
}

/* collect on node 0*/
COLLECT_ON_COMM0 ( int * comm, int * typesize ,
                 void * inbuf, int *ninbuf , void * outbuf, int * noutbuf )
{
  col_on_comm ( comm, typesize ,
                inbuf, ninbuf , outbuf, noutbuf, 0 ) ;
}

col_on_comm ( int * Fcomm, int * typesize ,
              void * inbuf, int *ninbuf , void * outbuf, int * noutbuf, int sw )
{
#if defined( DM_PARALLEL ) && !defined(STUBMPI)
  int mytask, ntasks, p ;
  int *recvcounts ;
  int *displace ;
  int noutbuf_loc ;
  int root_task ;
  MPI_Comm *comm, dummy_comm ;

  comm = &dummy_comm ;
  *comm = MPI_Comm_f2c( *Fcomm ) ;
  MPI_Comm_size ( *comm, &ntasks ) ;
  MPI_Comm_rank ( *comm, &mytask ) ;
  recvcounts = (int *) malloc( ntasks * sizeof(int)) ;
  displace   = (int *) malloc( ntasks * sizeof(int)) ;
  root_task = ( sw == 0 ) ? 0 : ntasks-1 ;

  /* collect up recvcounts */
  MPI_Gather( ninbuf , 1 , MPI_INT , recvcounts , 1 , MPI_INT , root_task , *comm ) ;

  if ( mytask == root_task ) {

    /* figure out displacements */
    for ( p = 1 , displace[0] = 0 , noutbuf_loc = recvcounts[0] ; p < ntasks ; p++ ) {
      displace[p] = displace[p-1]+recvcounts[p-1] ;
      noutbuf_loc = noutbuf_loc + recvcounts[p] ;
    }

    if ( noutbuf_loc > * noutbuf )
    {
      fprintf(stderr,"FATAL ERROR: collect_on_comm: noutbuf_loc (%d) > noutbuf (%d)\n",
		      noutbuf_loc , * noutbuf ) ; 
      fprintf(stderr,"WILL NOT perform the collection operation\n") ;
      MPI_Abort(MPI_COMM_WORLD,1) ;
    }

    /* multiply everything by the size of the type */
    for ( p = 0 ; p < ntasks ; p++ ) {
      displace[p] *= *typesize ;
      recvcounts[p] *= *typesize ;
    }
  }

  MPI_Gatherv( inbuf  , *ninbuf * *typesize  , MPI_CHAR ,
               outbuf , recvcounts , displace, MPI_CHAR ,
               root_task , *comm ) ;

  free(recvcounts) ;
  free(displace) ;
#endif
  return(0) ;
}


DIST_ON_COMM ( int * comm, int * typesize ,
                 void * inbuf, int *ninbuf , void * outbuf, int * noutbuf )
{
  dst_on_comm ( comm, typesize ,
                inbuf, ninbuf , outbuf, noutbuf, 1 ) ;
}

DIST_ON_COMM0 ( int * comm, int * typesize ,
                 void * inbuf, int *ninbuf , void * outbuf, int * noutbuf )
{
  dst_on_comm ( comm, typesize ,
                inbuf, ninbuf , outbuf, noutbuf, 0 ) ;
}

dst_on_comm ( int * Fcomm, int * typesize ,
              void * inbuf, int *ninbuf , void * outbuf, int * noutbuf, int sw )
{
#if defined(DM_PARALLEL) && ! defined(STUBMPI)
  int mytask, ntasks, p ;
  int *sendcounts ;
  int *displace ;
  int noutbuf_loc ;
  int root_task ;
  MPI_Comm *comm, dummy_comm ;

  comm = &dummy_comm ;
  *comm = MPI_Comm_f2c( *Fcomm ) ;
  MPI_Comm_size ( *comm, &ntasks ) ;
  MPI_Comm_rank ( *comm, &mytask ) ;
  sendcounts = (int *) malloc( ntasks * sizeof(int)) ;
  displace   = (int *) malloc( ntasks * sizeof(int)) ;
  root_task = ( sw == 0 ) ? 0 : ntasks-1 ;

  /* collect up sendcounts */
  MPI_Gather( noutbuf , 1 , MPI_INT , sendcounts , 1 , MPI_INT , root_task , *comm ) ;

  if ( mytask == root_task ) {

    /* figure out displacements */
    for ( p = 1 , displace[0] = 0 , noutbuf_loc = sendcounts[0] ; p < ntasks ; p++ ) {
      displace[p] = displace[p-1]+sendcounts[p-1] ;
      noutbuf_loc = noutbuf_loc + sendcounts[p] ;
    }

    /* multiply everything by the size of the type */
    for ( p = 0 ; p < ntasks ; p++ ) {
      displace[p] *= *typesize ;
      sendcounts[p] *= *typesize ;
    }
  }

  MPI_Scatterv( inbuf   , sendcounts , displace, MPI_CHAR ,
                outbuf  , *noutbuf * *typesize  , MPI_CHAR ,
                root_task , *comm ) ;

  free(sendcounts) ;
  free(displace) ;
#endif
  return(0) ;
}

#ifndef MACOS
#  include <malloc.h>
#  include <sys/resource.h>
#endif

#if 0
  int getrusage(
          int who,
          struct rusage *r_usage);
#endif

#if 0
extern int outy ;
extern int maxstug, nouty, maxouty ;
#endif

#if 0
#include <unistd.h>
#include <sys/times.h>
/*  used internally for chasing memory leaks on ibm  */
rlim_ ()
{
#ifndef MACOS

   struct rusage r_usage ;
   struct mallinfo minf ;
   struct tms  tm ;
   long tick, tock ;

   tick = sysconf( _SC_CLK_TCK ) ;
   times( &tm ) ;
   tock = (tm.tms_utime + tm.tms_stime)*tick ;

   getrusage ( RUSAGE_SELF, &r_usage ) ;
   if ( tock != 0 ) {
     fprintf(stderr,"sm %ld d %ld s %ld maxrss %ld %d %d %ld\n",r_usage.ru_ixrss/tock,r_usage.ru_idrss/tock,r_usage.ru_isrss/tock, r
_usage.ru_maxrss,tick,tock,r_usage.ru_ixrss) ;
   }
   minf = mallinfo() ;
   fprintf(stderr,"a %ld usm %ld fsm %ld uord %ld ford %ld hblkhd %d\n",minf.arena,minf.usmblks,minf.fsmblks,minf.uordblks,minf.ford
blks,minf.hblkhd) ;
# if 0
   fprintf(stderr," outy %d  nouty %d  maxstug %d maxouty %d \n", outy, nouty, maxstug, maxouty ) ;
# endif
#endif
}
#endif


#ifdef RAW_MPI_IO_OUTPUT
# ifdef NOUNDERSCORE
#   define MPI_RAW_IO_OPEN  mpi_raw_io_open
#   define MPI_RAW_IO_WRITE mpi_raw_io_write
#   define MPI_RAW_IO_READ  mpi_raw_io_read
#   define MPI_RAW_IO_CLOSE mpi_raw_io_close
#   define RSL_INTERNAL_MILLICLOCK rsl_internal_milliclock
#   define RSL_INTERNAL_MICROCLOCK rsl_internal_microclock
# else
#   ifdef F2CSTYLE
#     define MPI_RAW_IO_OPEN mpi_raw_io_open__
#     define MPI_RAW_IO_WRITE mpi_raw_io_write__
#     define MPI_RAW_IO_READ  mpi_raw_io_read__
#     define MPI_RAW_IO_CLOSE mpi_raw_io_close__
#     define RSL_INTERNAL_MILLICLOCK rsl_internal_milliclock__
#     define RSL_INTERNAL_MICROCLOCK rsl_internal_microclock__
#   else
#     define MPI_RAW_IO_OPEN mpi_raw_io_open_
#     define MPI_RAW_IO_WRITE mpi_raw_io_write_
#     define MPI_RAW_IO_READ  mpi_raw_io_read_
#     define MPI_RAW_IO_CLOSE mpi_raw_io_close_
#     define RSL_INTERNAL_MILLICLOCK rsl_internal_milliclock_
#     define RSL_INTERNAL_MICROCLOCK rsl_internal_microclock_
#   endif
# endif

#include "mpi.h"

static FILE  *fp_repl ;
static int ks, ke ;
static MPI_Comm *comm, dummy_comm ;
static MPI_Datatype filetype2, filetype3 ;
static MPI_Datatype memtype2, memtype3 ;
static MPI_File fh, fh2d ;

int
MPI_RAW_IO_CLOSE ( )
{
  int result, rank  ;
  MPI_Comm_rank( *comm, &rank ) ;
  result = MPI_File_close(&fh) ;
  result = MPI_File_close(&fh2d) ;
  if ( rank == 0 ) fclose(fp_repl) ;
}

int
MPI_RAW_IO_WRITE ( char * buf, int *ndims0, int * nk )
{
  int ierr, rank ;
  int s, e ;
  MPI_Status status ;
  
  fprintf(stderr,"MPI_RAW_IO_WRITE called ndims %d nk %d\n",*ndims0,*nk) ;
  MPI_Comm_rank( *comm, &rank ) ;
  s = RSL_INTERNAL_MILLICLOCK () ;
  if ( *ndims0 == 3 ) {
    fprintf(stderr,"MPI_RAW_IO_WRITE: calling MPI_File_write_all(3) \n") ;
    ierr = MPI_File_write_all( fh, buf, 1, memtype3, &status ) ;
    fprintf(stderr,"MPI_RAW_IO_WRITE: back from MPI_File_write_all(3) ierr = %d\n",ierr) ;
  } else if ( *ndims0 == 2 ) {
    fprintf(stderr,"MPI_RAW_IO_WRITE: calling MPI_File_write_all(2) \n") ;
    ierr = MPI_File_write_all( fh2d, buf, 1, memtype2, &status ) ;
    fprintf(stderr,"MPI_RAW_IO_WRITE: back from MPI_File_write_all(2) ierr = %d\n",ierr) ;
  } else if ( *ndims0 == 1 ) {
    if ( rank == 0 ) {
      fprintf(stderr,"MPI_RAW_IO_WRITE: fwrite %d\n", *nk) ;
      fwrite( buf , 4, *nk, fp_repl ) ;
    }
  } else if ( *ndims0 == 0 ) {
    if ( rank == 0 ) {
      fprintf(stderr,"MPI_RAW_IO_WRITE: fwrite %d\n", 1) ;
      fwrite( buf , 4, 1, fp_repl ) ;
    }
  } else {
    fprintf(stderr,"MPI_RAW_IO_WRITE: do nothing return\n") ;
    return(1) ;
  } 
  fprintf(stderr,"MPI_RAW_IO_WRITE: normal return\n") ;
  e = RSL_INTERNAL_MILLICLOCK () ;
  if ( rank == 0 ) fprintf(stderr,"MPI_RAW_IO_WRITE ndims %d milliseconds %d\n",*ndims0,e-s) ;
  return(0) ;
}

int
MPI_RAW_IO_READ ( char * buf, int *ndims0, int *nk )
{
  int ierr, rank ;
  int s, e ;
  MPI_Status status ;
  MPI_Comm_rank( *comm, &rank ) ;
  s = RSL_INTERNAL_MILLICLOCK () ;
  if ( *ndims0 == 3 ) {
    ierr = MPI_File_read_all( fh, buf, 1, memtype3, &status ) ;
  } else if ( *ndims0 == 2 ) {
    ierr = MPI_File_read_all( fh2d, buf, 1, memtype2, &status ) ;
  } else if ( *ndims0 == 1 ) {
    if ( rank == 0 ) {
      fread( buf , 4, *nk, fp_repl ) ;
    }
    MPI_Bcast( buf, *nk,MPI_INT,0,*comm) ;
  } else if ( *ndims0 == 0 ) {
    if ( rank == 0 ) {
      fread( buf , 4, 1, fp_repl ) ;
    }
    MPI_Bcast( buf, 1,MPI_INT,0,*comm) ;
  } else {
    return(1) ;
  }
  e = RSL_INTERNAL_MILLICLOCK () ;
  if ( rank == 0 ) fprintf(stderr,"MPI_RAW_IO_READ ndims %d milliseconds %d\n",*ndims0,e-s) ;
  return(0) ;
}

int
MPI_RAW_IO_OPEN ( int * Fcomm, char * fname0 , int *fnamelen0 , int *rw0 ,  int *ws0 , /* 1 is read, otherwise write */
                  int * ids0, int * ide0, int * jds0, int * jde0, int * kds0, int * kde0 ,
                  int * ims0, int * ime0, int * jms0, int * jme0, int * kms0, int * kme0 ,
                  int * ips0, int * ipe0, int * jps0, int * jpe0, int * kps0, int * kpe0 )
{
  int ndims, i ;
  int ids,ide,jds,jde,kds,kde,
    ims,ime,jms,jme,kms,kme,
    ips,ipe,jps,jpe,kps,kpe ;
  MPI_Datatype filetype, memtype ;
  MPI_File fh_tmp ;
  int gsizes[3] ;
  int msizes[3] ;
  int lsizes[3] ;
  int start_indices[3] ;
  char fname[128], fname_repl[128], fname_tmp[128] ;
  int ierr, rank ;

  comm = &dummy_comm ;
  *comm = MPI_Comm_f2c( *Fcomm ) ;

  for ( i = 0 ; i < *fnamelen0 && i < 127 ; i++ ) {
    fname[i] = fname0[i] ;
  }

  fname[i] = '\0' ;

  MPI_Comm_rank( *comm, &rank ) ;
  ks = *kds0 ;
  ke = *kde0 ;
  if ( rank == 0 ) {
    sprintf(fname_repl,"%s_replicated",fname) ;
    if (( fp_repl = fopen( fname_repl, (*rw0==1)?"r":"w" )) == NULL ) {
      fprintf(stderr,"Failure opening %s for %s\n",fname_repl,(*rw0==1)?"reading":"writing") ; 
      perror("couldn't open _replicated file");
    }
  }
      
  ids = *ids0-1 ,ide=*ide0-1,jds=*jds0-1,jde=*jde0-1,kds=*kds0-1,kde=*kde0-1 ;
  ims = *ims0-1 ,ime=*ime0-1,jms=*jms0-1,jme=*jme0-1,kms=*kms0-1,kme=*kme0-1 ;
  ips = *ips0-1 ,ipe=*ipe0-1,jps=*jps0-1,jpe=*jpe0-1,kps=*kps0-1,kpe=*kpe0-1 ;
#if 1
  /*  MMC change Sept 21 2007: */
/*   for ( ndims = 2 ; ndims <= 3 ; ndims++ ) { */

  for ( ndims = 2 ; ndims <= 2 ; ndims++ ) {

    if ( ndims == 3 ) {
      gsizes[0] = ide-ids+1 ; gsizes[1] = kde-kds+1 ; gsizes[2] = jde-jds+1 ;
      msizes[0] = ime-ims+1 ; msizes[1] = kme-kms+1 ; msizes[2] = jme-jms+1 ;
      lsizes[0] = ipe-ips+1 ; lsizes[1] = kpe-kps+1 ; lsizes[2] = jpe-jps+1 ;
      start_indices[0] = ips ; start_indices[1] = kps ; start_indices[2] = jps ;
      sprintf(fname_tmp,"%s3d",fname) ;
    } else if ( ndims == 2 ) {
      gsizes[0] = ide-ids+1 ; gsizes[1] = jde-jds+1 ;
      msizes[0] = ime-ims+1 ; msizes[1] = jme-jms+1 ;
      lsizes[0] = ipe-ips+1 ; lsizes[1] = jpe-jps+1 ;
      start_indices[0] = ips ; start_indices[1] = jps ;
      sprintf(fname_tmp,"%s2d",fname) ;
    } else {
      return(1) ;
    }
    
    fprintf(stderr,"ndims,gsizes[0],gsizes[1],gsizes[2] %d %d %d %d\n",ndims,gsizes[0],gsizes[1],gsizes[2]) ;
    fprintf(stderr,"ndims,msizes[0],msizes[1],msizes[2] %d %d %d %d\n",ndims,msizes[0],msizes[1],msizes[2]) ;
    fprintf(stderr,"ndims,lsizes[0],lsizes[1],lsizes[2] %d %d %d %d\n",ndims,lsizes[0],lsizes[1],lsizes[2]) ;
    
    ierr = MPI_Type_create_subarray( ndims, gsizes, lsizes, start_indices, MPI_ORDER_FORTRAN, *ws0==8?MPI_DOUBLE:MPI_FLOAT, &filetype ) ;
    ierr = MPI_Type_commit( &filetype ) ;
    if ( *rw0 == 1 ) {
      ierr = MPI_File_open( *comm, fname_tmp, MPI_MODE_RDONLY, MPI_INFO_NULL, &fh_tmp ) ;
    } else {
    
      fprintf(stderr,"Opening %s for %d dims\n",fname_tmp,ndims) ;
      
      ierr = MPI_File_open( *comm, fname_tmp, MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh_tmp ) ;
      
      fprintf(stderr,"MPI_File_open returns ierr = %d\n", ierr) ;
    }
    
    ierr = MPI_File_set_view(fh_tmp,0,*ws0==8?MPI_DOUBLE:MPI_FLOAT,filetype,"native",MPI_INFO_NULL) ;
    
    fprintf(stderr,"MPI_File_set_view returns ierr = %d\n", ierr) ;
    
    if ( ndims == 3 ) {
      start_indices[0] = ips-ims ; start_indices[1] = kps-kms ; start_indices[2] = jps-jms ;
    } else if ( ndims == 2 ) {
      start_indices[0] = ips-ims ; start_indices[1] = jps-jms ;
    }
    fprintf(stderr,"ndims,start_indices[0],start_indices[1],start_indices[2] %d %d %d %d\n",ndims,start_indices[0],start_indices[1],start_indices[2]) ;
    
    ierr = MPI_Type_create_subarray( ndims, msizes, lsizes, start_indices, MPI_ORDER_FORTRAN, *ws0==8?MPI_DOUBLE:MPI_FLOAT, &memtype ) ;
    
    fprintf(stderr,"MPI_Type_create_subarray returns ierr = %d\n", ierr) ;
    
    ierr = MPI_Type_commit( &memtype ) ;
    
    fprintf(stderr,"MPI_Type_commit returns ierr = %d\n", ierr) ;
   
    if ( ndims == 3 ) {
      filetype3 = filetype ; memtype3 = memtype ; fh = fh_tmp ;
    } else if ( ndims == 2 ) {
      filetype2 = filetype ; memtype2 = memtype ; fh2d = fh_tmp ;
    }
  }
#endif
  return(0) ;
}

#endif

