// -*- C++ -*-
// ACL:license
// ----------------------------------------------------------------------
// This software and ancillary information (herein called "SOFTWARE")
// called POOMA (Parallel Object-Oriented Methods and Applications) is
// made available under the terms described here.  The SOFTWARE has been
// approved for release with associated LA-CC Number LA-CC-98-65.
// 
// Unless otherwise indicated, this SOFTWARE has been authored by an
// employee or employees of the University of California, operator of the
// Los Alamos National Laboratory under Contract No. W-7405-ENG-36 with
// the U.S. Department of Energy.  The U.S. Government has rights to use,
// reproduce, and distribute this SOFTWARE. The public may copy, distribute,
// prepare derivative works and publicly display this SOFTWARE without 
// charge, provided that this Notice and any statement of authorship are 
// reproduced on all copies.  Neither the Government nor the University 
// makes any warranty, express or implied, or assumes any liability or 
// responsibility for the use of this SOFTWARE.
// 
// If SOFTWARE is modified to produce derivative works, such modified
// SOFTWARE should be clearly marked, so as not to confuse it with the
// version available from LANL.
// 
// For more information about POOMA, send e-mail to pooma@acl.lanl.gov,
// or visit the POOMA web page at http://www.acl.lanl.gov/pooma/.
// ----------------------------------------------------------------------
// ACL:license

//-----------------------------------------------------------------------------
// Classes: 
//   UniformGridLayoutData<Dim> template definitions.
//   UniformGridLayout<Dim> template definitions.
//-----------------------------------------------------------------------------

#include "Threads/PoomaSmarts.h"
#include "Layout/UniformGridLayout.h"
#include "Utilities/PAssert.h"

#include <vector>


///////////////////////////////////////////////////////////////////////////////
// namespace POOMA {

//============================================================
// UniformGridLayoutData non-inline method definitions
//============================================================

//-----------------------------------------------------------------------------
//
// template <int Dim>
// template <class Partitioner,class Mapper>
// void UniformGridLayout<Dim>::
// UniformGridLayoutData(const Domain_t &gdom,
//                       const Partitioner &gpar,
//                       const Mapper &cmap)
// Originally, we provided a slew of constructors, mirroring those
// in UniformGridLayout. However, once we eliminated storage of a
// partitioner object, it became simpler to have the Layout construct
// the partitioner and then initialize its data object by passing 
// that partitioner to this constructor.
//
//-----------------------------------------------------------------------------

template <int Dim>
inline UniformGridLayoutData<Dim>::
UniformGridLayoutData() 
  : Observable<UniformGridLayoutData>(*this) 
{ 
  for (int i = 0; i < Dim; ++i)
    blockstride_m[i] = blocksizes_m[i] = 0;
}

template <int Dim>
template <class Partitioner>
UniformGridLayoutData<Dim>::
UniformGridLayoutData(const Domain_t &gdom, 
		      const Partitioner &gpar,
		      const ContextMapper<Dim> & cmap )
  : LayoutBaseData<Dim>(false,
			false,
			GuardLayers_t(0),
			GuardLayers_t(0),
			gdom,
			gdom),
    Observable<UniformGridLayoutData>(*this)
{
  // Figure out if we have guards to worry about.
    
  if (gpar.hasInternalGuards() && gpar.maxSize() > 1)
    {
      hasInternalGuards_m = true;
      internalGuards_m = gpar.internalGuards();
    }
      
  if (gpar.hasExternalGuards())
    {
      hasExternalGuards_m = true;
      externalGuards_m = gpar.externalGuards();
      GuardLayers<Dim>::addGuardLayers(domain_m,externalGuards_m);
    }
    
  // Do the partitioning. 
  // This initializes allDomain_m, firsti_m, etc.
      
  partition(gpar,cmap);

}

template <int Dim>
template <class Partitioner>
void UniformGridLayoutData<Dim>::partition(const Partitioner &gpar,
					   const ContextMapper<Dim> &cmap)
{
  int i;

  // In spite of being templated, this only works with uniform-grid
  // partitioners.
    
  CTAssert(Partitioner::uniform);

  // We must have something to partition, and the domain lists must be
  // empty.
  
  PAssert(domain_m.size() > 0);
  PAssert(innerdomain_m.size() > 0);
  PAssert(all_m.size() == 0);
  PAssert(local_m.size() == 0);
  PAssert(remote_m.size() == 0);

  // Save the first and block size info from the current domain.

  blocks_m = gpar.blocks();

  // Note, for the purposes of partitioning, we pretend like we're
  // only working with the inner domain. The total domain includes the
  // external guards, and those do not affect the partitioning.  

  blockstride_m[0] = 1;
  int blocks[Dim];
  for (i = 0; i < Dim; ++i)
  {
    firsti_m[i] = innerdomain_m[i].first();
    firste_m[i] = domain_m[i].first();
    blocks[i] = gpar.blocks()[i].first();
    allDomain_m[i] = Interval<1>(blocks[i]);
    blocksizes_m[i] = innerdomain_m[i].length() / blocks[i];
    if (i > 0)
      blockstride_m[i] = blockstride_m[i-1] * blocks[i-1];
  }

  // Invoke the partitioner.
  
  gpar.partition(innerdomain_m, all_m, cmap);

  // fill local and remote lists

  List_t::const_iterator start = all_m.begin();
  List_t::const_iterator end   = all_m.end();
  
  for ( ; start!=end ; ++start)
    {
      if ( (*start)->context() == Pooma::context()
	   || (*start)->context() == -1 )
	{ 
	  (*start)->localID() = local_m.size();
	  local_m.push_back(*start);
	}
      else
	remote_m.push_back(*start);
    }

  if (hasInternalGuards_m) 
    {
      gcFillList_m.clear();
      calcGCFillList();
    }
}
//-----------------------------------------------------------------------------
//
// template<int Dim>
// void UniformGridLayoutData<Dim>::initialize
//
// Used by an I/O or data management entity to initialize the layout based
// on detailed state information previously stored. As in the case of the
// initializer with the partitioner argument, this method will call 'addDomain'
// to add in the new domains it creates and will initialize
// guard cell info, etc.
//
//-----------------------------------------------------------------------------

template<int Dim>
void UniformGridLayoutData<Dim>::initialize(const Domain_t& idom,
				 const List_t& nodes,
				 const Loc<Dim>& ublocks,
				 bool hasIG, bool hasEG,
				 const GuardLayers_t& ig,
				 const GuardLayers_t& eg)
{
  int i;

  // delete existing nodes and clear all the lists

  if (all_m.size() > 0)
    {
      for (i=0; i < all_m.size(); ++i)
	delete all_m[i];
      all_m.clear();
      local_m.clear();
      remote_m.clear();
    }

  // Initially, our total and owned domains are the same.

  domain_m = idom;
  innerdomain_m = idom;

  // Examine the info about guard cells.  Change our domains if necessary,
  // and save guard cell info for later.

  hasInternalGuards_m = hasIG;
  if (hasInternalGuards_m)
    {
      internalGuards_m = ig;
    }

  hasExternalGuards_m = (hasEG && ! domain_m.empty());
  if (hasExternalGuards_m)
    {
      externalGuards_m = eg;
      GuardLayers<Dim>::addGuardLayers(domain_m, externalGuards_m);
    }

  // Save the first and block size info from the current domain.

  blocks_m = ublocks;

  // Note, for the purposes of partitioning, we pretend like we're
  // only working with the inner domain. The total domain includes the
  // external guards, and those do not affect the partitioning.
  
  blockstride_m[0] = 1;
  int blocks[Dim];
  for (i = 0; i < Dim; ++i)
  {
    firsti_m[i] = innerdomain_m[i].first();
    firste_m[i] = domain_m[i].first();
    blocks[i] = ublocks[i].first();
    allDomain_m[i] = Interval<1>(blocks[i]);
    blocksizes_m[i] = innerdomain_m[i].length() / blocks[i];
    if (i > 0)
      blockstride_m[i] = blockstride_m[i-1] * blocks[i-1];
  }

  // Assign the given list of nodes to the total list.
  all_m= nodes;

  // Iterate through the complete list of nodes provided and assign to the
  // appropriate subcategories.

  List_t::iterator start = all_m.begin();
  List_t::iterator end   = all_m.end();
  
  for ( ; start!=end ;++start )
    {
      if( (*start)->context() == Pooma::context() ||
	  (*start)->context() == -1 )
	local_m.push_back(*start);
      else
	remote_m.push_back(*start);
    }

  if (hasInternalGuards_m) 
    {
      gcFillList_m.clear();
      calcGCFillList();
    }
}
//-----------------------------------------------------------------------------
//
// template <int Dim>
// void UniformGridLayout<Dim>::calcGCFillList()
//
// Calculates the cached information needed by MultiPatch Engine to
// fill the guard cells.
//
//-----------------------------------------------------------------------------

template <int Dim>
void UniformGridLayoutData<Dim>::calcGCFillList()
  {
    int d, p;

    // We want to create the list in such a manner that all
    // communication in a particular direction is done first, allowing
    // parallelism with the least amount of contention for
    // patches. Thus we have an outer loop over Dim, doing the upward
    // copies first, then the downward copies.
     
    int numPatches = all_m.size();
     
    gcFillList_m.reserve(2*Dim*numPatches); // a bit extra
    
    for (d = 0; d < Dim; ++d)
      {
        // First we "send" up in every direction, meaning that we fill
        // the "lower" internal guard cells for domains that have
        // them.
        
        if (internalGuards_m.lower(d) > 0)
          {
            // We use a DomainIterator to figure out if we're at edges
            // as we iterate through the patches.
            
            // NOTE!!! Implicit in this is that all of the domains are
            // stored in fortran storage order in the all_m array.  Of
            // course, this is also only valid for single context
            // stuff.  When we go to multiple contexts, this algorithm
            // will still work if the local's always form a block that
            // is also stored in fortran storage order.
            
            Interval<Dim>::iterator pos = allDomain_m.begin();
            
            for (p = 0; p < numPatches; ++p, ++pos)
              {
                // Edge detection. If this element is at the upper
                // edge in the direction that we're sending, skip it
                // and continue.
               
                if ( (*pos)[d].first() == allDomain_m[d].last() ) continue;
                  
                // The destination ID is one step "up" in the "d"
                // direction, which is at an offset in all_m of
                // blockstride_m[d]:
                
                int sourceID = p;
                int destID   = p + blockstride_m[d];
                
                // Check that our destination is in range.

                PAssert(destID < numPatches);

                // We should never get here if we're at the last cell.

                PAssert(pos != allDomain_m.end()); 
                                      
                // Calculate the domain of the overlapping cells that
                // need to be communicated. This is the total domain
                // in all directions but "d", where it is just the top
                // guard-cell width of the source domain.
                
                // (This causes copying of some uninitialized data,
                // since the first direction includes guards [which
                // haven't been filled] in the perpendicular directions,
                // but that data later gets overwritten by good data.
                // Could change this to use more conservative sets
                // of domains, but then the accumulation would have to
                // happen in reverse order [I think???].)
                
                Domain_t gcdom(all_m[p]->allocated());
                
                int max = all_m[p]->domain()[d].last();
                int min = max - internalGuards_m.lower(d) + 1;
                                
                gcdom[d] = Interval<1>(min,max);  
                 
                // Now, push IDs and source into cache...
 		if (
		    all_m[sourceID]->context() == -1 || 
		    all_m[sourceID]->context() == Pooma::context() || 
 		    all_m[destID]->context() == Pooma::context()
		    )
                gcFillList_m.push_back(GCFillInfo(gcdom,sourceID,destID));
              }
          }

        // Next we "send" down in every direction, meaning that we
        // fill the "upper" internal guard cells for domains that have
        // them.

        if (internalGuards_m.upper(d) > 0)
          {
            Interval<Dim>::iterator pos = allDomain_m.begin();

            for (p = 0; p < numPatches; ++p, ++pos)
              {
                // Edge detection. If this element is at the lower
                // edge in the direction that we're sending, skip it
                // and continue.
               
                if ( (*pos)[d].first() == allDomain_m[d].first() ) continue;
                  
                // The destination ID is one step "down" in the "d"
                // direction, which is at an offset in all_m of
                // blockstride_m[d]:
                
                int sourceID = p;
                int destID   = p - blockstride_m[d];
                 
                // Check that destination is in range.

                PAssert(destID >= 0);

                // Calculate the domain of the overlapping cells that
                // need to be communicated. See comments above.

                Domain_t gcdom(all_m[p]->allocated());
                
                int min = all_m[p]->domain()[d].first();
                int max = min + internalGuards_m.upper(d) - 1;
                
                gcdom[d] = Interval<1>(min,max);  
                 
                // Now, push IDs and source into cache...
 		if (
		    all_m[sourceID]->context() == -1 || 
		    all_m[sourceID]->context() == Pooma::context() || 
 		    all_m[destID]->context() == Pooma::context()
		    )
		  gcFillList_m.push_back(GCFillInfo(gcdom,sourceID,destID));
              }
          }
      }
  }



//-----------------------------------------------------------------------------
//
// template <int Dim>
// template <class Partitioner>
// void UniformGridLayout<Dim>::
// repartition(const Partitioner &)
//
// Repartition the layout using a new Partitioner scheme.  The initial
// domain lists are cleared out, the partitioner is invoked, and then
// all the observers are notified.  This can only be done with a
// GridParition partitioner.
//
//-----------------------------------------------------------------------------

template <int Dim>
template <class Partitioner>
bool UniformGridLayoutData<Dim>::
repartition(const Partitioner &p,
	    const ContextMapper<Dim>& cmap)
{
  // We can only repartition if we have been initialized to some domain.

  PAssert(domain_m.size() > 0);

  // Delete existing nodes and clear all the lists.

  for (int i = 0; i < all_m.size(); ++i)
    delete all_m[i];
    
  all_m.clear();
  local_m.clear();
  remote_m.clear();

  // Do the new partitioning.

  partition(p,cmap);

  if (hasInternalGuards_m) 
    {
      gcFillList_m.clear();
      calcGCFillList();
    }

  // Notify all users.

  notify(repartitionEvent);

  return true;
}

// } // namespace POOMA


// ACL:rcsinfo
// ----------------------------------------------------------------------
// $RCSfile: UniformGridLayout.cpp,v $   $Author: luchini $
// $Revision: 1.34 $   $Date: 2000/08/15 17:30:31 $
// ----------------------------------------------------------------------
// ACL:rcsinfo

