// -*- C++ -*-
// ACL:license
// ----------------------------------------------------------------------
// This software and ancillary information (herein called "SOFTWARE")
// called POOMA (Parallel Object-Oriented Methods and Applications) is
// made available under the terms described here.  The SOFTWARE has been
// approved for release with associated LA-CC Number LA-CC-98-65.
// 
// Unless otherwise indicated, this SOFTWARE has been authored by an
// employee or employees of the University of California, operator of the
// Los Alamos National Laboratory under Contract No. W-7405-ENG-36 with
// the U.S. Department of Energy.  The U.S. Government has rights to use,
// reproduce, and distribute this SOFTWARE. The public may copy, distribute,
// prepare derivative works and publicly display this SOFTWARE without 
// charge, provided that this Notice and any statement of authorship are 
// reproduced on all copies.  Neither the Government nor the University 
// makes any warranty, express or implied, or assumes any liability or 
// responsibility for the use of this SOFTWARE.
// 
// If SOFTWARE is modified to produce derivative works, such modified
// SOFTWARE should be clearly marked, so as not to confuse it with the
// version available from LANL.
// 
// For more information about POOMA, send e-mail to pooma@acl.lanl.gov,
// or visit the POOMA web page at http://www.acl.lanl.gov/pooma/.
// ----------------------------------------------------------------------
// ACL:license

#ifndef POOMA_EVALUATOR_INLINEEVALUATOR_H
#define POOMA_EVALUATOR_INLINEEVALUATOR_H

//-----------------------------------------------------------------------------
// Class: InlineEvaluator
//-----------------------------------------------------------------------------


//-----------------------------------------------------------------------------
// Overview:
//
// InlineEvaluator evaluates expressions by inlining a simple loop.
// It does no dependency checking, locking, where blocks, etc.
//-----------------------------------------------------------------------------

//-----------------------------------------------------------------------------
// Typedefs:
//-----------------------------------------------------------------------------

//-----------------------------------------------------------------------------
// Includes:
//-----------------------------------------------------------------------------

#include "Evaluator/KernelTags.h"
#include "Utilities/WrappedInt.h"
#include "Utilities/PAssert.h"

//-----------------------------------------------------------------------------
// Forward Declarations:
//-----------------------------------------------------------------------------

template<class KernelTag>
struct KernelEvaluator;

//-----------------------------------------------------------------------------
// Full Description:
//
// The point of this class is to input an expression with the
// 'evaluate' member function and evaluate it by looping over the
// whole domain.
//
// This is the simplest possible evaluator. It makes a number of
// simplifying assumptions about the expressions it tries to evaluate
// and the context in which they are evaluated.  These assumptions let
// it do some things very efficiently, but limit the contexts in which
// it can be used.
//
// These assumptions are:
//
// 1. There are no where blocks. That means that the InlineEvaluator
// does not need to have any state.
//
// 2. The expression passed in can handle random access to all of its
// elements efficiently.  That basically means that it can only be
// used with BrickEngine or its equivalent.
//
//-----------------------------------------------------------------------------

template<>
struct KernelEvaluator<InlineKernelTag>
{
  //
  // evaluate(expression)
  //
  // Input an expression and cause it to be evaluated.
  // All this template function does is extract the domain
  // from the expression and call evaluate on that.
  // 
  template<class LHS,class Op,class RHS>
  inline static void evaluate(const LHS& lhs,const Op& op,const RHS& rhs)
  {
    typedef typename LHS::Domain_t Domain_t;
    evaluate(lhs,op,rhs,lhs.domain(),
	     WrappedInt<Domain_t::dimensions>());
    POOMA_INCREMENT_STATISTIC(NumInlineEvaluations)
  }

  //
  // evaluate(expression,domain)
  //
  // Evaluate an expression on a given domain.  This function must be
  // specialized for particular domain types.  The expectation is that
  // it will just loop over the domain and use random access in the
  // expression to evaluate it.
  //
  template<class LHS,class Op,class RHS,class Domain>
  inline static void evaluate(const LHS& lhs,const Op& op,const RHS& rhs,
			      const Domain& domain)
  {
    evaluate(lhs,op,rhs,domain,
	     WrappedInt<Domain::dimensions>());
    POOMA_INCREMENT_STATISTIC(NumInlineEvaluations)
  }

  //
  // evaluate(expression,domain,domain_dimension)
  //
  // This is the function both of the above functions call.
  // It adds a third argument which is a tag class templated on
  // the dimension of the domain.
  //
  // This parameter lets us specialize the function based on
  // that dimension.
  //
  // Some day, we will figure out how to specialize template 
  // member functions outside the class declaration...
  //
  // These functions are all inline for efficiency. That means that if
  // they are being used at the user level we will get the optimization
  // of recognizing multiple uses of a single Array on the right hand
  // side.
  //
  // There are seven specializations here, for dimension 1 through 7.
  // Rather than use template metaprograms for these seven cases we
  // simply enumerate them explicitly.  This is done to reduce the
  // burden on the compiler, which would otherwise have to jump through
  // a bunch of hoops to get the code that is here.
  //
  // For each of the specializations it builds a nested loop for each
  // dimension. Each loop is constructed with first() and last() from the
  // appropriate dimension of the domain.
  //
  // NOTE: These loops assume that the domain passed in is a unit-stride
  // domain starting at 0.  Assertions are made to make sure this is true.
  
  template<class LHS,class Op,class RHS,class Domain>
  inline static void evaluate(const LHS& lhs,const Op& op,const RHS& rhs,
			      const Domain& domain,WrappedInt<1>)
  {
    CTAssert(Domain::unitStride == 1);
    PAssert(domain[0].first() == 0);
    int e0 = domain[0].length();
    for (int i0=0; i0<e0; ++i0)
      op(lhs(i0),rhs.read(i0));
  }

  template<class LHS,class Op,class RHS,class Domain>
  inline static void evaluate(const LHS& lhs,const Op& op,const RHS& rhs,
			      const Domain& domain,WrappedInt<2>)
  {
    CTAssert(Domain::unitStride == 1);
    PAssert(domain[0].first() == 0);
    PAssert(domain[1].first() == 0);
    int e0 = domain[0].length();
    int e1 = domain[1].length();
    for (int i1=0; i1<e1; ++i1)
      for (int i0=0; i0<e0; ++i0)
	op(lhs(i0,i1),rhs.read(i0,i1));
  }
  
  template<class LHS,class Op,class RHS,class Domain>
  inline static void evaluate(const LHS& lhs,const Op& op,const RHS& rhs,
			      const Domain& domain,WrappedInt<3>)
  {
    CTAssert(Domain::unitStride == 1);
    PAssert(domain[0].first() == 0);
    PAssert(domain[1].first() == 0);
    PAssert(domain[2].first() == 0);
    int e0 = domain[0].length();
    int e1 = domain[1].length();
    int e2 = domain[2].length();
    for (int i2=0; i2<e2; ++i2)
      for (int i1=0; i1<e1; ++i1)
	for (int i0=0; i0<e0; ++i0)
	  op(lhs(i0,i1,i2),rhs.read(i0,i1,i2));
  }

  template<class LHS,class Op,class RHS,class Domain>
  inline static void evaluate(const LHS& lhs,const Op& op,const RHS& rhs,
			      const Domain& domain,WrappedInt<4>)
  {
    CTAssert(Domain::unitStride == 1);
    PAssert(domain[0].first() == 0);
    PAssert(domain[1].first() == 0);
    PAssert(domain[2].first() == 0);
    PAssert(domain[3].first() == 0);
    int e0 = domain[0].length();
    int e1 = domain[1].length();
    int e2 = domain[2].length();
    int e3 = domain[3].length();
    for (int i3=0; i3<e3; ++i3)
      for (int i2=0; i2<e2; ++i2)
	for (int i1=0; i1<e1; ++i1)
	  for (int i0=0; i0<e0; ++i0)
	    op(lhs(i0,i1,i2,i3),rhs.read(i0,i1,i2,i3));
  }

  template<class LHS,class Op,class RHS,class Domain>
  inline static void evaluate(const LHS& lhs,const Op& op,const RHS& rhs,
			      const Domain& domain,WrappedInt<5>)
  {
    CTAssert(Domain::unitStride == 1);
    PAssert(domain[0].first() == 0);
    PAssert(domain[1].first() == 0);
    PAssert(domain[2].first() == 0);
    PAssert(domain[3].first() == 0);
    PAssert(domain[4].first() == 0);
    int e0 = domain[0].length();
    int e1 = domain[1].length();
    int e2 = domain[2].length();
    int e3 = domain[3].length();
    int e4 = domain[4].length();
    for (int i4=0; i4<e4; ++i4)
      for (int i3=0; i3<e3; ++i3)
	for (int i2=0; i2<e2; ++i2)
	  for (int i1=0; i1<e1; ++i1)
	    for (int i0=0; i0<e0; ++i0)
	      op(lhs(i0,i1,i2,i3,i4),rhs.read(i0,i1,i2,i3,i4));
  }

  template<class LHS,class Op,class RHS,class Domain>
  inline static void evaluate(const LHS& lhs,const Op& op,const RHS& rhs,
			      const Domain& domain,WrappedInt<6>)
  {
    CTAssert(Domain::unitStride == 1);
    PAssert(domain[0].first() == 0);
    PAssert(domain[1].first() == 0);
    PAssert(domain[2].first() == 0);
    PAssert(domain[3].first() == 0);
    PAssert(domain[4].first() == 0);
    PAssert(domain[5].first() == 0);
    int e0 = domain[0].length();
    int e1 = domain[1].length();
    int e2 = domain[2].length();
    int e3 = domain[3].length();
    int e4 = domain[4].length();
    int e5 = domain[5].length();
    for (int i5=0; i5<e5; ++i5)
      for (int i4=0; i4<e4; ++i4)
	for (int i3=0; i3<e3; ++i3)
	  for (int i2=0; i2<e2; ++i2)
	    for (int i1=0; i1<e1; ++i1)
	      for (int i0=0; i0<e0; ++i0)
		op(lhs(i0,i1,i2,i3,i4,i5),
		   rhs.read(i0,i1,i2,i3,i4,i5));
  }

  template<class LHS,class Op,class RHS,class Domain>
  inline static void evaluate(const LHS& lhs,const Op& op,const RHS& rhs,
			      const Domain& domain,WrappedInt<7>)
  {
    CTAssert(Domain::unitStride == 1);
    PAssert(domain[0].first() == 0);
    PAssert(domain[1].first() == 0);
    PAssert(domain[2].first() == 0);
    PAssert(domain[3].first() == 0);
    PAssert(domain[4].first() == 0);
    PAssert(domain[5].first() == 0);
    PAssert(domain[6].first() == 0);
    int e0 = domain[0].length();
    int e1 = domain[1].length();
    int e2 = domain[2].length();
    int e3 = domain[3].length();
    int e4 = domain[4].length();
    int e5 = domain[5].length();
    int e6 = domain[6].length();
    for (int i6=0; i6<e6; ++i6)
      for (int i5=0; i5<e5; ++i5)
	for (int i4=0; i4<e4; ++i4)
	  for (int i3=0; i3<e3; ++i3)
	    for (int i2=0; i2<e2; ++i2)
	      for (int i1=0; i1<e1; ++i1)
		for (int i0=0; i0<e0; ++i0)
		  op(lhs(i0,i1,i2,i3,i4,i5,i6),
		     rhs.read(i0,i1,i2,i3,i4,i5,i6));
  }

private:

};

//-----------------------------------------------------------------------------

#endif // POOMA_EVALUATOR_INLINEEVALUATOR_H

// ACL:rcsinfo
// ----------------------------------------------------------------------
// $RCSfile: InlineEvaluator.h,v $   $Author: swhaney $
// $Revision: 1.25 $   $Date: 2000/04/12 23:56:09 $
// ----------------------------------------------------------------------
// ACL:rcsinfo
