MicroBenchmarks/LCALS/LCALSSuite.hxx - llvm-test-suite - Git at Google

 //
 // See README-LCALS_license.txt for access and distribution restrictions
 //

 //
 // Header file with enums, macros, routines and structures used to
 // compile and run loops in LCALS suite and to generate execution
 // statistics.
 //

 #ifndef LCALSSuite_HXX
 #define LCALSSuite_HXX

 #include "LCALSParams.hxx"
 #include "LCALSStats.hxx"

 #include <vector>
 #include <string>


 //
 // Enumeration defining unique id for each loop KERNEL in suite.
 //
 // IMPORTANT: Generally, this should not need modification unless
 //            new loops (i.e., kernels) are added to the suite.
 //
 // Note: To keep output understandable, keep this consistent with
 //       routine defineLoopSuiteRunInfo().
 //
 enum LoopKernelID {

    // Keep this one first and don't comment out (!!)
    // This insures loop ids start at zero so all array indexing
    // or data structures is correct.  Also, this loop is not
    // executed the same way the others are.
    REF_LOOP = 0,

    //
    // Loop Subset A: Loops extracted from LLNL app codes.
    // They are implemented in runA<variant>Loops.cxx files.
    //
    PRESSURE_CALC,
    PRESSURE_CALC_ALT,
    ENERGY_CALC,
    ENERGY_CALC_ALT,
    VOL3D_CALC,
    DEL_DOT_VEC_2D,
    COUPLE,
    FIR,

    //
    // Loop Subset B: "Basic" Loops.
    // They are implemented in runB<variant>Loops.cxx files.
    //
    INIT3,
    MULADDSUB,
    IF_QUAD,
    TRAP_INT,

    //
    // Loop Subset C: Loops from older Livermore Loops in "C" suite.
    // They are implemented in runC<variant>Loops.cxx files.
    //
    HYDRO_1D,
    ICCG,
    INNER_PROD,
    BAND_LIN_EQ,
    TRIDIAG_ELIM,
    EOS,
    ADI,
    INT_PREDICT,
    DIFF_PREDICT,
    FIRST_SUM,
    FIRST_DIFF,
    PIC_2D,
    PIC_1D,
    HYDRO_2D,
    GEN_LIN_RECUR,
    DISC_ORD,
    MAT_X_MAT,
    PLANCKIAN,
    IMP_HYDRO_2D,
    FIND_FIRST_MIN,

    NUM_LOOP_KERNELS // Keep this one last and NEVER comment out (!!)

 };


 //
 // Enumeration defining unique id for each loop VARIANT in suite.
 //
 // IMPORTANT: Generally, this should not need modification unless
 //            new loop variants are added to the suite.
 //
 enum LoopVariantID {
    //
    // These variants define LCALS benchmark
    //
    RAW,
    RAW_OMP,
    FORALL_LAMBDA,
    FORALL_LAMBDA_OMP,

 #if defined(LCALS_DO_MISC)

    //
    // These variants are used in miscellaneous LCALS studies
    //
    FORALL_HYBRID_LAMBDA,
 #if 0  // THESE ARE NOT AVAILABLE YET!!!
    FORALL_HYBRID_LAMBDA_OMP,
 #endif
    FORALL_FUNCTOR,
    FORALL_FUNCTOR_OMP,
 #if 0  // THESE ARE NOT AVAILABLE YET!!!
    FORALL_HYBRID_FUNCTOR,
    FORALL_HYBRID_FUNCTOR_OMP,
 #endif
    RAW_FUNC,
    FORALL_LAMBDA_TYPEFIX,
    FORALL_LAMBDA_OMP_TYPEFIX,
    FORALL_HYBRID_LAMBDA_TYPEFIX,

 #endif // if LCALS_DO_MISC

 };


 //
 // Enumeration defining possible loop lengths to run.
 //
 enum LoopLength {

    LONG = 0,
    MEDIUM,
    SHORT,

    NUM_LENGTHS // Keep this one last (!!)

 };


 ////////////////////////////////////////////////////////////////////////////////
 //
 // The following macro constants define which loop VARIANTS can be compiled
 // (and potentially) run for a given compiler.
 //
 // NOTE: The Makefile sets the LCALS_COMPILER_* macro constant.
 //
 //  --> IMPORTANT: Actual selection of which loop variants are run is done
 //                 in main.cxx via the vector 'run_variants'.
 //
 ////////////////////////////////////////////////////////////////////////////////

 #if defined(LCALS_COMPILER_ICC)
 //
 // Configuration options for Intel compilers
 //

 #define COMPILE_RAW_VARIANTS
 #define COMPILE_LAMBDA_VARIANTS
 #define COMPILE_FUNCTOR_VARIANTS
 #define COMPILE_OMP_VARIANTS


 #elif defined(LCALS_COMPILER_GNU)
 //
 // Configuration options for GNU compilers
 //

 #define COMPILE_RAW_VARIANTS
 #define COMPILE_LAMBDA_VARIANTS
 #define COMPILE_FUNCTOR_VARIANTS
 #define COMPILE_OMP_VARIANTS


 #elif defined(LCALS_COMPILER_XLC12)
 //
 // Configuration options for IBM xlC compilers
 //

 //
 // xlC compilers DO NOT support lambda functions currently!!
 //
 #define COMPILE_RAW_VARIANTS
 #undef COMPILE_LAMBDA_VARIANTS
 #define COMPILE_FUNCTOR_VARIANTS
 #define COMPILE_OMP_VARIANTS


 #elif defined(LCALS_COMPILER_CLANG)
 //
 // Configuration options for clang compilers
 //

 //
 // Clang compilers DO NOT support OpenMP currently!!
 //
 #define COMPILE_RAW_VARIANTS
 #define COMPILE_LAMBDA_VARIANTS
 #define COMPILE_FUNCTOR_VARIANTS
 #undef COMPILE_OMP_VARIANTS


 #else
 #error LCALS compiler is undefined!

 #endif


 //
 // The following macro constants are used to turn on/off compilation of
 // individual loop KERNELS in suite.  Names are consistent with LoopID
 // enum above.
 //

 #if defined (LCALS_DO_OMP_ONLY)
 //
 // Only these loops have OpenMP implementations.  The imlementations are
 // found in runOMP<variant>Loops.cxx files.
 //

 // Loop Subset A: Loops extracted from LLNL app codes.
 #define COMPILE_PRESSURE_CALC
 #define COMPILE_PRESSURE_CALC_ALT
 #define COMPILE_ENERGY_CALC
 #define COMPILE_ENERGY_CALC_ALT
 #define COMPILE_VOL3D_CALC
 #define COMPILE_DEL_DOT_VEC_2D
 #define COMPILE_COUPLE
 #define COMPILE_FIR

 // Loop Subset B: "Basic" Loops.
 #define COMPILE_INIT3
 #define COMPILE_MULADDSUB
 #define COMPILE_IF_QUAD
 #define COMPILE_TRAP_INT

 // Loop Subset C: Loops from older Livermore Loops in "C" suite.
 #define COMPILE_PIC_2D

 #else // compile all loop kernels
 //
 // Loop Subset A: Loops extracted from LLNL app codes.
 // They are implemented in runA<variant>Loops.cxx files.
 //
 #define COMPILE_PRESSURE_CALC
 #define COMPILE_PRESSURE_CALC_ALT
 #define COMPILE_ENERGY_CALC
 #define COMPILE_ENERGY_CALC_ALT
 #define COMPILE_VOL3D_CALC
 #define COMPILE_DEL_DOT_VEC_2D
 #define COMPILE_COUPLE
 #define COMPILE_FIR

 //
 // Loop Subset B: "Basic" Loops.
 // They are implemented in runB<variant>Loops.cxx files.
 //
 #define COMPILE_INIT3
 #define COMPILE_MULADDSUB
 #define COMPILE_IF_QUAD
 #define COMPILE_TRAP_INT

 //
 // Loop Subset C: Loops from older Livermore Loops in "C" suite.
 // They are implemented in runLCK<variant>Loops.cxx files.
 //
 #define COMPILE_HYDRO_1D
 #define COMPILE_ICCG
 #define COMPILE_INNER_PROD
 #define COMPILE_BAND_LIN_EQ
 #define COMPILE_TRIDIAG_ELIM
 #define COMPILE_EOS
 #define COMPILE_ADI
 #define COMPILE_INT_PREDICT
 #define COMPILE_DIFF_PREDICT
 #define COMPILE_FIRST_SUM
 #define COMPILE_FIRST_DIFF
 #define COMPILE_PIC_2D
 #define COMPILE_PIC_1D
 #define COMPILE_HYDRO_2D
 #define COMPILE_GEN_LIN_RECUR
 #define COMPILE_DISC_ORD
 #define COMPILE_MAT_X_MAT
 #define COMPILE_PLANCKIAN
 #define COMPILE_IMP_HYDRO_2D
 #define COMPILE_FIND_FIRST_MIN

 #endif


 //////////////////////////////////////////////////////////////////
 //
 //  Structure holding double arrays and scalars used in loops.
 //
 //  Note: These are initialized in allocateLoopData().
 //
 ///////////////////////////////////////////////////////////////////

 struct LoopData
 {

    //
    // Structures to hold data for easy reinitialization
    // (useful for verifying result checksums, etc.)
    //
    struct RealArray
    {
       int id;
       Real_ptr data;
       Index_type len;
    };

    struct IndxArray
    {
       int id;
       Index_type* data;
       Index_type  len;
    };

    struct ComplexArray
    {
       int id;
       Complex_ptr data;
       Index_type  len;
    };


    Index_type max_loop_length;

    //
    // Static values indicating number of data arrays
    // of various forms used in loop suite.
    //
    // NOTE: These number may need to change to accomodate new loops.
    //       Also, other arrays may need to be added.
    //
    static const unsigned s_num_1D_Real_arrays = 16;
    static const unsigned s_num_1D_Nx4_Real_arrays = 2;
    static const unsigned s_num_1D_Indx_arrays = 5;
    static const unsigned s_num_1D_Complex_arrays = 5;

    static const unsigned s_num_2D_Nx25_Real_arrays = 4;
    static const unsigned s_num_2D_7xN_Real_arrays = 11;
    static const unsigned s_num_2D_64x64_Real_arrays = 1;

    static const unsigned s_num_3D_2xNx4_Real_arrays = 3;

    static const unsigned s_num_Real_scalars   = 10;

    //
    // NOTE: To see how the following data structures are related,
    //       please see the routine allocateLoopData() in the
    //       file LCALSSuite.cxx.
    //
    //       The reason that we hold on to the same data in two
    //       different ways is two-fold:
    //          1) The first set of arrays below makes it easy to
    //             access pointers to data based on what is used in
    //             each loop kernel; e.g., arrays of variaous dimensions.
    //          2) The second set of arrays makes it easy to process
    //             arrays for (re)initialization and checksum
    //             computation to verify results; e.g., we simply
    //             iterate through 1-dim arrays without having to
    //             know their lengths, if they are really being used
    //             as 2- or 3-dimensional arrays, for example.
    //

    //
    // Data arrays and scalars used in loop execution.
    //
    Real_ptr  array_1D_Real[s_num_1D_Real_arrays];
    Real_ptr  array_1D_Nx4_Real[s_num_1D_Nx4_Real_arrays];
    Index_type* array_1D_Indx[s_num_1D_Indx_arrays];
    Complex_ptr array_1D_Complex[s_num_1D_Complex_arrays];

    Real_ptr* array_2D_Nx25_Real[s_num_2D_Nx25_Real_arrays];
    Real_ptr* array_2D_7xN_Real[s_num_2D_7xN_Real_arrays];
    Real_ptr* array_2D_64x64_Real[s_num_2D_64x64_Real_arrays];

    Real_ptr** array_3D_2xNx4_Real[s_num_3D_2xNx4_Real_arrays];

    Real_type scalar_Real[s_num_Real_scalars];

    //
    // Arrays of structs holding data arrays used for data initialization
    // and checksum verification.
    //
    RealArray RealArray_1D[s_num_1D_Real_arrays];
    RealArray RealArray_1D_Nx4[s_num_1D_Nx4_Real_arrays];
    IndxArray IndxArray_1D[s_num_1D_Indx_arrays];
    ComplexArray ComplexArray_1D[s_num_1D_Complex_arrays];

    RealArray RealArray_2D_Nx25[s_num_2D_Nx25_Real_arrays];
    RealArray RealArray_2D_7xN[s_num_2D_7xN_Real_arrays];
    RealArray RealArray_2D_64x64[s_num_2D_64x64_Real_arrays];

    RealArray RealArray_3D_2xNx4[s_num_3D_2xNx4_Real_arrays];

    RealArray RealArray_scalars;

 };

 //
 // Routine to access data structure that holds data needed to execute loops.
 //
 LoopData& getLoopData();


 //
 //  Routine that generates vector of loop variant names string
 //  from vector of LoopVariantID enum values.
 //
 std::vector<std::string> getVariantNames(
    const std::vector<LoopVariantID>& lvids);

 //
 //  Routine that maps LoopVariantID enum value (used in main to help
 //  insure correctness) to string (used in loop framework for flexibility).
 //
 std::string getVariantName(LoopVariantID lvid);


 //////////////////////////////////////////////////////////////////
 //
 //  Routines to define how loop suite will be run and
 //  to set up data for loop suite.
 //
 //////////////////////////////////////////////////////////////////

 //
 // Routines to define specific details about how to run loop suite.
 //
 // Note:  Individual loop lengths and sampling parameters
 //        are defined in this routine.
 //
 void defineLoopSuiteRunInfo(const std::vector<LoopVariantID>& run_variants,
                             bool run_loop[],
                             double sample_frac,
                             double loop_length_factor );


 //
 // Routines to allocate and initialize arrays (and scalars) for
 // loops in suite and to free those arrays when done.
 //
 void allocateLoopData();
 void freeLoopData();


 //
 // Routines to initialize and finalize loop data, statistics, timers, etc.
 //
 // Each of these routines must be called before and after the execution
 // of each loop.
 //
 void loopInit(unsigned iloop, LoopStat& stat);
 void loopInit(unsigned iloop); //, LoopStat& stat);
 //
 void loopFinalize(unsigned iloop, LoopStat& stat, LoopLength ilength);


 //
 // Routines to run reference loops for figure of merit (FOM) calculations.
 //
 void defineReferenceLoopRunInfo();
 void computeReferenceLoopTimes();

 //
 // Routine called in main to execute loops corresponding to given
 // variant ID and length.  The run_loop boolean array indicates which
 // loop kernels in suite to execute
 //
 void runLoopVariant( LoopVariantID lvid,
                      bool run_loop[],
                      LoopLength ilength );


 //
 // Routines to run specific loop variants for suite.
 //
 // THESE SHOULD NOT BE CALLED BY ROUTINE ABOVE, NOT DIRECTLY!!!
 //
 // loop_stats is vector of LoopStat objects corresponding to loop variant.
 // run_loop boolean array indicates which loop kernels in suite to execute.
 // ilength indicates which loop length to run (see LoopLength enum).
 //
 void runARawLoops( std::vector<LoopStat>& loop_stats,
                    bool run_loop[],
                    LoopLength ilength );
 void runBRawLoops( std::vector<LoopStat>& loop_stats,
                    bool run_loop[],
                    LoopLength ilength );
 void runCRawLoops( std::vector<LoopStat>& loop_stats,
                    bool run_loop[],
                    LoopLength ilength );

 void runARawFuncLoops( std::vector<LoopStat>& loop_stats,
                        bool run_loop[],
                        LoopLength ilength );
 void runBRawFuncLoops( std::vector<LoopStat>& loop_stats,
                        bool run_loop[],
                        LoopLength ilength );
 void runCRawFuncLoops( std::vector<LoopStat>& loop_stats,
                        bool run_loop[],
                        LoopLength ilength );

 void runOMPRawLoops( std::vector<LoopStat>& loop_stats,
                      bool run_loop[],
                      LoopLength ilength );


 void runAForallLambdaLoops( std::vector<LoopStat>& loop_stats,
                             bool run_loop[],
                             LoopLength ilength );
 void runBForallLambdaLoops( std::vector<LoopStat>& loop_stats,
                             bool run_loop[],
                             LoopLength ilength );
 void runCForallLambdaLoops( std::vector<LoopStat>& loop_stats,
                             bool run_loop[],
                             LoopLength ilength );
 void runOMPForallLambdaLoops( std::vector<LoopStat>& loop_stats,
                               bool run_loop[],
                               LoopLength ilength );

 void runAForallLambdaLoops_TYPEFIX( std::vector<LoopStat>& loop_stats,
                                     bool run_loop[],
                                     LoopLength ilength );
 void runBForallLambdaLoops_TYPEFIX( std::vector<LoopStat>& loop_stats,
                                     bool run_loop[],
                                     LoopLength ilength );
 void runCForallLambdaLoops_TYPEFIX( std::vector<LoopStat>& loop_stats,
                                     bool run_loop[],
                                     LoopLength ilength );
 void runOMPForallLambdaLoops_TYPEFIX( std::vector<LoopStat>& loop_stats,
                                       bool run_loop[],
                                       LoopLength ilength );


 void runAForallFunctorLoops( std::vector<LoopStat>& loop_stats,
                              bool run_loop[],
                              LoopLength ilength );
 void runBForallFunctorLoops( std::vector<LoopStat>& loop_stats,
                              bool run_loop[],
                              LoopLength ilength );
 void runCForallFunctorLoops( std::vector<LoopStat>& loop_stats,
                              bool run_loop[],
                              LoopLength ilength );
 void runOMPForallFunctorLoops( std::vector<LoopStat>& loop_stats,
                                bool run_loop[],
                                LoopLength ilength );

 void runAForallHybridLambdaLoops( std::vector<LoopStat>& loop_stats,
                                   bool run_loop[],
                                   LoopLength ilength );
 void runBForallHybridLambdaLoops( std::vector<LoopStat>& loop_stats,
                                   bool run_loop[],
                                   LoopLength ilength );
 void runCForallHybridLambdaLoops( std::vector<LoopStat>& loop_stats,
                                   bool run_loop[],
                                   LoopLength ilength );

 void runAForallHybridLambdaLoops_TYPEFIX( std::vector<LoopStat>& loop_stats,
                                           bool run_loop[],
                                           LoopLength ilength );
 void runBForallHybridLambdaLoops_TYPEFIX( std::vector<LoopStat>& loop_stats,
                                           bool run_loop[],
                                           LoopLength ilength );
 void runCForallHybridLambdaLoops_TYPEFIX( std::vector<LoopStat>& loop_stats,
                                           bool run_loop[],
                                           LoopLength ilength );

 //
 // Recursively construct directories based on a relative or
 // absolute path name.  Return true if directory created
 // successfully, else false.
 //
 bool recursiveMkdir(const std::string& path);


 #endif  // closing endif for header file include guard
	//
	// See README-LCALS_license.txt for access and distribution restrictions
	//

	//
	// Header file with enums, macros, routines and structures used to
	// compile and run loops in LCALS suite and to generate execution
	// statistics.
	//

	#ifndef LCALSSuite_HXX
	#define LCALSSuite_HXX

	#include "LCALSParams.hxx"
	#include "LCALSStats.hxx"

	#include <vector>
	#include <string>


	//
	// Enumeration defining unique id for each loop KERNEL in suite.
	//
	// IMPORTANT: Generally, this should not need modification unless
	// new loops (i.e., kernels) are added to the suite.
	//
	// Note: To keep output understandable, keep this consistent with
	// routine defineLoopSuiteRunInfo().
	//
	enum LoopKernelID {

	// Keep this one first and don't comment out (!!)
	// This insures loop ids start at zero so all array indexing
	// or data structures is correct. Also, this loop is not
	// executed the same way the others are.
	REF_LOOP = 0,

	//
	// Loop Subset A: Loops extracted from LLNL app codes.
	// They are implemented in runA<variant>Loops.cxx files.
	//
	PRESSURE_CALC,
	PRESSURE_CALC_ALT,
	ENERGY_CALC,
	ENERGY_CALC_ALT,
	VOL3D_CALC,
	DEL_DOT_VEC_2D,
	COUPLE,
	FIR,

	//
	// Loop Subset B: "Basic" Loops.
	// They are implemented in runB<variant>Loops.cxx files.
	//
	INIT3,
	MULADDSUB,
	IF_QUAD,
	TRAP_INT,

	//
	// Loop Subset C: Loops from older Livermore Loops in "C" suite.
	// They are implemented in runC<variant>Loops.cxx files.
	//
	HYDRO_1D,
	ICCG,
	INNER_PROD,
	BAND_LIN_EQ,
	TRIDIAG_ELIM,
	EOS,
	ADI,
	INT_PREDICT,
	DIFF_PREDICT,
	FIRST_SUM,
	FIRST_DIFF,
	PIC_2D,
	PIC_1D,
	HYDRO_2D,
	GEN_LIN_RECUR,
	DISC_ORD,
	MAT_X_MAT,
	PLANCKIAN,
	IMP_HYDRO_2D,
	FIND_FIRST_MIN,

	NUM_LOOP_KERNELS // Keep this one last and NEVER comment out (!!)

	};


	//
	// Enumeration defining unique id for each loop VARIANT in suite.
	//
	// IMPORTANT: Generally, this should not need modification unless
	// new loop variants are added to the suite.
	//
	enum LoopVariantID {
	//
	// These variants define LCALS benchmark
	//
	RAW,
	RAW_OMP,
	FORALL_LAMBDA,
	FORALL_LAMBDA_OMP,

	#if defined(LCALS_DO_MISC)

	//
	// These variants are used in miscellaneous LCALS studies
	//
	FORALL_HYBRID_LAMBDA,
	#if 0 // THESE ARE NOT AVAILABLE YET!!!
	FORALL_HYBRID_LAMBDA_OMP,
	#endif
	FORALL_FUNCTOR,
	FORALL_FUNCTOR_OMP,
	#if 0 // THESE ARE NOT AVAILABLE YET!!!
	FORALL_HYBRID_FUNCTOR,
	FORALL_HYBRID_FUNCTOR_OMP,
	#endif
	RAW_FUNC,
	FORALL_LAMBDA_TYPEFIX,
	FORALL_LAMBDA_OMP_TYPEFIX,
	FORALL_HYBRID_LAMBDA_TYPEFIX,

	#endif // if LCALS_DO_MISC

	};


	//
	// Enumeration defining possible loop lengths to run.
	//
	enum LoopLength {

	LONG = 0,
	MEDIUM,
	SHORT,

	NUM_LENGTHS // Keep this one last (!!)

	};


	////////////////////////////////////////////////////////////////////////////////
	//
	// The following macro constants define which loop VARIANTS can be compiled
	// (and potentially) run for a given compiler.
	//
	// NOTE: The Makefile sets the LCALS_COMPILER_* macro constant.
	//
	// --> IMPORTANT: Actual selection of which loop variants are run is done
	// in main.cxx via the vector 'run_variants'.
	//
	////////////////////////////////////////////////////////////////////////////////

	#if defined(LCALS_COMPILER_ICC)
	//
	// Configuration options for Intel compilers
	//

	#define COMPILE_RAW_VARIANTS
	#define COMPILE_LAMBDA_VARIANTS
	#define COMPILE_FUNCTOR_VARIANTS
	#define COMPILE_OMP_VARIANTS


	#elif defined(LCALS_COMPILER_GNU)
	//
	// Configuration options for GNU compilers
	//

	#define COMPILE_RAW_VARIANTS
	#define COMPILE_LAMBDA_VARIANTS
	#define COMPILE_FUNCTOR_VARIANTS
	#define COMPILE_OMP_VARIANTS


	#elif defined(LCALS_COMPILER_XLC12)
	//
	// Configuration options for IBM xlC compilers
	//

	//
	// xlC compilers DO NOT support lambda functions currently!!
	//
	#define COMPILE_RAW_VARIANTS
	#undef COMPILE_LAMBDA_VARIANTS
	#define COMPILE_FUNCTOR_VARIANTS
	#define COMPILE_OMP_VARIANTS


	#elif defined(LCALS_COMPILER_CLANG)
	//
	// Configuration options for clang compilers
	//

	//
	// Clang compilers DO NOT support OpenMP currently!!
	//
	#define COMPILE_RAW_VARIANTS
	#define COMPILE_LAMBDA_VARIANTS
	#define COMPILE_FUNCTOR_VARIANTS
	#undef COMPILE_OMP_VARIANTS


	#else
	#error LCALS compiler is undefined!

	#endif


	//
	// The following macro constants are used to turn on/off compilation of
	// individual loop KERNELS in suite. Names are consistent with LoopID
	// enum above.
	//

	#if defined (LCALS_DO_OMP_ONLY)
	//
	// Only these loops have OpenMP implementations. The imlementations are
	// found in runOMP<variant>Loops.cxx files.
	//

	// Loop Subset A: Loops extracted from LLNL app codes.
	#define COMPILE_PRESSURE_CALC
	#define COMPILE_PRESSURE_CALC_ALT
	#define COMPILE_ENERGY_CALC
	#define COMPILE_ENERGY_CALC_ALT
	#define COMPILE_VOL3D_CALC
	#define COMPILE_DEL_DOT_VEC_2D
	#define COMPILE_COUPLE
	#define COMPILE_FIR

	// Loop Subset B: "Basic" Loops.
	#define COMPILE_INIT3
	#define COMPILE_MULADDSUB
	#define COMPILE_IF_QUAD
	#define COMPILE_TRAP_INT

	// Loop Subset C: Loops from older Livermore Loops in "C" suite.
	#define COMPILE_PIC_2D

	#else // compile all loop kernels
	//
	// Loop Subset A: Loops extracted from LLNL app codes.
	// They are implemented in runA<variant>Loops.cxx files.
	//
	#define COMPILE_PRESSURE_CALC
	#define COMPILE_PRESSURE_CALC_ALT
	#define COMPILE_ENERGY_CALC
	#define COMPILE_ENERGY_CALC_ALT
	#define COMPILE_VOL3D_CALC
	#define COMPILE_DEL_DOT_VEC_2D
	#define COMPILE_COUPLE
	#define COMPILE_FIR

	//
	// Loop Subset B: "Basic" Loops.
	// They are implemented in runB<variant>Loops.cxx files.
	//
	#define COMPILE_INIT3
	#define COMPILE_MULADDSUB
	#define COMPILE_IF_QUAD
	#define COMPILE_TRAP_INT

	//
	// Loop Subset C: Loops from older Livermore Loops in "C" suite.
	// They are implemented in runLCK<variant>Loops.cxx files.
	//
	#define COMPILE_HYDRO_1D
	#define COMPILE_ICCG
	#define COMPILE_INNER_PROD
	#define COMPILE_BAND_LIN_EQ
	#define COMPILE_TRIDIAG_ELIM
	#define COMPILE_EOS
	#define COMPILE_ADI
	#define COMPILE_INT_PREDICT
	#define COMPILE_DIFF_PREDICT
	#define COMPILE_FIRST_SUM
	#define COMPILE_FIRST_DIFF
	#define COMPILE_PIC_2D
	#define COMPILE_PIC_1D
	#define COMPILE_HYDRO_2D
	#define COMPILE_GEN_LIN_RECUR
	#define COMPILE_DISC_ORD
	#define COMPILE_MAT_X_MAT
	#define COMPILE_PLANCKIAN
	#define COMPILE_IMP_HYDRO_2D
	#define COMPILE_FIND_FIRST_MIN

	#endif



	//////////////////////////////////////////////////////////////////
	//
	// Structure holding double arrays and scalars used in loops.
	//
	// Note: These are initialized in allocateLoopData().
	//
	///////////////////////////////////////////////////////////////////

	struct LoopData
	{

	//
	// Structures to hold data for easy reinitialization
	// (useful for verifying result checksums, etc.)
	//
	struct RealArray
	{
	int id;
	Real_ptr data;
	Index_type len;
	};

	struct IndxArray
	{
	int id;
	Index_type* data;
	Index_type len;
	};

	struct ComplexArray
	{
	int id;
	Complex_ptr data;
	Index_type len;
	};


	Index_type max_loop_length;

	//
	// Static values indicating number of data arrays
	// of various forms used in loop suite.
	//
	// NOTE: These number may need to change to accomodate new loops.
	// Also, other arrays may need to be added.
	//
	static const unsigned s_num_1D_Real_arrays = 16;
	static const unsigned s_num_1D_Nx4_Real_arrays = 2;
	static const unsigned s_num_1D_Indx_arrays = 5;
	static const unsigned s_num_1D_Complex_arrays = 5;

	static const unsigned s_num_2D_Nx25_Real_arrays = 4;
	static const unsigned s_num_2D_7xN_Real_arrays = 11;
	static const unsigned s_num_2D_64x64_Real_arrays = 1;

	static const unsigned s_num_3D_2xNx4_Real_arrays = 3;

	static const unsigned s_num_Real_scalars = 10;

	//
	// NOTE: To see how the following data structures are related,
	// please see the routine allocateLoopData() in the
	// file LCALSSuite.cxx.
	//
	// The reason that we hold on to the same data in two
	// different ways is two-fold:
	// 1) The first set of arrays below makes it easy to
	// access pointers to data based on what is used in
	// each loop kernel; e.g., arrays of variaous dimensions.
	// 2) The second set of arrays makes it easy to process
	// arrays for (re)initialization and checksum
	// computation to verify results; e.g., we simply
	// iterate through 1-dim arrays without having to
	// know their lengths, if they are really being used
	// as 2- or 3-dimensional arrays, for example.
	//

	//
	// Data arrays and scalars used in loop execution.
	//
	Real_ptr array_1D_Real[s_num_1D_Real_arrays];
	Real_ptr array_1D_Nx4_Real[s_num_1D_Nx4_Real_arrays];
	Index_type* array_1D_Indx[s_num_1D_Indx_arrays];
	Complex_ptr array_1D_Complex[s_num_1D_Complex_arrays];

	Real_ptr* array_2D_Nx25_Real[s_num_2D_Nx25_Real_arrays];
	Real_ptr* array_2D_7xN_Real[s_num_2D_7xN_Real_arrays];
	Real_ptr* array_2D_64x64_Real[s_num_2D_64x64_Real_arrays];

	Real_ptr** array_3D_2xNx4_Real[s_num_3D_2xNx4_Real_arrays];

	Real_type scalar_Real[s_num_Real_scalars];

	//
	// Arrays of structs holding data arrays used for data initialization
	// and checksum verification.
	//
	RealArray RealArray_1D[s_num_1D_Real_arrays];
	RealArray RealArray_1D_Nx4[s_num_1D_Nx4_Real_arrays];
	IndxArray IndxArray_1D[s_num_1D_Indx_arrays];
	ComplexArray ComplexArray_1D[s_num_1D_Complex_arrays];

	RealArray RealArray_2D_Nx25[s_num_2D_Nx25_Real_arrays];
	RealArray RealArray_2D_7xN[s_num_2D_7xN_Real_arrays];
	RealArray RealArray_2D_64x64[s_num_2D_64x64_Real_arrays];

	RealArray RealArray_3D_2xNx4[s_num_3D_2xNx4_Real_arrays];

	RealArray RealArray_scalars;

	};

	//
	// Routine to access data structure that holds data needed to execute loops.
	//
	LoopData& getLoopData();


	//
	// Routine that generates vector of loop variant names string
	// from vector of LoopVariantID enum values.
	//
	std::vector<std::string> getVariantNames(
	const std::vector<LoopVariantID>& lvids);

	//
	// Routine that maps LoopVariantID enum value (used in main to help
	// insure correctness) to string (used in loop framework for flexibility).
	//
	std::string getVariantName(LoopVariantID lvid);


	//////////////////////////////////////////////////////////////////
	//
	// Routines to define how loop suite will be run and
	// to set up data for loop suite.
	//
	//////////////////////////////////////////////////////////////////

	//
	// Routines to define specific details about how to run loop suite.
	//
	// Note: Individual loop lengths and sampling parameters
	// are defined in this routine.
	//
	void defineLoopSuiteRunInfo(const std::vector<LoopVariantID>& run_variants,
	bool run_loop[],
	double sample_frac,
	double loop_length_factor );


	//
	// Routines to allocate and initialize arrays (and scalars) for
	// loops in suite and to free those arrays when done.
	//
	void allocateLoopData();
	void freeLoopData();


	//
	// Routines to initialize and finalize loop data, statistics, timers, etc.
	//
	// Each of these routines must be called before and after the execution
	// of each loop.
	//
	void loopInit(unsigned iloop, LoopStat& stat);
	void loopInit(unsigned iloop); //, LoopStat& stat);
	//
	void loopFinalize(unsigned iloop, LoopStat& stat, LoopLength ilength);


	//
	// Routines to run reference loops for figure of merit (FOM) calculations.
	//
	void defineReferenceLoopRunInfo();
	void computeReferenceLoopTimes();

	//
	// Routine called in main to execute loops corresponding to given
	// variant ID and length. The run_loop boolean array indicates which
	// loop kernels in suite to execute
	//
	void runLoopVariant( LoopVariantID lvid,
	bool run_loop[],
	LoopLength ilength );


	//
	// Routines to run specific loop variants for suite.
	//
	// THESE SHOULD NOT BE CALLED BY ROUTINE ABOVE, NOT DIRECTLY!!!
	//
	// loop_stats is vector of LoopStat objects corresponding to loop variant.
	// run_loop boolean array indicates which loop kernels in suite to execute.
	// ilength indicates which loop length to run (see LoopLength enum).
	//
	void runARawLoops( std::vector<LoopStat>& loop_stats,
	bool run_loop[],
	LoopLength ilength );
	void runBRawLoops( std::vector<LoopStat>& loop_stats,
	bool run_loop[],
	LoopLength ilength );
	void runCRawLoops( std::vector<LoopStat>& loop_stats,
	bool run_loop[],
	LoopLength ilength );

	void runARawFuncLoops( std::vector<LoopStat>& loop_stats,
	bool run_loop[],
	LoopLength ilength );
	void runBRawFuncLoops( std::vector<LoopStat>& loop_stats,
	bool run_loop[],
	LoopLength ilength );
	void runCRawFuncLoops( std::vector<LoopStat>& loop_stats,
	bool run_loop[],
	LoopLength ilength );

	void runOMPRawLoops( std::vector<LoopStat>& loop_stats,
	bool run_loop[],
	LoopLength ilength );


	void runAForallLambdaLoops( std::vector<LoopStat>& loop_stats,
	bool run_loop[],
	LoopLength ilength );
	void runBForallLambdaLoops( std::vector<LoopStat>& loop_stats,
	bool run_loop[],
	LoopLength ilength );
	void runCForallLambdaLoops( std::vector<LoopStat>& loop_stats,
	bool run_loop[],
	LoopLength ilength );
	void runOMPForallLambdaLoops( std::vector<LoopStat>& loop_stats,
	bool run_loop[],
	LoopLength ilength );

	void runAForallLambdaLoops_TYPEFIX( std::vector<LoopStat>& loop_stats,
	bool run_loop[],
	LoopLength ilength );
	void runBForallLambdaLoops_TYPEFIX( std::vector<LoopStat>& loop_stats,
	bool run_loop[],
	LoopLength ilength );
	void runCForallLambdaLoops_TYPEFIX( std::vector<LoopStat>& loop_stats,
	bool run_loop[],
	LoopLength ilength );
	void runOMPForallLambdaLoops_TYPEFIX( std::vector<LoopStat>& loop_stats,
	bool run_loop[],
	LoopLength ilength );


	void runAForallFunctorLoops( std::vector<LoopStat>& loop_stats,
	bool run_loop[],
	LoopLength ilength );
	void runBForallFunctorLoops( std::vector<LoopStat>& loop_stats,
	bool run_loop[],
	LoopLength ilength );
	void runCForallFunctorLoops( std::vector<LoopStat>& loop_stats,
	bool run_loop[],
	LoopLength ilength );
	void runOMPForallFunctorLoops( std::vector<LoopStat>& loop_stats,
	bool run_loop[],
	LoopLength ilength );

	void runAForallHybridLambdaLoops( std::vector<LoopStat>& loop_stats,
	bool run_loop[],
	LoopLength ilength );
	void runBForallHybridLambdaLoops( std::vector<LoopStat>& loop_stats,
	bool run_loop[],
	LoopLength ilength );
	void runCForallHybridLambdaLoops( std::vector<LoopStat>& loop_stats,
	bool run_loop[],
	LoopLength ilength );

	void runAForallHybridLambdaLoops_TYPEFIX( std::vector<LoopStat>& loop_stats,
	bool run_loop[],
	LoopLength ilength );
	void runBForallHybridLambdaLoops_TYPEFIX( std::vector<LoopStat>& loop_stats,
	bool run_loop[],
	LoopLength ilength );
	void runCForallHybridLambdaLoops_TYPEFIX( std::vector<LoopStat>& loop_stats,
	bool run_loop[],
	LoopLength ilength );

	//
	// Recursively construct directories based on a relative or
	// absolute path name. Return true if directory created
	// successfully, else false.
	//
	bool recursiveMkdir(const std::string& path);




	#endif // closing endif for header file include guard