blob: 4facdbae3523841492ab549f4e0520147d14e4c0 [file] [log] [blame]
//
// See README-LCALS_license.txt for access and distribution restrictions
//
//
// Source file with routines to allocate data for LCALS suite
// and define parameters controlling execution of each loop.
//
#include "LCALSSuite.hxx"
#include "LCALSStats.hxx"
#include "SubsetDataA.hxx"
#include<cstdlib>
#include<string>
#include<iostream>
#include<sys/types.h>
#include<sys/stat.h>
//#define LCALS_OMP_MEM_INIT
#undef LCALS_OMP_MEM_INIT
//
// File scope data holding structures used in loop suite
//
static LoopData* s_loop_data = 0;
//
// Default value for static ADomain member;
//
double ADomain::loop_length_factor = 1.0;
//
// Prototypes for file scope routines used in to manage loop data and checksums
//
namespace {
Real_ptr allocAndInitData(LoopData::RealArray& ra, Index_type len);
Index_type* allocAndInitData(LoopData::IndxArray& ia, Index_type len);
Complex_ptr allocAndInitData(LoopData::ComplexArray& ca, Index_type len);
void initData(LoopData::RealArray& ra);
void initData(LoopData::IndxArray& ia);
void initData(LoopData::ComplexArray& ca);
void initChksum(LoopStat& stat, LoopLength ilength);
void updateChksum(LoopStat& stat, LoopLength ilength,
const LoopData::RealArray& ra, Real_type scale_factor = 1.0);
void updateChksum(LoopStat& stat, LoopLength ilength,
Real_type val);
void updateChksum(LoopStat& stat, LoopLength ilength,
const LoopData::ComplexArray& ca, Real_type scale_factor = 1.0);
} // closing brace for unnamed namespace
//
// Accessor routine for suite kernel data.
//
LoopData& getLoopData() { return *s_loop_data; }
//
// Define how suite will run and initialize stat structures for loops.
//
// NOTE: Loop lengths, loop sample counts (and weights for optimization
// evaluation) are defined here!
//
// These values should be set large enough to accurately generate
// execution timings (i.e., not too small to be masked by CPU timing
// resolution and overhead). The values set here were manually determined
// so that O(1) seconds of execution time is required to sample each loop
// on some of our fastest Intel machines.
//
void defineLoopSuiteRunInfo(const std::vector<LoopVariantID>& run_variants,
bool run_loop[],
double sample_frac,
double loop_length_factor)
{
#ifdef TESTSUITE
std::cout << "\n defineLoopSuiteRunInfo..." << std::endl;
#endif
std::vector<std::string> run_variant_names = getVariantNames(run_variants);
if ( s_loop_data == 0 ) {
s_loop_data = new LoopData();
}
//
//
// Enumeration defining loop groups for relative weighting of
// execution timing based on what we think is most important.
//
// In computation of figures of merit (FOM), loops with higher
// weights will reduce FOM value more for higher run-time than
// those with lower weights.
//
enum WeightGroup {
DATA_PARALLEL = 0,
ORDER_DEPENDENT,
TRANSCENDENTAL,
DATA_DEPENDENT,
POINTER_NEST,
COMPLEX,
NUM_WEIGHT_GROUPS // Keep this one last and NEVER comment out (!!)
};
//
// Initialize structure holding loop suite execution data.
//
LoopSuiteRunInfo& suite_info = getLoopSuiteRunInfo();
suite_info.loop_samp_frac = sample_frac;
suite_info.loop_weights.resize(NUM_WEIGHT_GROUPS);
suite_info.loop_weights[DATA_PARALLEL] = 2.0;
suite_info.loop_weights[ORDER_DEPENDENT] = 1.8;
suite_info.loop_weights[TRANSCENDENTAL] = 1.7;
suite_info.loop_weights[DATA_DEPENDENT] = 1.7;
suite_info.loop_weights[POINTER_NEST] = 1.4;
suite_info.loop_weights[COMPLEX] = 1.0;
suite_info.loop_length_names.resize(NUM_LENGTHS);
suite_info.loop_length_names[LONG] = std::string("LONG");
suite_info.loop_length_names[MEDIUM] = std::string("MEDIUM");
suite_info.loop_length_names[SHORT] = std::string("SHORT");
suite_info.num_loops_run.resize( run_variant_names.size() );
suite_info.tot_time.resize( run_variant_names.size() );
suite_info.fom_rel.resize( run_variant_names.size() );
suite_info.fom_rate.resize( run_variant_names.size() );
for (unsigned ilv = 0; ilv < run_variant_names.size(); ++ilv) {
suite_info.addLoopStats(run_variant_names[ilv]);
suite_info.num_loops_run[ilv].resize(NUM_LENGTHS, 0);
suite_info.tot_time[ilv].resize(NUM_LENGTHS, 0.0);
suite_info.fom_rel[ilv].resize(NUM_LENGTHS, 0.0);
suite_info.fom_rate[ilv].resize(NUM_LENGTHS, 0.0);
}
//
// Define common loop lengths for LONG, MEDIUM, SHORT loops.
//
// The values assigned here are propagated across all kernels
// (with a few exceptions) to simplify suite configuration en masse.
// These can also be set per-kernel below.
//
std::vector< int > shared_loop_length(NUM_LENGTHS);
shared_loop_length[LONG] = static_cast<int>(44217 * loop_length_factor);
shared_loop_length[MEDIUM] = static_cast<int>(5001 * loop_length_factor);
shared_loop_length[SHORT] = static_cast<int>(171 * loop_length_factor);
ADomain::loop_length_factor = loop_length_factor;
std::vector<double>& weight = suite_info.loop_weights;
Index_type max_loop_length = 0;
for (unsigned iloop = 0 ; iloop < suite_info.num_loops; ++iloop) {
std::string loop_name;
LoopStat loop_stat(suite_info.num_loop_lengths);
Index_type max_loop_indx = 0;
if ( run_loop[iloop] ) {
switch ( iloop ) {
case REF_LOOP : {
loop_name = std::string("REF_LOOP");
//
// Note: Reference loop stats are not used in
// in suite. Parameters are defined in
// defineReferenceLoopRunInfo( ) routine.
//
break;
}
//
// Parameters defining how loops in Subset A are run...
//
case PRESSURE_CALC :
case PRESSURE_CALC_ALT : {
if ( static_cast<LoopKernelID>(iloop) == PRESSURE_CALC ) {
loop_name = std::string("PRESSURE_CALC");
} else {
loop_name = std::string("PRESSURE_CALC_ALT");
}
loop_stat.loop_weight = weight[DATA_DEPENDENT];
for (int i = 0; i < NUM_LENGTHS; ++i) {
loop_stat.loop_length[i] = shared_loop_length[i];
}
max_loop_indx = loop_stat.loop_length[LONG];
loop_stat.samples_per_pass[LONG] = 15000;
loop_stat.samples_per_pass[MEDIUM] = 200000;
loop_stat.samples_per_pass[SHORT] = 10000000;
break;
}
case ENERGY_CALC :
case ENERGY_CALC_ALT : {
if ( static_cast<LoopKernelID>(iloop) == ENERGY_CALC ) {
loop_name = std::string("ENERGY_CALC");
} else {
loop_name = std::string("ENERGY_CALC_ALT");
}
loop_stat.loop_weight = weight[DATA_DEPENDENT];
for (int i = 0; i < NUM_LENGTHS; ++i) {
loop_stat.loop_length[i] = shared_loop_length[i];
}
max_loop_indx = loop_stat.loop_length[LONG];
loop_stat.samples_per_pass[LONG] = 3000;
loop_stat.samples_per_pass[MEDIUM] = 30000;
loop_stat.samples_per_pass[SHORT] = 1000000;
break;
}
case VOL3D_CALC : {
loop_name = std::string("VOL3D_CALC");
loop_stat.loop_weight = weight[ORDER_DEPENDENT];
Index_type ndims = 3;
ADomain Ldomain(LONG, ndims);
loop_stat.loop_length[LONG] = Ldomain.lpz - Ldomain.fpz + 1;
ADomain Mdomain(MEDIUM, ndims);
loop_stat.loop_length[MEDIUM] = Mdomain.lpz - Mdomain.fpz + 1;
ADomain Sdomain(SHORT, ndims);
loop_stat.loop_length[SHORT] = Sdomain.lpz - Sdomain.fpz + 1;
max_loop_indx = Ldomain.lpn;
loop_stat.samples_per_pass[LONG] = 6500;
loop_stat.samples_per_pass[MEDIUM] = 30000;
loop_stat.samples_per_pass[SHORT] = 800000;
break;
}
case DEL_DOT_VEC_2D : {
loop_name = std::string("DEL_DOT_VEC_2D");
loop_stat.loop_weight = weight[DATA_PARALLEL];
Index_type ndims = 2;
ADomain Ldomain(LONG, ndims);
loop_stat.loop_length[LONG] = Ldomain.n_real_zones;
ADomain Mdomain(MEDIUM, ndims);
loop_stat.loop_length[MEDIUM] = Mdomain.n_real_zones;
ADomain Sdomain(SHORT, ndims);
loop_stat.loop_length[SHORT] = Sdomain.n_real_zones;
max_loop_indx = Ldomain.lrn;
loop_stat.samples_per_pass[LONG] = 4000;
loop_stat.samples_per_pass[MEDIUM] = 25000;
loop_stat.samples_per_pass[SHORT] = 2000000;
break;
}
case COUPLE : {
loop_name = std::string("COUPLE");
loop_stat.loop_weight = weight[TRANSCENDENTAL];
Index_type ndims = 3;
ADomain Ldomain(LONG, ndims);
loop_stat.loop_length[LONG] = Ldomain.lpz - Ldomain.fpz + 1;
ADomain Mdomain(MEDIUM, ndims);
loop_stat.loop_length[MEDIUM] = Mdomain.lpz - Mdomain.fpz + 1;
ADomain Sdomain(SHORT, ndims);
loop_stat.loop_length[SHORT] = Sdomain.lpz - Sdomain.fpz + 1;
max_loop_indx = Ldomain.lrn;
loop_stat.samples_per_pass[LONG] = 2000;
loop_stat.samples_per_pass[MEDIUM] = 10000;
loop_stat.samples_per_pass[SHORT] = 600000;
break;
}
case FIR : {
loop_name = std::string("FIR");
loop_stat.loop_weight = weight[ORDER_DEPENDENT];
for (int i = 0; i < NUM_LENGTHS; ++i) {
loop_stat.loop_length[i] = shared_loop_length[i];
}
max_loop_indx = loop_stat.loop_length[LONG];
loop_stat.samples_per_pass[LONG] = 10000;
loop_stat.samples_per_pass[MEDIUM] = 80000;
loop_stat.samples_per_pass[SHORT] = 3000000;
break;
}
//
// Parameters defining how loops in Subset B are run...
//
case INIT3 : {
loop_name = std::string("INIT3");
loop_stat.loop_weight = weight[DATA_PARALLEL];
for (int i = 0; i < NUM_LENGTHS; ++i) {
loop_stat.loop_length[i] = shared_loop_length[i];
}
max_loop_indx = loop_stat.loop_length[LONG];
loop_stat.samples_per_pass[LONG] = 10000;
loop_stat.samples_per_pass[MEDIUM] = 110000;
loop_stat.samples_per_pass[SHORT] = 12000000;
break;
}
case MULADDSUB : {
loop_name = std::string("MULADDSUB");
loop_stat.loop_weight = weight[DATA_PARALLEL];
for (int i = 0; i < NUM_LENGTHS; ++i) {
loop_stat.loop_length[i] = shared_loop_length[i];
}
max_loop_indx = loop_stat.loop_length[LONG];
loop_stat.samples_per_pass[LONG] = 12000;
loop_stat.samples_per_pass[MEDIUM] = 140000;
loop_stat.samples_per_pass[SHORT] = 15000000;
break;
}
case IF_QUAD : {
loop_name = std::string("IF_QUAD");
loop_stat.loop_weight = weight[DATA_DEPENDENT];
for (int i = 0; i < NUM_LENGTHS; ++i) {
loop_stat.loop_length[i] = shared_loop_length[i];
}
max_loop_indx = loop_stat.loop_length[LONG];
loop_stat.samples_per_pass[LONG] = 3000;
loop_stat.samples_per_pass[MEDIUM] = 30000;
loop_stat.samples_per_pass[SHORT] = 1000000;
break;
}
case TRAP_INT : {
loop_name = std::string("TRAP_INT");
loop_stat.loop_weight = weight[ORDER_DEPENDENT];
for (int i = 0; i < NUM_LENGTHS; ++i) {
loop_stat.loop_length[i] = shared_loop_length[i];
}
max_loop_indx = loop_stat.loop_length[LONG];
loop_stat.samples_per_pass[LONG] = 4000;
loop_stat.samples_per_pass[MEDIUM] = 32000;
loop_stat.samples_per_pass[SHORT] = 1000000;
break;
}
//
// Parameters defining how loops in Subset C are run...
//
case HYDRO_1D : {
loop_name = std::string("HYDRO_1D");
loop_stat.loop_weight = weight[DATA_PARALLEL];
for (int i = 0; i < NUM_LENGTHS; ++i) {
loop_stat.loop_length[i] = shared_loop_length[i];
}
max_loop_indx = loop_stat.loop_length[LONG];
loop_stat.samples_per_pass[LONG] = 30000;
loop_stat.samples_per_pass[MEDIUM] = 320000;
loop_stat.samples_per_pass[SHORT] = 15000000;
break;
}
case ICCG : {
loop_name = std::string("ICCG");
loop_stat.loop_weight = weight[COMPLEX];
for (int i = 0; i < NUM_LENGTHS; ++i) {
loop_stat.loop_length[i] = shared_loop_length[i];
}
max_loop_indx = loop_stat.loop_length[LONG];
loop_stat.samples_per_pass[LONG] = 20000;
loop_stat.samples_per_pass[MEDIUM] = 200000;
loop_stat.samples_per_pass[SHORT] = 6000000;
break;
}
case INNER_PROD : {
loop_name = std::string("INNER_PROD");
loop_stat.loop_weight = weight[ORDER_DEPENDENT];
for (int i = 0; i < NUM_LENGTHS; ++i) {
loop_stat.loop_length[i] = shared_loop_length[i];
}
max_loop_indx = loop_stat.loop_length[LONG];
loop_stat.samples_per_pass[LONG] = 50000;
loop_stat.samples_per_pass[MEDIUM] = 600000;
loop_stat.samples_per_pass[SHORT] = 30000000;
break;
}
case BAND_LIN_EQ : {
loop_name = std::string("BAND_LIN_EQ");
loop_stat.loop_weight = weight[COMPLEX];
for (int i = 0; i < NUM_LENGTHS; ++i) {
loop_stat.loop_length[i] = shared_loop_length[i];
}
max_loop_indx = loop_stat.loop_length[LONG];
loop_stat.samples_per_pass[LONG] = 40000;
loop_stat.samples_per_pass[MEDIUM] = 600000;
loop_stat.samples_per_pass[SHORT] = 20000000;
break;
}
case TRIDIAG_ELIM : {
loop_name = std::string("TRIDIAG_ELIM");
loop_stat.loop_weight = weight[ORDER_DEPENDENT];
for (int i = 0; i < NUM_LENGTHS; ++i) {
loop_stat.loop_length[i] = shared_loop_length[i];
}
max_loop_indx = loop_stat.loop_length[LONG];
loop_stat.samples_per_pass[LONG] = 10000;
loop_stat.samples_per_pass[MEDIUM] = 100000;
loop_stat.samples_per_pass[SHORT] = 3000000;
break;
}
case EOS : {
loop_name = std::string("EOS");
loop_stat.loop_weight = weight[DATA_PARALLEL];
for (int i = 0; i < NUM_LENGTHS; ++i) {
loop_stat.loop_length[i] = shared_loop_length[i];
}
max_loop_indx = loop_stat.loop_length[LONG];
loop_stat.samples_per_pass[LONG] = 18000;
loop_stat.samples_per_pass[MEDIUM] = 140000;
loop_stat.samples_per_pass[SHORT] = 5000000;
break;
}
case ADI : {
loop_name = std::string("ADI");
loop_stat.loop_weight = weight[COMPLEX];
for (int i = 0; i < NUM_LENGTHS; ++i) {
loop_stat.loop_length[i] = shared_loop_length[i];
}
max_loop_indx = loop_stat.loop_length[LONG];
loop_stat.samples_per_pass[LONG] = 1000;
loop_stat.samples_per_pass[MEDIUM] = 9000;
loop_stat.samples_per_pass[SHORT] = 300000;
break;
}
case INT_PREDICT : {
loop_name = std::string("INT_PREDICT");
loop_stat.loop_weight = weight[POINTER_NEST];
for (int i = 0; i < NUM_LENGTHS; ++i) {
loop_stat.loop_length[i] = shared_loop_length[i];
}
max_loop_indx = loop_stat.loop_length[LONG];
loop_stat.samples_per_pass[LONG] = 3000;
loop_stat.samples_per_pass[MEDIUM] = 30000;
loop_stat.samples_per_pass[SHORT] = 2000000;
break;
}
case DIFF_PREDICT : {
loop_name = std::string("DIFF_PREDICT");
loop_stat.loop_weight = weight[POINTER_NEST];
for (int i = 0; i < NUM_LENGTHS; ++i) {
loop_stat.loop_length[i] = shared_loop_length[i];
}
max_loop_indx = loop_stat.loop_length[LONG];
loop_stat.samples_per_pass[LONG] = 2000;
loop_stat.samples_per_pass[MEDIUM] = 22000;
loop_stat.samples_per_pass[SHORT] = 1800000;
break;
}
case FIRST_SUM : {
loop_name = std::string("FIRST_SUM");
loop_stat.loop_weight = weight[ORDER_DEPENDENT];
for (int i = 0; i < NUM_LENGTHS; ++i) {
loop_stat.loop_length[i] = shared_loop_length[i];
}
max_loop_indx = loop_stat.loop_length[LONG];
loop_stat.samples_per_pass[LONG] = 30000;
loop_stat.samples_per_pass[MEDIUM] = 250000;
loop_stat.samples_per_pass[SHORT] = 8000000;
break;
}
case FIRST_DIFF : {
loop_name = std::string("FIRST_DIFF");
loop_stat.loop_weight = weight[DATA_PARALLEL];
for (int i = 0; i < NUM_LENGTHS; ++i) {
loop_stat.loop_length[i] = shared_loop_length[i];
}
max_loop_indx = loop_stat.loop_length[LONG];
loop_stat.samples_per_pass[LONG] = 30000;
loop_stat.samples_per_pass[MEDIUM] = 500000;
loop_stat.samples_per_pass[SHORT] = 30000000;
break;
}
case PIC_2D : {
loop_name = std::string("PIC_2D");
loop_stat.loop_weight = weight[COMPLEX];
for (int i = 0; i < NUM_LENGTHS; ++i) {
loop_stat.loop_length[i] = shared_loop_length[i];
}
max_loop_indx = loop_stat.loop_length[LONG];
loop_stat.samples_per_pass[LONG] = 2000;
loop_stat.samples_per_pass[MEDIUM] = 18000;
loop_stat.samples_per_pass[SHORT] = 700000;
break;
}
case PIC_1D : {
loop_name = std::string("PIC_1D");
loop_stat.loop_weight = weight[DATA_DEPENDENT];
for (int i = 0; i < NUM_LENGTHS; ++i) {
loop_stat.loop_length[i] = shared_loop_length[i];
}
max_loop_indx = loop_stat.loop_length[LONG];
loop_stat.samples_per_pass[LONG] = 3000;
loop_stat.samples_per_pass[MEDIUM] = 24000;
loop_stat.samples_per_pass[SHORT] = 1000000;
break;
}
case HYDRO_2D : {
loop_name = std::string("HYDRO_2D");
loop_stat.loop_weight = weight[ORDER_DEPENDENT];
for (int i = 0; i < NUM_LENGTHS; ++i) {
loop_stat.loop_length[i] = shared_loop_length[i];
}
max_loop_indx = loop_stat.loop_length[LONG];
loop_stat.samples_per_pass[LONG] = 300;
loop_stat.samples_per_pass[MEDIUM] = 2000;
loop_stat.samples_per_pass[SHORT] = 50000;
break;
}
case GEN_LIN_RECUR : {
loop_name = std::string("GEN_LIN_RECUR");
loop_stat.loop_weight = weight[ORDER_DEPENDENT];
for (int i = 0; i < NUM_LENGTHS; ++i) {
loop_stat.loop_length[i] = shared_loop_length[i];
}
max_loop_indx = loop_stat.loop_length[LONG];
loop_stat.samples_per_pass[LONG] = 4000;
loop_stat.samples_per_pass[MEDIUM] = 36000;
loop_stat.samples_per_pass[SHORT] = 1000000;
break;
}
case DISC_ORD : {
loop_name = std::string("DISC_ORD");
loop_stat.loop_weight = weight[ORDER_DEPENDENT];
for (int i = 0; i < NUM_LENGTHS; ++i) {
loop_stat.loop_length[i] = shared_loop_length[i];
}
max_loop_indx = loop_stat.loop_length[LONG];
loop_stat.samples_per_pass[LONG] = 1000;
loop_stat.samples_per_pass[MEDIUM] = 8000;
loop_stat.samples_per_pass[SHORT] = 200000;
break;
}
case MAT_X_MAT : {
loop_name = std::string("MAT_X_MAT");
loop_stat.loop_weight = weight[ORDER_DEPENDENT];
for (int i = 0; i < NUM_LENGTHS; ++i) {
loop_stat.loop_length[i] = shared_loop_length[i];
}
max_loop_indx = loop_stat.loop_length[LONG];
loop_stat.samples_per_pass[LONG] = 8;
loop_stat.samples_per_pass[MEDIUM] = 70;
loop_stat.samples_per_pass[SHORT] = 8000;
break;
}
case PLANCKIAN : {
loop_name = std::string("PLANCKIAN");
loop_stat.loop_weight = weight[TRANSCENDENTAL];
for (int i = 0; i < NUM_LENGTHS; ++i) {
loop_stat.loop_length[i] = shared_loop_length[i];
}
max_loop_indx = loop_stat.loop_length[LONG];
loop_stat.samples_per_pass[LONG] = 4000;
loop_stat.samples_per_pass[MEDIUM] = 30000;
loop_stat.samples_per_pass[SHORT] = 1000000;
break;
}
case IMP_HYDRO_2D : {
loop_name = std::string("IMP_HYDRO_2D");
loop_stat.loop_weight = weight[ORDER_DEPENDENT];
for (int i = 0; i < NUM_LENGTHS; ++i) {
loop_stat.loop_length[i] = shared_loop_length[i];
}
max_loop_indx = loop_stat.loop_length[LONG];
loop_stat.samples_per_pass[LONG] = 800;
loop_stat.samples_per_pass[MEDIUM] = 6000;
loop_stat.samples_per_pass[SHORT] = 150000;
break;
}
case FIND_FIRST_MIN : {
loop_name = std::string("FIND_FIRST_MIN");
loop_stat.loop_weight = weight[DATA_DEPENDENT];
for (int i = 0; i < NUM_LENGTHS; ++i) {
loop_stat.loop_length[i] = shared_loop_length[i];
}
max_loop_indx = loop_stat.loop_length[LONG];
loop_stat.samples_per_pass[LONG] = 50000;
loop_stat.samples_per_pass[MEDIUM] = 330000;
loop_stat.samples_per_pass[SHORT] = 8000000;
break;
}
default : {
std::cout << "\n Unknown loop id = " << iloop << std::endl;
}
} // switch statement on loop id
} // if loop with id is to be run
suite_info.loop_names.push_back(loop_name);
//
// Set max loop length to be largest loop index used over all loops.
//
max_loop_length =
std::max(max_loop_length, max_loop_indx);
//
// Set number of times each loop length will be run.
//
for (unsigned i = 0; i < suite_info.num_loop_lengths; ++i) {
loop_stat.samples_per_pass[i] = static_cast<int>(
loop_stat.samples_per_pass[i] * suite_info.loop_samp_frac /
loop_length_factor);
if ( suite_info.run_loop_length[i] ) {
loop_stat.loop_run_count[i] =
loop_stat.samples_per_pass[i] * suite_info.num_suite_passes;
} else {
loop_stat.loop_run_count[i] = 0;
}
}
//
// We add loop stat for each loop to maintain consistent array indexing.
// However, only loops specified to be run will be executed.
//
for (unsigned ilv = 0; ilv < run_variant_names.size(); ++ilv) {
suite_info.getLoopStats(run_variant_names[ilv]).push_back(loop_stat);
}
} // loop over loop IDs
defineReferenceLoopRunInfo();
s_loop_data->max_loop_length =
std::max(max_loop_length, suite_info.ref_loop_stat.loop_length[LONG]);
}
//
// Generate vector of loop variant names string from vector of
// LoopVariantID enum values.
//
std::vector<std::string> getVariantNames(
const std::vector<LoopVariantID>& lvids)
{
std::vector<std::string> run_variant_names;
for (unsigned ilv = 0; ilv < lvids.size(); ++ilv) {
std::string variant_name = getVariantName(lvids[ilv]);
run_variant_names.push_back(variant_name);
}
return run_variant_names;
}
//
// Generate loop variant name string from LoopVariantID enum value.
//
std::string getVariantName(LoopVariantID lvid)
{
std::string lvname;
switch ( lvid ) {
// Bechmark variants
//
case RAW: {
lvname = "Raw"; break;
}
case RAW_OMP: {
lvname = "Raw_OMP"; break;
}
case FORALL_LAMBDA: {
lvname = "Forall_Lambda"; break;
}
case FORALL_LAMBDA_OMP: {
lvname = "Forall_Lambda_OMP"; break;
}
#if defined(LCALS_DO_MISC)
// Misc variants
//
case FORALL_HYBRID_LAMBDA: {
lvname = "Hybrid_Lambda"; break;
}
#if 0 // THESE ARE AVAILABLE YET!!!
case FORALL_HYBRID_LAMBDA_OMP: {
lvname = "Hybrid_Lambda_OMP"; break;
}
#endif
case FORALL_FUNCTOR: {
lvname = "Forall_Functor"; break;
}
case FORALL_FUNCTOR_OMP: {
lvname = "Forall_Functor_OMP"; break;
}
#if 0 // THESE ARE AVAILABLE YET!!!
case FORALL_HYBRID_FUNCTOR: {
lvname = "Hybrid_Functor"; break;
}
case FORALL_HYBRID_FUNCTOR_OMP: {
lvname = "Hybrid_Functor_OMP"; break;
}
#endif
case RAW_FUNC: {
lvname = "Raw_Func"; break;
}
case FORALL_LAMBDA_TYPEFIX: {
lvname = "Forall_Lambda_TYPEFIX"; break;
}
case FORALL_LAMBDA_OMP_TYPEFIX: {
lvname = "Forall_Lambda_OMP_TYPEFIX"; break;
}
case FORALL_HYBRID_LAMBDA_TYPEFIX: {
lvname = "Hybrid_Lambda_TYPEFIX"; break;
}
#endif // if LCALS_DO_MISC
default: {
std::cout << "\n Unknown loop variant id = " << lvid << std::endl;
}
}
return lvname;
}
#ifdef TEST_SUITE
//
// Execute loop variant identified by function args.
//
void runLoopVariant( LoopVariantID lvid,
bool run_loop[],
LoopLength ilength )
{
LoopSuiteRunInfo& loop_suite_run_info = getLoopSuiteRunInfo();
std::string loop_variant_name = getVariantName(lvid);
std::vector<LoopStat>& loop_stats =
loop_suite_run_info.getLoopStats(loop_variant_name);
switch ( lvid ) {
// Bechmark variants
//
case RAW: {
runARawLoops(loop_stats, run_loop, ilength);
runBRawLoops(loop_stats, run_loop, ilength);
runCRawLoops(loop_stats, run_loop, ilength);
break;
}
case FORALL_LAMBDA: {
runAForallLambdaLoops(loop_stats, run_loop, ilength);
runBForallLambdaLoops(loop_stats, run_loop, ilength);
runCForallLambdaLoops(loop_stats, run_loop, ilength);
break;
}
case RAW_OMP: {
runOMPRawLoops(loop_stats, run_loop, ilength);
break;
}
case FORALL_LAMBDA_OMP: {
runOMPForallLambdaLoops(loop_stats, run_loop, ilength);
break;
}
#if defined(LCALS_DO_MISC)
// Misc variants
//
case FORALL_HYBRID_LAMBDA: {
runAForallHybridLambdaLoops(loop_stats, run_loop, ilength);
runBForallHybridLambdaLoops(loop_stats, run_loop, ilength);
runCForallHybridLambdaLoops(loop_stats, run_loop, ilength);
break;
}
#if 0 // THESE ARE NOT DEFINED YET!!!
case FORALL_HYBRID_LAMBDA_OMP: {
break;
}
#endif
case FORALL_FUNCTOR: {
runAForallFunctorLoops(loop_stats, run_loop, ilength);
runBForallFunctorLoops(loop_stats, run_loop, ilength);
runCForallFunctorLoops(loop_stats, run_loop, ilength);
break;
}
case FORALL_FUNCTOR_OMP: {
runOMPForallFunctorLoops(loop_stats, run_loop, ilength);
break;
}
#if 0 // THESE ARE NOT DEFINED YET!!!
case FORALL_HYBRID_FUNCTOR: {
break;
}
case FORALL_HYBRID_FUNCTOR_OMP: {
break;
}
#endif
case RAW_FUNC: {
runARawFuncLoops(loop_stats, run_loop, ilength);
runBRawFuncLoops(loop_stats, run_loop, ilength);
runCRawFuncLoops(loop_stats, run_loop, ilength);
break;
}
case FORALL_LAMBDA_TYPEFIX: {
runAForallLambdaLoops_TYPEFIX(loop_stats, run_loop, ilength);
runBForallLambdaLoops_TYPEFIX(loop_stats, run_loop, ilength);
runCForallLambdaLoops_TYPEFIX(loop_stats, run_loop, ilength);
break;
}
case FORALL_LAMBDA_OMP_TYPEFIX: {
runOMPForallLambdaLoops_TYPEFIX(loop_stats, run_loop, ilength);
break;
}
case FORALL_HYBRID_LAMBDA_TYPEFIX: {
runAForallHybridLambdaLoops_TYPEFIX(loop_stats, run_loop, ilength);
runBForallHybridLambdaLoops_TYPEFIX(loop_stats, run_loop, ilength);
runCForallHybridLambdaLoops_TYPEFIX(loop_stats, run_loop, ilength);
break;
}
#endif // if LCALS_DO_MISC
default: {
std::cout << "\n Unknown loop variant id = " << lvid << std::endl;
}
}
}
#endif
//
// Initialize data to run loop with given ID. Note that this routine
// assumes that it is called before the loop with given ID is run and
// that data initialization calls in here are concistent with what is
// needed to execute loop.
//
// Loop data is initialized in this routine so all variants of loop
// tun the same way. Note that data arrays are initialized for
// each loop only under the circumstances that it is actually required.
//
//
void loopInit(unsigned iloop, LoopStat& stat)
{
LoopData& loop_data = getLoopData();
flushCache();
stat.loop_is_run = true;
switch ( iloop ) {
case REF_LOOP : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
initData(loop_data.RealArray_1D[2]);
break;
}
case PRESSURE_CALC :
case PRESSURE_CALC_ALT : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
initData(loop_data.RealArray_1D[2]);
initData(loop_data.RealArray_1D[3]);
initData(loop_data.RealArray_1D[4]);
initData(loop_data.RealArray_scalars);
break;
}
case ENERGY_CALC :
case ENERGY_CALC_ALT : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
initData(loop_data.RealArray_1D[2]);
initData(loop_data.RealArray_1D[3]);
initData(loop_data.RealArray_1D[4]);
initData(loop_data.RealArray_1D[5]);
initData(loop_data.RealArray_1D[6]);
initData(loop_data.RealArray_1D[7]);
initData(loop_data.RealArray_1D[8]);
initData(loop_data.RealArray_1D[9]);
initData(loop_data.RealArray_1D[10]);
initData(loop_data.RealArray_1D[11]);
initData(loop_data.RealArray_1D[12]);
initData(loop_data.RealArray_1D[13]);
initData(loop_data.RealArray_1D[14]);
initData(loop_data.RealArray_scalars);
break;
}
case VOL3D_CALC : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
initData(loop_data.RealArray_1D[2]);
initData(loop_data.RealArray_1D[3]);
break;
}
case DEL_DOT_VEC_2D : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
initData(loop_data.RealArray_1D[2]);
initData(loop_data.RealArray_1D[3]);
initData(loop_data.RealArray_1D[4]);
break;
}
case COUPLE : {
initData(loop_data.ComplexArray_1D[0]);
initData(loop_data.ComplexArray_1D[1]);
initData(loop_data.ComplexArray_1D[2]);
initData(loop_data.ComplexArray_1D[3]);
initData(loop_data.ComplexArray_1D[4]);
break;
}
case FIR : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
break;
}
case INIT3 : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
initData(loop_data.RealArray_1D[2]);
initData(loop_data.RealArray_1D[3]);
initData(loop_data.RealArray_1D[4]);
break;
}
case MULADDSUB : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
initData(loop_data.RealArray_1D[2]);
initData(loop_data.RealArray_1D[3]);
initData(loop_data.RealArray_1D[4]);
break;
}
case IF_QUAD : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
initData(loop_data.RealArray_1D[2]);
initData(loop_data.RealArray_1D[3]);
initData(loop_data.RealArray_1D[4]);
break;
}
case TRAP_INT : {
initData(loop_data.IndxArray_1D[0]);
initData(loop_data.RealArray_scalars);
break;
}
case HYDRO_1D : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
initData(loop_data.RealArray_1D[2]);
initData(loop_data.RealArray_scalars);
break;
}
case ICCG : {
initData(loop_data.RealArray_1D_Nx4[0]);
initData(loop_data.RealArray_1D_Nx4[1]);
break;
}
case INNER_PROD : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
break;
}
case BAND_LIN_EQ : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
break;
}
case TRIDIAG_ELIM : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
initData(loop_data.RealArray_1D[2]);
break;
}
case EOS : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
initData(loop_data.RealArray_1D[2]);
initData(loop_data.RealArray_1D[3]);
initData(loop_data.RealArray_scalars);
break;
}
case ADI : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
initData(loop_data.RealArray_1D[2]);
initData(loop_data.RealArray_3D_2xNx4[0]);
initData(loop_data.RealArray_3D_2xNx4[1]);
initData(loop_data.RealArray_3D_2xNx4[2]);
initData(loop_data.RealArray_scalars);
break;
}
case INT_PREDICT : {
initData(loop_data.RealArray_2D_Nx25[0]);
initData(loop_data.RealArray_scalars);
break;
}
case DIFF_PREDICT : {
initData(loop_data.RealArray_2D_Nx25[0]);
initData(loop_data.RealArray_2D_Nx25[1]);
break;
}
case FIRST_SUM : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
break;
}
case FIRST_DIFF : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
break;
}
case PIC_2D : {
initData(loop_data.RealArray_2D_Nx25[0]);
initData(loop_data.RealArray_2D_Nx25[1]);
initData(loop_data.RealArray_2D_Nx25[2]);
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
initData(loop_data.IndxArray_1D[0]);
initData(loop_data.IndxArray_1D[1]);
initData(loop_data.RealArray_2D_64x64[0]);
break;
}
case PIC_1D : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
initData(loop_data.RealArray_1D[2]);
initData(loop_data.RealArray_1D[3]);
initData(loop_data.RealArray_1D[4]);
initData(loop_data.RealArray_1D[5]);
initData(loop_data.RealArray_1D[6]);
initData(loop_data.RealArray_1D[7]);
initData(loop_data.RealArray_1D[8]);
initData(loop_data.RealArray_scalars);
initData(loop_data.IndxArray_1D[2]);
initData(loop_data.IndxArray_1D[3]);
initData(loop_data.IndxArray_1D[4]);
break;
}
case HYDRO_2D : {
initData(loop_data.RealArray_2D_7xN[0]);
initData(loop_data.RealArray_2D_7xN[1]);
initData(loop_data.RealArray_2D_7xN[2]);
initData(loop_data.RealArray_2D_7xN[3]);
initData(loop_data.RealArray_2D_7xN[4]);
initData(loop_data.RealArray_2D_7xN[5]);
initData(loop_data.RealArray_2D_7xN[6]);
initData(loop_data.RealArray_2D_7xN[7]);
initData(loop_data.RealArray_2D_7xN[8]);
initData(loop_data.RealArray_2D_7xN[9]);
initData(loop_data.RealArray_2D_7xN[10]);
break;
}
case GEN_LIN_RECUR : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
initData(loop_data.RealArray_1D[2]);
initData(loop_data.RealArray_scalars);
break;
}
case DISC_ORD : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
initData(loop_data.RealArray_1D[2]);
initData(loop_data.RealArray_1D[3]);
initData(loop_data.RealArray_1D[4]);
initData(loop_data.RealArray_1D[5]);
initData(loop_data.RealArray_1D[6]);
initData(loop_data.RealArray_1D[7]);
initData(loop_data.RealArray_1D[8]);
initData(loop_data.RealArray_scalars);
break;
}
case MAT_X_MAT : {
initData(loop_data.RealArray_2D_Nx25[0]);
initData(loop_data.RealArray_2D_Nx25[1]);
initData(loop_data.RealArray_2D_64x64[0]);
break;
}
case PLANCKIAN : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
initData(loop_data.RealArray_1D[2]);
initData(loop_data.RealArray_1D[3]);
initData(loop_data.RealArray_1D[4]);
break;
}
case IMP_HYDRO_2D : {
initData(loop_data.RealArray_2D_7xN[0]);
initData(loop_data.RealArray_2D_7xN[1]);
initData(loop_data.RealArray_2D_7xN[2]);
initData(loop_data.RealArray_2D_7xN[3]);
initData(loop_data.RealArray_2D_7xN[4]);
initData(loop_data.RealArray_2D_7xN[5]);
break;
}
case FIND_FIRST_MIN : {
initData(loop_data.RealArray_1D[0]);
break;
}
default : {
std::cout << "\n Unknown loop id = " << iloop << std::endl;
}
}
}
/* *********** LLVM Test Suite ************* *
* *
* Overloaded for use in the test suite. *
* Removes LoopStat argument and setting *
* the loop as run. Benchmark library *
* replaces the stat object for timing *
* statistics. *
* *
* ***************************************** */
void loopInit(unsigned iloop) //, LoopStat& stat)
{
LoopData& loop_data = getLoopData();
flushCache();
// stat.loop_is_run = true;
switch ( iloop ) {
case REF_LOOP : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
initData(loop_data.RealArray_1D[2]);
break;
}
//
// Initialize data for Loop Subset A...
//
case PRESSURE_CALC :
case PRESSURE_CALC_ALT : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
initData(loop_data.RealArray_1D[2]);
initData(loop_data.RealArray_1D[3]);
initData(loop_data.RealArray_1D[4]);
initData(loop_data.RealArray_scalars);
break;
}
case ENERGY_CALC :
case ENERGY_CALC_ALT : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
initData(loop_data.RealArray_1D[2]);
initData(loop_data.RealArray_1D[3]);
initData(loop_data.RealArray_1D[4]);
initData(loop_data.RealArray_1D[5]);
initData(loop_data.RealArray_1D[6]);
initData(loop_data.RealArray_1D[7]);
initData(loop_data.RealArray_1D[8]);
initData(loop_data.RealArray_1D[9]);
initData(loop_data.RealArray_1D[10]);
initData(loop_data.RealArray_1D[11]);
initData(loop_data.RealArray_1D[12]);
initData(loop_data.RealArray_1D[13]);
initData(loop_data.RealArray_1D[14]);
initData(loop_data.RealArray_scalars);
break;
}
case VOL3D_CALC : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
initData(loop_data.RealArray_1D[2]);
initData(loop_data.RealArray_1D[3]);
break;
}
case DEL_DOT_VEC_2D : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
initData(loop_data.RealArray_1D[2]);
initData(loop_data.RealArray_1D[3]);
initData(loop_data.RealArray_1D[4]);
break;
}
case COUPLE : {
initData(loop_data.ComplexArray_1D[0]);
initData(loop_data.ComplexArray_1D[1]);
initData(loop_data.ComplexArray_1D[2]);
initData(loop_data.ComplexArray_1D[3]);
initData(loop_data.ComplexArray_1D[4]);
break;
}
case FIR : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
break;
}
//
// Initialize data for Loop Subset B...
//
case INIT3 : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
initData(loop_data.RealArray_1D[2]);
initData(loop_data.RealArray_1D[3]);
initData(loop_data.RealArray_1D[4]);
break;
}
case MULADDSUB : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
initData(loop_data.RealArray_1D[2]);
initData(loop_data.RealArray_1D[3]);
initData(loop_data.RealArray_1D[4]);
break;
}
case IF_QUAD : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
initData(loop_data.RealArray_1D[2]);
initData(loop_data.RealArray_1D[3]);
initData(loop_data.RealArray_1D[4]);
break;
}
case TRAP_INT : {
initData(loop_data.IndxArray_1D[0]);
initData(loop_data.RealArray_scalars);
break;
}
//
// Initialize data for Loop Subset C...
//
case HYDRO_1D : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
initData(loop_data.RealArray_1D[2]);
initData(loop_data.RealArray_scalars);
break;
}
case ICCG : {
initData(loop_data.RealArray_1D_Nx4[0]);
initData(loop_data.RealArray_1D_Nx4[1]);
break;
}
case INNER_PROD : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
break;
}
case BAND_LIN_EQ : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
break;
}
case TRIDIAG_ELIM : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
initData(loop_data.RealArray_1D[2]);
break;
}
case EOS : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
initData(loop_data.RealArray_1D[2]);
initData(loop_data.RealArray_1D[3]);
initData(loop_data.RealArray_scalars);
break;
}
case ADI : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
initData(loop_data.RealArray_1D[2]);
initData(loop_data.RealArray_3D_2xNx4[0]);
initData(loop_data.RealArray_3D_2xNx4[1]);
initData(loop_data.RealArray_3D_2xNx4[2]);
initData(loop_data.RealArray_scalars);
break;
}
case INT_PREDICT : {
initData(loop_data.RealArray_2D_Nx25[0]);
initData(loop_data.RealArray_scalars);
break;
}
case DIFF_PREDICT : {
initData(loop_data.RealArray_2D_Nx25[0]);
initData(loop_data.RealArray_2D_Nx25[1]);
break;
}
case FIRST_SUM : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
break;
}
case FIRST_DIFF : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
break;
}
case PIC_2D : {
initData(loop_data.RealArray_2D_Nx25[0]);
initData(loop_data.RealArray_2D_Nx25[1]);
initData(loop_data.RealArray_2D_Nx25[2]);
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
initData(loop_data.IndxArray_1D[0]);
initData(loop_data.IndxArray_1D[1]);
initData(loop_data.RealArray_2D_64x64[0]);
break;
}
case PIC_1D : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
initData(loop_data.RealArray_1D[2]);
initData(loop_data.RealArray_1D[3]);
initData(loop_data.RealArray_1D[4]);
initData(loop_data.RealArray_1D[5]);
initData(loop_data.RealArray_1D[6]);
initData(loop_data.RealArray_1D[7]);
initData(loop_data.RealArray_1D[8]);
initData(loop_data.RealArray_scalars);
initData(loop_data.IndxArray_1D[2]);
initData(loop_data.IndxArray_1D[3]);
initData(loop_data.IndxArray_1D[4]);
break;
}
case HYDRO_2D : {
initData(loop_data.RealArray_2D_7xN[0]);
initData(loop_data.RealArray_2D_7xN[1]);
initData(loop_data.RealArray_2D_7xN[2]);
initData(loop_data.RealArray_2D_7xN[3]);
initData(loop_data.RealArray_2D_7xN[4]);
initData(loop_data.RealArray_2D_7xN[5]);
initData(loop_data.RealArray_2D_7xN[6]);
initData(loop_data.RealArray_2D_7xN[7]);
initData(loop_data.RealArray_2D_7xN[8]);
initData(loop_data.RealArray_2D_7xN[9]);
initData(loop_data.RealArray_2D_7xN[10]);
break;
}
case GEN_LIN_RECUR : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
initData(loop_data.RealArray_1D[2]);
initData(loop_data.RealArray_scalars);
break;
}
case DISC_ORD : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
initData(loop_data.RealArray_1D[2]);
initData(loop_data.RealArray_1D[3]);
initData(loop_data.RealArray_1D[4]);
initData(loop_data.RealArray_1D[5]);
initData(loop_data.RealArray_1D[6]);
initData(loop_data.RealArray_1D[7]);
initData(loop_data.RealArray_1D[8]);
initData(loop_data.RealArray_scalars);
break;
}
case MAT_X_MAT : {
initData(loop_data.RealArray_2D_Nx25[0]);
initData(loop_data.RealArray_2D_Nx25[1]);
initData(loop_data.RealArray_2D_64x64[0]);
break;
}
case PLANCKIAN : {
initData(loop_data.RealArray_1D[0]);
initData(loop_data.RealArray_1D[1]);
initData(loop_data.RealArray_1D[2]);
initData(loop_data.RealArray_1D[3]);
initData(loop_data.RealArray_1D[4]);
break;
}
case IMP_HYDRO_2D : {
initData(loop_data.RealArray_2D_7xN[0]);
initData(loop_data.RealArray_2D_7xN[1]);
initData(loop_data.RealArray_2D_7xN[2]);
initData(loop_data.RealArray_2D_7xN[3]);
initData(loop_data.RealArray_2D_7xN[4]);
initData(loop_data.RealArray_2D_7xN[5]);
break;
}
case FIND_FIRST_MIN : {
initData(loop_data.RealArray_1D[0]);
break;
}
default : {
std::cout << "\n Unknown loop id = " << iloop << std::endl;
}
} // switch statement on loop id
}
//
// Finalize data for loop with given ID. Note that this routine assumes
// that it is called after the loop with given ID is run and that checksum
// calls in here are concistent with what is needed for loop.
//
void loopFinalize(unsigned iloop, LoopStat& stat, LoopLength ilength)
{
#if defined(LCALS_VERIFY_CHECKSUM)
initChksum(stat, ilength);
LoopData& loop_data = getLoopData();
switch ( iloop ) {
case REF_LOOP : {
// Nothing to do for REF_LOOP case...
break;
}
//
// Update checksums for Loop Subset A...
//
case PRESSURE_CALC :
case PRESSURE_CALC_ALT : {
updateChksum(stat, ilength, loop_data.RealArray_1D[2]);
break;
}
case ENERGY_CALC :
case ENERGY_CALC_ALT : {
updateChksum(stat, ilength, loop_data.RealArray_1D[0]);
updateChksum(stat, ilength, loop_data.RealArray_1D[5]);
break;
}
case VOL3D_CALC : {
updateChksum(stat, ilength, loop_data.RealArray_1D[3]);
break;
}
case DEL_DOT_VEC_2D : {
updateChksum(stat, ilength, loop_data.RealArray_1D[4]);
break;
}
case COUPLE : {
updateChksum(stat, ilength, loop_data.ComplexArray_1D[0]);
updateChksum(stat, ilength, loop_data.ComplexArray_1D[1]);
updateChksum(stat, ilength, loop_data.ComplexArray_1D[2]);
break;
}
case FIR : {
updateChksum(stat, ilength, loop_data.RealArray_1D[0]);
break;
}
//
// Update checksums for Loop Subset B...
//
case INIT3 : {
updateChksum(stat, ilength, loop_data.RealArray_1D[0]);
updateChksum(stat, ilength, loop_data.RealArray_1D[1]);
updateChksum(stat, ilength, loop_data.RealArray_1D[2]);
break;
}
case MULADDSUB : {
updateChksum(stat, ilength, loop_data.RealArray_1D[0]);
updateChksum(stat, ilength, loop_data.RealArray_1D[1]);
updateChksum(stat, ilength, loop_data.RealArray_1D[2]);
break;
}
case IF_QUAD : {
updateChksum(stat, ilength, loop_data.RealArray_1D[3]);
updateChksum(stat, ilength, loop_data.RealArray_1D[4]);
break;
}
case TRAP_INT : {
updateChksum(stat, ilength, loop_data.scalar_Real[0]);
break;
}
//
// Update checksums for Loop Subset C...
//
case HYDRO_1D : {
updateChksum(stat, ilength, loop_data.RealArray_1D[0]);
break;
}
case ICCG : {
updateChksum(stat, ilength, loop_data.RealArray_1D_Nx4[0]);
break;
}
case INNER_PROD : {
updateChksum(stat, ilength, loop_data.scalar_Real[0]);
break;
}
case BAND_LIN_EQ : {
updateChksum(stat, ilength, loop_data.RealArray_1D[0]);
break;
}
case TRIDIAG_ELIM : {
updateChksum(stat, ilength, loop_data.RealArray_1D[0]);
break;
}
case EOS : {
updateChksum(stat, ilength, loop_data.RealArray_1D[0]);
break;
}
case ADI : {
updateChksum(stat, ilength, loop_data.RealArray_3D_2xNx4[0]);
updateChksum(stat, ilength, loop_data.RealArray_3D_2xNx4[1]);
updateChksum(stat, ilength, loop_data.RealArray_3D_2xNx4[2]);
break;
}
case INT_PREDICT : {
updateChksum(stat, ilength, loop_data.RealArray_2D_Nx25[0]);
break;
}
case DIFF_PREDICT : {
updateChksum(stat, ilength, loop_data.RealArray_2D_Nx25[0]);
break;
}
case FIRST_SUM : {
updateChksum(stat, ilength, loop_data.RealArray_1D[0]);
break;
}
case FIRST_DIFF : {
updateChksum(stat, ilength, loop_data.RealArray_1D[0]);
break;
}
case PIC_2D : {
updateChksum(stat, ilength, loop_data.RealArray_2D_Nx25[0]);
updateChksum(stat, ilength, loop_data.RealArray_2D_64x64[0]);
break;
}
case PIC_1D : {
updateChksum(stat, ilength, loop_data.RealArray_1D[6]);
updateChksum(stat, ilength, loop_data.RealArray_1D[1]);
updateChksum(stat, ilength, loop_data.RealArray_1D[7]);
break;
}
case HYDRO_2D : {
updateChksum(stat, ilength, loop_data.RealArray_2D_7xN[9]);
updateChksum(stat, ilength, loop_data.RealArray_2D_7xN[10]);
break;
}
case GEN_LIN_RECUR : {
updateChksum(stat, ilength, loop_data.RealArray_1D[0]);
break;
}
case DISC_ORD : {
updateChksum(stat, ilength, loop_data.RealArray_1D[7]);
break;
}
case MAT_X_MAT : {
updateChksum(stat, ilength, loop_data.RealArray_2D_Nx25[0]);
break;
}
case PLANCKIAN : {
updateChksum(stat, ilength, loop_data.RealArray_1D[4]);
break;
}
case IMP_HYDRO_2D : {
updateChksum(stat, ilength, loop_data.RealArray_2D_7xN[0]);
break;
}
case FIND_FIRST_MIN : {
updateChksum(stat, ilength, loop_data.scalar_Real[0]);
break;
}
default : {
std::cout << "\n Unknown loop id = " << iloop << std::endl;
}
} // switch statement on loop id
#endif // if LCALS_VERIFY_CHECKSUM
}
//
// Allocate and initialize arrays (and scalars) used to execute loops in suite.
//
void allocateLoopData()
{
#ifdef TESTSUITE
std::cout << "\n allocateLoopData..." << std::endl;
#endif
unsigned num_aligned_segments =
(s_loop_data->max_loop_length + 20)/LCALS_DATA_ALIGN + 1;
unsigned aligned_chunksize = num_aligned_segments * LCALS_DATA_ALIGN;
//
// Allocate and initialize 1D loop length Real arrays.
//
for (unsigned i = 0; i < s_loop_data->s_num_1D_Real_arrays; ++i) {
Index_type data_len = aligned_chunksize;
LoopData::RealArray* rarray = s_loop_data->RealArray_1D;
rarray[i].id = i+1;
Real_ptr data = allocAndInitData(rarray[i], data_len);
s_loop_data->array_1D_Real[i] = data;
}
//
// Allocate and initialize 1D loop length X 4 Real arrays.
//
for (unsigned i = 0; i < s_loop_data->s_num_1D_Nx4_Real_arrays; ++i) {
Index_type data_len = aligned_chunksize*4;
LoopData::RealArray* rarray = s_loop_data->RealArray_1D_Nx4;
rarray[i].id = i+1;
Real_ptr data = allocAndInitData(rarray[i], data_len);
s_loop_data->array_1D_Nx4_Real[i] = data;
}
//
// Allocate and initialize 1D loop length Indx arrays.
//
for (unsigned i = 0; i < s_loop_data->s_num_1D_Indx_arrays; ++i) {
Index_type data_len = aligned_chunksize;
LoopData::IndxArray* iarray = s_loop_data->IndxArray_1D;
iarray[i].id = i;
Index_type* data = allocAndInitData(iarray[i], data_len);
s_loop_data->array_1D_Indx[i] = data;
}
//
// Allocate and initialize 1D loop length Complex arrays.
//
for (unsigned i = 0; i < s_loop_data->s_num_1D_Complex_arrays; ++i) {
Index_type data_len = aligned_chunksize;
LoopData::ComplexArray* carray = s_loop_data->ComplexArray_1D;
carray[i].id = i+1;
Complex_ptr data = allocAndInitData(carray[i], data_len);
s_loop_data->array_1D_Complex[i] = data;
}
//
// Allocate and initialize 2D loop length X 25 Real arrays.
//
for (unsigned i = 0; i < s_loop_data->s_num_2D_Nx25_Real_arrays; ++i) {
Index_type data_len = aligned_chunksize*25;
LoopData::RealArray* rarray = s_loop_data->RealArray_2D_Nx25;
rarray[i].id = i+1;
Real_ptr data = allocAndInitData(rarray[i], data_len);
s_loop_data->array_2D_Nx25_Real[i] = new Real_ptr[aligned_chunksize];
for (Index_type k = 0; k < aligned_chunksize; ++k) {
s_loop_data->array_2D_Nx25_Real[i][k] = &data[k*25];
}
}
//
// Allocate and initialize 2D 7 X loop length Real arrays.
//
for (unsigned i = 0; i < s_loop_data->s_num_2D_7xN_Real_arrays; ++i) {
Index_type data_len = 7*aligned_chunksize;
LoopData::RealArray* rarray = s_loop_data->RealArray_2D_7xN;
rarray[i].id = i+1;
Real_ptr data = allocAndInitData(rarray[i], data_len);
s_loop_data->array_2D_7xN_Real[i] = new Real_ptr[7];
for (Index_type k = 0; k < 7; ++k) {
s_loop_data->array_2D_7xN_Real[i][k] = &data[k*aligned_chunksize];
}
}
//
// Allocate and initialize 2D 64 X 64 Real arrays.
//
for (unsigned i = 0; i < s_loop_data->s_num_2D_64x64_Real_arrays; ++i) {
Index_type data_len = 64*64;
LoopData::RealArray* rarray = s_loop_data->RealArray_2D_64x64;
rarray[i].id = i+1;
Real_ptr data = allocAndInitData(rarray[i], data_len);
s_loop_data->array_2D_64x64_Real[i] = new Real_ptr[64];
for (Index_type k = 0; k < 64; ++k) {
s_loop_data->array_2D_64x64_Real[i][k] = &data[k*64];
}
}
//
// Allocate and initialize 3D 2 X loop length X 4 Real arrays.
//
for (unsigned i = 0; i < s_loop_data->s_num_3D_2xNx4_Real_arrays; ++i) {
Index_type data_len = 2*aligned_chunksize*4;
LoopData::RealArray* rarray = s_loop_data->RealArray_3D_2xNx4;
rarray[i].id = i+1;
Real_ptr data = allocAndInitData(rarray[i], data_len);
s_loop_data->array_3D_2xNx4_Real[i] = new Real_ptr*[2];
for (Index_type k = 0; k < 2; ++k) {
s_loop_data->array_3D_2xNx4_Real[i][k] = new Real_ptr[aligned_chunksize];
}
for (Index_type k = 0; k < 2; ++k) {
for (Index_type l = 0; l < aligned_chunksize; ++l) {
s_loop_data->array_3D_2xNx4_Real[i][k][l] = &data[k*l*4];
}
}
}
//
// Initialize Real scalars.
//
s_loop_data->RealArray_scalars.id = 21;
s_loop_data->RealArray_scalars.data = s_loop_data->scalar_Real;
s_loop_data->RealArray_scalars.len = s_loop_data->s_num_Real_scalars;
initData(s_loop_data->RealArray_scalars);
}
//
// Free arrays used in loop suite loop execution (allocated in routine above).
//
void freeLoopData()
{
if ( s_loop_data != 0 ) return;
#ifdef TESTSUITE
std::cout << "\n freeLoopData..." << std::endl;
#endif
//
// De-allocate 1D loop length Real arrays.
//
for (unsigned i = 0; i < s_loop_data->s_num_1D_Real_arrays; ++i) {
#if defined(USE_PTR_CLASS)
free( s_loop_data->array_1D_Real[i].get() );
#else
free( s_loop_data->array_1D_Real[i] );
#endif
}
//
// De-allocate 1D loop length X 4 Real arrays.
//
for (unsigned i = 0; i < s_loop_data->s_num_1D_Nx4_Real_arrays; ++i) {
#if defined(USE_PTR_CLASS)
free( s_loop_data->array_1D_Nx4_Real[i].get() );
#else
free( s_loop_data->array_1D_Nx4_Real[i] );
#endif
}
//
// De-allocate 1D loop length Indx arrays.
//
for (unsigned i = 0; i < s_loop_data->s_num_1D_Indx_arrays; ++i) {
free( s_loop_data->array_1D_Indx[i] );
}
//
// De-allocate 1D loop length Complex arrays.
//
for (unsigned i = 0; i < s_loop_data->s_num_1D_Complex_arrays; ++i) {
#if defined(USE_PTR_CLASS)
free( s_loop_data->array_1D_Complex[i].get() );
#else
free( s_loop_data->array_1D_Complex[i] );
#endif
}
//
// De-allocate 2D 7 X loop length Real arrays.
//
for (unsigned i = 0; i < s_loop_data->s_num_2D_7xN_Real_arrays; ++i) {
#if defined(USE_PTR_CLASS)
free( s_loop_data->array_2D_7xN_Real[i][0].get() );
#else
free( s_loop_data->array_2D_7xN_Real[i][0] );
#endif
delete [] s_loop_data->array_2D_7xN_Real[i];
}
//
// De-allocate 2D 64 X 64 Real arrays.
//
for (unsigned i = 0; i < s_loop_data->s_num_2D_64x64_Real_arrays; ++i) {
#if defined(USE_PTR_CLASS)
free( s_loop_data->array_2D_64x64_Real[i][0].get() );
#else
free( s_loop_data->array_2D_64x64_Real[i][0] );
#endif
delete [] s_loop_data->array_2D_64x64_Real[i];
}
//
// De-allocate and initialize 3D 2 X loop length X 4 Real arrays.
//
for (unsigned i = 0; i < s_loop_data->s_num_3D_2xNx4_Real_arrays; ++i) {
#if defined(USE_PTR_CLASS)
free( s_loop_data->array_3D_2xNx4_Real[i][0][0].get() );
#else
free( s_loop_data->array_3D_2xNx4_Real[i][0][0] );
#endif
for (Index_type k = 0; k < 2; ++k) {
delete [] s_loop_data->array_3D_2xNx4_Real[i][k];
}
delete [] s_loop_data->array_3D_2xNx4_Real[i];
}
delete s_loop_data;
s_loop_data = 0;
}
//
// Implementations of file scope routines used to manage loop data
// and checksums
//
namespace {
//
// Routines to allocate and initialize individual arrays consistently for
// checking results.
//
Real_ptr allocAndInitData(LoopData::RealArray& ra, Index_type len)
{
Real_ptr data = 0;
posix_memalign( (void **)&data, LCALS_DATA_ALIGN, len*sizeof(Real_type) );
ra.data = data;
ra.len = len;
initData(ra);
return data;
}
Index_type* allocAndInitData(LoopData::IndxArray& ia, Index_type len)
{
Index_type* data = 0;
posix_memalign( (void **)&data, LCALS_DATA_ALIGN, len*sizeof(Index_type) );
ia.data = data;
ia.len = len;
initData(ia);
return data;
}
Complex_ptr allocAndInitData(LoopData::ComplexArray& ca, Index_type len)
{
Complex_ptr data = new Complex_type[len];
ca.data = data;
ca.len = len;
initData(ca);
return data;
}
void initData(LoopData::RealArray& ra)
{
int id = ra.id;
Real_type factor = ( id % 2 ? 0.1 : 0.2 );
Real_ptr data = ra.data;
Index_type totlen = ra.len;
#if defined(LCALS_OMP_MEM_INIT)
#pragma omp parallel for
for (Index_type j = 0; j < totlen; ++j) {
data[j] = factor*(j + 1.1)/(j + 1.12345);
}
#else
for (Index_type j = 0; j < totlen; ++j) {
data[j] = factor*(j + 1.1)/(j + 1.12345);
}
#endif
}
void initData(LoopData::IndxArray& ia)
{
int id = ia.id;
Index_type* data = ia.data;
Index_type totlen = ia.len;
#if defined(LCALS_OMP_MEM_INIT)
#pragma omp parallel for
for (Index_type j = 0; j < totlen; ++j) {
data[j] = 0;
}
#else
for (Index_type j = 0; j < totlen; ++j) {
data[j] = 0;
}
#endif
}
void initData(LoopData::ComplexArray& ca)
{
int id = ca.id;
Complex_type factor = ( id % 2 ? Complex_type(0.1,0.2) :
Complex_type(0.2,0.3) );
Complex_ptr data = ca.data;
Index_type totlen = ca.len;
#if defined(LCALS_OMP_MEM_INIT)
#pragma omp parallel for
for (Index_type j = 0; j < totlen; ++j) {
data[j] = factor*(j + 1.1)/(j + 1.12345);
}
#else
for (Index_type j = 0; j < totlen; ++j) {
data[j] = factor*(j + 1.1)/(j + 1.12345);
}
#endif
}
//
// Routines to initialize loop check sum.
//
void initChksum(LoopStat& stat, LoopLength ilength)
{
stat.loop_chksum[ilength] = 0.0;
}
//
// Routines to update loop check sum.
//
void updateChksum(LoopStat& stat, LoopLength ilength,
const LoopData::RealArray& ra,
Real_type scale_factor)
{
Real_ptr data = ra.data;
Index_type len = ra.len;
long double tchk = stat.loop_chksum[ilength];
for (Index_type j = 0; j < len; ++j) {
tchk += (j+1)*data[j]*scale_factor;
}
stat.loop_chksum[ilength] = tchk;
}
void updateChksum(LoopStat& stat, LoopLength ilength,
Real_type val)
{
stat.loop_chksum[ilength] += val;
}
void updateChksum(LoopStat& stat, LoopLength ilength,
const LoopData::ComplexArray& ca,
Real_type scale_factor)
{
Complex_ptr data = ca.data;
Index_type len = ca.len;
long double tchk = stat.loop_chksum[ilength];
for (Index_type j = 0; j < len; ++j) {
tchk += (j+1)*(real(data[j])+imag(data[j]))*scale_factor;
}
stat.loop_chksum[ilength] = tchk;
}
} // closing brace for unnamed namespace
//
// Recursively construct directories for given path name.
//
bool recursiveMkdir(const std::string& path)
{
bool retval = true;
mode_t mode = (S_IRUSR | S_IWUSR | S_IXUSR);
const char separator = '/';
int length = static_cast<int>(path.length());
char* path_buf = new char[length + 1];
sprintf(path_buf, "%s", path.c_str());
struct stat status;
int pos = length - 1;
/* find part of path that has not yet been created */
while ((stat(path_buf, &status) != 0) && (pos >= 0)) {
/* slide backwards in string until next slash found */
bool slash_found = false;
while ((!slash_found) && (pos >= 0)) {
if (path_buf[pos] == separator) {
slash_found = true;
if (pos >= 0) path_buf[pos] = '\0';
} else pos--;
}
}
/*
* if there is a part of the path that already exists make sure
* it is really a directory
*/
if (pos >= 0) {
if (!S_ISDIR(status.st_mode)) {
std::cout << "Cannot create directories in path = " << path
<< "\n because some intermediate item in path exists and"
<< "is NOT a directory" << std::endl;
retval = false;
}
}
/*
* make all directories that do not already exist
*
* if (pos < 0), then there is no part of the path that
* already exists. Need to make the first part of the
* path before sliding along path_buf.
*/
if ( retval && pos < 0) {
if (mkdir(path_buf, mode) != 0) {
std::cout << " Cannot create directory = "
<< path_buf << std::endl;
retval = false;
}
pos = 0;
}
if ( retval ) {
/* make remaining directories */
do {
/* slide forward in string until next '\0' found */
bool null_found = false;
while ((!null_found) && (pos < length)) {
if (path_buf[pos] == '\0') {
null_found = true;
path_buf[pos] = separator;
}
pos++;
}
/* make directory if not at end of path */
if (pos < length) {
if (mkdir(path_buf, mode) != 0) {
std::cout << " Cannot create directory = "
<< path_buf << std::endl;
retval = false;
}
}
} while (pos < length && retval);
}
delete[] path_buf;
return retval;
}