| // |
| // See README-LCALS_license.txt for access and distribution restrictions |
| // |
| |
| // |
| // Source file containing LCALS "B" subset raw loops using the google |
| // benchmark library |
| // |
| |
| #include <benchmark/benchmark.h> |
| #include "../LCALSSuite.hxx" |
| #include "../SubsetDataB.hxx" |
| |
| static void BM_INIT3_RAW(benchmark::State& state) { |
| |
| LoopData& loop_data = getLoopData(); |
| |
| loopInit(INIT3); |
| |
| Real_ptr out1 = loop_data.array_1D_Real[0]; |
| Real_ptr out2 = loop_data.array_1D_Real[1]; |
| Real_ptr out3 = loop_data.array_1D_Real[2]; |
| Real_ptr in1 = loop_data.array_1D_Real[3]; |
| Real_ptr in2 = loop_data.array_1D_Real[4]; |
| |
| for( auto _ : state) { |
| |
| for (Index_type i=0 ; i< state.range(0) ; i++ ) { |
| out1[i] = out2[i] = out3[i] = - in1[i] - in2[i]; |
| } |
| |
| } |
| } |
| |
| BENCHMARK(BM_INIT3_RAW)->Arg(171)->Arg(5001)-> |
| Arg(44217)->Unit(benchmark::kMicrosecond); |
| |
| |
| static void BM_MULADDSUB_RAW(benchmark::State& state) { |
| |
| LoopData& loop_data = getLoopData(); |
| |
| loopInit(MULADDSUB); |
| |
| Real_ptr out1 = loop_data.array_1D_Real[0]; |
| Real_ptr out2 = loop_data.array_1D_Real[1]; |
| Real_ptr out3 = loop_data.array_1D_Real[2]; |
| Real_ptr in1 = loop_data.array_1D_Real[3]; |
| Real_ptr in2 = loop_data.array_1D_Real[4]; |
| |
| for ( auto _ : state) { |
| |
| for (Index_type i=0 ; i< state.range(0) ; i++ ) { |
| out1[i] = in1[i] * in2[i] ; |
| out2[i] = in1[i] + in2[i] ; |
| out3[i] = in1[i] - in2[i] ; |
| } |
| |
| } |
| } |
| |
| BENCHMARK(BM_MULADDSUB_RAW)->Arg(171)->Arg(5001)-> |
| Arg(44217)->Unit(benchmark::kMicrosecond); |
| |
| |
| static void BM_IF_QUAD_RAW(benchmark::State& state) { |
| |
| LoopData& loop_data = getLoopData(); |
| |
| loopInit(IF_QUAD); |
| |
| Real_ptr a = loop_data.array_1D_Real[0]; |
| Real_ptr b = loop_data.array_1D_Real[1]; |
| Real_ptr c = loop_data.array_1D_Real[2]; |
| Real_ptr x1 = loop_data.array_1D_Real[3]; |
| Real_ptr x2 = loop_data.array_1D_Real[4]; |
| |
| for ( auto _ : state ) { |
| |
| for (Index_type i=0 ; i< state.range(0); i++ ) { |
| Real_type s = b[i]*b[i] - 4.0*a[i]*c[i]; |
| if ( s >= 0 ) { |
| s = sqrt(s); |
| x2[i] = (-b[i]+s)/(2.0*a[i]); |
| x1[i] = (-b[i]-s)/(2.0*a[i]); |
| } else { |
| x2[i] = 0.0; |
| x1[i] = 0.0; |
| } |
| } |
| |
| } |
| } |
| |
| BENCHMARK(BM_IF_QUAD_RAW)->Arg(171)->Arg(5001)-> |
| Arg(44217)->Unit(benchmark::kMicrosecond); |
| |
| |
| |
| static void BM_TRAP_INT_RAW(benchmark::State& state) { |
| |
| LoopData& loop_data = getLoopData(); |
| |
| loopInit(TRAP_INT); |
| |
| Real_type xn = loop_data.scalar_Real[0]; |
| Real_type x0 = loop_data.scalar_Real[1]; |
| Real_type xp = loop_data.scalar_Real[2]; |
| Real_type y = loop_data.scalar_Real[3]; |
| Real_type yp = loop_data.scalar_Real[4]; |
| |
| Index_type nx = loop_data.array_1D_Indx[0][0] + 1; |
| |
| const Real_type h = (xn - x0) / nx; |
| Real_type sumx = 0.5*( trap_int_func(x0, y, xp, yp) + |
| trap_int_func(xn, y, xp, yp) ); |
| |
| Real_type val = 0; |
| |
| for (auto _ : state) { |
| |
| for (Index_type i=0 ; i< state.range(0); i++ ) { |
| Real_type x = x0 + i*h; |
| sumx += trap_int_func(x, y, xp, yp); |
| } |
| benchmark::DoNotOptimize(val = sumx * h); |
| |
| } |
| } |
| |
| BENCHMARK(BM_TRAP_INT_RAW)->Arg(171)->Arg(5001)-> |
| Arg(44217)->Unit(benchmark::kMicrosecond); |