blob: 0987a7521f2dae9e63d40b4e03aeb3cec3425e12 [file] [log] [blame]
//
// See README-LCALS_license.txt for access and distribution restrictions
//
//
// Source file containing LCALS "C" subset raw loops using the google
// benchmark library.
//
#include <benchmark/benchmark.h>
#include "../LCALSSuite.hxx"
static void BM_HYDRO_1D_RAW(benchmark::State& state) {
/*
*******************************************************************
* Kernel 1 -- hydro fragment
*******************************************************************
* DO 1 L = 1,Loop
* DO 1 k = 1,n
* 1 X(k)= Q + Y(k)*(R*ZX(k+10) + T*ZX(k+11))
*/
LoopData& loop_data = getLoopData();
loopInit(HYDRO_1D);
Real_ptr x = loop_data.array_1D_Real[0];
Real_ptr y = loop_data.array_1D_Real[1];
Real_ptr z = loop_data.array_1D_Real[2];
const Real_type q = loop_data.scalar_Real[0];
const Real_type r = loop_data.scalar_Real[1];
const Real_type t = loop_data.scalar_Real[2];
for (auto _ : state) {
for (Index_type k=0 ; k< state.range(0) ; k++ ) {
x[k] = q + y[k]*( r*z[k+10] + t*z[k+11] );
}
}
}
BENCHMARK(BM_HYDRO_1D_RAW)->Arg(171)->Arg(5001)->
Arg(44217)->Unit(benchmark::kMicrosecond);
static void BM_ICCG_RAW(benchmark::State& state) {
/*
*******************************************************************
* Kernel 2 -- ICCG excerpt (Incomplete Cholesky Conj. Gradient)
*******************************************************************
* DO 200 L= 1,Loop
* II= n
* IPNTP= 0
*222 IPNT= IPNTP
* IPNTP= IPNTP+II
* II= II/2
* i= IPNTP+1
CDIR$ IVDEP
* DO 2 k= IPNT+2,IPNTP,2
* i= i+1
* 2 X(i)= X(k) - V(k)*X(k-1) - V(k+1)*X(k+1)
* IF( II.GT.1) GO TO 222
*200 CONTINUE
*/
LoopData& loop_data = getLoopData();
loopInit(ICCG);
Real_ptr x = loop_data.array_1D_Nx4_Real[0];
Real_ptr v = loop_data.array_1D_Nx4_Real[1];
Index_type ii, ipnt, ipntp, i;
for (auto _ : state) {
ii = state.range(0);
ipntp = 0;
do {
ipnt = ipntp;
ipntp += ii;
ii /= 2;
i = ipntp ;
for (Index_type k=ipnt+1 ; k<ipntp ; k=k+2 ) {
i++;
x[i] = x[k] - v[k ]*x[k-1] - v[k+1]*x[k+1];
}
} while ( ii>0 );
}
}
BENCHMARK(BM_ICCG_RAW)->Arg(171)->Arg(5001)->
Arg(44217)->Unit(benchmark::kMicrosecond);
static void BM_INNER_PROD_RAW(benchmark::State& state) {
/*
*******************************************************************
* Kernel 3 -- inner product
*******************************************************************
* DO 3 L= 1,Loop
* Q= 0.0
* DO 3 k= 1,n
* 3 Q= Q + Z(k)*X(k)
*/
LoopData& loop_data = getLoopData();
loopInit(INNER_PROD);
Real_ptr x = loop_data.array_1D_Real[0];
Real_ptr z = loop_data.array_1D_Real[1];
Real_type q = 0.0;
Real_type val = 0.0;
for (auto _ : state) {
q = 0.0;
for (Index_type k=0 ; k< state.range(0); k++ ) {
benchmark::DoNotOptimize(q += z[k]*x[k]);
}
}
}
BENCHMARK(BM_INNER_PROD_RAW)->Arg(171)->Arg(5001)->
Arg(44217)->Unit(benchmark::kMicrosecond);
static void BM_BAND_LIN_EQ_RAW(benchmark::State& state) {
/*
*******************************************************************
* Kernel 4 -- banded linear equations
*******************************************************************
* m= (1001-7)/2
* DO 444 L= 1,Loop
* DO 444 k= 7,1001,m
* lw= k-6
* temp= X(k-1)
CDIR$ IVDEP
* DO 4 j= 5,n,5
* temp = temp - XZ(lw)*Y(j)
* 4 lw= lw+1
* X(k-1)= Y(5)*temp
*444 CONTINUE
*/
LoopData& loop_data = getLoopData();
loopInit(BAND_LIN_EQ);
Real_ptr x = loop_data.array_1D_Real[0];
Real_ptr y = loop_data.array_1D_Real[1];
Index_type lw;
Real_type temp;
for (auto _ : state) {
Index_type m = ( 1001-7 )/2;
for ( Index_type k=6 ; k<1001 ; k=k+m ) {
lw = k - 6;
temp = x[k-1];
for (Index_type j=4 ; j< state.range(0) ; j=j+5 ) {
temp -= x[lw]*y[j];
lw++;
}
x[k-1] = y[4]*temp;
}
}
}
BENCHMARK(BM_BAND_LIN_EQ_RAW)->Arg(171)->Arg(5001)->
Arg(44217)->Unit(benchmark::kMicrosecond);
static void BM_TRIDIAG_ELIM_RAW(benchmark::State& state) {
/*
*******************************************************************
* Kernel 5 -- tri-diagonal elimination, below diagonal
*******************************************************************
* DO 5 L = 1,Loop
* DO 5 i = 2,n
* 5 X(i)= Z(i)*(Y(i) - X(i-1))
*/
LoopData& loop_data = getLoopData();
loopInit(TRIDIAG_ELIM);
Real_ptr x = loop_data.array_1D_Real[0];
Real_ptr y = loop_data.array_1D_Real[1];
Real_ptr z = loop_data.array_1D_Real[2];
for (auto _ : state) {
for ( Index_type i=1 ; i< state.range(0) ; i++ ) {
x[i] = z[i]*( y[i] - x[i-1] );
}
}
}
BENCHMARK(BM_TRIDIAG_ELIM_RAW)->Arg(171)->Arg(5001)->
Arg(44217)->Unit(benchmark::kMicrosecond);
static void BM_EOS_RAW(benchmark::State& state) {
/*
*******************************************************************
* Kernel 7 -- equation of state fragment
*******************************************************************
* DO 7 L= 1,Loop
* DO 7 k= 1,n
* X(k)= U(k ) + R*( Z(k ) + R*Y(k )) +
* . T*( U(k+3) + R*( U(k+2) + R*U(k+1)) +
* . T*( U(k+6) + Q*( U(k+5) + Q*U(k+4))))
* 7 CONTINUE
*/
LoopData& loop_data = getLoopData();
loopInit(EOS);
Real_ptr x = loop_data.array_1D_Real[0];
Real_ptr y = loop_data.array_1D_Real[1];
Real_ptr z = loop_data.array_1D_Real[2];
Real_ptr u = loop_data.array_1D_Real[3];
const Real_type q = loop_data.scalar_Real[0];
const Real_type r = loop_data.scalar_Real[1];
const Real_type t = loop_data.scalar_Real[2];
for (auto _ : state) {
for ( Index_type k=0 ; k< state.range(0) ; k++ ) {
x[k] = u[k] + r*( z[k] + r*y[k] ) +
t*( u[k+3] + r*( u[k+2] + r*u[k+1] ) +
t*( u[k+6] + q*( u[k+5] + q*u[k+4] ) ) );
}
}
}
BENCHMARK(BM_EOS_RAW)->Arg(171)->Arg(5001)->
Arg(44217)->Unit(benchmark::kMicrosecond);
static void BM_ADI_RAW(benchmark::State& state) {
/*
*******************************************************************
* Kernel 8 -- ADI integration
*******************************************************************
* DO 8 L = 1,Loop
* nl1 = 1
* nl2 = 2
* DO 8 kx = 2,3
CDIR$ IVDEP
* DO 8 ky = 2,n
* DU1(ky)=U1(kx,ky+1,nl1) - U1(kx,ky-1,nl1)
* DU2(ky)=U2(kx,ky+1,nl1) - U2(kx,ky-1,nl1)
* DU3(ky)=U3(kx,ky+1,nl1) - U3(kx,ky-1,nl1)
* U1(kx,ky,nl2)=U1(kx,ky,nl1) +A11*DU1(ky) +A12*DU2(ky) +A13*DU3(ky)
* . + SIG*(U1(kx+1,ky,nl1) -2.*U1(kx,ky,nl1) +U1(kx-1,ky,nl1))
* U2(kx,ky,nl2)=U2(kx,ky,nl1) +A21*DU1(ky) +A22*DU2(ky) +A23*DU3(ky)
* . + SIG*(U2(kx+1,ky,nl1) -2.*U2(kx,ky,nl1) +U2(kx-1,ky,nl1))
* U3(kx,ky,nl2)=U3(kx,ky,nl1) +A31*DU1(ky) +A32*DU2(ky) +A33*DU3(ky)
* . + SIG*(U3(kx+1,ky,nl1) -2.*U3(kx,ky,nl1) +U3(kx-1,ky,nl1))
* 8 CONTINUE
*/
LoopData& loop_data = getLoopData();
loopInit(ADI);
Real_ptr du1 = loop_data.array_1D_Real[0];
Real_ptr du2 = loop_data.array_1D_Real[1];
Real_ptr du3 = loop_data.array_1D_Real[2];
Real_ptr** u1 = loop_data.array_3D_2xNx4_Real[0];
Real_ptr** u2 = loop_data.array_3D_2xNx4_Real[1];
Real_ptr** u3 = loop_data.array_3D_2xNx4_Real[2];
const Real_type sig = loop_data.scalar_Real[0];
const Real_type a11 = loop_data.scalar_Real[1];
const Real_type a12 = loop_data.scalar_Real[2];
const Real_type a13 = loop_data.scalar_Real[3];
const Real_type a21 = loop_data.scalar_Real[4];
const Real_type a22 = loop_data.scalar_Real[5];
const Real_type a23 = loop_data.scalar_Real[6];
const Real_type a31 = loop_data.scalar_Real[7];
const Real_type a32 = loop_data.scalar_Real[8];
const Real_type a33 = loop_data.scalar_Real[9];
Index_type nl1 = 0;
Index_type nl2 = 1;
Index_type kx;
for (auto _ : state) {
for ( kx=1 ; kx<3 ; kx++ ) {
for (Index_type ky=1 ; ky< state.range(0) ; ky++ ) {
du1[ky] = u1[nl1][ky+1][kx] - u1[nl1][ky-1][kx];
du2[ky] = u2[nl1][ky+1][kx] - u2[nl1][ky-1][kx];
du3[ky] = u3[nl1][ky+1][kx] - u3[nl1][ky-1][kx];
u1[nl2][ky][kx]=
u1[nl1][ky][kx]+a11*du1[ky]+a12*du2[ky]+a13*du3[ky] + sig*
(u1[nl1][ky][kx+1]-2.0*u1[nl1][ky][kx]+u1[nl1][ky][kx-1]);
u2[nl2][ky][kx]=
u2[nl1][ky][kx]+a21*du1[ky]+a22*du2[ky]+a23*du3[ky] + sig*
(u2[nl1][ky][kx+1]-2.0*u2[nl1][ky][kx]+u2[nl1][ky][kx-1]);
u3[nl2][ky][kx]=
u3[nl1][ky][kx]+a31*du1[ky]+a32*du2[ky]+a33*du3[ky] + sig*
(u3[nl1][ky][kx+1]-2.0*u3[nl1][ky][kx]+u3[nl1][ky][kx-1]);
}
}
}
}
BENCHMARK(BM_ADI_RAW)->Arg(171)->Arg(5001)->
Arg(44217)->Unit(benchmark::kMicrosecond);
static void BM_INT_PREDICT_RAW(benchmark::State& state) {
/*
*******************************************************************
* Kernel 9 -- integrate predictors
*******************************************************************
* DO 9 L = 1,Loop
* DO 9 i = 1,n
* PX( 1,i)= DM28*PX(13,i) + DM27*PX(12,i) + DM26*PX(11,i) +
* . DM25*PX(10,i) + DM24*PX( 9,i) + DM23*PX( 8,i) +
* . DM22*PX( 7,i) + C0*(PX( 5,i) + PX( 6,i))+ PX( 3,i)
* 9 CONTINUE
*/
LoopData& loop_data = getLoopData();
loopInit(INT_PREDICT);
Real_ptr* px = loop_data.array_2D_Nx25_Real[0];
const Real_type dm22 = loop_data.scalar_Real[0];
const Real_type dm23 = loop_data.scalar_Real[1];
const Real_type dm24 = loop_data.scalar_Real[2];
const Real_type dm25 = loop_data.scalar_Real[3];
const Real_type dm26 = loop_data.scalar_Real[4];
const Real_type dm27 = loop_data.scalar_Real[5];
const Real_type dm28 = loop_data.scalar_Real[6];
const Real_type c0 = loop_data.scalar_Real[7];
for (auto _ : state) {
for (Index_type i=0 ; i< state.range(0) ; i++ ) {
px[i][0] = dm28*px[i][12] + dm27*px[i][11] + dm26*px[i][10] +
dm25*px[i][ 9] + dm24*px[i][ 8] + dm23*px[i][ 7] +
dm22*px[i][ 6] + c0*( px[i][ 4] + px[i][ 5]) + px[i][ 2];
}
}
}
BENCHMARK(BM_INT_PREDICT_RAW)->Arg(171)->Arg(5001)->
Arg(44217)->Unit(benchmark::kMicrosecond);
static void BM_DIFF_PREDICT_RAW(benchmark::State& state) {
/*
*******************************************************************
* Kernel 10 -- difference predictors
*******************************************************************
* DO 10 L= 1,Loop
* DO 10 i= 1,n
* AR = CX(5,i)
* BR = AR - PX(5,i)
* PX(5,i) = AR
* CR = BR - PX(6,i)
* PX(6,i) = BR
* AR = CR - PX(7,i)
* PX(7,i) = CR
* BR = AR - PX(8,i)
* PX(8,i) = AR
* CR = BR - PX(9,i)
* PX(9,i) = BR
* AR = CR - PX(10,i)
* PX(10,i)= CR
* BR = AR - PX(11,i)
* PX(11,i)= AR
* CR = BR - PX(12,i)
* PX(12,i)= BR
* PX(14,i)= CR - PX(13,i)
* PX(13,i)= CR
* 10 CONTINUE
*/
LoopData& loop_data = getLoopData();
loopInit(DIFF_PREDICT);
Real_ptr* px = loop_data.array_2D_Nx25_Real[0];
Real_ptr* cx = loop_data.array_2D_Nx25_Real[1];
for (auto _ : state) {
for (Index_type i=0 ; i< state.range(0) ; i++ ) {
Real_type ar, br, cr;
ar = cx[i][ 4];
br = ar - px[i][ 4];
px[i][ 4] = ar;
cr = br - px[i][ 5];
px[i][ 5] = br;
ar = cr - px[i][ 6];
px[i][ 6] = cr;
br = ar - px[i][ 7];
px[i][ 7] = ar;
cr = br - px[i][ 8];
px[i][ 8] = br;
ar = cr - px[i][ 9];
px[i][ 9] = cr;
br = ar - px[i][10];
px[i][10] = ar;
cr = br - px[i][11];
px[i][11] = br;
px[i][13] = cr - px[i][12];
px[i][12] = cr;
}
}
}
BENCHMARK(BM_DIFF_PREDICT_RAW)->Arg(171)->Arg(5001)->
Arg(44217)->Unit(benchmark::kMicrosecond);
static void BM_FIRST_SUM_RAW(benchmark::State& state) {
/*
*******************************************************************
* Kernel 11 -- first sum
*******************************************************************
* DO 11 L = 1,Loop
* X(1)= Y(1)
* DO 11 k = 2,n
* 11 X(k)= X(k-1) + Y(k)
*/
LoopData& loop_data = getLoopData();
loopInit(FIRST_SUM);
Real_ptr x = loop_data.array_1D_Real[0];
Real_ptr y = loop_data.array_1D_Real[1];
for (auto _ :state) {
x[0] = y[0];
for (Index_type k=1 ; k< state.range(0) ; k++ ) {
x[k] = x[k-1] + y[k];
}
}
}
BENCHMARK(BM_FIRST_SUM_RAW)->Arg(171)->Arg(5001)->
Arg(44217)->Unit(benchmark::kMicrosecond);
static void BM_FIRST_DIFF_RAW(benchmark::State& state) {
/*
*******************************************************************
* Kernel 12 -- first difference
*******************************************************************
* DO 12 L = 1,Loop
* DO 12 k = 1,n
* 12 X(k)= Y(k+1) - Y(k)
*/
LoopData& loop_data = getLoopData();
loopInit(FIRST_DIFF);
Real_ptr x = loop_data.array_1D_Real[0];
Real_ptr y = loop_data.array_1D_Real[1];
for (auto _ : state) {
for (Index_type k=0 ; k< state.range(0) ; k++ ) {
x[k] = y[k+1] - y[k];
}
}
}
BENCHMARK(BM_FIRST_DIFF_RAW)->Arg(171)->Arg(5001)->
Arg(44217)->Unit(benchmark::kMicrosecond);
static void BM_PIC_2D_RAW(benchmark::State& state) {
/*
*******************************************************************
* Kernel 13 -- 2-D PIC (Particle In Cell)
*******************************************************************
* DO 13 L= 1,Loop
* DO 13 ip= 1,n
* i1= P(1,ip)
* j1= P(2,ip)
* i1= 1 + MOD2N(i1,64)
* j1= 1 + MOD2N(j1,64)
* P(3,ip)= P(3,ip) + B(i1,j1)
* P(4,ip)= P(4,ip) + C(i1,j1)
* P(1,ip)= P(1,ip) + P(3,ip)
* P(2,ip)= P(2,ip) + P(4,ip)
* i2= P(1,ip)
* j2= P(2,ip)
* i2= MOD2N(i2,64)
* j2= MOD2N(j2,64)
* P(1,ip)= P(1,ip) + Y(i2+32)
* P(2,ip)= P(2,ip) + Z(j2+32)
* i2= i2 + E(i2+32)
* j2= j2 + F(j2+32)
* H(i2,j2)= H(i2,j2) + 1.0
* 13 CONTINUE
*/
LoopData& loop_data = getLoopData();
loopInit(PIC_2D);
Real_ptr* p = loop_data.array_2D_Nx25_Real[0];
Real_ptr* b = loop_data.array_2D_Nx25_Real[1];
Real_ptr* c = loop_data.array_2D_Nx25_Real[2];
Real_ptr y = loop_data.array_1D_Real[0];
Real_ptr z = loop_data.array_1D_Real[1];
Index_type* e = loop_data.array_1D_Indx[0];
Index_type* f = loop_data.array_1D_Indx[1];
Real_ptr* h = loop_data.array_2D_64x64_Real[0];
for (auto _ : state) {
for (Index_type ip=0 ; ip< state.range(0) ; ip++ ) {
Index_type i1, j1, i2, j2;
i1 = (Index_type) p[ip][0];
j1 = (Index_type) p[ip][1];
i1 &= 64-1;
j1 &= 64-1;
p[ip][2] += b[j1][i1];
p[ip][3] += c[j1][i1];
p[ip][0] += p[ip][2];
p[ip][1] += p[ip][3];
i2 = (Index_type) p[ip][0];
j2 = (Index_type) p[ip][1];
i2 = ( i2 & 64-1 ) ;
j2 = ( j2 & 64-1 ) ;
p[ip][0] += y[i2+32];
p[ip][1] += z[j2+32];
i2 += e[i2+32];
j2 += f[j2+32];
h[j2][i2] += 1.0;
}
}
}
BENCHMARK(BM_PIC_2D_RAW)->Arg(171)->Arg(5001)->
Arg(44217)->Unit(benchmark::kMicrosecond);
static void BM_PIC_1D_RAW(benchmark::State& state) {
/*
*******************************************************************
* Kernel 14 -- 1-D PIC (Particle In Cell)
*******************************************************************
* DO 14 L= 1,Loop
* DO 141 k= 1,n
* VX(k)= 0.0
* XX(k)= 0.0
* IX(k)= INT( GRD(k))
* XI(k)= REAL( IX(k))
* EX1(k)= EX ( IX(k))
* DEX1(k)= DEX ( IX(k))
*41 CONTINUE
* DO 142 k= 1,n
* VX(k)= VX(k) + EX1(k) + (XX(k) - XI(k))*DEX1(k)
* XX(k)= XX(k) + VX(k) + FLX
* IR(k)= XX(k)
* RX(k)= XX(k) - IR(k)
* IR(k)= MOD2N( IR(k),2048) + 1
* XX(k)= RX(k) + IR(k)
*42 CONTINUE
* DO 14 k= 1,n
* RH(IR(k) )= RH(IR(k) ) + 1.0 - RX(k)
* RH(IR(k)+1)= RH(IR(k)+1) + RX(k)
*14 CONTINUE
*/
LoopData& loop_data = getLoopData();
loopInit(PIC_1D);
Real_ptr vx = loop_data.array_1D_Real[0];
Real_ptr xx = loop_data.array_1D_Real[1];
Real_ptr xi = loop_data.array_1D_Real[2];
Real_ptr ex = loop_data.array_1D_Real[3];
Real_ptr ex1 = loop_data.array_1D_Real[4];
Real_ptr dex = loop_data.array_1D_Real[5];
Real_ptr dex1 = loop_data.array_1D_Real[6];
Real_ptr rh = loop_data.array_1D_Real[7];
Real_ptr rx = loop_data.array_1D_Real[8];
const Real_type flx = loop_data.scalar_Real[0];
Index_type* ix = loop_data.array_1D_Indx[2];
Index_type* ir = loop_data.array_1D_Indx[3];
Index_type* grd = loop_data.array_1D_Indx[4];
for (auto _ : state) {
for (Index_type k=0 ; k< state.range(0) ; k++ ) {
vx[k] = 0.0;
xx[k] = 0.0;
ix[k] = (Index_type) grd[k];
xi[k] = (Real_type) ix[k];
ex1[k] = ex[ ix[k] - 1 ];
dex1[k] = dex[ ix[k] - 1 ];
}
for (Index_type k=0 ; k< state.range(0) ; k++ ) {
vx[k] = vx[k] + ex1[k] + ( xx[k] - xi[k] )*dex1[k];
xx[k] = xx[k] + vx[k] + flx;
ir[k] = (Index_type) xx[k];
rx[k] = xx[k] - ir[k];
ir[k] = ( ir[k] & (2048-1) ) + 1;
xx[k] = rx[k] + ir[k];
}
for (Index_type k=0 ; k< state.range(0) ; k++ ) {
rh[ ir[k]-1 ] += 1.0 - rx[k];
rh[ ir[k] ] += rx[k];
}
}
}
BENCHMARK(BM_PIC_1D_RAW)->Arg(171)->Arg(5001)->
Arg(44217)->Unit(benchmark::kMicrosecond);
static void BM_HYDRO_2D_RAW(benchmark::State& state) {
/*
*******************************************************************
* Kernel 18 - 2-D explicit hydrodynamics fragment
*******************************************************************
* DO 75 L= 1,Loop
* T= 0.0037
* S= 0.0041
* KN= 6
* JN= n
* DO 70 k= 2,KN
* DO 70 j= 2,JN
* ZA(j,k)= (ZP(j-1,k+1)+ZQ(j-1,k+1)-ZP(j-1,k)-ZQ(j-1,k))
* . *(ZR(j,k)+ZR(j-1,k))/(ZM(j-1,k)+ZM(j-1,k+1))
* ZB(j,k)= (ZP(j-1,k)+ZQ(j-1,k)-ZP(j,k)-ZQ(j,k))
* . *(ZR(j,k)+ZR(j,k-1))/(ZM(j,k)+ZM(j-1,k))
* 70 CONTINUE
* DO 72 k= 2,KN
* DO 72 j= 2,JN
* ZU(j,k)= ZU(j,k)+S*(ZA(j,k)*(ZZ(j,k)-ZZ(j+1,k))
* . -ZA(j-1,k) *(ZZ(j,k)-ZZ(j-1,k))
* . -ZB(j,k) *(ZZ(j,k)-ZZ(j,k-1))
* . +ZB(j,k+1) *(ZZ(j,k)-ZZ(j,k+1)))
* ZV(j,k)= ZV(j,k)+S*(ZA(j,k)*(ZR(j,k)-ZR(j+1,k))
* . -ZA(j-1,k) *(ZR(j,k)-ZR(j-1,k))
* . -ZB(j,k) *(ZR(j,k)-ZR(j,k-1))
* . +ZB(j,k+1) *(ZR(j,k)-ZR(j,k+1)))
* 72 CONTINUE
* DO 75 k= 2,KN
* DO 75 j= 2,JN
* ZR(j,k)= ZR(j,k)+T*ZU(j,k)
* ZZ(j,k)= ZZ(j,k)+T*ZV(j,k)
* 75 CONTINUE
*/
LoopData& loop_data = getLoopData();
loopInit(HYDRO_2D);
Real_ptr* za = loop_data.array_2D_7xN_Real[0];
Real_ptr* zb = loop_data.array_2D_7xN_Real[1];
Real_ptr* zm = loop_data.array_2D_7xN_Real[2];
Real_ptr* zp = loop_data.array_2D_7xN_Real[3];
Real_ptr* zq = loop_data.array_2D_7xN_Real[4];
Real_ptr* zr = loop_data.array_2D_7xN_Real[5];
Real_ptr* zu = loop_data.array_2D_7xN_Real[6];
Real_ptr* zv = loop_data.array_2D_7xN_Real[7];
Real_ptr* zz = loop_data.array_2D_7xN_Real[8];
Real_ptr* zrout = loop_data.array_2D_7xN_Real[9];
Real_ptr* zzout = loop_data.array_2D_7xN_Real[10];
const Real_type t = 0.0037;
const Real_type s = 0.0041;
Index_type kn = 6;
Index_type jn = state.range(0);
Index_type k;
for (auto _ : state) {
for ( k=1 ; k<kn ; k++ ) {
for (Index_type j=1 ; j<jn ; j++ ) {
za[k][j] = ( zp[k+1][j-1] +zq[k+1][j-1] -zp[k][j-1] -zq[k][j-1] )*
( zr[k][j] +zr[k][j-1] ) / ( zm[k][j-1] +zm[k+1][j-1]);
zb[k][j] = ( zp[k][j-1] +zq[k][j-1] -zp[k][j] -zq[k][j] ) *
( zr[k][j] +zr[k-1][j] ) / ( zm[k][j] +zm[k][j-1]);
}
}
for ( k=1 ; k<kn ; k++ ) {
for (Index_type j=1 ; j<jn ; j++ ) {
zu[k][j] += s*( za[k][j] *( zz[k][j] - zz[k][j+1] ) -
za[k][j-1] *( zz[k][j] - zz[k][j-1] ) -
zb[k][j] *( zz[k][j] - zz[k-1][j] ) +
zb[k+1][j] *( zz[k][j] - zz[k+1][j] ) );
zv[k][j] += s*( za[k][j] *( zr[k][j] - zr[k][j+1] ) -
za[k][j-1] *( zr[k][j] - zr[k][j-1] ) -
zb[k][j] *( zr[k][j] - zr[k-1][j] ) +
zb[k+1][j] *( zr[k][j] - zr[k+1][j] ) );
}
}
for ( k=1 ; k<kn ; k++ ) {
for (Index_type j=1 ; j<jn ; j++ ) {
zrout[k][j] = zr[k][j] + t*zu[k][j];
zzout[k][j] = zz[k][j] + t*zv[k][j];
}
}
}
}
BENCHMARK(BM_HYDRO_2D_RAW)->Arg(171)->Arg(5001)->
Arg(44217)->Unit(benchmark::kMicrosecond);
static void BM_GEN_LIN_RECUR_RAW(benchmark::State& state) {
/*
*******************************************************************
* Kernel 19 -- general linear recurrence equations
*******************************************************************
* KB5I= 0
* DO 194 L= 1,Loop
* DO 191 k= 1,n
* B5(k+KB5I)= SA(k) +STB5*SB(k)
* STB5= B5(k+KB5I) -STB5
*191 CONTINUE
*192 DO 193 i= 1,n
* k= n-i+1
* B5(k+KB5I)= SA(k) +STB5*SB(k)
* STB5= B5(k+KB5I) -STB5
*193 CONTINUE
*194 CONTINUE
*/
LoopData& loop_data = getLoopData();
loopInit(GEN_LIN_RECUR);
Real_ptr b5 = loop_data.array_1D_Real[0];
Real_ptr sa = loop_data.array_1D_Real[1];
Real_ptr sb = loop_data.array_1D_Real[2];
Real_type stb5 = loop_data.scalar_Real[0];
Index_type kb5i = 0;
for (auto _ : state) {
for ( Index_type k=0 ; k< state.range(0) ; k++ ) {
b5[k+kb5i] = sa[k] + stb5*sb[k];
stb5 = b5[k+kb5i] - stb5;
}
for (Index_type i=1 ; i<= state.range(0) ; i++ ) {
Index_type k = state.range(0) - i ;
b5[k+kb5i] = sa[k] + stb5*sb[k];
stb5 = b5[k+kb5i] - stb5;
}
}
}
BENCHMARK(BM_GEN_LIN_RECUR_RAW)->Arg(171)->Arg(5001)->
Arg(44217)->Unit(benchmark::kMicrosecond);
static void BM_DISC_ORD_RAW(benchmark::State& state) {
/*
*******************************************************************
* Kernel 20 -- Discrete ordinates transport, cond recurrence on xx
*******************************************************************
* DO 20 L= 1,Loop
* DO 20 k= 1,n
* DI= Y(k)-G(k)/( XX(k)+DK)
* DN= 0.2
* IF( DI.NE.0.0) DN= MAX( S,MIN( Z(k)/DI, T))
* X(k)= ((W(k)+V(k)*DN)* XX(k)+U(k))/(VX(k)+V(k)*DN)
* XX(k+1)= (X(k)- XX(k))*DN+ XX(k)
* 20 CONTINUE
*/
LoopData& loop_data = getLoopData();
loopInit(DISC_ORD);
Real_ptr x = loop_data.array_1D_Real[0];
Real_ptr y = loop_data.array_1D_Real[1];
Real_ptr z = loop_data.array_1D_Real[2];
Real_ptr u = loop_data.array_1D_Real[3];
Real_ptr v = loop_data.array_1D_Real[4];
Real_ptr w = loop_data.array_1D_Real[5];
Real_ptr g = loop_data.array_1D_Real[6];
Real_ptr xx = loop_data.array_1D_Real[7];
Real_ptr vx = loop_data.array_1D_Real[9];
const Real_type s = loop_data.scalar_Real[0];
const Real_type t = loop_data.scalar_Real[1];
const Real_type dk = loop_data.scalar_Real[2];
for (auto _ : state) {
for (Index_type k=0 ; k< state.range(0) ; k++ ) {
Real_type di = y[k] - g[k] / ( xx[k] + dk );
Real_type dn = 0.2;
if ( di ) {
dn = z[k]/di ;
if ( t < dn ) dn = t;
if ( s > dn ) dn = s;
}
x[k] = ( ( w[k] + v[k]*dn )* xx[k] + u[k] ) / ( vx[k] + v[k]*dn );
xx[k+1] = ( x[k] - xx[k] )* dn + xx[k];
}
}
}
BENCHMARK(BM_DISC_ORD_RAW)->Arg(171)->Arg(5001)->
Arg(44217)->Unit(benchmark::kMicrosecond);
static void BM_MAT_X_MAT_RAW(benchmark::State& state) {
/*
*******************************************************************
* Kernel 21 -- matrix*matrix product
*******************************************************************
* DO 21 L= 1,Loop
* DO 21 k= 1,25
* DO 21 i= 1,25
* DO 21 j= 1,n
* PX(i,j)= PX(i,j) +VY(i,k) * CX(k,j)
* 21 CONTINUE
*/
LoopData& loop_data = getLoopData();
loopInit(MAT_X_MAT);
Real_ptr* px = loop_data.array_2D_Nx25_Real[0];
Real_ptr* cx = loop_data.array_2D_Nx25_Real[1];
Real_ptr* vy = loop_data.array_2D_64x64_Real[0];
Index_type k, i;
for (auto _ : state) {
for ( k=0 ; k<25 ; k++ ) {
for ( i=0 ; i<25 ; i++ ) {
for (Index_type j=0 ; j< state.range(0) ; j++ ) {
px[j][i] += vy[k][i] * cx[j][k];
}
}
}
}
}
BENCHMARK(BM_MAT_X_MAT_RAW)->Arg(171)->Arg(5001)->
Arg(44217)->Unit(benchmark::kMicrosecond);
static void BM_PLANCKIAN_RAW(benchmark::State& state) {
/*
*******************************************************************
* Kernel 22 -- Planckian distribution
*******************************************************************
* EXPMAX= 20.0
* U(n)= 0.99*EXPMAX*V(n)
* DO 22 L= 1,Loop
* DO 22 k= 1,n
* Y(k)= U(k)/V(k)
* W(k)= X(k)/( EXP( Y(k)) -1.0)
* 22 CONTINUE
*/
LoopData& loop_data = getLoopData();
loopInit(PLANCKIAN);
Real_ptr x = loop_data.array_1D_Real[0];
Real_ptr y = loop_data.array_1D_Real[1];
Real_ptr u = loop_data.array_1D_Real[2];
Real_ptr v = loop_data.array_1D_Real[3];
Real_ptr w = loop_data.array_1D_Real[4];
Real_type expmax = 20.0;
u[state.range(0)-1] = 0.99*expmax*v[state.range(0)-1];
for (auto _ : state) {
for (Index_type k=0 ; k< state.range(0) ; k++ ) {
y[k] = u[k] / v[k];
w[k] = x[k] / ( exp( y[k] ) -1.0 );
}
}
}
BENCHMARK(BM_PLANCKIAN_RAW)->Arg(171)->Arg(5001)->
Arg(44217)->Unit(benchmark::kMicrosecond);
static void BM_IMP_HYDRO_2D_RAW(benchmark::State& state) {
/*
*******************************************************************
* Kernel 23 -- 2-D implicit hydrodynamics fragment
*******************************************************************
* DO 23 L= 1,Loop
* DO 23 j= 2,6
* DO 23 k= 2,n
* QA= ZA(k,j+1)*ZR(k,j) +ZA(k,j-1)*ZB(k,j) +
* . ZA(k+1,j)*ZU(k,j) +ZA(k-1,j)*ZV(k,j) +ZZ(k,j)
* 23 ZA(k,j)= ZA(k,j) +.175*(QA -ZA(k,j))
*/
LoopData& loop_data = getLoopData();
loopInit(IMP_HYDRO_2D);
Real_ptr* za = loop_data.array_2D_7xN_Real[0];
Real_ptr* zb = loop_data.array_2D_7xN_Real[1];
Real_ptr* zr = loop_data.array_2D_7xN_Real[2];
Real_ptr* zu = loop_data.array_2D_7xN_Real[3];
Real_ptr* zv = loop_data.array_2D_7xN_Real[4];
Real_ptr* zz = loop_data.array_2D_7xN_Real[5];
Index_type j;
for (auto _ : state) {
for ( j=1 ; j<6 ; j++ ) {
for ( Index_type k=1 ; k< state.range(0) ; k++ ) {
Real_type qa = za[j+1][k]*zr[j][k] + za[j-1][k]*zb[j][k] +
za[j][k+1]*zu[j][k] + za[j][k-1]*zv[j][k] + zz[j][k];
za[j][k] += 0.175*( qa - za[j][k] );
}
}
}
}
BENCHMARK(BM_IMP_HYDRO_2D_RAW)->Arg(171)->Arg(5001)->
Arg(44217)->Unit(benchmark::kMicrosecond);
static void BM_FIND_FIRST_MIN_RAW(benchmark::State& state) {
/*
*******************************************************************
* Kernel 24 -- find location of first minimum in array
*******************************************************************
* X( n/2)= -1.0E+10
* DO 24 L= 1,Loop
* m= 1
* DO 24 k= 2,n
* IF( X(k).LT.X(m)) m= k
* 24 CONTINUE
*/
LoopData& loop_data = getLoopData();
loopInit(FIND_FIRST_MIN);
Real_ptr x = loop_data.array_1D_Real[0];
Index_type m = 0;
Index_type val = 0;
for (auto _ : state) {
m = 0;
for (Index_type k=1 ; k< state.range(0) ; k++ ) {
if ( x[k] < x[m] ) benchmark::DoNotOptimize(m = k);
}
}
}
BENCHMARK(BM_FIND_FIRST_MIN_RAW)->Arg(171)->Arg(5001)->
Arg(44217)->Unit(benchmark::kMicrosecond);