blob: 2627814c845b8ef17fb149b2ea02d6d86e5fd501 [file] [log] [blame]
/*
* This is an executable test containing a number of loops to measure
* the performance of a compiler. Arrays' length is LEN by default
* and if you want a different array length, you should replace every
* LEN by your desired number which must be a multiple of 40. If you
* want to increase the number of loop calls to have a longer run time
* you have to manipulate the constant value ntimes. There is a dummy
* function called in each loop to make all computations appear required.
* The time to execute this function is included in the time measurement
* for the output but it is neglectable.
*
* The output includes three columns:
* Loop: The name of the loop
* Time(Sec): The time in seconds to run the loop
* Checksum: The checksum calculated when the test has run
*
* In this version of the codelets arrays are static type.
*
* All functions/loops are taken from "TEST SUITE FOR VECTORIZING COMPILERS"
* by David Callahan, Jack Dongarra and David Levine except those whose
* functions' name have 4 digits.
*/
#define ntimes_default 200000
#define digits_default 6
#define _XOPEN_SOURCE 600
#include <stdlib.h>
#include <math.h>
#include <stdio.h>
#include <sys/param.h>
#include <sys/times.h>
#include <sys/types.h>
#include <time.h>
#include <string.h>
#include <assert.h>
static int ntimes = ntimes_default;
static int digits = digits_default;
#include "types.h"
#ifndef TESTS
#define TESTS 0xFFFFFFFF
#endif
#include "tests.h"
#define lll LEN
/*
* Disable timing in the output so that we can use the output for correctness testing.
*/
#ifndef USE_CLOCK
#define clock() 0
#endif
__attribute__ ((aligned(ALIGNMENT))) X_TYPE X[lll],Y[lll],Z[lll],U[lll],V[lll];
//TYPE* __restrict__ array;
TYPE array[LEN2*LEN2] __attribute__((aligned(ALIGNMENT)));
TYPE x[LEN] __attribute__((aligned(ALIGNMENT)));
TYPE temp;
int temp_int;
// We place all of the data into one single global structure so that we can
// control its exact layout. Otherwise, the performance of the code can become
// very dependent on the exact addresses assigned to arrays at compile time, and
// probing that behavior is not the purpose of this set of benchmarks.
//
// We insert prime-multiple padding in between each array to help ensure that
// the relative offsets of the arrays are unlikely to trigger unmodelled
// architecture specific problems w.r.t. cache behavior or other CPU
// features. For example, this ensures that no two of the arrays will be
// 4K-aliased with each other, which can be important for some Intel processors.
struct GlobalData {
__attribute__((aligned(ALIGNMENT))) TYPE a[LEN];
int pad1[3];
__attribute__((aligned(ALIGNMENT))) TYPE b[LEN];
int pad2[5];
__attribute__((aligned(ALIGNMENT))) TYPE c[LEN];
int pad3[7];
__attribute__((aligned(ALIGNMENT))) TYPE d[LEN];
int pad4[11];
__attribute__((aligned(ALIGNMENT))) TYPE e[LEN];
int pad5[13];
__attribute__((aligned(ALIGNMENT))) TYPE aa[LEN2][LEN2];
int pad6[17];
__attribute__((aligned(ALIGNMENT))) TYPE bb[LEN2][LEN2];
int pad7[19];
__attribute__((aligned(ALIGNMENT))) TYPE cc[LEN2][LEN2];
int pad8[23];
__attribute__((aligned(ALIGNMENT))) TYPE tt[LEN2][LEN2];
} global_data;
__attribute__((aligned(ALIGNMENT))) TYPE * const a = global_data.a;
__attribute__((aligned(ALIGNMENT))) TYPE * const b = global_data.b;
__attribute__((aligned(ALIGNMENT))) TYPE * const c = global_data.c;
__attribute__((aligned(ALIGNMENT))) TYPE * const d = global_data.d;
__attribute__((aligned(ALIGNMENT))) TYPE * const e = global_data.e;
__attribute__((aligned(ALIGNMENT))) TYPE (* const aa)[LEN2] = global_data.aa;
__attribute__((aligned(ALIGNMENT))) TYPE (* const bb)[LEN2] = global_data.bb;
__attribute__((aligned(ALIGNMENT))) TYPE (* const cc)[LEN2] = global_data.cc;
__attribute__((aligned(ALIGNMENT))) TYPE (* const tt)[LEN2] = global_data.tt;
int indx[LEN] __attribute__((aligned(ALIGNMENT)));
TYPE* __restrict__ xx;
TYPE* yy;
int dummy(TYPE[LEN], TYPE[LEN], TYPE[LEN], TYPE[LEN], TYPE[LEN], TYPE[LEN2][LEN2], TYPE[LEN2][LEN2], TYPE[LEN2][LEN2], TYPE);
int dummy_media(short[], char[], int);
int set1d(TYPE arr[LEN], TYPE value, int stride)
{
if (stride == -1) {
for (int i = 0; i < LEN; i++) {
arr[i] = 1. / (TYPE) (i+1);
}
} else if (stride == -2) {
for (int i = 0; i < LEN; i++) {
arr[i] = 1. / (TYPE) ((i+1) * (i+1));
}
} else {
for (int i = 0; i < LEN; i += stride) {
arr[i] = value;
}
}
return 0;
}
int set1ds(int _n, TYPE arr[LEN], TYPE value, int stride)
{
if (stride == -1) {
for (int i = 0; i < LEN; i++) {
arr[i] = 1. / (TYPE) (i+1);
}
} else if (stride == -2) {
for (int i = 0; i < LEN; i++) {
arr[i] = 1. / (TYPE) ((i+1) * (i+1));
}
} else {
for (int i = 0; i < LEN; i += stride) {
arr[i] = value;
}
}
return 0;
}
int set2d(TYPE arr[LEN2][LEN2], TYPE value, int stride)
{
// -- initialize two-dimensional arraysft
if (stride == -1) {
for (int i = 0; i < LEN2; i++) {
for (int j = 0; j < LEN2; j++) {
arr[i][j] = 1. / (TYPE) (i+1);
}
}
} else if (stride == -2) {
for (int i = 0; i < LEN2; i++) {
for (int j = 0; j < LEN2; j++) {
arr[i][j] = 1. / (TYPE) ((i+1) * (i+1));
}
}
} else {
for (int i = 0; i < LEN2; i++) {
for (int j = 0; j < LEN2; j += stride) {
arr[i][j] = value;
}
}
}
return 0;
}
TYPE sum1d(TYPE arr[LEN]){
TYPE ret = 0.;
for (int i = 0; i < LEN; i++)
ret += arr[i];
return ret;
}
static inline int s471s(void)
{
// -- dummy subroutine call made in s471
return 0;
}
static inline TYPE f(TYPE a, TYPE b){
return a*b;
}
void check(int name){
TYPE suma = 0;
TYPE sumb = 0;
TYPE sumc = 0;
TYPE sumd = 0;
TYPE sume = 0;
for (int i = 0; i < LEN; i++){
suma += a[i];
sumb += b[i];
sumc += c[i];
sumd += d[i];
sume += e[i];
}
TYPE sumaa = 0;
TYPE sumbb = 0;
TYPE sumcc = 0;
for (int i = 0; i < LEN2; i++){
for (int j = 0; j < LEN2; j++){
sumaa += aa[i][j];
sumbb += bb[i][j];
sumcc += cc[i][j];
}
}
TYPE sumarray = 0;
for (int i = 0; i < LEN2*LEN2; i++){
sumarray += array[i];
}
if (name == 1) printf("%.*g \n",digits,suma);
if (name == 2) printf("%.*g \n",digits,sumb);
if (name == 3) printf("%.*g \n",digits,sumc);
if (name == 4) printf("%.*g \n",digits,sumd);
if (name == 5) printf("%.*g \n",digits,sume);
if (name == 11) printf("%.*g \n",digits,sumaa);
if (name == 22) printf("%.*g \n",digits,sumbb);
if (name == 33) printf("%.*g \n",digits,sumcc);
if (name == 0) printf("%.*g \n",digits,sumarray);
if (name == 12) printf("%.*g \n",digits,suma+sumb);
if (name == 25) printf("%.*g \n",digits,sumb+sume);
if (name == 13) printf("%.*g \n",digits,suma+sumc);
if (name == 123) printf("%.*g \n",digits,suma+sumb+sumc);
if (name == 1122) printf("%.*g \n",digits,sumaa+sumbb);
if (name == 112233) printf("%.*g \n",digits,sumaa+sumbb+sumcc);
if (name == 111) printf("%.*g \n",digits,sumaa+suma);
if (name == -1) printf("%.*g \n",digits,temp);
if (name == -12) printf("%.*g \n",digits,temp+sumb);
}
int init(char* name)
{
TYPE any=0.;
TYPE zero=0.;
TYPE half=.5;
TYPE one=1.;
TYPE two=2.;
TYPE small = .000001;
int unit =1;
int frac=-1;
int frac2=-2;
if (!strcmp(name, "s000 ")) {
for (int i = 0; i < lll; i++) {
X[i] = 1+i;
Y[i] = 2+i;
Z[i] = 3+i;
U[i] = 4+i;
V[i] = 5+i;
}
} else if (!strcmp(name, "s111 ")) {
set1d(a, one,unit);
set1d(b, any,frac2);
set1d(c, any,frac2);
set1d(d, any,frac2);
set1d(e, any,frac2);
} else if (!strcmp(name, "s112 ")) {
set1d(a, one,unit);
set1d(b, any,frac2);
} else if (!strcmp(name, "s113 ")) {
set1d(a, one,unit);
set1d(b, any,frac2);
} else if (!strcmp(name, "s114 ")) {
set2d(aa, any,frac);
set2d(bb, any,frac2);
} else if (!strcmp(name, "s115 ")) {
set1d(a, one,unit);
set2d(aa,small,unit);
set2d(bb,small,unit);
set2d(cc,small,unit);
} else if (!strcmp(name, "s116 ")) {
set1d(a, one,unit);
} else if (!strcmp(name, "s118 ")) {
set1d(a, one,unit);
set2d(bb,small,unit);
} else if (!strcmp(name, "s119 ")) {
set2d(aa, one,unit);
set2d(bb, any,frac2);
} else if (!strcmp(name, "s121 ")) {
set1d(a, one,unit);
set1d(b, any,frac2);
} else if (!strcmp(name, "s122 ")) {
set1d(a, one,unit);
set1d(b, any,frac2);
} else if (!strcmp(name, "s123 ")) {
set1d(a,zero,unit);
set1d(b, one,unit);
set1d(c, one,unit);
set1d(d, any,frac);
set1d(e, any,frac);
} else if (!strcmp(name, "s124 ")) {
set1d(a,zero,unit);
set1d(b, one,unit);
set1d(c, one,unit);
set1d(d, any,frac);
set1d(e, any,frac);
} else if (!strcmp(name, "s125 ")) {
set1ds(LEN*LEN, array,zero,unit);
set2d(aa, one,unit);
set2d(bb,half,unit);
set2d(cc, two,unit);
} else if (!strcmp(name, "s126 ")) {
set2d(bb, one,unit);
set1ds(LEN*LEN,array,any,frac);
set2d(cc, any,frac);
} else if (!strcmp(name, "s127 ")) {
set1d(a,zero,unit);
set1d(b, one,unit);
set1d(c, any,frac);
set1d(d, any,frac);
set1d(e, any,frac);
} else if (!strcmp(name, "s128 ")) {
set1d(a,zero,unit);
set1d(b, two,unit);
set1d(c, one,unit);
set1d(d, one,unit);
} else if (!strcmp(name, "s131 ")) {
set1d(a, one,unit);
set1d(b, any,frac2);
} else if (!strcmp(name, "s132 ")) {
set2d(aa, one,unit);
set1d(b, any,frac);
set1d(c, any,frac);
} else if (!strcmp(name, "s141 ")) {
set1ds(LEN*LEN,array, one,unit);
set2d(bb, any,frac2);
} else if (!strcmp(name, "s151 ")) {
set1d(a, one,unit);
set1d(b, any,frac2);
} else if (!strcmp(name, "s152 ")) {
set1d(a, one,unit);
set1d(b,zero,unit);
set1d(c, any,frac);
set1d(d, any,frac);
set1d(e, any,frac);
} else if (!strcmp(name, "s161 ")) {
set1d(a, one,unit);
set1ds(LEN/2,&b[0], one,2);
set1ds(LEN/2,&b[1],-one,2);
set1d(c, one,unit);
set1d(d, any,frac);
set1d(e, any,frac);
} else if (!strcmp(name, "s162 ")) {
set1d(a, one,unit);
set1d(b, any,frac);
set1d(c, any,frac);
} else if (!strcmp(name, "s171 ")) {
set1d(a, one,unit);
set1d(b, any,frac2);
} else if (!strcmp(name, "s172 ")) {
set1d(a, one,unit);
set1d(b, any,frac2);
} else if (!strcmp(name, "s173 ")) {
set1d(a, one,unit);
set1d(b, any,frac2);
} else if (!strcmp(name, "s174 ")) {
set1d(a, one,unit);
set1d(b, any,frac2);
} else if (!strcmp(name, "s175 ")) {
set1d(a, one,unit);
set1d(b, any,frac2);
} else if (!strcmp(name, "s176 ")) {
set1d(a, one,unit);
set1d(b, any,frac);
set1d(c, any,frac);
} else if (!strcmp(name, "s211 ")) {
set1d(a,zero,unit);
set1d(b, one,unit);
set1d(c, any,frac);
set1d(d, any,frac);
set1d(e, any,frac);
} else if (!strcmp(name, "s212 ")) {
set1d(a, any,frac);
set1d(b, one,unit);
set1d(c, one,unit);
set1d(d, any,frac);
} else if (!strcmp(name, "s221 ")) {
set1d(a, one,unit);
set1d(b, any,frac);
set1d(c, any,frac);
set1d(d, any,frac);
} else if (!strcmp(name, "s222 ")) {
set1d(a,zero,unit);
set1d(b, one,unit);
set1d(c, one,unit);
} else if (!strcmp(name, "s231 ")) {
set2d(aa, one,unit);
set2d(bb, any,frac2);
} else if (!strcmp(name, "s232 ")) {
set2d(aa, one,unit);
set2d(bb,zero,unit);
} else if (!strcmp(name, "s233 ")) {
set2d(aa, any,frac);
set2d(bb, any,frac);
set2d(cc, any,frac);
} else if (!strcmp(name, "s234 ")) {
set2d(aa, one,unit);
set2d(bb, any,frac);
set2d(cc, any,frac);
} else if (!strcmp(name, "s235 ")) {
set1d(a, one,unit);
set1d(b, any,frac);
set1d(c, any,frac);
set2d(aa, one,unit);
set2d(bb, any, frac2);
} else if (!strcmp(name, "s241 ")) {
set1d(a, one,unit);
set1d(b, one,unit);
set1d(c, one,unit);
set1d(d, one,unit);
} else if (!strcmp(name, "s242 ")) {
set1d(a,small,unit);
set1d(b,small,unit);
set1d(c,small,unit);
set1d(d,small,unit);
} else if (!strcmp(name, "s243 ")) {
set1d(a,zero,unit);
set1d(b, one,unit);
set1d(c, any,frac);
set1d(d, any,frac);
set1d(e, any,frac);
} else if (!strcmp(name, "s244 ")) {
set1d(a,zero,unit);
set1d(b, one,unit);
set1d(c,small,unit);
set1d(d,small,unit);
} else if (!strcmp(name, "s251 ")) {
set1d(a,zero,unit);
set1d(b, one,unit);
set1d(c, any,frac);
set1d(d, any,frac);
set1d(e, any,frac);
} else if (!strcmp(name, "s252 ")) {
set1d(a,zero,unit);
set1d(b, one,unit);
set1d(c, one,unit);
} else if (!strcmp(name, "s253 ")) {
set1d(a, one,unit);
set1d(b,small,unit);
set1d(c, one,unit);
set1d(d, any,frac);
} else if (!strcmp(name, "s254 ")) {
set1d(a,zero,unit);
set1d(b, one,unit);
} else if (!strcmp(name, "s255 ")) {
set1d(a,zero,unit);
set1d(b, one,unit);
} else if (!strcmp(name, "s256 ")) {
set1d(a, one,unit);
set2d(aa, two,unit);
set2d(bb, one,unit);
} else if (!strcmp(name, "s257 ")) {
set1d(a, one,unit);
set2d(aa, two,unit);
set2d(bb, one,unit);
} else if (!strcmp(name, "s258 ")) {
set1d(a, any,frac);
set1d(b,zero,unit);
set1d(c, any,frac);
set1d(d, any,frac);
set1d(e,zero,unit);
set2d(aa, any,frac);
} else if (!strcmp(name, "s261 ")) {
set1d(a, one,unit);
set1d(b, any,frac2);
set1d(c, any,frac2);
set1d(d, one,unit);
} else if (!strcmp(name, "s271 ")) {
set1d(a, one,unit);
set1d(b, any,frac);
set1d(c, any,frac);
} else if (!strcmp(name, "s272 ")) {
set1d(a, one,unit);
set1d(b, one,unit);
set1d(c, any,frac);
set1d(d, any,frac);
set1d(e, two,unit);
} else if (!strcmp(name, "s273 ")) {
set1d(a, one,unit);
set1d(b, one,unit);
set1d(c, one,unit);
set1d(d,small,unit);
set1d(e, any,frac);
} else if (!strcmp(name, "s274 ")) {
set1d(a,zero,unit);
set1d(b, one,unit);
set1d(c, one,unit);
set1d(d, any,frac);
set1d(e, any,frac);
} else if (!strcmp(name, "s275 ")) {
set2d(aa, one,unit);
set2d(bb,small,unit);
set2d(cc,small,unit);
} else if (!strcmp(name, "s276 ")) {
set1d(a, one,unit);
set1d(b, any,frac);
set1d(c, any,frac);
set1d(d, any,frac);
} else if (!strcmp(name, "s277 ")) {
set1d(a, one,unit);
set1ds(LEN/2,b, one,unit);
set1ds(LEN/2,&b[LEN/2],-one,unit);
set1d(c, any,frac);
set1d(d, any,frac);
set1d(e, any,frac);
} else if (!strcmp(name, "s278 ")) {
set1ds(LEN/2,a,-one,unit);
set1ds(LEN/2,&a[LEN/2],one,unit);
set1d(b, one,unit);
set1d(c, any,frac);
set1d(d, any,frac);
set1d(e, any,frac);
} else if (!strcmp(name, "s279 ")) {
set1ds(LEN/2,a,-one,unit);
set1ds(LEN/2,&a[LEN/2],one,unit);
// set1d(a, -one,unit);
set1d(b, one,unit);
set1d(c, any,frac);
set1d(d, any,frac);
set1d(e, any,frac);
} else if (!strcmp(name, "s2710")) {
set1d(a, one,unit);
set1d(b, one,unit);
set1d(c, any,frac);
set1d(d, any,frac);
set1d(e, any,frac);
} else if (!strcmp(name, "s2711")) {
set1d(a, one,unit);
set1d(b, any,frac);
set1d(c, any,frac);
} else if (!strcmp(name, "s2712")) {
set1d(a, one,unit);
set1d(b, any,frac);
set1d(c, any,frac);
} else if (!strcmp(name, "s281 ")) {
set1d(a,zero,unit);
set1d(b, one,unit);
set1d(c, one,unit);
} else if (!strcmp(name, "s291 ")) {
set1d(a,zero,unit);
set1d(b, one,unit);
} else if (!strcmp(name, "s292 ")) {
set1d(a,zero,unit);
set1d(b, one,unit);
} else if (!strcmp(name, "s293 ")) {
set1d(a, any,frac);
} else if (!strcmp(name, "s2101")) {
set2d(aa, one,unit);
set2d(bb, any,frac);
set2d(cc, any,frac);
} else if (!strcmp(name, "s2102")) {
set2d(aa,zero,unit);
} else if (!strcmp(name, "s2111")) {
// set2d(aa, one,unit);
set2d(aa,zero,unit);
} else if (!strcmp(name, "s311 ")) {
set1d(a, any,frac);
} else if (!strcmp(name, "s312 ")) {
set1d(a,1.000001,unit);
} else if (!strcmp(name, "s313 ")) {
set1d(a, any,frac);
set1d(b, any,frac);
} else if (!strcmp(name, "s314 ")) {
set1d(a, any,frac);
} else if (!strcmp(name, "s315 ")) {
set1d(a, any,frac);
} else if (!strcmp(name, "s316 ")) {
set1d(a, any,frac);
} else if (!strcmp(name, "s317 ")) {
} else if (!strcmp(name, "s318 ")) {
set1d(a, any,frac);
a[LEN-1] = -two;
} else if (!strcmp(name, "s319 ")) {
set1d(a,zero,unit);
set1d(b,zero,unit);
set1d(c, any,frac);
set1d(d, any,frac);
set1d(e, any,frac);
} else if (!strcmp(name, "s3110")) {
set2d(aa, any,frac);
aa[LEN2-1][LEN2-1] = two;
} else if (!strcmp(name, "s3111")) {
set1d(a, any,frac);
} else if (!strcmp(name, "s3112")) {
set1d(a, any,frac2);
set1d(b,zero,unit);
} else if (!strcmp(name, "s3113")) {
set1d(a, any,frac);
a[LEN-1] = -two;
} else if (!strcmp(name, "s321 ")) {
set1d(a, one,unit);
set1d(b,zero,unit);
} else if (!strcmp(name, "s322 ")) {
set1d(a, one,unit);
set1d(b,zero,unit);
set1d(c,zero,unit);
} else if (!strcmp(name, "s323 ")) {
set1d(a, one,unit);
set1d(b, one,unit);
set1d(c, any,frac);
set1d(d, any,frac);
set1d(e, any,frac);
} else if (!strcmp(name, "s331 ")) {
set1d(a, any,frac);
a[LEN-1] = -one;
} else if (!strcmp(name, "s332 ")) {
set1d(a, any,frac2);
a[LEN-1] = two;
} else if (!strcmp(name, "s341 ")) {
set1d(a,zero,unit);
set1d(b, any,frac);
} else if (!strcmp(name, "s342 ")) {
set1d(a, any,frac);
set1d(b, any,frac);
} else if (!strcmp(name, "s343 ")) {
set2d(aa, any,frac);
set2d(bb, one,unit);
} else if (!strcmp(name, "s351 ")) {
set1d(a, one,unit);
set1d(b, one,unit);
c[0] = 1.;
} else if (!strcmp(name, "s352 ")) {
set1d(a, any,frac);
set1d(b, any,frac);
} else if (!strcmp(name, "s353 ")) {
set1d(a, one,unit);
set1d(b, one,unit);
c[0] = 1.;
} else if (!strcmp(name, "s411 ")) {
set1d(a, one,unit);
set1d(b, any,frac);
set1d(c, any,frac);
} else if (!strcmp(name, "s412 ")) {
set1d(a, one,unit);
set1d(b, any,frac);
set1d(c, any,frac);
} else if (!strcmp(name, "s413 ")) {
set1d(a,zero,unit);
set1d(b, one,unit);
set1d(c, one,unit);
set1d(d, any,frac);
set1d(e, any,frac);
} else if (!strcmp(name, "s414 ")) {
set2d(aa, one,unit);
set2d(bb, any,frac);
set2d(cc, any,frac);
} else if (!strcmp(name, "s415 ")) {
set1d(a, one,unit);
set1d(b, any,frac);
set1d(c, any,frac);
a[LEN-1] = -one;
} else if (!strcmp(name, "s421 ")) {
set1d(a, any,frac2);
set1d(b, one,unit);
} else if (!strcmp(name, "s422 ")) {
set1d(array,one,unit);
set1d(a, any,frac2);
} else if (!strcmp(name, "s423 ")) {
set1d(array,zero,unit);
set1d(a, any,frac2);
} else if (!strcmp(name, "s424 ")) {
set1d(array,one,unit);
set1d(a, any,frac2);
} else if (!strcmp(name, "s431 ")) {
set1d(a, one,unit);
set1d(b, any,frac2);
} else if (!strcmp(name, "s432 ")) {
set1d(a, one,unit);
set1d(b, any,frac2);
} else if (!strcmp(name, "s441 ")) {
set1d(a, one,unit);
set1d(b, any,frac);
set1d(c, any,frac);
set1ds(LEN/3, &d[0], -one,unit);
set1ds(LEN/3, &d[LEN/3], zero,unit);
set1ds(LEN/3+1, &d[(2*LEN/3)], one,unit);
} else if (!strcmp(name, "s442 ")) {
set1d(a, one,unit);
set1d(b, any,frac);
set1d(c, any,frac);
set1d(d, any,frac);
set1d(e, any,frac);
} else if (!strcmp(name, "s443 ")) {
set1d(a, one,unit);
set1d(b, any,frac);
set1d(c, any,frac);
} else if (!strcmp(name, "s451 ")) {
set1d(b, any,frac);
set1d(c, any,frac);
} else if (!strcmp(name, "s452 ")) {
set1d(a,zero,unit);
set1d(b, one,unit);
set1d(c,small,unit);
} else if (!strcmp(name, "s453 ")) {
set1d(a,zero,unit);
set1d(b, any,frac2);
} else if (!strcmp(name, "s471 ")) {
set1d(a, one,unit);
set1d(b, one,unit);
set1d(c, one,unit);
set1d(d, any,frac);
set1d(e, any,frac);
} else if (!strcmp(name, "s481 ")) {
set1d(a, one,unit);
set1d(b, any,frac);
set1d(c, any,frac);
set1d(d, any,frac);
} else if (!strcmp(name, "s482 ")) {
set1d(a, one,unit);
set1d(b, any,frac);
set1d(c, any,frac);
} else if (!strcmp(name, "s491 ")) {
set1d(a,zero,unit);
set1d(b, one,unit);
set1d(c, any,frac);
set1d(d, any,frac);
} else if (!strcmp(name, "s4112")) {
set1d(a, one,unit);
set1d(b, any,frac);
} else if (!strcmp(name, "s4113")) {
set1d(a,zero,unit);
set1d(b, one,unit);
set1d(c, any,frac2);
} else if (!strcmp(name, "s4114")) {
set1d(a,zero,unit);
set1d(b, one,unit);
set1d(c, any,frac);
set1d(d, any,frac);
} else if (!strcmp(name, "s4115")) {
set1d(a, any,frac);
set1d(b, any,frac);
} else if (!strcmp(name, "s4116")) {
set1d(a, any,frac);
set2d(aa, any,frac);
} else if (!strcmp(name, "s4117")) {
set1d(a,zero,unit);
set1d(b, one,unit);
set1d(c, any,frac);
set1d(d, any,frac);
} else if (!strcmp(name, "s4121")) {
set1d(a, one,unit);
set1d(b, any,frac);
set1d(c, any,frac);
} else if (!strcmp(name, "va ")) {
set1d(a,zero,unit);
set1d(b, any,frac2);
} else if (!strcmp(name, "vag ")) {
set1d(a,zero,unit);
set1d(b, any,frac2);
} else if (!strcmp(name, "vas ")) {
set1d(a,zero,unit);
set1d(b, any,frac2);
} else if (!strcmp(name, "vif ")) {
set1d(a,zero,unit);
set1d(b, any,frac2);
} else if (!strcmp(name, "vpv ")) {
set1d(a,zero,unit);
set1d(b, any,frac2);
} else if (!strcmp(name, "vtv ")) {
set1d(a, one,unit);
set1d(b, one,unit);
} else if (!strcmp(name, "vpvtv")) {
set1d(a, one,unit);
set1d(b, any,frac);
set1d(c, any,frac);
} else if (!strcmp(name, "vpvts")) {
set1d(a, one,unit);
set1d(b, any,frac2);
} else if (!strcmp(name, "vpvpv")) {
set1d(a, any,frac2);
set1d(b, one,unit);
set1d(c,-one,unit);
} else if (!strcmp(name, "vtvtv")) {
set1d(a, one,unit);
set1d(b, two,unit);
set1d(c,half,unit);
} else if (!strcmp(name, "vsumr")) {
set1d(a, any,frac);
} else if (!strcmp(name, "vdotr")) {
set1d(a, any,frac);
set1d(b, any,frac);
} else if (!strcmp(name, "vbor ")) {
set1d(a, any,frac);
set1d(b, any,frac);
set1d(c, one,frac);
set1d(d, two,frac);
set1d(e,half,frac);
set2d(aa, any,frac);
} else {
}
return 0;
}
#if TESTS & LINEAR_DEPENDENCE
int s000()
{
// linear dependence testing
// no dependence - vectorizable
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s000 ");
start_t = clock();
for (int nl = 0; nl < 2*ntimes; nl++) {
for (int i = 0; i < lll; i++) {
// a[i] = b[i] + c[i];
// X[i] = (Y[i] * Z[i])+(U[i]*V[i]);
X[i] = Y[i] + 1;
}
dummy((TYPE*)X, (TYPE*)Y, (TYPE*)Z, (TYPE*)U, (TYPE*)V, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S000\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %1.1
int s111()
{
// linear dependence testing
// no dependence - vectorizable
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s111 ");
start_t = clock();
for (int nl = 0; nl < 2*ntimes; nl++) {
// #pragma vector always
for (int i = 1; i < LEN; i += 2) {
a[i] = a[i - 1] + b[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S111\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
int s1111()
{
// no dependence - vectorizable
// jump in data access
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init("s111 ");
start_t = clock();
for (int nl = 0; nl < 2*ntimes; nl++) {
for (int i = 0; i < LEN/2; i++) {
a[2*i] = c[i] * b[i] + d[i] * b[i] + c[i] * c[i] + d[i] * b[i] + d[i] * c[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif /1000000.0);
printf("S1111\t %.2f \t\t ", clock_dif_sec);
check(1);
return 0;
}
// %1.1
int s112()
{
// linear dependence testing
// loop reversal
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s112 ");
start_t = clock();
for (int nl = 0; nl < 3*ntimes; nl++) {
// #pragma vector always
for (int i = LEN - 2; i >= 0; i--) {
a[i+1] = a[i] + b[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S112\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
int s1112()
{
// linear dependence testing
// loop reversal
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init("s112 ");
start_t = clock();
for (int nl = 0; nl < ntimes*3; nl++) {
for (int i = LEN - 1; i >= 0; i--) {
a[i] = b[i] + (TYPE) 1.;
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif /1000000.0);
printf("S1112\t %.2f \t\t ", clock_dif_sec);
check(1);
return 0;
}
// %1.1
int s113()
{
// linear dependence testing
// a(i)=a(1) but no actual dependence cycle
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s113 ");
start_t = clock();
for (int nl = 0; nl < 4*ntimes; nl++) {
for (int i = 1; i < LEN; i++) {
a[i] = a[0] + b[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S113\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
int s1113()
{
// linear dependence testing
// one iteration dependency on a(LEN/2) but still vectorizable
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s113 ");
start_t = clock();
for (int nl = 0; nl < 2*ntimes; nl++) {
for (int i = 0; i < LEN; i++) {
a[i] = a[LEN/2] + b[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S1113\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %1.1
int s114()
{
// linear dependence testing
// transpose vectorization
// Jump in data access - not vectorizable
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s114 ");
start_t = clock();
for (int nl = 0; nl < 200*(ntimes/(LEN2)); nl++) {
for (int i = 0; i < LEN2; i++) {
for (int j = 0; j < i; j++) {
aa[i][j] = aa[j][i] + bb[i][j];
}
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S114\t %.2f \t\t", clock_dif_sec);;
check(11);
return 0;
}
// %1.1
int s115()
{
// linear dependence testing
// triangular saxpy loop
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s115 ");
start_t = clock();
for (int nl = 0; nl < 1000*(ntimes/LEN2); nl++) {
for (int j = 0; j < LEN2; j++) {
for (int i = j+1; i < LEN2; i++) {
a[i] -= aa[j][i] * a[j];
}
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S115\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
int s1115()
{
// linear dependence testing
// triangular saxpy loop
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s115 ");
start_t = clock();
for (int nl = 0; nl < 100*(ntimes/LEN2); nl++) {
for (int i = 0; i < LEN2; i++) {
for (int j = 0; j < LEN2; j++) {
aa[i][j] = aa[i][j]*cc[j][i] + bb[i][j];
}
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S1115\t %.2f \t\t", clock_dif_sec);;
check(11);
return 0;
}
// %1.1
int s116()
{
// linear dependence testing
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s116 ");
start_t = clock();
for (int nl = 0; nl < ntimes*10; nl++) {
for (int i = 0; i < LEN - 5; i += 5) {
a[i] = a[i + 1] * a[i];
a[i + 1] = a[i + 2] * a[i + 1];
a[i + 2] = a[i + 3] * a[i + 2];
a[i + 3] = a[i + 4] * a[i + 3];
a[i + 4] = a[i + 5] * a[i + 4];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S116\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %1.1
int s118()
{
// linear dependence testing
// potential dot product recursion
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s118 ");
start_t = clock();
for (int nl = 0; nl < 200*(ntimes/LEN2); nl++) {
for (int i = 1; i < LEN2; i++) {
for (int j = 0; j <= i - 1; j++) {
a[i] += bb[j][i] * a[i-j-1];
}
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S118\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %1.1
int s119()
{
// linear dependence testing
// no dependence - vectorizable
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init("s119 ");
start_t = clock();
for (int nl = 0; nl < 200*(ntimes/(LEN2)); nl++) {
for (int i = 1; i < LEN2; i++) {
for (int j = 1; j < LEN2; j++) {
aa[i][j] = aa[i-1][j-1] + bb[i][j];
}
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif /1000000.0);
printf("S119\t %.2f \t\t ", clock_dif_sec);
check(11);
return 0;
}
int s1119()
{
// linear dependence testing
// no dependence - vectorizable
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init("s119 ");
start_t = clock();
for (int nl = 0; nl < 200*(ntimes/(LEN2)); nl++) {
for (int i = 1; i < LEN2; i++) {
for (int j = 0; j < LEN2; j++) {
aa[i][j] = aa[i-1][j] + bb[i][j];
}
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif /1000000.0);
printf("S1119\t %.2f \t\t ", clock_dif_sec);
check(11);
return 0;
}
#endif // TESTS & LINEAR_DEPENDENCE
#if TESTS & INDUCTION_VARIABLE
// %1.2
int s121()
{
// induction variable recognition
// loop with possible ambiguity because of scalar store
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s121 ");
start_t = clock();
int j;
for (int nl = 0; nl < 3*ntimes; nl++) {
for (int i = 0; i < LEN-1; i++) {
j = i + 1;
a[i] = a[j] + b[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S121\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %1.2
int s122(int n1, int n3)
{
// induction variable recognition
// variable lower and upper bound, and stride
// reverse data access and jump in data access
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s122 ");
start_t = clock();
int j, k;
for (int nl = 0; nl < ntimes; nl++) {
j = 1;
k = 0;
for (int i = n1-1; i < LEN; i += n3) {
k += j;
a[i] += b[LEN - k];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S122\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %1.2
int s123()
{
// induction variable recognition
// induction variable under an if
// not vectorizable, the condition cannot be speculated
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s123 ");
start_t = clock();
int j;
for (int nl = 0; nl < ntimes; nl++) {
j = -1;
for (int i = 0; i < (LEN/2); i++) {
j++;
a[j] = b[i] + d[i] * e[i];
if (c[i] > (TYPE)0.) {
j++;
a[j] = c[i] + d[i] * e[i];
}
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S123\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %1.2
int s124()
{
// induction variable recognition
// induction variable under both sides of if (same value)
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s124 ");
start_t = clock();
int j;
for (int nl = 0; nl < ntimes; nl++) {
j = -1;
for (int i = 0; i < LEN; i++) {
if (b[i] > (TYPE)0.) {
j++;
a[j] = b[i] + d[i] * e[i];
} else {
j++;
a[j] = c[i] + d[i] * e[i];
}
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S124\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %1.2
int s125()
{
// induction variable recognition
// induction variable in two loops; collapsing possible
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s125 ");
start_t = clock();
int k;
for (int nl = 0; nl < 100*(ntimes/(LEN2)); nl++) {
k = -1;
for (int i = 0; i < LEN2; i++) {
for (int j = 0; j < LEN2; j++) {
k++;
array[k] = aa[i][j] + bb[i][j] * cc[i][j];
}
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S125\t %.2f \t\t", clock_dif_sec);;
check(0);
return 0;
}
// %1.2
int s126()
{
// induction variable recognition
// induction variable in two loops; recurrence in inner loop
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s126 ");
start_t = clock();
int k;
for (int nl = 0; nl < 10*(ntimes/LEN2); nl++) {
k = 1;
for (int i = 0; i < LEN2; i++) {
for (int j = 1; j < LEN2; j++) {
bb[j][i] = bb[j-1][i] + array[k-1] * cc[j][i];
++k;
}
++k;
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S126\t %.2f \t\t", clock_dif_sec);;
check(22);
return 0;
}
// %1.2
int s127()
{
// induction variable recognition
// induction variable with multiple increments
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s127 ");
start_t = clock();
int j;
for (int nl = 0; nl < 2*ntimes; nl++) {
j = -1;
for (int i = 0; i < LEN/2; i++) {
j++;
a[j] = b[i] + c[i] * d[i];
j++;
a[j] = b[i] + d[i] * e[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S127\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %1.2
int s128()
{
// induction variables
// coupled induction variables
// jump in data access
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s128 ");
start_t = clock();
int j, k;
for (int nl = 0; nl < 2*ntimes; nl++) {
j = -1;
for (int i = 0; i < LEN/2; i++) {
k = j + 1;
a[i] = b[k] - d[i];
j = k + 1;
b[k] = a[i] + c[k];
}
dummy(a, b, c, d, e, aa, bb, cc, 1.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S128\t %.2f \t\t", clock_dif_sec);;
check(12);
return 0;
}
#endif // TESTS & INDUCTION_VARIABLE
// %1.3
#if TESTS & GLOBAL_DATA_FLOW
int s131()
{
// global data flow analysis
// forward substitution
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s131 ");
start_t = clock();
int m = 1;
for (int nl = 0; nl < 5*ntimes; nl++) {
for (int i = 0; i < LEN - 1; i++) {
a[i] = a[i + m] + b[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S131\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %1.3
int s132()
{
// global data flow analysis
// loop with multiple dimension ambiguous subscripts
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s132 ");
start_t = clock();
int m = 0;
int j = m;
int k = m+1;
for (int nl = 0; nl < 400*ntimes; nl++) {
for (int i= 1; i < LEN2; i++) {
aa[j][i] = aa[k][i-1] + b[i] * c[1];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S132\t %.2f \t\t", clock_dif_sec);;
check(11);
return 0;
}
// %1.4
int s141()
{
// nonlinear dependence testing
// walk a row in a symmetric packed array
// element a(i,j) for (int j>i) stored in location j*(j-1)/2+i
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s141 ");
start_t = clock();
int k;
for (int nl = 0; nl < 200*(ntimes/LEN2); nl++) {
for (int i = 0; i < LEN2; i++) {
k = (i+1) * ((i+1) - 1) / 2 + (i+1)-1;
for (int j = i; j < LEN2; j++) {
array[k] += bb[j][i];
k += j+1;
}
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S141\t %.2f \t\t", clock_dif_sec);;
check(0);
return 0;
}
// %1.5
int s151s(TYPE a[LEN], TYPE b[LEN], int m)
{
for (int i = 0; i < LEN-1; i++) {
a[i] = a[i + m] + b[i];
}
return 0;
}
int s151()
{
// interprocedural data flow analysis
// passing parameter information into a subroutine
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s151 ");
start_t = clock();
for (int nl = 0; nl < 5*ntimes; nl++) {
s151s(a, b, 1);
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S151\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %1.5
int s152s(TYPE a[LEN], TYPE b[LEN], TYPE c[LEN], int i)
{
a[i] += b[i] * c[i];
return 0;
}
int s152()
{
// interprocedural data flow analysis
// collecting information from a subroutine
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s152 ");
start_t = clock();
for (int nl = 0; nl < ntimes; nl++) {
for (int i = 0; i < LEN; i++) {
b[i] = d[i] * e[i];
s152s(a, b, c, i);
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S152\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
#endif // TESTS & GLOBAL_DATA_FLOW
#if TESTS & CONTROL_FLOW
// %1.6
int s161()
{
// control flow
// tests for recognition of loop independent dependences
// between statements in mutually exclusive regions.
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s161 ");
start_t = clock();
for (int nl = 0; nl < ntimes/2; nl++) {
for (int i = 0; i < LEN-1; ++i) {
if (b[i] < (TYPE)0.) {
goto L20;
}
a[i] = c[i] + d[i] * e[i];
goto L10;
L20:
c[i+1] = a[i] + d[i] * d[i];
L10:
;
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S161\t %.2f \t\t", clock_dif_sec);;
check(13);
return 0;
}
int s1161()
{
// control flow
// tests for recognition of loop independent dependences
// between statements in mutually exclusive regions.
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s161 ");
start_t = clock();
for (int nl = 0; nl < ntimes; nl++) {
for (int i = 0; i < LEN-1; ++i) {
if (c[i] < (TYPE)0.) {
goto L20;
}
a[i] = c[i] + d[i] * e[i];
goto L10;
L20:
b[i] = a[i] + d[i] * d[i];
L10:
;
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S1161\t %.2f \t\t", clock_dif_sec);;
check(13);
return 0;
}
// %1.6
int s162(int k)
{
// control flow
// deriving assertions
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s162 ");
start_t = clock();
for (int nl = 0; nl < ntimes; nl++) {
if (k > 0) {
for (int i = 0; i < LEN-1; i++) {
a[i] = a[i + k] + b[i] * c[i];
}
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S162\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
#endif // TESTS & CONTROL_FLOW
#if TESTS & SYMBOLICS
// %1.7
int s171(int inc)
{
// symbolics
// symbolic dependence tests
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s171 ");
start_t = clock();
for (int nl = 0; nl < ntimes; nl++) {
for (int i = 0; i < LEN; i++) {
a[i * inc] += b[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S171\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %1.7
int s172( int n1, int n3)
{
// symbolics
// vectorizable if n3 .ne. 0
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s172 ");
start_t = clock();
for (int nl = 0; nl < ntimes; nl++) {
for (int i = n1-1; i < LEN; i += n3) {
a[i] += b[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S172\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %1.7
int s173()
{
// symbolics
// expression in loop bounds and subscripts
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s173 ");
start_t = clock();
int k = LEN/2;
for (int nl = 0; nl < 10*ntimes; nl++) {
for (int i = 0; i < LEN/2; i++) {
a[i+k] = a[i] + b[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S173\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %1.7
int s174(int M)
{
// symbolics
// loop with subscript that may seem ambiguous
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s174 ");
start_t = clock();
for (int nl = 0; nl < 10*ntimes; nl++) {
for (int i = 0; i < M; i++) {
a[i+M] = a[i] + b[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S174\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %1.7
int s175(int inc)
{
// symbolics
// symbolic dependence tests
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s175 ");
start_t = clock();
for (int nl = 0; nl < ntimes; nl++) {
for (int i = 0; i < LEN-1; i += inc) {
a[i] = a[i + inc] + b[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S175\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %1.7
int s176()
{
// symbolics
// convolution
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s176 ");
start_t = clock();
int m = LEN/2;
for (int nl = 0; nl < 4*(ntimes/LEN); nl++) {
for (int j = 0; j < (LEN/2); j++) {
for (int i = 0; i < m; i++) {
a[i] += b[i+m-j-1] * c[j];
}
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S176\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
#endif // TESTS & SYMBOLICS
// **********************************************************
// * *
// * VECTORIZATION *
// * *
// **********************************************************
#if TESTS & STATEMENT_REORDERING
// %2.1
int s211()
{
// statement reordering
// statement reordering allows vectorization
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s211 ");
start_t = clock();
for (int nl = 0; nl < ntimes; nl++) {
for (int i = 1; i < LEN-1; i++) {
a[i] = b[i - 1] + c[i] * d[i];
b[i] = b[i + 1] - e[i] * d[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S211\t %.2f \t\t", clock_dif_sec);;
check(12);
return 0;
}
// %2.1
int s212()
{
// statement reordering
// dependency needing temporary
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s212 ");
start_t = clock();
for (int nl = 0; nl < ntimes; nl++) {
for (int i = 0; i < LEN-1; i++) {
a[i] *= c[i];
b[i] += a[i + 1] * d[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S212\t %.2f \t\t", clock_dif_sec);;
check(12);
return 0;
}
int s1213()
{
// statement reordering
// dependency needing temporary
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s212 ");
start_t = clock();
for (int nl = 0; nl < ntimes; nl++) {
for (int i = 1; i < LEN-1; i++) {
a[i] = b[i-1]+c[i];
b[i] = a[i+1]*d[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S1213\t %.2f \t\t", clock_dif_sec);;
check(12);
return 0;
}
#endif // TESTS & STATEMENT_REORDERING
#if TESTS & LOOP_RESTRUCTURING
// %2.2
int s221()
{
// loop distribution
// loop that is partially recursive
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s221 ");
start_t = clock();
for (int nl = 0; nl < ntimes/2; nl++) {
for (int i = 1; i < LEN; i++) {
a[i] += c[i] * d[i];
b[i] = b[i - 1] + a[i] + d[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S221\t %.2f \t\t", clock_dif_sec);;
check(12);
return 0;
}
int s1221()
{
// run-time symbolic resolution
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s221 ");
start_t = clock();
for (int nl = 0; nl < ntimes; nl++) {
for (int i = 4; i < LEN; i++) {
b[i] = b[i - 4] + a[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S1221\t %.2f \t\t", clock_dif_sec);;
check(12);
return 0;
}
// %2.2
int s222()
{
// loop distribution
// partial loop vectorizatio recurrence in middle
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s222 ");
start_t = clock();
for (int nl = 0; nl < ntimes/2; nl++) {
for (int i = 1; i < LEN; i++) {
a[i] += b[i] * c[i];
e[i] = e[i - 1] * e[i - 1];
a[i] -= b[i] * c[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S222\t %.2f \t\t", clock_dif_sec);;
check(12);
return 0;
}
// %2.3
int s231()
{
// loop interchange
// loop with data dependency
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s231 ");
start_t = clock();
for (int nl = 0; nl < 100*(ntimes/LEN2); nl++) {
for (int i = 0; i < LEN2; ++i) {
for (int j = 1; j < LEN2; j++) {
aa[j][i] = aa[j - 1][i] + bb[j][i];
}
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S231\t %.2f \t\t", clock_dif_sec);;
check(11);
return 0;
}
// %2.3
int s232()
{
// loop interchange
// interchanging of triangular loops
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s232 ");
start_t = clock();
for (int nl = 0; nl < 100*(ntimes/(LEN2)); nl++) {
for (int j = 1; j < LEN2; j++) {
for (int i = 1; i <= j; i++) {
aa[j][i] = aa[j][i-1]*aa[j][i-1]+bb[j][i];
}
}
dummy(a, b, c, d, e, aa, bb, cc, 1.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S232\t %.2f \t\t", clock_dif_sec);;
check(11);
return 0;
}
int s1232()
{
// loop interchange
// interchanging of triangular loops
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s232 ");
start_t = clock();
for (int nl = 0; nl < 100*(ntimes/LEN2); nl++) {
for (int j = 0; j < LEN2; j++) {
for (int i = j; i < LEN2; i++) {
aa[i][j] = bb[i][j] + cc[i][j];
}
}
dummy(a, b, c, d, e, aa, bb, cc, 1.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S1232\t %.2f \t\t", clock_dif_sec);;
check(11);
return 0;
}
// %2.3
int s233()
{
// loop interchange
// interchanging with one of two inner loops
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s233 ");
start_t = clock();
for (int nl = 0; nl < 100*(ntimes/LEN2); nl++) {
for (int i = 1; i < LEN2; i++) {
for (int j = 1; j < LEN2; j++) {
aa[j][i] = aa[j-1][i] + cc[j][i];
}
for (int j = 1; j < LEN2; j++) {
bb[j][i] = bb[j][i-1] + cc[j][i];
}
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S233\t %.2f \t\t", clock_dif_sec);;
check(1122);
return 0;
}
int s2233()
{
// loop interchange
// interchanging with one of two inner loops
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s233 ");
start_t = clock();
for (int nl = 0; nl < 100*(ntimes/LEN2); nl++) {
for (int i = 1; i < LEN2; i++) {
for (int j = 1; j < LEN2; j++) {
aa[j][i] = aa[j-1][i] + cc[j][i];
}
for (int j = 1; j < LEN2; j++) {
bb[i][j] = bb[i-1][j] + cc[i][j];
}
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S2233\t %.2f \t\t", clock_dif_sec);;
check(1122);
return 0;
}
// %2.3
int s235()
{
// loop interchanging
// imperfectly nested loops
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s235 ");
start_t = clock();
for (int nl = 0; nl < 200*(ntimes/LEN2); nl++) {
for (int i = 0; i < LEN2; i++) {
a[i] += b[i] * c[i];
for (int j = 1; j < LEN2; j++) {
aa[j][i] = aa[j-1][i] + bb[j][i] * a[i];
}
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S235\t %.2f \t\t", clock_dif_sec);;
check(111);
return 0;
}
#endif // TESTS & LOOP_RESTRUCTURING
#if TESTS & NODE_SPLITTING
// %2.4
int s241()
{
// node splitting
// preloading necessary to allow vectorization
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s241 ");
start_t = clock();
for (int nl = 0; nl < 2*ntimes; nl++) {
for (int i = 0; i < LEN-1; i++) {
a[i] = b[i] * c[i ] * d[i];
b[i] = a[i] * a[i+1] * d[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S241\t %.2f \t\t", clock_dif_sec);;
check(12);
return 0;
}
// %2.4
int s242(TYPE s1, TYPE s2)
{
// node splitting
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s242 ");
start_t = clock();
for (int nl = 0; nl < ntimes/5; nl++) {
for (int i = 1; i < LEN; ++i) {
a[i] = a[i - 1] + s1 + s2 + b[i] + c[i] + d[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S242\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %2.4
int s243()
{
// node splitting
// false dependence cycle breaking
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s243 ");
start_t = clock();
for (int nl = 0; nl < ntimes; nl++) {
for (int i = 0; i < LEN-1; i++) {
a[i] = b[i] + c[i ] * d[i];
b[i] = a[i] + d[i ] * e[i];
a[i] = b[i] + a[i+1] * d[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S243\t %.2f \t\t", clock_dif_sec);;
check(12);
return 0;
}
// %2.4
int s244()
{
// node splitting
// false dependence cycle breaking
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s244 ");
start_t = clock();
for (int nl = 0; nl < ntimes; nl++) {
for (int i = 0; i < LEN-1; ++i) {
a[i] = b[i] + c[i] * d[i];
b[i] = c[i] + b[i];
a[i+1] = b[i] + a[i+1] * d[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S244\t %.2f \t\t", clock_dif_sec);;
check(12);
return 0;
}
int s1244()
{
// node splitting
// cycle with ture and anti dependency
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s244 ");
start_t = clock();
for (int nl = 0; nl < ntimes; nl++) {
for (int i = 0; i < LEN-1; i++) {
a[i] = b[i] + c[i] * c[i] + b[i]*b[i] + c[i];
d[i] = a[i] + a[i+1];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S1244\t %.2f \t\t", clock_dif_sec);;
check(12);
return 0;
}
int s2244()
{
// node splitting
// cycle with ture and anti dependency
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s244 ");
start_t = clock();
for (int nl = 0; nl < ntimes; nl++) {
for (int i = 0; i < LEN-1; i++) {
a[i+1] = b[i] + e[i];
a[i] = b[i] + c[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S2244\t %.2f \t\t", clock_dif_sec);;
check(12);
return 0;
}
#endif // TESTS & NODE_SPLITTING
#if TESTS & EXPANSION
// %2.5
int s251()
{
// scalar and array expansion
// scalar expansion
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s251 ");
start_t = clock();
TYPE s;
for (int nl = 0; nl < 4*ntimes; nl++) {
for (int i = 0; i < LEN; i++) {
s = b[i] + c[i] * d[i];
a[i] = s * s;
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S251\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
int s1251()
{
// scalar and array expansion
// scalar expansion
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s251 ");
start_t = clock();
TYPE s;
for (int nl = 0; nl < 4*ntimes; nl++) {
for (int i = 0; i < LEN; i++) {
s = b[i]+c[i];
b[i] = a[i]+d[i];
a[i] = s*e[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S1251\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
int s2251()
{
// scalar and array expansion
// scalar expansion
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s251 ");
start_t = clock();
for (int nl = 0; nl < ntimes; nl++) {
TYPE s = (TYPE)0.0;
for (int i = 0; i < LEN; i++) {
a[i] = s*e[i];
s = b[i]+c[i];
b[i] = a[i]+d[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S2251\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
int s3251()
{
// scalar and array expansion
// scalar expansion
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s251 ");
start_t = clock();
for (int nl = 0; nl < ntimes; nl++) {
for (int i = 0; i < LEN-1; i++){
a[i+1] = b[i]+c[i];
b[i] = c[i]*e[i];
d[i] = a[i]*e[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S3251\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %2.5
int s252()
{
// scalar and array expansion
// loop with ambiguous scalar temporary
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s252 ");
start_t = clock();
TYPE t, s;
for (int nl = 0; nl < ntimes; nl++) {
t = (TYPE) 0.;
for (int i = 0; i < LEN; i++) {
s = b[i] * c[i];
a[i] = s + t;
t = s;
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S252\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %2.5
int s253()
{
// scalar and array expansion
// scalar expansio assigned under if
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s253 ");
start_t = clock();
TYPE s;
for (int nl = 0; nl < ntimes; nl++) {
for (int i = 0; i < LEN; i++) {
if (a[i] > b[i]) {
s = a[i] - b[i] * d[i];
c[i] += s;
a[i] = s;
}
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S253\t %.2f \t\t", clock_dif_sec);;
check(13);
return 0;
}
// %2.5
int s254()
{
// scalar and array expansion
// carry around variable
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s254 ");
start_t = clock();
TYPE x;
for (int nl = 0; nl < 4*ntimes; nl++) {
x = b[LEN-1];
for (int i = 0; i < LEN; i++) {
a[i] = (b[i] + x) * (TYPE).5;
x = b[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S254\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %2.5
int s255()
{
// scalar and array expansion
// carry around variables, 2 levels
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s255 ");
start_t = clock();
TYPE x, y;
for (int nl = 0; nl < ntimes; nl++) {
x = b[LEN-1];
y = b[LEN-2];
for (int i = 0; i < LEN; i++) {
a[i] = (b[i] + x + y) * (TYPE).333;
y = x;
x = b[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S255\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %2.5
int s256()
{
// scalar and array expansion
// array expansion
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s256 ");
start_t = clock();
for (int nl = 0; nl < 10*(ntimes/LEN2); nl++) {
for (int i = 0; i < LEN2; i++) {
for (int j = 1; j < LEN2; j++) {
a[j] = (TYPE)1.0 - a[j - 1];
cc[j][i] = a[j] + bb[j][i]*d[j];
}
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S256\t %.2f \t\t", clock_dif_sec);;
check(111);
return 0;
}
// %2.5
int s257()
{
// scalar and array expansion
// array expansion
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s257 ");
start_t = clock();
for (int nl = 0; nl < 10*(ntimes/LEN2); nl++) {
for (int i = 1; i < LEN2; i++) {
for (int j = 0; j < LEN2; j++) {
a[i] = aa[j][i] - a[i-1];
aa[j][i] = a[i] + bb[j][i];
}
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S257\t %.2f \t\t", clock_dif_sec);;
check(111);
return 0;
}
int s258()
{
// scalar and array expansion
// wrap-around scalar under an if
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s258 ");
start_t = clock();
TYPE s;
for (int nl = 0; nl < ntimes/10; nl++) {
s = 0.;
for (int i = 0; i < LEN; ++i) {
if (a[i] > 0.) {
s = d[i] * d[i];
}
b[i] = s * c[i] + d[i];
e[i] = (s + (TYPE)1.) * aa[0][i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S258\t %.2f \t\t", clock_dif_sec);;
check(25);
return 0;
}
// %2.7
int s261()
{
// scalar and array expansion
// wrap-around scalar under an if
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s261 ");
start_t = clock();
TYPE t;
for (int nl = 0; nl < ntimes; nl++) {
for (int i = 1; i < LEN; ++i) {
t = a[i] + b[i];
a[i] = t + c[i-1];
t = c[i] * d[i];
c[i] = t;
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S261\t %.2f \t\t", clock_dif_sec);;
check(25);
return 0;
}
#endif // TESTS & EXPANSION
#if TESTS & CONTROL_FLOW
int s271()
{
// control flow
// loop with singularity handling
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s271 ");
start_t = clock();
for (int nl = 0; nl < 4*ntimes; nl++) {
for (int i = 0; i < LEN; i++) {
if (b[i] > (TYPE)0.) {
a[i] += b[i] * c[i];
}
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S271\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %2.7
int s272(TYPE t)
{
// control flow
// loop with independent conditional
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s272 ");
start_t = clock();
for (int nl = 0; nl < ntimes; nl++) {
for (int i = 0; i < LEN; i++) {
if (e[i] >= t) {
a[i] += c[i] * d[i];
b[i] += c[i] * c[i];
}
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S272\t %.2f \t\t", clock_dif_sec);;
check(12);
return 0;
}
// %2.7
int s273()
{
// control flow
// simple loop with dependent conditional
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s273 ");
start_t = clock();
for (int nl = 0; nl < ntimes; nl++) {
for (int i = 0; i < LEN; i++) {
a[i] += d[i] * e[i];
if (a[i] < (TYPE)0.)
b[i] += d[i] * e[i];
c[i] += a[i] * d[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S273\t %.2f \t\t", clock_dif_sec);;
check(123);
return 0;
}
// %2.7
int s274()
{
// control flow
// complex loop with dependent conditional
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s274 ");
start_t = clock();
for (int nl = 0; nl < ntimes; nl++) {
for (int i = 0; i < LEN; i++) {
a[i] = c[i] + e[i] * d[i];
if (a[i] > (TYPE)0.) {
b[i] = a[i] + b[i];
} else {
a[i] = d[i] * e[i];
}
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S274\t %.2f \t\t", clock_dif_sec);;
check(12);
return 0;
}
// %2.7
int s275()
{
// control flow
// if around inner loop, interchanging needed
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s275 ");
start_t = clock();
for (int nl = 0; nl < 10*(ntimes/LEN2); nl++) {
for (int i = 0; i < LEN2; i++) {
if (aa[0][i] > (TYPE)0.) {
for (int j = 1; j < LEN2; j++) {
aa[j][i] = aa[j-1][i] + bb[j][i] * cc[j][i];
}
}
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S275\t %.2f \t\t", clock_dif_sec);;
check(11);
return 0;
}
int s2275()
{
// loop distribution is needed to be able to interchange
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s275 ");
start_t = clock();
for (int nl = 0; nl < 100*(ntimes/LEN2); nl++) {
for (int i = 0; i < LEN2; i++) {
for (int j = 0; j < LEN2; j++) {
aa[j][i] = aa[j][i] + bb[j][i] * cc[j][i];
}
a[i] = b[i] + c[i] * d[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S2275\t %.2f \t\t", clock_dif_sec);;
check(11);
return 0;
}
// %2.7
int s276()
{
// control flow
// if test using loop index
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s276 ");
start_t = clock();
int mid = (LEN/2);
for (int nl = 0; nl < 4*ntimes; nl++) {
for (int i = 0; i < LEN; i++) {
if (i+1 < mid) {
a[i] += b[i] * c[i];
} else {
a[i] += b[i] * d[i];
}
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S276\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %2.7
int s277()
{
// control flow
// test for dependences arising from guard variable computation.
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s277 ");
start_t = clock();
for (int nl = 0; nl < ntimes; nl++) {
for (int i = 0; i < LEN-1; i++) {
if (a[i] >= (TYPE)0.) {
goto L20;
}
if (b[i] >= (TYPE)0.) {
goto L30;
}
a[i] += c[i] * d[i];
L30:
b[i+1] = c[i] + d[i] * e[i];
L20:
;
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S277\t %.2f \t\t", clock_dif_sec);;
check(12);
return 0;
}
// %2.7
int s278()
{
// control flow
// if/goto to block if-then-else
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s278 ");
start_t = clock();
for (int nl = 0; nl < ntimes; nl++) {
for (int i = 0; i < LEN; i++) {
if (a[i] > (TYPE)0.) {
goto L20;
}
b[i] = -b[i] + d[i] * e[i];
goto L30;
L20:
c[i] = -c[i] + d[i] * e[i];
L30:
a[i] = b[i] + c[i] * d[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S278\t %.2f \t\t", clock_dif_sec);;
check(123);
return 0;
}
// %2.7
int s279()
{
// control flow
// vector if/gotos
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s279 ");
start_t = clock();
for (int nl = 0; nl < ntimes/2; nl++) {
for (int i = 0; i < LEN; i++) {
if (a[i] > (TYPE)0.) {
goto L20;
}
b[i] = -b[i] + d[i] * d[i];
if (b[i] <= a[i]) {
goto L30;
}
c[i] += d[i] * e[i];
goto L30;
L20:
c[i] = -c[i] + e[i] * e[i];
L30:
a[i] = b[i] + c[i] * d[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S279\t %.2f \t\t", clock_dif_sec);;
check(123);
return 0;
}
int s1279()
{
// control flow
// vector if/gotos
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s279 ");
start_t = clock();
for (int nl = 0; nl < ntimes; nl++) {
for (int i = 0; i < LEN; i++) {
if (a[i] < (TYPE)0.) {
if (b[i] > a[i]) {
c[i] += d[i] * e[i];
}
}
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S1279\t %.2f \t\t", clock_dif_sec);;
check(123);
return 0;
}
// %2.7
int s2710( TYPE x)
{
// control flow
// scalar and vector ifs
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s2710");
start_t = clock();
for (int nl = 0; nl < ntimes/2; nl++) {
for (int i = 0; i < LEN; i++) {
if (a[i] > b[i]) {
a[i] += b[i] * d[i];
if (LEN > 10) {
c[i] += d[i] * d[i];
} else {
c[i] = d[i] * e[i] + (TYPE)1.;
}
} else {
b[i] = a[i] + e[i] * e[i];
if (x > (TYPE)0.) {
c[i] = a[i] + d[i] * d[i];
} else {
c[i] += e[i] * e[i];
}
}
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S2710\t %.2f \t\t", clock_dif_sec);;
check(123);
return 0;
}
// %2.7
int s2711()
{
// control flow
// semantic if removal
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s2711");
start_t = clock();
for (int nl = 0; nl < 4*ntimes; nl++) {
for (int i = 0; i < LEN; i++) {
if (b[i] != (TYPE)0.0) {
a[i] += b[i] * c[i];
}
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S2711\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %2.7
int s2712()
{
// control flow
// if to elemental min
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s2712");
start_t = clock();
for (int nl = 0; nl < 4*ntimes; nl++) {
for (int i = 0; i < LEN; i++) {
if (a[i] > b[i]) {
a[i] += b[i] * c[i];
}
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S2712\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
#endif // TESTS & CONTROL_FLOW
#if TESTS & CROSSING_THRESHOLDS
// %2.8
int s281()
{
// crossing thresholds
// index set splitting
// reverse data access
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s281 ");
start_t = clock();
TYPE x;
for (int nl = 0; nl < ntimes; nl++) {
for (int i = 0; i < LEN; i++) {
x = a[LEN-i-1] + b[i] * c[i];
a[i] = x-(TYPE)1.0;
b[i] = x;
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S281\t %.2f \t\t", clock_dif_sec);;
check(12);
return 0;
}
int s1281()
{
// crossing thresholds
// index set splitting
// reverse data access
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s281 ");
start_t = clock();
TYPE x;
for (int nl = 0; nl < 4*ntimes; nl++) {
for (int i = 0; i < LEN; i++) {
x = b[i]*c[i]+a[i]*d[i]+e[i];
a[i] = x-(TYPE)1.0;
b[i] = x;
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S1281\t %.2f \t\t", clock_dif_sec);;
check(12);
return 0;
}
// %2.9
int s291()
{
// loop peeling
// wrap around variable, 1 level
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s291 ");
start_t = clock();
int im1;
for (int nl = 0; nl < 2*ntimes; nl++) {
im1 = LEN-1;
for (int i = 0; i < LEN; i++) {
a[i] = (b[i] + b[im1]) * (TYPE).5;
im1 = i;
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S291\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %2.9
int s292()
{
// loop peeling
// wrap around variable, 2 levels
// similar to S291
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s292 ");
start_t = clock();
int im1, im2;
for (int nl = 0; nl < ntimes; nl++) {
im1 = LEN-1;
im2 = LEN-2;
for (int i = 0; i < LEN; i++) {
a[i] = (b[i] + b[im1] + b[im2]) * (TYPE).333;
im2 = im1;
im1 = i;
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S292\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %2.9
int s293()
{
// loop peeling
// a(i)=a(0) with actual dependence cycle, loop is vectorizable
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s293 ");
start_t = clock();
for (int nl = 0; nl < 4*ntimes; nl++) {
for (int i = 0; i < LEN; i++) {
a[i] = a[0];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S293\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %2.10
int s2101()
{
// diagonals
// main diagonal calculation
// jump in data access
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s2101");
start_t = clock();
for (int nl = 0; nl < 10*ntimes; nl++) {
for (int i = 0; i < LEN2; i++) {
aa[i][i] += bb[i][i] * cc[i][i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S2101\t %.2f \t\t", clock_dif_sec);;
check(11);
return 0;
}
// %2.12
int s2102()
{
// diagonals
// identity matrix, best results vectorize both inner and outer loops
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s2102");
start_t = clock();
for (int nl = 0; nl < 100*(ntimes/LEN2); nl++) {
for (int i = 0; i < LEN2; i++) {
for (int j = 0; j < LEN2; j++) {
aa[j][i] = (TYPE)0.;
}
aa[i][i] = (TYPE)1.;
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S2102\t %.2f \t\t", clock_dif_sec);;
check(11);
return 0;
}
// %2.11
int s2111()
{
// wavefronts, it will make jump in data access
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s2111");
start_t = clock();
for (int nl = 0; nl < 100*(ntimes/(LEN2)); nl++) {
for (int j = 1; j < LEN2; j++) {
for (int i = 1; i < LEN2; i++) {
aa[j][i] = aa[j][i-1] + aa[j-1][i];
}
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S2111\t %.2f \t\t", clock_dif_sec);;
temp = 0.;
for (int i = 0; i < LEN2; i++)
for (int j = 0; j < LEN2; j++)
temp += aa[i][j];
if (temp == 0) temp = 3.;
check(-1);
return 0;
}
#endif // TESTS & CROSSING_THRESHOLDS
// **********************************************************
// *
// IDIOM RECOGNITION *
// *
// **********************************************************
#if TESTS & REDUCTIONS
// %3.1
int s311()
{
// reductions
// sum reduction
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s311 ");
start_t = clock();
TYPE sum;
for (int nl = 0; nl < ntimes*10; nl++) {
sum = (TYPE)0.;
for (int i = 0; i < LEN; i++) {
sum += a[i];
}
dummy(a, b, c, d, e, aa, bb, cc, sum);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S311\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
TYPE test(TYPE* A){
TYPE s = (TYPE)0.0;
// #pragma nosimd
for (int i = 0; i < 4; i++)
s += A[i];
return s;
}
int s31111()
{
// reductions
// sum reduction
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s311 ");
start_t = clock();
TYPE sum;
for (int nl = 0; nl < 2000*ntimes; nl++) {
sum = (TYPE)0.;
sum += test(a);
sum += test(&a[4]);
sum += test(&a[8]);
sum += test(&a[12]);
sum += test(&a[16]);
sum += test(&a[20]);
sum += test(&a[24]);
sum += test(&a[28]);
dummy(a, b, c, d, e, aa, bb, cc, sum);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S31111\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %3.1
int s312()
{
// reductions
// product reduction
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s312 ");
start_t = clock();
TYPE prod;
for (int nl = 0; nl < 10*ntimes; nl++) {
prod = (TYPE)1.;
for (int i = 0; i < LEN; i++) {
prod *= a[i];
}
dummy(a, b, c, d, e, aa, bb, cc, prod);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S312\t %.2f \t\t", clock_dif_sec);;
temp = prod;
check(-1);
return 0;
}
// %3.1
int s313()
{
// reductions
// dot product
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s313 ");
start_t = clock();
TYPE dot;
for (int nl = 0; nl < ntimes*5; nl++) {
dot = (TYPE)0.;
for (int i = 0; i < LEN; i++) {
dot += a[i] * b[i];
}
dummy(a, b, c, d, e, aa, bb, cc, dot);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S313\t %.2f \t\t", clock_dif_sec);;
temp = dot;
check(-1);
return 0;
}
// %3.1
int s314()
{
// reductions
// if to max reduction
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s314 ");
start_t = clock();
TYPE x;
for (int nl = 0; nl < ntimes*5; nl++) {
x = a[0];
for (int i = 0; i < LEN; i++) {
if (a[i] > x) {
x = a[i];
}
}
dummy(a, b, c, d, e, aa, bb, cc, x);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S314\t %.2f \t\t", clock_dif_sec);;
temp = x;
check(-1);
return 0;
}
// %3.1
int s315()
{
// reductions
// if to max with index reductio 1 dimension
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s315 ");
for (int i = 0; i < LEN; i++)
a[i] = (i * 7) % LEN;
start_t = clock();
TYPE x, chksum;
int index;
for (int nl = 0; nl < ntimes; nl++) {
x = a[0];
index = 0;
for (int i = 0; i < LEN; ++i) {
if (a[i] > x) {
x = a[i];
index = i;
}
}
chksum = x + (TYPE) index;
dummy(a, b, c, d, e, aa, bb, cc, chksum);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S315\t %.2f \t\t", clock_dif_sec);;
temp = index+x+1;
check(-1);
return 0;
}
// %3.1
int s316()
{
// reductions
// if to min reduction
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s316 ");
start_t = clock();
TYPE x;
for (int nl = 0; nl < ntimes*5; nl++) {
x = a[0];
for (int i = 1; i < LEN; ++i) {
if (a[i] < x) {
x = a[i];
}
}
dummy(a, b, c, d, e, aa, bb, cc, x);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S316\t %.2f \t\t", clock_dif_sec);;
temp = x;
check(-1);
return 0;
}
// %3.1
int s317()
{
// reductions
// product reductio vectorize with
// 1. scalar expansion of factor, and product reduction
// 2. closed form solution: q = factor**n
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s317 ");
start_t = clock();
TYPE q;
for (int nl = 0; nl < 5*ntimes; nl++) {
q = (TYPE)1.;
for (int i = 0; i < LEN/2; i++) {
q *= (TYPE).99;
}
dummy(a, b, c, d, e, aa, bb, cc, q);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S317\t %.2f \t\t", clock_dif_sec);;
temp = q;
check(-1);
return 0;
}
// %3.1
int s318( int inc)
{
// reductions
// isamax, max absolute value, increments not equal to 1
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s318 ");
start_t = clock();
int k, index;
TYPE max, chksum;
for (int nl = 0; nl < ntimes/2; nl++) {
k = 0;
index = 0;
max = abs(a[0]);
k += inc;
for (int i = 1; i < LEN; i++) {
if (abs(a[k]) <= max) {
goto L5;
}
index = i;
max = abs(a[k]);
L5:
k += inc;
}
chksum = max + (TYPE) index;
dummy(a, b, c, d, e, aa, bb, cc, chksum);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S318\t %.2f \t\t", clock_dif_sec);;
temp = max + index+1;
check(-1);
return 0;
}
// %3.1
int s319()
{
// reductions
// coupled reductions
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s319 ");
start_t = clock();
TYPE sum;
for (int nl = 0; nl < 2*ntimes; nl++) {
sum = 0.;
for (int i = 0; i < LEN; i++) {
a[i] = c[i] + d[i];
sum += a[i];
b[i] = c[i] + e[i];
sum += b[i];
}
dummy(a, b, c, d, e, aa, bb, cc, sum);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S319\t %.2f \t\t", clock_dif_sec);;
temp = sum;
check(-1);
return 0;
}
// %3.1
int s3110()
{
// reductions
// if to max with index reductio 2 dimensions
// similar to S315
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s3110");
start_t = clock();
int xindex, yindex;
TYPE max, chksum;
for (int nl = 0; nl < 100*(ntimes/(LEN2)); nl++) {
max = aa[(0)][0];
xindex = 0;
yindex = 0;
for (int i = 0; i < LEN2; i++) {
for (int j = 0; j < LEN2; j++) {
if (aa[i][j] > max) {
max = aa[i][j];
xindex = i;
yindex = j;
}
}
}
chksum = max + (TYPE) xindex + (TYPE) yindex;
dummy(a, b, c, d, e, aa, bb, cc, chksum);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S3110\t %.2f \t\t", clock_dif_sec);;
temp = max + xindex+1 + yindex+1;
check(-1);
return 0;
}
int s13110()
{
// reductions
// if to max with index reductio 2 dimensions
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s3110");
start_t = clock();
int xindex, yindex;
TYPE max, chksum;
for (int nl = 0; nl < 100*(ntimes/(LEN2)); nl++) {
max = aa[(0)][0];
xindex = 0;
yindex = 0;
for (int i = 0; i < LEN2; i++) {
for (int j = 0; j < LEN2; j++) {
if (aa[i][j] > max) {
max = aa[i][j];
}
}
}
chksum = max + (TYPE) xindex + (TYPE) yindex;
dummy(a, b, c, d, e, aa, bb, cc, chksum);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S13110\t %.2f \t\t", clock_dif_sec);;
temp = max + xindex+1 + yindex+1;
check(-1);
return 0;
}
// %3.1
int s3111()
{
// reductions
// conditional sum reduction
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s3111");
start_t = clock();
TYPE sum;
for (int nl = 0; nl < ntimes/2; nl++) {
sum = 0.;
for (int i = 0; i < LEN; i++) {
if (a[i] > (TYPE)0.) {
sum += a[i];
}
}
dummy(a, b, c, d, e, aa, bb, cc, sum);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S3111\t %.2f \t\t", clock_dif_sec);;
temp = sum;
check(-1);
return 0;
}
// %3.1
int s3112()
{
// reductions
// sum reduction saving running sums
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s3112");
start_t = clock();
TYPE sum;
for (int nl = 0; nl < ntimes; nl++) {
sum = (TYPE)0.0;
for (int i = 0; i < LEN; i++) {
sum += a[i];
b[i] = sum;
}
dummy(a, b, c, d, e, aa, bb, cc, sum);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S3112\t %.2f \t\t", clock_dif_sec);;
temp = sum;
check(-12);
return 0;
}
// %3.1
int s3113()
{
// reductions
// maximum of absolute value
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s3113");
start_t = clock();
TYPE max;
for (int nl = 0; nl < ntimes*4; nl++) {
max = abs(a[0]);
for (int i = 0; i < LEN; i++) {
if ((abs(a[i])) > max) {
max = abs(a[i]);
}
}
dummy(a, b, c, d, e, aa, bb, cc, max);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S3113\t %.2f \t\t", clock_dif_sec);;
temp = max;
check(-1);
return 0;
}
#endif // TESTS & REDUCTIONS
#if TESTS & RECURRENCES
// %3.2
int s321()
{
// recurrences
// first order linear recurrence
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s321 ");
start_t = clock();
for (int nl = 0; nl < ntimes; nl++) {
for (int i = 1; i < LEN; i++) {
a[i] += a[i-1] * b[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S321\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %3.2
int s322()
{
// recurrences
// second order linear recurrence
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s322 ");
start_t = clock();
for (int nl = 0; nl < ntimes/2; nl++) {
for (int i = 2; i < LEN; i++) {
a[i] = a[i] + a[i - 1] * b[i] + a[i - 2] * c[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S322\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %3.2
int s323()
{
// recurrences
// coupled recurrence
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s323 ");
start_t = clock();
for (int nl = 0; nl < ntimes/2; nl++) {
for (int i = 1; i < LEN; i++) {
a[i] = b[i-1] + c[i] * d[i];
b[i] = a[i] + c[i] * e[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S323\t %.2f \t\t", clock_dif_sec);;
check(12);
return 0;
}
#endif // TESTS & RECURRENCES
#if TESTS & SEARCHING
// %3.3
int s331()
{
// search loops
// if to last-1
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s331 ");
start_t = clock();
int j;
TYPE chksum;
for (int nl = 0; nl < ntimes; nl++) {
j = -1;
for (int i = 0; i < LEN; i++) {
if (a[i] < (TYPE)0.) {
j = i;
}
}
chksum = (TYPE) j;
dummy(a, b, c, d, e, aa, bb, cc, chksum);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S331\t %.2f \t\t", clock_dif_sec);;
temp = j+1;
check(-1);
return 0;
}
int max(int a1, int b1)
{
if (b1 > a1)
return b1;
else
return a1;
}
// %3.3
int s332( TYPE t)
{
// search loops
// first value greater than threshoLEN
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s332 ");
start_t = clock();
int index;
TYPE value;
TYPE chksum;
for (int nl = 0; nl < ntimes; nl++) {
index = -2;
value = -1.;
for (int i = 0; i < LEN; i++) {
if (a[i] > t) {
index = i;
value = a[i];
goto L20;
}
}
L20:
chksum = value + (TYPE) index;
dummy(a, b, c, d, e, aa, bb, cc, chksum);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S332\t %.2f \t\t", clock_dif_sec);;
temp = value;
check(-1);
return 0;
}
#endif // TESTS & SEARCHING
#if TESTS & PACKING
// %3.4
int s341()
{
// packing
// pack positive values
// not vectorizable, value of j in unknown at each iteration
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
start_t = clock();
init( "s341 ");
start_t = clock();
int j;
for (int nl = 0; nl < ntimes; nl++) {
j = -1;
for (int i = 0; i < LEN; i++) {
if (b[i] > (TYPE)0.) {
j++;
a[j] = b[i];
}
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S341\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %3.4
int s342()
{
// packing
// unpacking
// not vectorizable, value of j in unknown at each iteration
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
start_t = clock();
init( "s342 ");
start_t = clock();
int j = 0;
for (int nl = 0; nl < ntimes; nl++) {
j = -1;
for (int i = 0; i < LEN; i++) {
if (a[i] > (TYPE)0.) {
j++;
a[i] = b[j];
}
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S342\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %3.4
int s343()
{
// packing
// pack 2-d array into one dimension
// not vectorizable, value of k in unknown at each iteration
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
start_t = clock();
init( "s343 ");
start_t = clock();
int k;
for (int nl = 0; nl < 10*(ntimes/LEN2); nl++) {
k = -1;
for (int i = 0; i < LEN2; i++) {
for (int j = 0; j < LEN2; j++) {
if (bb[j][i] > (TYPE)0.) {
k++;
array[k] = aa[j][i];
}
}
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S343\t %.2f \t\t", clock_dif_sec);;
check(0);
return 0;
}
#endif // TESTS & PACKING
#if TESTS & LOOP_REROLLING
// %3.5
int s351()
{
// loop rerolling
// unrolled saxpy
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
start_t = clock();
init( "s351 ");
start_t = clock();
TYPE alpha = c[0];
for (int nl = 0; nl < 8*ntimes; nl++) {
for (int i = 0; i < LEN; i += 5) {
a[i] += alpha * b[i];
a[i + 1] += alpha * b[i + 1];
a[i + 2] += alpha * b[i + 2];
a[i + 3] += alpha * b[i + 3];
a[i + 4] += alpha * b[i + 4];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S351\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
int s1351()
{
// induction pointer recognition
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
start_t = clock();
init( "s351 ");
start_t = clock();
for (int nl = 0; nl < 8*ntimes; nl++) {
TYPE* __restrict__ A = a;
TYPE* __restrict__ B = b;
TYPE* __restrict__ C = c;
for (int i = 0; i < LEN; i++) {
*A = *B+*C;
A++;
B++;
C++;
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S1351\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %3.5
int s352()
{
// loop rerolling
// unrolled dot product
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
start_t = clock();
init( "s352 ");
start_t = clock();
TYPE dot;
for (int nl = 0; nl < 8*ntimes; nl++) {
dot = (TYPE)0.;
for (int i = 0; i < LEN; i += 5) {
dot = dot + a[i] * b[i] + a[i + 1] * b[i + 1] + a[i + 2]
* b[i + 2] + a[i + 3] * b[i + 3] + a[i + 4] * b[i + 4];
}
dummy(a, b, c, d, e, aa, bb, cc, dot);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S352\t %.2f \t\t", clock_dif_sec);;
temp = dot;
check(-1);
return 0;
}
// %3.5
int s353(int* __restrict__ ip)
{
// loop rerolling
// unrolled sparse saxpy
// gather is required
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
start_t = clock();
init( "s353 ");
start_t = clock();
TYPE alpha = c[0];
for (int nl = 0; nl < ntimes; nl++) {
for (int i = 0; i < LEN; i += 5) {
a[i] += alpha * b[ip[i]];
a[i + 1] += alpha * b[ip[i + 1]];
a[i + 2] += alpha * b[ip[i + 2]];
a[i + 3] += alpha * b[ip[i + 3]];
a[i + 4] += alpha * b[ip[i + 4]];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S353\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
#endif // TESTS & LOOP_REROLLING
// **********************************************************
// *
// LANGUAGE COMPLETENESS *
// *
// **********************************************************
#if TESTS & EQUIVALENCING
// %4.1
// %4.2
int s421()
{
// storage classes and equivalencing
// equivalence- no overlap
set1d(xx, 1., 1);
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s421 ");
start_t = clock();
for (int nl = 0; nl < 4*ntimes; nl++) {
yy = xx;
for (int i = 0; i < LEN - 1; i++) {
xx[i] = yy[i+1] + a[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 1.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S421\t %.2f \t\t", clock_dif_sec);;
temp = 0;
for (int i = 0; i < LEN; i++){
temp += xx[i];
}
check(-1);
return 0;
}
int s1421()
{
// storage classes and equivalencing
// equivalence- no overlap
set1d(xx, 1., 1);
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s421 ");
start_t = clock();
xx = &b[LEN/2];
for (int nl = 0; nl < 8*ntimes; nl++) {
for (int i = 0; i < LEN/2; i++) {
b[i] = xx[i] + a[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 1.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S1421\t %.2f \t\t", clock_dif_sec);;
temp = 0;
for (int i = 0; i < LEN/2; i++){
temp += xx[i];
}
check(-1);
return 0;
}
// %4.2
int s422()
{
// storage classes and equivalencing
// common and equivalence statement
// anti-dependence, threshold of 4
xx = array + 4;
set1d(xx, 0., 1);
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s422 ");
start_t = clock();
for (int nl = 0; nl < 8*ntimes; nl++) {
for (int i = 0; i < LEN; i++) {
xx[i] = array[i + 8] + a[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S422\t %.2f \t\t", clock_dif_sec);;
temp = 0;
for (int i = 0; i < LEN; i++){
temp += xx[i];
}
check(-1);
return 0;
}
// %4.2
int s423()
{
// storage classes and equivalencing
// common and equivalenced variables - with anti-dependence
int vl = 64;
xx = array+vl;
set1d(xx, 1., 1);
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s423 ");
start_t = clock();
for (int nl = 0; nl < 4*ntimes; nl++) {
for (int i = 0; i < LEN - 1; i++) {
array[i+1] = xx[i] + a[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 1.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S423\t %.2f \t\t", clock_dif_sec);;
temp = 0.;
for (int i = 0; i < LEN; i++){
temp += array[i];
}
check(-1);
return 0;
}
// %4.2
int s424()
{
// storage classes and equivalencing
// common and equivalenced variables - overlap
// vectorizeable in strips of 64 or less
int vl = 63;
xx = array + vl;
set1d(xx, 0., 1);
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s424 ");
start_t = clock();
for (int nl = 0; nl < 4*ntimes; nl++) {
for (int i = 0; i < LEN - 1; i++) {
xx[i+1] = array[i] + a[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 1.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S424\t %.2f \t\t", clock_dif_sec);;
temp = 0.;
for (int i = 0; i < LEN; i++){
temp += xx[i];
}
check(-1);
return 0;
}
#endif // TESTS & EQUIVALENCING
// %4.3
#if TESTS & GLOBAL_DATA_FLOW
int s431()
{
// parameters
// parameter statement
int k1=1;
int k2=2;
int k=2*k1-k2;
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s431 ");
start_t = clock();
for (int nl = 0; nl < ntimes*10; nl++) {
for (int i = 0; i < LEN; i++) {
a[i] = a[i+k] + b[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S431\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
#endif // TESTS & GLOBAL_DATA_FLOW
// %4.4
#if TESTS & CONTROL_FLOW
int s441()
{
// non-logical if's
// arithmetic if
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s441 ");
start_t = clock();
for (int nl = 0; nl < ntimes; nl++) {
for (int i = 0; i < LEN; i++) {
if (d[i] < (TYPE)0.) {
a[i] += b[i] * c[i];
} else if (d[i] == (TYPE)0.) {
a[i] += b[i] * b[i];
} else {
a[i] += c[i] * c[i];
}
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S441\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %4.4
int s442()
{
// non-logical if's
// computed goto
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s442 ");
start_t = clock();
for (int nl = 0; nl < ntimes/2; nl++) {
for (int i = 0; i < LEN; i++) {
switch (indx[i]) {
case 1: goto L15;
case 2: goto L20;
case 3: goto L30;
case 4: goto L40;
}
L15:
a[i] += b[i] * b[i];
goto L50;
L20:
a[i] += c[i] * c[i];
goto L50;
L30:
a[i] += d[i] * d[i];
goto L50;
L40:
a[i] += e[i] * e[i];
L50:
;
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S442\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %4.4
int s443()
{
// non-logical if's
// arithmetic if
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s443 ");
start_t = clock();
for (int nl = 0; nl < 2*ntimes; nl++) {
for (int i = 0; i < LEN; i++) {
if (d[i] <= (TYPE)0.) {
goto L20;
} else {
goto L30;
}
L20:
a[i] += b[i] * c[i];
goto L50;
L30:
a[i] += b[i] * b[i];
L50:
;
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S443\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
#endif // TESTS & CONTROL_FLOW
#if TESTS & GLOBAL_DATA_FLOW
// %4.5
int s451()
{
// intrinsic functions
// intrinsics
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s451 ");
start_t = clock();
for (int nl = 0; nl < ntimes/5; nl++) {
for (int i = 0; i < LEN; i++) {
#ifdef USE_FLOAT_TRIG
a[i] = sinf(b[i]) + cosf(c[i]);
#else
a[i] = sin(b[i]) + cos(c[i]);
#endif
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S451\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %4.5
int s452()
{
// intrinsic functions
// seq function
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s452 ");
start_t = clock();
for (int nl = 0; nl < 4*ntimes; nl++) {
for (int i = 0; i < LEN; i++) {
a[i] = b[i] + c[i] * (TYPE) (i+1);
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S452\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
#endif // TESTS & GLOBAL_DATA_FLOW
#if TESTS & INDUCTION_VARIABLE
// %4.5
int s453()
{
// induction varibale recognition
TYPE s;
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s453 ");
start_t = clock();
for (int nl = 0; nl < ntimes*2; nl++) {
s = 0.;
for (int i = 0; i < LEN; i++) {
s += (TYPE)2.;
a[i] = s * b[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S453\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
#endif // TESTS & INDUCTION_VARIABLE
// %4.7
#if TESTS & GLOBAL_DATA_FLOW
int s471(){
// call statements
int m = LEN;
set1d(x, 0., 1);
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s471 ");
start_t = clock();
for (int nl = 0; nl < ntimes/2; nl++) {
for (int i = 0; i < m; i++) {
x[i] = b[i] + d[i] * d[i];
s471s();
b[i] = c[i] + d[i] * e[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S471\t %.2f \t\t", clock_dif_sec);;
temp = 0.;
for (int i = 0; i < LEN; i++){
temp += x[i];
}
check(-12);
return 0;
}
#endif // TESTS & GLOBAL_DATA_FLOW
#if TESTS & CONTROL_FLOW
// %4.8
int s481()
{
// non-local goto's
// stop statement
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s481 ");
start_t = clock();
for (int nl = 0; nl < ntimes; nl++) {
for (int i = 0; i < LEN; i++) {
if (d[i] < (TYPE)0.) {
exit (0);
}
a[i] += b[i] * c[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S481\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %4.8
// %4.8
int s482()
{
// non-local goto's
// other loop exit with code before exit
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s482 ");
start_t = clock();
for (int nl = 0; nl < ntimes; nl++) {
for (int i = 0; i < LEN; i++) {
a[i] += b[i] * c[i];
if (c[i] > b[i]) break;
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S482\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
#endif // TESTS & CONTROL_FLOW
int min(int a, int b){
return (a < b) ? a : b;
}
#if TESTS & INDIRECT_ADDRESSING
// %4.9
int s491(int* __restrict__ ip)
{
// vector semantics
// indirect addressing on lhs, store in sequence
// scatter is required
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s491 ");
start_t = clock();
for (int nl = 0; nl < ntimes; nl++) {
for (int i = 0; i < LEN; i++) {
a[ip[i]] = b[i] + c[i] * d[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S491\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %4.11
int s4112(int* __restrict__ ip, TYPE s)
{
// indirect addressing
// sparse saxpy
// gather is required
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s4112");
start_t = clock();
for (int nl = 0; nl < ntimes; nl++) {
for (int i = 0; i < LEN; i++) {
a[i] += b[ip[i]] * s;
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S4112\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %4.11
int s4113(int* __restrict__ ip)
{
// indirect addressing
// indirect addressing on rhs and lhs
// gather and scatter is required
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s4113");
start_t = clock();
for (int nl = 0; nl < ntimes; nl++) {
for (int i = 0; i < LEN; i++) {
a[ip[i]] = b[ip[i]] + c[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S4113\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %4.11
int s4114(int* ip, int n1)
{
// indirect addressing
// mix indirect addressing with variable lower and upper bounds
// gather is required
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s4114");
start_t = clock();
int k;
for (int nl = 0; nl < ntimes; nl++) {
for (int i = n1-1; i < LEN; i++) {
k = ip[i];
a[i] = b[i] + c[LEN-k+1-2] * d[i];
k += 5;
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S4114\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %4.11
int s4115(int* __restrict__ ip)
{
// indirect addressing
// sparse dot product
// gather is required
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s4115");
start_t = clock();
TYPE sum;
for (int nl = 0; nl < ntimes; nl++) {
sum = 0.;
for (int i = 0; i < LEN; i++) {
sum += a[i] * b[ip[i]];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S4115\t %.2f \t\t", clock_dif_sec);;
temp = sum;
check(-1);
return 0;
}
// %4.11
int s4116(int* __restrict__ ip, int j, int inc)
{
// indirect addressing
// more complicated sparse sdot
// gather is required
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s4116");
start_t = clock();
TYPE sum;
int off;
for (int nl = 0; nl < 100*ntimes; nl++) {
sum = 0.;
for (int i = 0; i < LEN2-1; i++) {
off = inc + i;
sum += a[off] * aa[j-1][ip[i]];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S4116\t %.2f \t\t", clock_dif_sec);;
temp = sum;
check(-1);
return 0;
}
// %4.11
int s4117()
{
// indirect addressing
// seq function
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s4117");
start_t = clock();
for (int nl = 0; nl < ntimes; nl++) {
for (int i = 0; i < LEN; i++) {
a[i] = b[i] + c[i/2] * d[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S4117\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
#endif // TESTS & INDIRECT_ADDRESSING
#if TESTS & GLOBAL_DATA_FLOW
// %4.12
int s4121()
{
// statement functions
// elementwise multiplication
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "s4121");
start_t = clock();
for (int nl = 0; nl < ntimes; nl++) {
for (int i = 0; i < LEN; i++) {
a[i] += f(b[i],c[i]);
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("S4121\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
#endif // TESTS & GLOBAL_DATA_FLOW
#if TESTS & CONTROL_LOOPS
// %5.1
int va()
{
// control loops
// vector assignment
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "va ");
start_t = clock();
for (int nl = 0; nl < ntimes*10; nl++) {
for (int i = 0; i < LEN; i++) {
a[i] = b[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("va\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %5.1
int vag( int* __restrict__ ip)
{
// control loops
// vector assignment, gather
// gather is required
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "vag ");
start_t = clock();
for (int nl = 0; nl < 2*ntimes; nl++) {
for (int i = 0; i < LEN; i++) {
a[i] = b[ip[i]];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("vag\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %5.1
int vas( int* __restrict__ ip)
{
// control loops
// vector assignment, scatter
// scatter is required
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "vas ");
start_t = clock();
for (int nl = 0; nl < 2*ntimes; nl++) {
for (int i = 0; i < LEN; i++) {
a[ip[i]] = b[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("vas\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %5.1
int vif()
{
// control loops
// vector if
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "vif ");
start_t = clock();
for (int nl = 0; nl < ntimes; nl++) {
for (int i = 0; i < LEN; i++) {
if (b[i] > (TYPE)0.) {
a[i] = b[i];
}
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("vif\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %5.1
int vpv()
{
// control loops
// vector plus vector
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "vpv ");
start_t = clock();
for (int nl = 0; nl < ntimes*10; nl++) {
for (int i = 0; i < LEN; i++) {
a[i] += b[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("vpv\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %5.1
int vtv()
{
// control loops
// vector times vector
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "vtv ");
start_t = clock();
// Function Body
for (int nl = 0; nl < ntimes*10; nl++) {
for (int i = 0; i < LEN; i++) {
a[i] *= b[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("vtv\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %5.1
int vpvtv()
{
// control loops
// vector plus vector times vector
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "vpvtv");
start_t = clock();
for (int nl = 0; nl < 4*ntimes; nl++) {
for (int i = 0; i < LEN; i++) {
a[i] += b[i] * c[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("vpvtv\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %5.1
int vpvts( TYPE s)
{
// control loops
// vector plus vector times scalar
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "vpvts");
start_t = clock();
for (int nl = 0; nl < ntimes; nl++) {
for (int i = 0; i < LEN; i++) {
a[i] += b[i] * s;
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("vpvts\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %5.1
int vpvpv()
{
// control loops
// vector plus vector plus vector
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "vpvpv");
start_t = clock();
for (int nl = 0; nl < 4*ntimes; nl++) {
for (int i = 0; i < LEN; i++) {
a[i] += b[i] + c[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("vpvpv\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %5.1
int vtvtv()
{
// control loops
// vector times vector times vector
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "vtvtv");
start_t = clock();
for (int nl = 0; nl < 4*ntimes; nl++) {
for (int i = 0; i < LEN; i++) {
a[i] = a[i] * b[i] * c[i];
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("vtvtv\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %5.1
int vsumr()
{
// control loops
// vector sum reduction
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "vsumr");
start_t = clock();
TYPE sum;
for (int nl = 0; nl < ntimes*10; nl++) {
sum = 0.;
for (int i = 0; i < LEN; i++) {
sum += a[i];
}
dummy(a, b, c, d, e, aa, bb, cc, sum);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("vsumr\t %.2f \t\t", clock_dif_sec);;
check(1);
return 0;
}
// %5.1
int vdotr()
{
// control loops
// vector dot product reduction
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "vdotr");
start_t = clock();
TYPE dot;
for (int nl = 0; nl < ntimes*10; nl++) {
dot = 0.;
for (int i = 0; i < LEN; i++) {
dot += a[i] * b[i];
}
dummy(a, b, c, d, e, aa, bb, cc, dot);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("vdotr\t %.2f \t\t", clock_dif_sec);;
temp = dot;
check(-1);
return 0;
}
// %5.1
int vbor()
{
// control loops
// basic operations rates, isolate arithmetic from memory traffic
// all combinations of three, 59 flops for 6 loads and 1 store.
clock_t start_t, end_t, clock_dif; double clock_dif_sec;
init( "vbor ");
start_t = clock();
TYPE a1, b1, c1, d1, e1, f1;
for (int nl = 0; nl < ntimes; nl++) {
for (int i = 0; i < LEN; i++) {
a1 = a[i];
b1 = b[i];
c1 = c[i];
d1 = d[i];
e1 = e[i];
f1 = aa[0][i];
a1 = a1 * b1 * c1 + a1 * b1 * d1 + a1 * b1 * e1 + a1 * b1 * f1 +
a1 * c1 * d1 + a1 * c1 * e1 + a1 * c1 * f1 + a1 * d1 * e1
+ a1 * d1 * f1 + a1 * e1 * f1;
b1 = b1 * c1 * d1 + b1 * c1 * e1 + b1 * c1 * f1 + b1 * d1 * e1 +
b1 * d1 * f1 + b1 * e1 * f1;
c1 = c1 * d1 * e1 + c1 * d1 * f1 + c1 * e1 * f1;
d1 = d1 * e1 * f1;
x[i] = a1 * b1 * c1 * d1;
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
end_t = clock(); clock_dif = end_t - start_t;
clock_dif_sec = (double) (clock_dif/1000000.0);
printf("vbor\t %.2f \t\t", clock_dif_sec);;
temp = 0.;
for (int i = 0; i < LEN; i++){
temp += x[i];
}
check(-1);
return 0;
}
#endif // TESTS & CONTROL_LOOPS
void set(int* ip, TYPE* s1, TYPE* s2){
posix_memalign((void **) &xx, ALIGNMENT, LEN*sizeof(TYPE));
printf("\n");
for (int i = 0; i < LEN; i = i+5){
ip[i] = (i+4);
ip[i+1] = (i+2);
ip[i+2] = (i);
ip[i+3] = (i+3);
ip[i+4] = (i+1);
}
set1d(a, 1.,1);
set1d(b, 1.,1);
set1d(c, 1.,1);
set1d(d, 1.,1);
set1d(e, 1.,1);
set2d(aa, 0.,-1);
set2d(bb, 0.,-1);
set2d(cc, 0.,-1);
for (int i = 0; i < LEN; i++){
indx[i] = (i+1) % 4+1;
}
*s1 = 1.0;
*s2 = 2.0;
}
int main(int argc, char *argv[]){
int n1 = 1;
int n3 = 1;
int* ip;
TYPE s1,s2;
posix_memalign((void **) &ip, ALIGNMENT, LEN*sizeof(TYPE));
if (argc > 1)
ntimes = atoi(argv[1]);
printf("Running each loop %d times...\n", ntimes);
if (argc > 2)
digits = atoi(argv[2]);
set(ip, &s1, &s2);
printf("Loop \t Time(Sec) \t Checksum \n");
#if TESTS & LINEAR_DEPENDENCE
s000();
s111();
s1111();
s112();
s1112();
s113();
s1113();
s114();
s115();
s1115();
s116();
s118();
s119();
s1119();
#endif
#if TESTS & INDUCTION_VARIABLE
s121();
s122(n1,n3);
s123();
s124();
s125();
s126();
s127();
s128();
#endif
#if TESTS & GLOBAL_DATA_FLOW
s131();
s132();
s141();
s151();
s152();
#endif
#if TESTS & CONTROL_FLOW
s161();
s1161();
s162(n1);
#endif
#if TESTS & SYMBOLICS
s171(n1);
s172(n1,n3);
s173();
s174(LEN/2);
s175(n1);
s176();
#endif
#if TESTS & STATEMENT_REORDERING
s211();
s212();
s1213();
#endif
#if TESTS & LOOP_RESTRUCTURING
s221();
s1221();
s222();
s231();
s232();
s1232();
s233();
s2233();
s235();
#endif
#if TESTS & NODE_SPLITTING
s241();
s242(s1, s2);
s243();
s244();
s1244();
s2244();
#endif
#if TESTS & EXPANSION
s251();
s1251();
s2251();
s3251();
s252();
s253();
s254();
s255();
s256();
s257();
s258();
s261();
#endif
#if TESTS & CONTROL_FLOW
s271();
s272(s1);
s273();
s274();
s275();
s2275();
s276();
s277();
s278();
s279();
s1279();
s2710(s1);
s2711();
s2712();
#endif
#if TESTS & CROSSING_THRESHOLDS
s281();
s1281();
s291();
s292();
s293();
s2101();
s2102();
s2111();
#endif
#if TESTS & REDUCTIONS
s311();
s31111();
s312();
s313();
s314();
s315();
s316();
s317();
s318(n1);
s319();
s3110();
s13110();
s3111();
s3112();
s3113();
#endif
#if TESTS & RECURRENCES
s321();
s322();
s323();
#endif
#if TESTS & SEARCHING
s331();
s332(s1);
#endif
#if TESTS & PACKING
s341();
s342();
s343();
#endif
#if TESTS & LOOP_REROLLING
s351();
s1351();
s352();
s353(ip);
#endif
#if TESTS & EQUIVALENCING
s421();
s1421();
s422();
s423();
s424();
#endif
#if TESTS & GLOBAL_DATA_FLOW
s431();
#endif
#if TESTS & CONTROL_FLOW
s441();
s442();
s443();
#endif
#if TESTS & GLOBAL_DATA_FLOW
s451();
s452();
#endif
#if TESTS & INDUCTION_VARIABLE
s453();
#endif
#if TESTS & GLOBAL_DATA_FLOW
s471();
#endif
#if TESTS & CONTROL_FLOW
s481();
s482();
#endif
#if TESTS & INDIRECT_ADDRESSING
s491(ip);
s4112(ip, s1);
s4113(ip);
s4114(ip,n1);
s4115(ip);
s4116(ip, LEN2/2, n1);
s4117();
#endif
#if TESTS & GLOBAL_DATA_FLOW
s4121();
#endif
#if TESTS & CONTROL_LOOPS
va();
vag(ip);
vas(ip);
vif();
vpv();
vtv();
vpvtv();
vpvts(s1);
vpvpv();
vtvtv();
vsumr();
vdotr();
vbor();
#endif
return 0;
}