| /*--------------------- Start flops.c source code ----------------------*/ |
| |
| /*****************************/ |
| /* flops.c */ |
| /* Version 2.0, 18 Dec 1992 */ |
| /* Al Aburto */ |
| /* aburto@nosc.mil */ |
| /*****************************/ |
| |
| /* |
| Flops.c is a 'c' program which attempts to estimate your systems |
| floating-point 'MFLOPS' rating for the FADD, FSUB, FMUL, and FDIV |
| operations based on specific 'instruction mixes' (discussed below). |
| The program provides an estimate of PEAK MFLOPS performance by making |
| maximal use of register variables with minimal interaction with main |
| memory. The execution loops are all small so that they will fit in |
| any cache. Flops.c can be used along with Linpack and the Livermore |
| kernels (which exersize memory much more extensively) to gain further |
| insight into the limits of system performance. The flops.c execution |
| modules also include various percent weightings of FDIV's (from 0% to |
| 25% FDIV's) so that the range of performance can be obtained when |
| using FDIV's. FDIV's, being computationally more intensive than |
| FADD's or FMUL's, can impact performance considerably on some systems. |
| |
| Flops.c consists of 8 independent modules (routines) which, except for |
| module 2, conduct numerical integration of various functions. Module |
| 2, estimates the value of pi based upon the Maclaurin series expansion |
| of atan(1). MFLOPS ratings are provided for each module, but the |
| programs overall results are summerized by the MFLOPS(1), MFLOPS(2), |
| MFLOPS(3), and MFLOPS(4) outputs. |
| |
| The MFLOPS(1) result is identical to the result provided by all |
| previous versions of flops.c. It is based only upon the results from |
| modules 2 and 3. Two problems surfaced in using MFLOPS(1). First, it |
| was difficult to completely 'vectorize' the result due to the |
| recurrence of the 's' variable in module 2. This problem is addressed |
| in the MFLOPS(2) result which does not use module 2, but maintains |
| nearly the same weighting of FDIV's (9.2%) as in MFLOPS(1) (9.6%). |
| The second problem with MFLOPS(1) centers around the percentage of |
| FDIV's (9.6%) which was viewed as too high for an important class of |
| problems. This concern is addressed in the MFLOPS(3) result where NO |
| FDIV's are conducted at all. |
| |
| The number of floating-point instructions per iteration (loop) is |
| given below for each module executed: |
| |
| MODULE FADD FSUB FMUL FDIV TOTAL Comment |
| 1 7 0 6 1 14 7.1% FDIV's |
| 2 3 2 1 1 7 difficult to vectorize. |
| 3 6 2 9 0 17 0.0% FDIV's |
| 4 7 0 8 0 15 0.0% FDIV's |
| 5 13 0 15 1 29 3.4% FDIV's |
| 6 13 0 16 0 29 0.0% FDIV's |
| 7 3 3 3 3 12 25.0% FDIV's |
| 8 13 0 17 0 30 0.0% FDIV's |
| |
| A*2+3 21 12 14 5 52 A=5, MFLOPS(1), Same as |
| 40.4% 23.1% 26.9% 9.6% previous versions of the |
| flops.c program. Includes |
| only Modules 2 and 3, does |
| 9.6% FDIV's, and is not |
| easily vectorizable. |
| |
| 1+3+4 58 14 66 14 152 A=4, MFLOPS(2), New output |
| +5+6+ 38.2% 9.2% 43.4% 9.2% does not include Module 2, |
| A*7 but does 9.2% FDIV's. |
| |
| 1+3+4 62 5 74 5 146 A=0, MFLOPS(3), New output |
| +5+6+ 42.9% 3.4% 50.7% 3.4% does not include Module 2, |
| 7+8 but does 3.4% FDIV's. |
| |
| 3+4+6 39 2 50 0 91 A=0, MFLOPS(4), New output |
| +8 42.9% 2.2% 54.9% 0.0% does not include Module 2, |
| and does NO FDIV's. |
| |
| NOTE: Various timer routines are included as indicated below. The |
| timer routines, with some comments, are attached at the end |
| of the main program. |
| |
| NOTE: Please do not remove any of the printouts. |
| |
| EXAMPLE COMPILATION: |
| UNIX based systems |
| cc -DUNIX -O flops.c -o flops |
| cc -DUNIX -DROPT flops.c -o flops |
| cc -DUNIX -fast -O4 flops.c -o flops |
| . |
| . |
| . |
| etc. |
| |
| Al Aburto |
| aburto@nosc.mil |
| */ |
| |
| /***************************************************************/ |
| /* Timer options. You MUST uncomment one of the options below */ |
| /* or compile, for example, with the '-DUNIX' option. */ |
| /***************************************************************/ |
| /* #define Amiga */ |
| /* #define UNIX */ |
| /* #define UNIX_Old */ |
| /* #define VMS */ |
| /* #define BORLAND_C */ |
| /* #define MSC */ |
| /* #define MAC */ |
| /* #define IPSC */ |
| /* #define FORTRAN_SEC */ |
| #define GTODay |
| /* #define CTimer */ |
| /* #define UXPM */ |
| /* #define MAC_TMgr */ |
| /* #define PARIX */ |
| /* #define POSIX */ |
| /* #define WIN32 */ |
| /* #define POSIX1 */ |
| /***********************/ |
| |
| #include <stdio.h> |
| #include <math.h> |
| /* 'Uncomment' the line below to run */ |
| /* with 'register double' variables */ |
| /* defined, or compile with the */ |
| /* '-DROPT' option. Don't need this if */ |
| /* registers used automatically, but */ |
| /* you might want to try it anyway. */ |
| /* #define ROPT */ |
| |
| double nulltime, TimeArray[3]; /* Variables needed for 'dtime()'. */ |
| double TLimit; /* Threshold to determine Number of */ |
| /* Loops to run. Fixed at 15.0 seconds.*/ |
| |
| double T[36]; /* Global Array used to hold timing */ |
| /* results and other information. */ |
| |
| double sa,sb,sc,sd,one,two,three; |
| double four,five,piref,piprg; |
| double scale,pierr; |
| |
| double A0 = 1.0; |
| double A1 = -0.1666666666671334; |
| double A2 = 0.833333333809067E-2; |
| double A3 = 0.198412715551283E-3; |
| double A4 = 0.27557589750762E-5; |
| double A5 = 0.2507059876207E-7; |
| double A6 = 0.164105986683E-9; |
| |
| double B0 = 1.0; |
| double B1 = -0.4999999999982; |
| double B2 = 0.4166666664651E-1; |
| double B3 = -0.1388888805755E-2; |
| double B4 = 0.24801428034E-4; |
| double B5 = -0.2754213324E-6; |
| double B6 = 0.20189405E-8; |
| |
| double C0 = 1.0; |
| double C1 = 0.99999999668; |
| double C2 = 0.49999995173; |
| double C3 = 0.16666704243; |
| double C4 = 0.4166685027E-1; |
| double C5 = 0.832672635E-2; |
| double C6 = 0.140836136E-2; |
| double C7 = 0.17358267E-3; |
| double C8 = 0.3931683E-4; |
| |
| double D1 = 0.3999999946405E-1; |
| double D2 = 0.96E-3; |
| double D3 = 0.1233153E-5; |
| |
| double E2 = 0.48E-3; |
| double E3 = 0.411051E-6; |
| |
| int main() |
| { |
| |
| #ifdef ROPT |
| register double s,u,v,w,x; |
| #else |
| double s,u,v,w,x; |
| #endif |
| |
| long loops, NLimit; |
| register long i, m, n; |
| |
| printf("\n"); |
| printf(" FLOPS C Program (Double Precision), V2.0 18 Dec 1992\n\n"); |
| |
| /****************************/ |
| loops = 15625; /* Initial number of loops. */ |
| /* DO NOT CHANGE! */ |
| /****************************/ |
| |
| /****************************************************/ |
| /* Set Variable Values. */ |
| /* T[1] references all timing results relative to */ |
| /* one million loops. */ |
| /* */ |
| /* The program will execute from 31250 to 512000000 */ |
| /* loops based on a runtime of Module 1 of at least */ |
| /* TLimit = 15.0 seconds. That is, a runtime of 15 */ |
| /* seconds for Module 1 is used to determine the */ |
| /* number of loops to execute. */ |
| /* */ |
| /* No more than NLimit = 512000000 loops are allowed*/ |
| /****************************************************/ |
| |
| T[1] = 1.0E+06/(double)loops; |
| |
| TLimit = 1.0; |
| NLimit = 512000000; |
| |
| piref = 3.14159265358979324; |
| one = 1.0; |
| two = 2.0; |
| three = 3.0; |
| four = 4.0; |
| five = 5.0; |
| scale = one; |
| |
| printf(" Module Error RunTime MFLOPS\n"); |
| printf(" (usec)\n"); |
| |
| /*******************************************************/ |
| /* Module 1. Calculate integral of df(x)/f(x) defined */ |
| /* below. Result is ln(f(1)). There are 14 */ |
| /* double precision operations per loop */ |
| /* ( 7 +, 0 -, 6 *, 1 / ) that are included */ |
| /* in the timing. */ |
| /* 50.0% +, 00.0% -, 42.9% *, and 07.1% / */ |
| /*******************************************************/ |
| #ifdef SMALL_PROBLEM_SIZE |
| n = loops*200; |
| #else |
| n = loops*10000; |
| #endif |
| sa = 0.0; |
| |
| n = 2 * n; |
| x = one / (double)n; /*********************/ |
| s = 0.0; /* Loop 1. */ |
| v = 0.0; /*********************/ |
| w = one; |
| |
| for( i = 1 ; i <= n-1 ; i++ ) |
| { |
| v = v + w; |
| u = v * x; |
| s = s + (D1+u*(D2+u*D3))/(w+u*(D1+u*(E2+u*E3))); |
| } |
| |
| sa = (D1+D2+D3)/(one+D1+E2+E3); |
| sb = D1; |
| |
| sa = x * ( sa + sb + two * s ) / two; /* Module 1 Results. */ |
| sb = one / sa; /*********************/ |
| n = (long)( (double)( 40000 * (long)sb ) / scale ); |
| sc = sb - 25.2; |
| /********************/ |
| /* DO NOT REMOVE */ |
| /* THIS PRINTOUT! */ |
| /********************/ |
| printf(" 1 %13.4lf %10.4lf %10.4lf\n", |
| sc* /* stabilize output */ 1e-30, |
| 0* /* stabilize output */ 1e-30, |
| 0 * /* stabilize output */ 1e-30); |
| return 0; |
| } |
| |
| |