blob: 3f47a5c783f63320f0fd797899154ec1d2148547 [file] [log] [blame]
/*
fftbench
Standard C++ version
10 May 2003
Written by Scott Robert Ladd.
No rights reserved. This is public domain software, for use by anyone.
A number-crunching benchmark that can be used as a fitness test for
evolving optimal compiler options via genetic algorithm.
A simplified version of the FFT component from my standard libcoyote
library, this benchmark test FFT performance on complex<double> values.
Note that the code herein is design for the purpose of testing
computational performance; error handling and other such "niceties"
is virtually non-existent.
Actual benchmark results can be found at:
http://www.coyotegulch.com
Please do not use this information or algorithm in any way that might
upset the balance of the universe or to produce imaginary friends for
imaginary numbers.
*/
#include <cmath>
#include <complex>
#include <ctime>
#include <cstring>
#include <iostream>
#include <iomanip>
#include <stdexcept>
using namespace std;
//---------------------------------------------------------------------------
// number of loops to perform
#if defined(ACOVEA)
#if defined(arch_pentium4)
static const int TEST_SIZE = 262144;
#else
static const int TEST_SIZE = 131072;
#endif
#else
#ifdef SMALL_PROBLEM_SIZE
static const int TEST_SIZE = 50000;
#else
static const int TEST_SIZE = 2097152/4;
#endif
#endif
//---------------------------------------------------------------------------
// embedded random number generator; ala Park and Miller
static double random_double()
{
static long seed = 1325;
static const long IA = 16807;
static const long IM = 2147483647;
static const double AM = 4.65661287525E-9;
static const long IQ = 127773;
static const long IR = 2836;
static const long MASK = 123459876;
long k;
double result;
seed ^= MASK;
k = seed / IQ;
seed = IA * (seed - k * IQ) - IR * k;
if (seed < 0)
seed += IM;
result = AM * seed;
seed ^= MASK;
return result;
}
//---------------------------------------------------------------------------
template <class T>
class polynomial
{
public:
// creation constructor, uninitialized (coefficients will be unknown values)
polynomial(size_t degree);
// creation constructor, with initialization from array
polynomial(size_t degree, const T * coefficients);
// creation constructor, with single-value initialization constructor
polynomial(size_t degree, const T & value);
// copy constructor
polynomial(const polynomial<T> & source);
// destructor
virtual ~polynomial();
// assignment
polynomial<T> & operator = (const polynomial<T> & source);
// initialize to specific value
void initialize(const T & value = T(0));
// increase polynomial length
polynomial<T> & stretch(size_t degrees);
// interrogate for degree
size_t degree() const
{
return m_degree;
}
// get specific coefficient
T get(size_t term) const;
T & operator [] (size_t term);
// evaluate for a specific value
T operator() (const T & x) const;
// unitary operators
polynomial<T> operator - () const;
polynomial<T> operator + () const;
// binary mathematical operators
polynomial<T> operator + (const polynomial<T> & poly) const;
polynomial<T> operator - (const polynomial<T> & poly) const;
// constant required by FFT routines
static const complex<T> PI2I;
// returns largest power of two that holds n
static size_t log2(size_t n);
// reverses a sequence of bits
static size_t flip_bits(size_t k, size_t bits);
// stretches the length of a polynomial to a power of two
size_t stretch_fft();
// performs a reverse-bit copy of a polynomial<T> into a new polynomial< complex<T> >
static polynomial< complex<T> > bit_reverse(const polynomial<T> & poly);
static polynomial< complex<T> > bit_reverse(const polynomial< complex<T> > & poly);
// Fast Fourier Transform of polynomial<T> to polynomial< complex<T> >
static polynomial< complex<T> > fft(const polynomial<T> & poly);
// inverse FFT of polynomial< complex<T> > to polynomial<T>
static polynomial< complex<T> > inverse_fft(const polynomial< complex<T> > & poly);
// multiplication via FFT
polynomial<T> operator * (const polynomial<T> & poly) const;
// shorthand mathematical operators
polynomial<T> & operator += (const polynomial<T> & poly);
polynomial<T> & operator -= (const polynomial<T> & poly);
polynomial<T> & operator *= (const polynomial<T> & poly);
protected:
// coefficients
T * m_coeff;
// number of terms
size_t m_degree;
// acquire resources
void acquire();
// release resources
void release();
// deep copy
void deep_copy(const T * source);
};
// acquire resources
template <typename T>
inline void polynomial<T>::acquire()
{
m_coeff = new T [m_degree];
}
// release resources
template <typename T>
inline void polynomial<T>::release()
{
delete [] m_coeff;
}
// deep copy
template <typename T>
inline void polynomial<T>::deep_copy(const T * source)
{
for (size_t n = 0; n < m_degree; ++n)
m_coeff[n] = source[n];
}
// initialize to specific value
template <typename T>
inline void polynomial<T>::initialize(const T & value)
{
for (size_t n = 0; n < m_degree; ++n)
m_coeff[n] = value;
}
// creation constructor, uninitialized (coefficients will be unknown values)
template <typename T>
polynomial<T>::polynomial(size_t degree)
: m_coeff(NULL),
m_degree(degree)
{
acquire();
}
// creation constructor, with possible initialization from array
template <typename T>
polynomial<T>::polynomial(size_t degree, const T * coefficients)
: m_coeff(NULL),
m_degree(degree)
{
acquire();
if (coefficients != NULL)
deep_copy(coefficients);
}
// creation constructor, with single-value initialization constructor
template <typename T>
polynomial<T>::polynomial(size_t degree, const T & value)
: m_coeff(NULL),
m_degree(degree)
{
acquire();
initialize(value);
}
// copy constructor
template <typename T>
polynomial<T>::polynomial(const polynomial<T> & source)
: m_coeff(NULL),
m_degree(source.m_degree)
{
acquire();
deep_copy(source.m_coeff);
}
// destructor
template <typename T>
polynomial<T>::~polynomial()
{
release();
}
// assignment
template <typename T>
polynomial<T> & polynomial<T>::operator = (const polynomial<T> & source)
{
if (m_degree != source.m_degree)
{
release();
m_degree = source.m_degree;
acquire();
}
deep_copy(source.m_coeff);
return *this;
}
// increase polynomial length
template <typename T>
polynomial<T> & polynomial<T>::stretch(size_t degrees)
{
if (degrees != 0)
{
T * old_coeff = m_coeff;
size_t old_degree = m_degree;
m_degree += degrees;
acquire();
size_t n = 0;
for (; n < old_degree; ++n)
m_coeff[n] = old_coeff[n];
for (; n < m_degree; ++n)
m_coeff[n] = T(0);
}
return *this;
}
// get specific coefficient
template <typename T>
T polynomial<T>::get(size_t term) const
{
return m_coeff[term];
}
template <typename T>
T & polynomial<T>::operator [] (size_t term)
{
return m_coeff[term];
}
// evaluate for a specific value
template <typename T>
T polynomial<T>::operator() (const T & x) const
{
// using Horner's Rule
T y = m_coeff[m_degree - 1];
size_t i = m_degree - 2;
while (true)
{
y = x * y + m_coeff[i];
if (i > 0)
--i;
else
break;
}
return y;
}
// unitary operators
template <typename T>
polynomial<T> polynomial<T>::operator - () const
{
polynomial<T> result(m_degree); // uninitialized
for (size_t n = 0; n < m_degree; ++n)
result.m_coeff[n] = -m_coeff[n];
return result;
}
template <typename T>
polynomial<T> polynomial<T>::operator + () const
{
return polynomial<T>(*this);
}
// binary mathematical operators
template <typename T>
polynomial<T> polynomial<T>::operator + (const polynomial<T> & poly) const
{
if (m_degree >= poly.m_degree)
{
polynomial<T> result(*this);
for (size_t n = 0; n < m_degree; ++n)
result.m_coeff[n] += poly.m_coeff[n];
return result;
}
else
{
polynomial<T> result(poly);
for (size_t n = 0; n < m_degree; ++n)
result.m_coeff[n] += m_coeff[n];
return result;
}
}
template <typename T>
polynomial<T> polynomial<T>::operator - (const polynomial<T> & poly) const
{
if (m_degree >= poly.m_degree)
{
polynomial<T> result(*this);
for (size_t n = 0; n < m_degree; ++n)
result.m_coeff[n] -= poly.m_coeff[n];
return result;
}
else
{
polynomial<T> result(poly);
for (size_t n = 0; n < m_degree; ++n)
result.m_coeff[n] -= m_coeff[n];
return result;
}
}
// constant required by FFT routines
template <typename T>
const complex<T> polynomial<T>::PI2I(0.0,6.283185307179586);
// returns largest power of two that holds n
template <typename T>
size_t polynomial<T>::log2(size_t n)
{
// returns 1 if n == 0!
size_t x = 1, c = 0;
while (x < n)
{
++c;
x <<= 1;
if (x == 0)
break;
}
return c;
}
// reverses a sequence of bits
template <typename T>
size_t polynomial<T>::flip_bits(size_t k, size_t bits)
{
size_t lm = 1 << (bits - 1);
size_t rm = 1;
size_t r = 0;
while (lm)
{
if (k & rm)
r |= (lm);
lm >>= 1;
rm <<= 1;
}
return r;
}
// stretches the length of a polynomial to a power of two
template <class T>
size_t polynomial<T>::stretch_fft()
{
size_t n = 1;
while (true)
{
if (m_degree <= n)
break;
n <<= 1;
if (n == 0)
throw overflow_error("overflow in fft polynomial stretch");
}
n <<= 1;
n -= m_degree;
if (n > 0)
stretch(n);
return n;
}
// performs a reverse-bit copy of a polynomial<T> into a new polynomial< complex<T> >
template <typename T>
polynomial< complex<T> > polynomial<T>::bit_reverse(const polynomial<T> & poly)
{
size_t b = log2(poly.degree());
polynomial< complex<T> > result(poly.degree());
for (size_t n = 0; n < poly.degree(); ++n)
result[flip_bits(n,b)] = poly.get(n);
return result;
}
// performs a reverse-bit copy of a polynomial<T> into a new polynomial< complex<T> >
template <typename T>
polynomial< complex<T> > polynomial<T>::bit_reverse(const polynomial< complex<T> > & poly)
{
size_t b = log2(poly.degree());
polynomial< complex<T> > result(poly.degree());
for (size_t n = 0; n < poly.degree(); ++n)
result[flip_bits(n,b)] = poly.get(n);
return result;
}
// Fast Fourier Transform of polynomial<T> to polynomial< complex<T> >
template <typename T>
polynomial< complex<T> > polynomial<T>::fft(const polynomial<T> & poly)
{
size_t nl = log2(poly.degree());
size_t j, k, m, m2, s;
complex<T> wm, w, t, u;
polynomial< complex<T> > result = bit_reverse(poly);
m = 2;
m2 = 1;
for (s = 0; s < nl; ++s)
{
wm = exp(PI2I / complex<T>(m));
w = 1.0;
for (j = 0; j <= (m2 - 1); ++j)
{
for (k = j; k <= poly.degree() - 1; k += m)
{
t = w * result[k + m2];
u = result[k];
result[k] = u + t;
result[k + m2] = u - t;
}
w *= wm;
}
m <<= 1;
m2 <<= 1;
}
return result;
}
// inverse FFT of polynomial< complex<T> > to polynomial<T>
template <typename T>
polynomial< complex<T> > polynomial<T>::inverse_fft(const polynomial< complex<T> > & poly)
{
size_t nl = log2(poly.degree());
size_t j, k, m, m2, s;
complex<T> wm, w, t, u;
polynomial< complex<T> > result = bit_reverse(poly);
m = 2;
m2 = 1;
for (s = 0; s < nl; ++s)
{
wm = exp(-PI2I / complex<T>(m));
w = 1.0;
for (j = 0; j <= (m2 - 1); ++j)
{
for (k = j; k <= poly.degree() - 1; k += m)
{
t = w * result[k + m2];
u = result[k];
result[k] = u + t;
result[k + m2] = u - t;
}
w *= wm;
}
m <<= 1;
m2 <<= 1;
}
for (j = 0; j < poly.degree(); ++j)
result[j] /= double(poly.degree());
return result;
}
// multiplication by FFT
template <typename T>
polynomial<T> polynomial<T>::operator * (const polynomial<T> & poly) const
{
#if defined(BRUTE_FORCE)
// brute force algorithm
if (m_degree != poly.m_degree)
throw domain_error("can not multiply two polynomials with different lengths");
polynomial<T> result(2 * m_degree - 1, T(0));
for (size_t n1 = 0; n1 < m_degree; ++n1)
for (size_t n2 = 0; n2 < m_degree; ++n2)
result.m_coeff[n1 + n2] += m_coeff[n1] * poly.m_coeff[n2];
#else
// duplicate p1 and p2 to preserve original objects
polynomial<T> a1(*this);
polynomial<T> a2(poly);
// expand polynomials to next-largest power of two
if (a1.degree() > a2.degree())
a2.stretch(a1.stretch_fft());
else
a1.stretch(a2.stretch_fft());
// FFT polynomials
polynomial< complex<T> > dft1 = fft(a1);
polynomial< complex<T> > dft2 = fft(a2);
// multiply coefficients
size_t n2 = a1.degree();
for (size_t k = 0; k < n2; ++k)
dft1[k] *= dft2[k];
// inverse DFT to obtain result
dft2 = inverse_fft(dft1);
// convert back to complex<T>
--n2;
polynomial<T> result(n2);
for (size_t k = 0; k < n2; ++k)
result[k] = dft2[k].real();
#endif
return result;
}
// shorthand mathematical operators
template <typename T>
polynomial<T> & polynomial<T>::operator += (const polynomial<T> & poly)
{
size_t length = (m_degree <= poly.m_degree) ? m_degree : poly.m_degree;
for (size_t n = 0; n < length; ++n)
m_coeff[n] += poly.m_coeff[n];
return *this;
}
template <typename T>
polynomial<T> & polynomial<T>::operator -= (const polynomial<T> & poly)
{
size_t length = (m_degree <= poly.m_degree) ? m_degree : poly.m_degree;
for (size_t n = 0; n < length; ++n)
m_coeff[n] -= poly.m_coeff[n];
return *this;
}
template <typename T>
polynomial<T> & polynomial<T>::operator *= (const polynomial<T> & poly)
{
*this = (*this) * poly;
}
//---------------------------------------------------------------------------
// Entry point
int main(int argc, char ** argv)
{
// are we testing using a genetic algorithm?
bool ga_testing = false;
if (argc > 1)
{
for (int i = 1; i < argc; ++i)
{
if (!strcmp(argv[1],"-ga"))
{
ga_testing = true;
break;
}
}
}
// generate random polynomials
polynomial<double> poly1(TEST_SIZE);
polynomial<double> poly2(TEST_SIZE);
polynomial<double> poly3(TEST_SIZE * 2 - 1);
for (int n = 0; n < TEST_SIZE; ++n)
{
poly1[n] = random_double();
poly2[n] = random_double();
}
// get starting time
//struct timespec start, stop;
//clock_gettime(CLOCK_REALTIME,&start);
// what we're timing
poly3 = poly1 * poly2;
// get final time
//clock_gettime(CLOCK_REALTIME,&stop);
double run_time = 0;
//(stop.tv_sec - start.tv_sec) + (double)(stop.tv_nsec - start.tv_nsec) / 1000000000.0;
// report runtime
if (ga_testing)
cout << run_time;
else
cout << "\nfftbench (Std. C++) run time: " << run_time << "\n\n";
cout.flush();
return 0;
}