MicroBenchmarks/LoopVectorization/MathFunctions.cpp - llvm-test-suite - Git at Google

 #include <iostream>
 #include <math.h>
 #include <memory>
 #include <random>

 #include "benchmark/benchmark.h"

 #define N 10000

 // Apply Fn(A[i]) + Fn(B[i]) in loop, with default loop vectorization settings.
 template <typename T> static void run_fn_autovec(T *A, T *B, T *C, T (*Fn)(T)) {
   for (unsigned i = 0; i < N; i++) {
     C[i] = Fn(A[i]) + Fn(B[i]);
   }
 }

 // Apply Fn(A[i]) + Fn(B[i]) in loop, with loop vectorization disabled.
 template <typename T> static void run_fn_novec(T *A, T *B, T *C, T (*Fn)(T)) {
 #pragma clang loop vectorize(disable) interleave(disable)
   for (unsigned i = 0; i < N; i++) {
     C[i] = Fn(A[i]) + Fn(B[i]);
   }
 }

 // Initialize arrays A, B and T with random numbers.
 template <typename T> static void init_data(T *A, T *B, T *C) {
   std::uniform_real_distribution<T> dist(-100, 100);
   std::mt19937 rng(12345);
   for (unsigned i = 0; i < N; i++) {
     A[i] = dist(rng);
     B[i] = dist(rng);
     C[i] = dist(rng);
   }
 }

 // Benchmark auto-vectorized version using Fn.
 template <typename T>
 static void __attribute__((always_inline))
 benchmark_fn_autovec(benchmark::State &state, T (*Fn)(T)) {
   std::unique_ptr<T[]> A(new T[N]);
   std::unique_ptr<T[]> B(new T[N]);
   std::unique_ptr<T[]> C(new T[N]);
   init_data(&A[0], &B[0], &C[0]);

 #ifdef BENCH_AND_VERIFY
   // Verify the vectorized and un-vectorized versions produce the same results.
   {
     std::unique_ptr<T[]> CNovec(new T[N]);
     for (unsigned i = 0; i < N; i++)
       CNovec[i] = C[i];

     run_fn_novec(&A[0], &B[0], &CNovec[0], Fn);
     run_fn_autovec(&A[0], &B[0], &C[0], Fn);
     for (unsigned i = 0; i < N; i++)
       // If there's a value mismatch, fall back to fpclassify.
       if (C[i] != CNovec[i] && fpclassify(C[i]) != fpclassify(CNovec[i])) {
         std::cerr << "ERROR: autovec result different to scalar result " << C[i]
                   << " != " << CNovec[i] << " at index " << i << "\n";
         exit(1);
       }
   }
 #endif

   for (auto _ : state) {
     run_fn_autovec(&A[0], &B[0], &C[0], Fn);
     benchmark::DoNotOptimize(A);
     benchmark::DoNotOptimize(B);
     benchmark::DoNotOptimize(C);
     benchmark::ClobberMemory();
   }
 }

 // Benchmark version using Fn with vectorization disabled.
 template <typename T>
 static void __attribute__((always_inline))
 benchmark_fn_novec(benchmark::State &state, T (*Fn)(T)) {
   std::unique_ptr<T[]> A(new T[N]);
   std::unique_ptr<T[]> B(new T[N]);
   std::unique_ptr<T[]> C(new T[N]);
   init_data(&A[0], &B[0], &C[0]);

   for (auto _ : state) {
     run_fn_novec(&A[0], &B[0], &C[0], Fn);
     benchmark::DoNotOptimize(A);
     benchmark::DoNotOptimize(B);
     benchmark::DoNotOptimize(C);
   }
 }

 // Add add auto-vectorized and disabled vectorization benchmarks for math
 // function fn and type ty.
 #define ADD_BENCHMARK(fn, ty)                                                  \
   void BENCHMARK_##fn##_autovec_##ty##_(benchmark::State &state) {             \
     benchmark_fn_autovec<ty>(state, fn);                                       \
   }                                                                            \
   BENCHMARK(BENCHMARK_##fn##_autovec_##ty##_)->Unit(benchmark::kMicrosecond);  \
                                                                                \
   void BENCHMARK_##fn##_novec_##ty##_(benchmark::State &state) {               \
     benchmark_fn_novec<ty>(state, fn);                                         \
   }                                                                            \
   BENCHMARK(BENCHMARK_##fn##_novec_##ty##_)->Unit(benchmark::kMicrosecond);

 ADD_BENCHMARK(expf, float)
 ADD_BENCHMARK(exp, double)

 ADD_BENCHMARK(acosf, float)
 ADD_BENCHMARK(acos, double)

 ADD_BENCHMARK(asinf, float)
 ADD_BENCHMARK(asin, double)

 ADD_BENCHMARK(atanf, float)
 ADD_BENCHMARK(atan, double)

 ADD_BENCHMARK(cbrtf, float)
 ADD_BENCHMARK(cbrt, double)

 ADD_BENCHMARK(erff, float)
 ADD_BENCHMARK(erf, double)

 ADD_BENCHMARK(cosf, float)
 ADD_BENCHMARK(cos, double)

 ADD_BENCHMARK(sinf, float)
 ADD_BENCHMARK(sin, double)

 ADD_BENCHMARK(sinhf, float)
 ADD_BENCHMARK(sinh, double)
	#include <iostream>
	#include <math.h>
	#include <memory>
	#include <random>

	#include "benchmark/benchmark.h"

	#define N 10000

	// Apply Fn(A[i]) + Fn(B[i]) in loop, with default loop vectorization settings.
	template <typename T> static void run_fn_autovec(T A, T B, T C, T (Fn)(T)) {
	for (unsigned i = 0; i < N; i++) {
	C[i] = Fn(A[i]) + Fn(B[i]);
	}
	}

	// Apply Fn(A[i]) + Fn(B[i]) in loop, with loop vectorization disabled.
	template <typename T> static void run_fn_novec(T A, T B, T C, T (Fn)(T)) {
	#pragma clang loop vectorize(disable) interleave(disable)
	for (unsigned i = 0; i < N; i++) {
	C[i] = Fn(A[i]) + Fn(B[i]);
	}
	}

	// Initialize arrays A, B and T with random numbers.
	template <typename T> static void init_data(T A, T B, T *C) {
	std::uniform_real_distribution<T> dist(-100, 100);
	std::mt19937 rng(12345);
	for (unsigned i = 0; i < N; i++) {
	A[i] = dist(rng);
	B[i] = dist(rng);
	C[i] = dist(rng);
	}
	}

	// Benchmark auto-vectorized version using Fn.
	template <typename T>
	static void __attribute__((always_inline))
	benchmark_fn_autovec(benchmark::State &state, T (*Fn)(T)) {
	std::unique_ptr<T[]> A(new T[N]);
	std::unique_ptr<T[]> B(new T[N]);
	std::unique_ptr<T[]> C(new T[N]);
	init_data(&A[0], &B[0], &C[0]);

	#ifdef BENCH_AND_VERIFY
	// Verify the vectorized and un-vectorized versions produce the same results.
	{
	std::unique_ptr<T[]> CNovec(new T[N]);
	for (unsigned i = 0; i < N; i++)
	CNovec[i] = C[i];

	run_fn_novec(&A[0], &B[0], &CNovec[0], Fn);
	run_fn_autovec(&A[0], &B[0], &C[0], Fn);
	for (unsigned i = 0; i < N; i++)
	// If there's a value mismatch, fall back to fpclassify.
	if (C[i] != CNovec[i] && fpclassify(C[i]) != fpclassify(CNovec[i])) {
	std::cerr << "ERROR: autovec result different to scalar result " << C[i]
	<< " != " << CNovec[i] << " at index " << i << "\n";
	exit(1);
	}
	}
	#endif

	for (auto _ : state) {
	run_fn_autovec(&A[0], &B[0], &C[0], Fn);
	benchmark::DoNotOptimize(A);
	benchmark::DoNotOptimize(B);
	benchmark::DoNotOptimize(C);
	benchmark::ClobberMemory();
	}
	}

	// Benchmark version using Fn with vectorization disabled.
	template <typename T>
	static void __attribute__((always_inline))
	benchmark_fn_novec(benchmark::State &state, T (*Fn)(T)) {
	std::unique_ptr<T[]> A(new T[N]);
	std::unique_ptr<T[]> B(new T[N]);
	std::unique_ptr<T[]> C(new T[N]);
	init_data(&A[0], &B[0], &C[0]);

	for (auto _ : state) {
	run_fn_novec(&A[0], &B[0], &C[0], Fn);
	benchmark::DoNotOptimize(A);
	benchmark::DoNotOptimize(B);
	benchmark::DoNotOptimize(C);
	}
	}

	// Add add auto-vectorized and disabled vectorization benchmarks for math
	// function fn and type ty.
	#define ADD_BENCHMARK(fn, ty) \
	void BENCHMARK_##fn##_autovec_##ty##_(benchmark::State &state) { \
	benchmark_fn_autovec<ty>(state, fn); \
	} \
	BENCHMARK(BENCHMARK_##fn##_autovec_##ty##_)->Unit(benchmark::kMicrosecond); \
	\
	void BENCHMARK_##fn##_novec_##ty##_(benchmark::State &state) { \
	benchmark_fn_novec<ty>(state, fn); \
	} \
	BENCHMARK(BENCHMARK_##fn##_novec_##ty##_)->Unit(benchmark::kMicrosecond);

	ADD_BENCHMARK(expf, float)
	ADD_BENCHMARK(exp, double)

	ADD_BENCHMARK(acosf, float)
	ADD_BENCHMARK(acos, double)

	ADD_BENCHMARK(asinf, float)
	ADD_BENCHMARK(asin, double)

	ADD_BENCHMARK(atanf, float)
	ADD_BENCHMARK(atan, double)

	ADD_BENCHMARK(cbrtf, float)
	ADD_BENCHMARK(cbrt, double)

	ADD_BENCHMARK(erff, float)
	ADD_BENCHMARK(erf, double)

	ADD_BENCHMARK(cosf, float)
	ADD_BENCHMARK(cos, double)

	ADD_BENCHMARK(sinf, float)
	ADD_BENCHMARK(sin, double)

	ADD_BENCHMARK(sinhf, float)
	ADD_BENCHMARK(sinh, double)