blob: d8cd421bf306d84e436f7e04554122d8e5b8552d [file] [log] [blame]
#include <memory>
#include <random>
#define DEFINE_SCALAR_AND_VECTOR_FN2(Init, Loop) \
auto ScalarFn = [](auto *A, auto *B, unsigned TC) { \
Init _Pragma("clang loop vectorize(disable) interleave_count(1)") Loop \
}; \
auto VectorFn = [](auto *A, auto *B, unsigned TC) { \
Init _Pragma("clang loop vectorize(enable)") Loop \
};
#define DEFINE_SCALAR_AND_VECTOR_FN3(Loop) \
auto ScalarFn = [](auto *A, auto *B, auto *C, unsigned TC) { \
_Pragma("clang loop vectorize(disable) interleave_count(1)") Loop \
}; \
auto VectorFn = [](auto *A, auto *B, auto *C, unsigned TC) { \
_Pragma("clang loop vectorize(enable)") Loop \
};
#define DEFINE_NESTED_SCALAR_AND_VECTOR_FN4(InnerLoopCode) \
auto ScalarFn = [](auto *A, auto *B, unsigned OuterTC, unsigned InnerTC) { \
for (unsigned long i = 0; i < OuterTC; i++) { \
_Pragma("clang loop vectorize(disable) interleave_count(1)") \
for (unsigned long j = 0; j < InnerTC; j++) { \
InnerLoopCode \
} \
} \
}; \
auto VectorFn = [](auto *A, auto *B, unsigned OuterTC, unsigned InnerTC) { \
for (unsigned long i = 0; i < OuterTC; i++) { \
_Pragma("clang loop vectorize(enable)") \
for (unsigned long j = 0; j < InnerTC; j++) { \
InnerLoopCode \
} \
} \
};
#define DEFINE_NESTED_SCALAR_AND_VECTOR_FN5(InnerLoopCode) \
auto ScalarFn = [](auto *A, auto *B, unsigned OuterTC, unsigned InnerTC) { \
for (long i = OuterTC - 1; i >= 0; i--) { \
_Pragma("clang loop vectorize(disable) interleave_count(1)") \
for (unsigned long j = 0; j < InnerTC; j++) { \
InnerLoopCode \
} \
} \
}; \
auto VectorFn = [](auto *A, auto *B, unsigned OuterTC, unsigned InnerTC) { \
for (long i = OuterTC - 1; i >= 0; i--) { \
_Pragma("clang loop vectorize(enable)") \
for (unsigned long j = 0; j < InnerTC; j++) { \
InnerLoopCode \
} \
} \
};
static std::mt19937 rng;
// Initialize arrays A with random numbers.
template <typename Ty>
static void init_data(const std::unique_ptr<Ty[]> &A, unsigned N) {
std::uniform_int_distribution<uint64_t> distrib(
std::numeric_limits<Ty>::min(), std::numeric_limits<Ty>::max());
for (unsigned i = 0; i < N; i++)
A[i] = distrib(rng);
}
template <typename Ty>
static void check(const std::unique_ptr<Ty[]> &Reference,
const std::unique_ptr<Ty[]> &Tmp, unsigned NumElements) {
if (!std::equal(&Reference[0], &Reference[0] + NumElements, &Tmp[0])) {
std::cerr << "Miscompare\n";
exit(1);
}
}