blob: ead007625c2ecf09a6a9433a93e6978b9b07d5af [file] [log] [blame] [edit]
//===- PointerUnionBM.cpp - Benchmark for PointerUnion ---------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Benchmarks for PointerUnion operations with randomized data.
//
//===----------------------------------------------------------------------===//
#include "benchmark/benchmark.h"
#include "llvm/ADT/PointerUnion.h"
#include "llvm/Support/Casting.h"
#include <cstddef>
#include <random>
#include <vector>
using namespace llvm;
namespace llvm {
namespace {
// Aligned slot types with controlled NumLowBitsAvailable.
template <int N> struct alignas(4) A4 {
int data;
}; // 2 bits.
template <int N> struct alignas(8) A8 {
int data;
}; // 3 bits.
template <int N> struct alignas(32) A32 {
int data;
}; // 5 bits.
template <typename T> T &getSlot() {
static T S{};
return S;
}
// Encodes a list of slot types for random fill.
template <typename... Ts> struct TypeList {};
template <typename UnionT, typename... SlotTs>
void fillRandom(UnionT *Arr, size_t Count, TypeList<SlotTs...>) {
constexpr size_t N = sizeof...(SlotTs);
UnionT Variants[N] = {UnionT(&getSlot<SlotTs>())...};
std::mt19937 Rng(42);
std::uniform_int_distribution<size_t> Dist(0, N - 1);
for (size_t I = 0; I < Count; ++I)
Arr[I] = Variants[Dist(Rng)];
}
template <typename UnionT, typename... SlotTs>
void fillRandomWithNulls(UnionT *Arr, size_t Count, TypeList<SlotTs...> TL) {
fillRandom(Arr, Count, TL);
std::mt19937 Rng(123);
// 50% null mix to exercise both null and non-null paths equally.
std::bernoulli_distribution Coin(0.5);
for (size_t I = 0; I < Count; ++I)
if (Coin(Rng))
Arr[I] = nullptr;
}
// A8Types<N> = TypeList<A8<0>, A8<1>, ..., A8<N-1>>.
// Used to generate randomized arrays with a uniform mix of N distinct types.
template <size_t... Is>
TypeList<A8<Is>...> makeA8TypesImpl(std::index_sequence<Is...>);
template <size_t N>
using A8Types = decltype(makeA8TypesImpl(std::make_index_sequence<N>{}));
// Splits N types into two tiers: N0 types in tier 0 (A4, 2-bit) and
// N1 types in tier 1 (A32, 5-bit). Tier 0 gets up to 3 types (the max
// for 2 low bits minus one escape code).
template <size_t N> struct TwoTierSplit {
static constexpr size_t N0 = N < 4 ? N - 1 : 3;
static constexpr size_t N1 = N - N0;
};
// Splits N types into three tiers: N0 types in tier 0 (A4, 2-bit),
// N1 = 1 type in tier 1 (A8, 3-bit), and N2 types in tier 2 (A32, 5-bit).
// Tier 0 gets up to 3 types; tier 1 always gets exactly 1.
template <size_t N> struct ThreeTierSplit {
static constexpr size_t N0 = N < 5 ? N - 2 : 3;
static constexpr size_t N1 = 1;
static constexpr size_t N2 = N - N0 - N1;
};
// A4A32Types<N> = TypeList<A4<0>, ..., A4<N0-1>, A32<0>, ..., A32<N1-1>>.
// Used to generate randomized arrays with a mix of A4 and A32 types.
template <size_t... I0, size_t... I1>
TypeList<A4<I0>..., A32<I1>...> makeA4A32TypesImpl(std::index_sequence<I0...>,
std::index_sequence<I1...>);
template <size_t N>
using A4A32Types = decltype(makeA4A32TypesImpl(
std::make_index_sequence<TwoTierSplit<N>::N0>{},
std::make_index_sequence<TwoTierSplit<N>::N1>{}));
// A4A8A32Types<N> = TypeList<A4<0>, ..., A8<0>, ..., A32<0>, ...>.
// Used to generate randomized arrays with a mix of A4, A8, and A32 types.
template <size_t... I0, size_t... I1, size_t... I2>
TypeList<A4<I0>..., A8<I1>..., A32<I2>...>
makeA4A8A32TypesImpl(std::index_sequence<I0...>, std::index_sequence<I1...>,
std::index_sequence<I2...>);
template <size_t N>
using A4A8A32Types = decltype(makeA4A8A32TypesImpl(
std::make_index_sequence<ThreeTierSplit<N>::N0>{},
std::make_index_sequence<ThreeTierSplit<N>::N1>{},
std::make_index_sequence<ThreeTierSplit<N>::N2>{}));
// Union type aliases.
template <size_t... Is>
auto makePtrUnion(std::index_sequence<Is...>) -> PointerUnion<A8<Is> *...>;
template <size_t N>
using PtrUnion = decltype(makePtrUnion(std::make_index_sequence<N>{}));
template <size_t... I0, size_t... I1>
auto makePtrUnion2T(std::index_sequence<I0...>, std::index_sequence<I1...>)
-> PointerUnion<A4<I0> *..., A32<I1> *...>;
template <size_t N>
using PtrUnion2T =
decltype(makePtrUnion2T(std::make_index_sequence<TwoTierSplit<N>::N0>{},
std::make_index_sequence<TwoTierSplit<N>::N1>{}));
template <size_t... I0, size_t... I1, size_t... I2>
auto makePtrUnion3T(std::index_sequence<I0...>, std::index_sequence<I1...>,
std::index_sequence<I2...>)
-> PointerUnion<A4<I0> *..., A8<I1> *..., A32<I2> *...>;
template <size_t N>
using PtrUnion3T =
decltype(makePtrUnion3T(std::make_index_sequence<ThreeTierSplit<N>::N0>{},
std::make_index_sequence<ThreeTierSplit<N>::N1>{},
std::make_index_sequence<ThreeTierSplit<N>::N2>{}));
// Isa: random type mix, uniform distribution (~1/N hit rate for each type).
template <typename UnionT, typename QueryT, typename TL>
static void BM_Isa(benchmark::State &State) {
constexpr size_t Batch = 1024;
std::vector<UnionT> Arr(Batch);
fillRandom(Arr.data(), Batch, TL{});
benchmark::DoNotOptimize(Arr.data());
for (auto _ : State) {
bool R = false;
for (size_t I = 0; I < Batch; ++I)
R ^= isa<QueryT *>(Arr[I]);
benchmark::DoNotOptimize(R);
}
State.SetItemsProcessed(State.iterations() * Batch);
}
// IsNull: random mix with ~50% nulls.
template <typename UnionT, typename TL>
static void BM_IsNull(benchmark::State &State) {
constexpr size_t Batch = 1024;
std::vector<UnionT> Arr(Batch);
fillRandomWithNulls(Arr.data(), Batch, TL{});
benchmark::DoNotOptimize(Arr.data());
for (auto _ : State) {
bool R = false;
for (size_t I = 0; I < Batch; ++I)
R ^= Arr[I].isNull();
benchmark::DoNotOptimize(R);
}
State.SetItemsProcessed(State.iterations() * Batch);
}
} // namespace
} // namespace llvm
// Registration -- N = 2, 4, 8. PtrUnion3T uses N = 3, 4, 8.
// Isa: PtrUnion (fixed-width tag).
BENCHMARK((BM_Isa<PtrUnion<2>, A8<0>, A8Types<2>>))->Name("Isa/PU/2/First");
BENCHMARK((BM_Isa<PtrUnion<2>, A8<1>, A8Types<2>>))->Name("Isa/PU/2/Last");
BENCHMARK((BM_Isa<PtrUnion<4>, A8<0>, A8Types<4>>))->Name("Isa/PU/4/First");
BENCHMARK((BM_Isa<PtrUnion<4>, A8<3>, A8Types<4>>))->Name("Isa/PU/4/Last");
BENCHMARK((BM_Isa<PtrUnion<8>, A8<0>, A8Types<8>>))->Name("Isa/PU/8/First");
BENCHMARK((BM_Isa<PtrUnion<8>, A8<7>, A8Types<8>>))->Name("Isa/PU/8/Last");
// Isa: PtrUnion2T (variable-width, 2 alignment tiers).
// Note: PtrUnion2T<N> uses variable-width encoding only when N > 3 (when
// fixed-width tags don't fit in 2 bits). PtrUnion2T<2> is still fixed-width.
BENCHMARK((BM_Isa<PtrUnion2T<2>, A4<0>, A4A32Types<2>>))
->Name("Isa/PU2T/2/Tier0");
BENCHMARK((BM_Isa<PtrUnion2T<2>, A32<0>, A4A32Types<2>>))
->Name("Isa/PU2T/2/Tier1");
BENCHMARK((BM_Isa<PtrUnion2T<4>, A4<0>, A4A32Types<4>>))
->Name("Isa/PU2T/4/Tier0");
BENCHMARK((BM_Isa<PtrUnion2T<4>, A32<0>, A4A32Types<4>>))
->Name("Isa/PU2T/4/Tier1");
BENCHMARK((BM_Isa<PtrUnion2T<8>, A4<0>, A4A32Types<8>>))
->Name("Isa/PU2T/8/Tier0");
BENCHMARK((BM_Isa<PtrUnion2T<8>, A32<4>, A4A32Types<8>>))
->Name("Isa/PU2T/8/Tier1");
// Isa: PtrUnion3T (variable-width, 3 alignment tiers).
// Note: PtrUnion3T<N> uses variable-width encoding only when N > 4 (when
// fixed-width tags don't fit in 2 bits). PtrUnion3T<3> and <4> are
// still fixed-width.
BENCHMARK((BM_Isa<PtrUnion3T<3>, A4<0>, A4A8A32Types<3>>))
->Name("Isa/PU3T/3/Tier0");
BENCHMARK((BM_Isa<PtrUnion3T<3>, A8<0>, A4A8A32Types<3>>))
->Name("Isa/PU3T/3/Tier1");
BENCHMARK((BM_Isa<PtrUnion3T<3>, A32<0>, A4A8A32Types<3>>))
->Name("Isa/PU3T/3/Tier2");
BENCHMARK((BM_Isa<PtrUnion3T<4>, A4<0>, A4A8A32Types<4>>))
->Name("Isa/PU3T/4/Tier0");
BENCHMARK((BM_Isa<PtrUnion3T<4>, A8<0>, A4A8A32Types<4>>))
->Name("Isa/PU3T/4/Tier1");
BENCHMARK((BM_Isa<PtrUnion3T<4>, A32<0>, A4A8A32Types<4>>))
->Name("Isa/PU3T/4/Tier2");
BENCHMARK((BM_Isa<PtrUnion3T<8>, A4<0>, A4A8A32Types<8>>))
->Name("Isa/PU3T/8/Tier0");
BENCHMARK((BM_Isa<PtrUnion3T<8>, A8<0>, A4A8A32Types<8>>))
->Name("Isa/PU3T/8/Tier1");
BENCHMARK((BM_Isa<PtrUnion3T<8>, A32<3>, A4A8A32Types<8>>))
->Name("Isa/PU3T/8/Tier2");
// IsNull: all suites.
BENCHMARK((BM_IsNull<PtrUnion<2>, A8Types<2>>))->Name("IsNull/PU/2");
BENCHMARK((BM_IsNull<PtrUnion<4>, A8Types<4>>))->Name("IsNull/PU/4");
BENCHMARK((BM_IsNull<PtrUnion<8>, A8Types<8>>))->Name("IsNull/PU/8");
BENCHMARK((BM_IsNull<PtrUnion2T<2>, A4A32Types<2>>))->Name("IsNull/PU2T/2");
BENCHMARK((BM_IsNull<PtrUnion2T<4>, A4A32Types<4>>))->Name("IsNull/PU2T/4");
BENCHMARK((BM_IsNull<PtrUnion2T<8>, A4A32Types<8>>))->Name("IsNull/PU2T/8");
BENCHMARK((BM_IsNull<PtrUnion3T<3>, A4A8A32Types<3>>))->Name("IsNull/PU3T/3");
BENCHMARK((BM_IsNull<PtrUnion3T<4>, A4A8A32Types<4>>))->Name("IsNull/PU3T/4");
BENCHMARK((BM_IsNull<PtrUnion3T<8>, A4A8A32Types<8>>))->Name("IsNull/PU3T/8");
BENCHMARK_MAIN();