blob: c9081f547812e90a54f44411591cb7ad4001245b [file] [log] [blame]
//===- TrieRawHashMapTest.cpp ---------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/TrieRawHashMap.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/SHA1.h"
#include "gtest/gtest.h"
using namespace llvm;
namespace llvm {
class TrieRawHashMapTestHelper {
public:
TrieRawHashMapTestHelper() = default;
void setTrie(ThreadSafeTrieRawHashMapBase *T) { Trie = T; }
ThreadSafeTrieRawHashMapBase::PointerBase getRoot() const {
return Trie->getRoot();
}
unsigned getStartBit(ThreadSafeTrieRawHashMapBase::PointerBase P) const {
return Trie->getStartBit(P);
}
unsigned getNumBits(ThreadSafeTrieRawHashMapBase::PointerBase P) const {
return Trie->getNumBits(P);
}
unsigned getNumSlotUsed(ThreadSafeTrieRawHashMapBase::PointerBase P) const {
return Trie->getNumSlotUsed(P);
}
unsigned getNumTries() const { return Trie->getNumTries(); }
std::string
getTriePrefixAsString(ThreadSafeTrieRawHashMapBase::PointerBase P) const {
return Trie->getTriePrefixAsString(P);
}
ThreadSafeTrieRawHashMapBase::PointerBase
getNextTrie(ThreadSafeTrieRawHashMapBase::PointerBase P) const {
return Trie->getNextTrie(P);
}
private:
ThreadSafeTrieRawHashMapBase *Trie = nullptr;
};
} // namespace llvm
namespace {
template <typename DataType, size_t HashSize = sizeof(uint64_t)>
class SimpleTrieHashMapTest : public TrieRawHashMapTestHelper,
public ::testing::Test {
public:
using NumType = DataType;
using HashType = std::array<uint8_t, HashSize>;
using TrieType = ThreadSafeTrieRawHashMap<DataType, sizeof(HashType)>;
TrieType &createTrie(size_t RootBits, size_t SubtrieBits) {
auto &Ret = Trie.emplace(RootBits, SubtrieBits);
TrieRawHashMapTestHelper::setTrie(&Ret);
return Ret;
}
void destroyTrie() { Trie.reset(); }
~SimpleTrieHashMapTest() { destroyTrie(); }
// Use the number itself as hash to test the pathological case.
static HashType hash(uint64_t Num) {
uint64_t HashN =
llvm::support::endian::byte_swap(Num, llvm::endianness::big);
HashType Hash;
memcpy(&Hash[0], &HashN, sizeof(HashType));
return Hash;
};
private:
std::optional<TrieType> Trie;
};
using SmallNodeTrieTest = SimpleTrieHashMapTest<uint64_t>;
TEST_F(SmallNodeTrieTest, TrieAllocation) {
NumType Numbers[] = {
0x0, std::numeric_limits<NumType>::max(), 0x1, 0x2,
0x3, std::numeric_limits<NumType>::max() - 1u,
};
unsigned ExpectedTries[] = {
1, // Allocate Root.
1, // Both on the root.
64, // 0 and 1 sinks all the way down.
64, // no new allocation needed.
65, // need a new node between 2 and 3.
65 + 63, // 63 new allocation to sink two big numbers all the way.
};
const char *ExpectedPrefix[] = {
"", // Root.
"", // Root.
"00000000000000[0000000]",
"00000000000000[0000000]",
"00000000000000[0000001]",
"ffffffffffffff[1111111]",
};
// Use root and subtrie sizes of 1 so this gets sunk quite deep.
auto &Trie = createTrie(/*RootBits=*/1, /*SubtrieBits=*/1);
for (unsigned I = 0; I < 6; ++I) {
// Lookup first to exercise hint code for deep tries.
TrieType::pointer Lookup = Trie.find(hash(Numbers[I]));
EXPECT_FALSE(Lookup);
Trie.insert(Lookup, TrieType::value_type(hash(Numbers[I]), Numbers[I]));
EXPECT_EQ(getNumTries(), ExpectedTries[I]);
EXPECT_EQ(getTriePrefixAsString(getNextTrie(getRoot())), ExpectedPrefix[I]);
}
}
TEST_F(SmallNodeTrieTest, TrieStructure) {
NumType Numbers[] = {
// Three numbers that will nest deeply to test (1) sinking subtries and
// (2) deep, non-trivial hints.
std::numeric_limits<NumType>::max(),
std::numeric_limits<NumType>::max() - 2u,
std::numeric_limits<NumType>::max() - 3u,
// One number to stay at the top-level.
0x37,
};
// Use root and subtrie sizes of 1 so this gets sunk quite deep.
auto &Trie = createTrie(/*RootBits=*/1, /*SubtrieBits=*/1);
for (NumType N : Numbers) {
// Lookup first to exercise hint code for deep tries.
TrieType::pointer Lookup = Trie.find(hash(N));
EXPECT_FALSE(Lookup);
Trie.insert(Lookup, TrieType::value_type(hash(N), N));
}
for (NumType N : Numbers) {
TrieType::pointer Lookup = Trie.find(hash(N));
EXPECT_TRUE(Lookup);
if (!Lookup)
continue;
EXPECT_EQ(hash(N), Lookup->Hash);
EXPECT_EQ(N, Lookup->Data);
// Confirm a subsequent insertion fails to overwrite by trying to insert a
// bad value.
auto Result = Trie.insert(Lookup, TrieType::value_type(hash(N), N - 1));
EXPECT_EQ(N, Result->Data);
}
// Check the trie so we can confirm the structure is correct. Each subtrie
// should have 2 slots. The root's index=0 should have the content for
// 0x37 directly, and index=1 should be a linked-list of subtries, finally
// ending with content for (max-2) and (max-3).
//
// Note: This structure is not exhaustive (too expensive to update tests),
// but it does test that the dump format is somewhat readable and that the
// basic structure is correct.
//
// Note: This test requires that the trie reads bytes starting from index 0
// of the array of uint8_t, and then reads each byte's bits from high to low.
// Check the Trie.
// We should allocated a total of 64 SubTries for 64 bit hash.
ASSERT_EQ(getNumTries(), 64u);
// Check the root trie. Two slots and both are used.
ASSERT_EQ(getNumSlotUsed(getRoot()), 2u);
// Check last subtrie.
// Last allocated trie is the next node in the allocation chain.
auto LastAlloctedSubTrie = getNextTrie(getRoot());
ASSERT_EQ(getTriePrefixAsString(LastAlloctedSubTrie),
"ffffffffffffff[1111110]");
ASSERT_EQ(getStartBit(LastAlloctedSubTrie), 63u);
ASSERT_EQ(getNumBits(LastAlloctedSubTrie), 1u);
ASSERT_EQ(getNumSlotUsed(LastAlloctedSubTrie), 2u);
}
TEST_F(SmallNodeTrieTest, TrieStructureSmallFinalSubtrie) {
NumType Numbers[] = {
// Three numbers that will nest deeply to test (1) sinking subtries and
// (2) deep, non-trivial hints.
std::numeric_limits<NumType>::max(),
std::numeric_limits<NumType>::max() - 2u,
std::numeric_limits<NumType>::max() - 3u,
// One number to stay at the top-level.
0x37,
};
// Use subtrie size of 5 to avoid hitting 64 evenly, making the final subtrie
// small.
auto &Trie = createTrie(/*RootBits=*/8, /*SubtrieBits=*/5);
for (NumType N : Numbers) {
// Lookup first to exercise hint code for deep tries.
TrieType::pointer Lookup = Trie.find(hash(N));
EXPECT_FALSE(Lookup);
Trie.insert(Lookup, TrieType::value_type(hash(N), N));
}
for (NumType N : Numbers) {
TrieType::pointer Lookup = Trie.find(hash(N));
ASSERT_TRUE(Lookup);
EXPECT_EQ(hash(N), Lookup->Hash);
EXPECT_EQ(N, Lookup->Data);
// Confirm a subsequent insertion fails to overwrite by trying to insert a
// bad value.
auto Result = Trie.insert(Lookup, TrieType::value_type(hash(N), N - 1));
EXPECT_EQ(N, Result->Data);
}
// Check the trie so we can confirm the structure is correct. The root
// should have 2^8=256 slots, most subtries should have 2^5=32 slots, and the
// deepest subtrie should have 2^1=2 slots (since (64-8)mod(5)=1).
// should have 2 slots. The root's index=0 should have the content for
// 0x37 directly, and index=1 should be a linked-list of subtries, finally
// ending with content for (max-2) and (max-3).
//
// Note: This structure is not exhaustive (too expensive to update tests),
// but it does test that the dump format is somewhat readable and that the
// basic structure is correct.
//
// Note: This test requires that the trie reads bytes starting from index 0
// of the array of uint8_t, and then reads each byte's bits from high to low.
// Check the Trie.
// 64 bit hash = 8 + 5 * 11 + 1, so 1 root, 11 8bit subtrie and 1 last level
// subtrie, 13 total.
ASSERT_EQ(getNumTries(), 13u);
// Check the root trie. Two slots and both are used.
ASSERT_EQ(getNumSlotUsed(getRoot()), 2u);
// Check last subtrie.
// Last allocated trie is the next node in the allocation chain.
auto LastAlloctedSubTrie = getNextTrie(getRoot());
ASSERT_EQ(getTriePrefixAsString(LastAlloctedSubTrie),
"ffffffffffffff[1111110]");
ASSERT_EQ(getStartBit(LastAlloctedSubTrie), 63u);
ASSERT_EQ(getNumBits(LastAlloctedSubTrie), 1u);
ASSERT_EQ(getNumSlotUsed(LastAlloctedSubTrie), 2u);
}
TEST_F(SmallNodeTrieTest, TrieDestructionLoop) {
// Test destroying large Trie. Make sure there is no recursion that can
// overflow the stack.
// Limit the tries to 2 slots (1 bit) to generate subtries at a higher rate.
auto &Trie = createTrie(/*NumRootBits=*/1, /*NumSubtrieBits=*/1);
// Fill them up. Pick a MaxN high enough to cause a stack overflow in debug
// builds.
static constexpr uint64_t MaxN = 100000;
for (uint64_t N = 0; N != MaxN; ++N) {
HashType Hash = hash(N);
Trie.insert(TrieType::pointer(), TrieType::value_type(Hash, NumType{N}));
}
// Destroy tries. If destruction is recursive and MaxN is high enough, these
// will both fail.
destroyTrie();
}
struct NumWithDestructorT {
uint64_t Num;
llvm::function_ref<void()> DestructorCallback;
~NumWithDestructorT() { DestructorCallback(); }
};
using NodeWithDestructorTrieTest = SimpleTrieHashMapTest<NumWithDestructorT>;
TEST_F(NodeWithDestructorTrieTest, TrieDestructionLoop) {
// Test destroying large Trie. Make sure there is no recursion that can
// overflow the stack.
// Limit the tries to 2 slots (1 bit) to generate subtries at a higher rate.
auto &Trie = createTrie(/*NumRootBits=*/1, /*NumSubtrieBits=*/1);
// Fill them up. Pick a MaxN high enough to cause a stack overflow in debug
// builds.
static constexpr uint64_t MaxN = 100000;
uint64_t DestructorCalled = 0;
auto DtorCallback = [&DestructorCalled]() { ++DestructorCalled; };
for (uint64_t N = 0; N != MaxN; ++N) {
HashType Hash = hash(N);
Trie.insert(TrieType::pointer(),
TrieType::value_type(Hash, NumType{N, DtorCallback}));
}
// Reset the count after all the temporaries get destroyed.
DestructorCalled = 0;
// Destroy tries. If destruction is recursive and MaxN is high enough, these
// will both fail.
destroyTrie();
// Count the number of destructor calls during `destroyTrie()`.
ASSERT_EQ(DestructorCalled, MaxN);
}
using NumStrNodeTrieTest = SimpleTrieHashMapTest<std::string>;
TEST_F(NumStrNodeTrieTest, TrieInsertLazy) {
for (unsigned RootBits : {2, 3, 6, 10}) {
for (unsigned SubtrieBits : {2, 3, 4}) {
auto &Trie = createTrie(RootBits, SubtrieBits);
for (int I = 0, E = 1000; I != E; ++I) {
TrieType::pointer Lookup;
HashType H = hash(I);
if (I & 1)
Lookup = Trie.find(H);
auto insertNum = [&](uint64_t Num) {
std::string S = Twine(I).str();
auto Hash = hash(Num);
return Trie.insertLazy(
Hash, [&](TrieType::LazyValueConstructor C) { C(std::move(S)); });
};
auto S1 = insertNum(I);
// The address of the Data should be the same.
EXPECT_EQ(&S1->Data, &insertNum(I)->Data);
auto insertStr = [&](std::string S) {
int Num = std::stoi(S);
return insertNum(Num);
};
std::string S2 = S1->Data;
// The address of the Data should be the same.
EXPECT_EQ(&S1->Data, &insertStr(S2)->Data);
}
for (int I = 0, E = 1000; I != E; ++I) {
std::string S = Twine(I).str();
TrieType::pointer Lookup = Trie.find(hash(I));
EXPECT_TRUE(Lookup);
if (!Lookup)
continue;
EXPECT_EQ(S, Lookup->Data);
}
}
}
}
} // end anonymous namespace