|  | //===- TrieRawHashMapTest.cpp ---------------------------------------------===// | 
|  | // | 
|  | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|  | // See https://llvm.org/LICENSE.txt for license information. | 
|  | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #include "llvm/ADT/TrieRawHashMap.h" | 
|  | #include "llvm/ADT/Twine.h" | 
|  | #include "llvm/Support/Endian.h" | 
|  | #include "llvm/Support/SHA1.h" | 
|  | #include "gtest/gtest.h" | 
|  |  | 
|  | using namespace llvm; | 
|  |  | 
|  | namespace llvm { | 
|  | class TrieRawHashMapTestHelper { | 
|  | public: | 
|  | TrieRawHashMapTestHelper() = default; | 
|  |  | 
|  | void setTrie(ThreadSafeTrieRawHashMapBase *T) { Trie = T; } | 
|  |  | 
|  | ThreadSafeTrieRawHashMapBase::PointerBase getRoot() const { | 
|  | return Trie->getRoot(); | 
|  | } | 
|  | unsigned getStartBit(ThreadSafeTrieRawHashMapBase::PointerBase P) const { | 
|  | return Trie->getStartBit(P); | 
|  | } | 
|  | unsigned getNumBits(ThreadSafeTrieRawHashMapBase::PointerBase P) const { | 
|  | return Trie->getNumBits(P); | 
|  | } | 
|  | unsigned getNumSlotUsed(ThreadSafeTrieRawHashMapBase::PointerBase P) const { | 
|  | return Trie->getNumSlotUsed(P); | 
|  | } | 
|  | unsigned getNumTries() const { return Trie->getNumTries(); } | 
|  | std::string | 
|  | getTriePrefixAsString(ThreadSafeTrieRawHashMapBase::PointerBase P) const { | 
|  | return Trie->getTriePrefixAsString(P); | 
|  | } | 
|  | ThreadSafeTrieRawHashMapBase::PointerBase | 
|  | getNextTrie(ThreadSafeTrieRawHashMapBase::PointerBase P) const { | 
|  | return Trie->getNextTrie(P); | 
|  | } | 
|  |  | 
|  | private: | 
|  | ThreadSafeTrieRawHashMapBase *Trie = nullptr; | 
|  | }; | 
|  | } // namespace llvm | 
|  |  | 
|  | namespace { | 
|  | template <typename DataType, size_t HashSize = sizeof(uint64_t)> | 
|  | class SimpleTrieHashMapTest : public TrieRawHashMapTestHelper, | 
|  | public ::testing::Test { | 
|  | public: | 
|  | using NumType = DataType; | 
|  | using HashType = std::array<uint8_t, HashSize>; | 
|  | using TrieType = ThreadSafeTrieRawHashMap<DataType, sizeof(HashType)>; | 
|  |  | 
|  | TrieType &createTrie(size_t RootBits, size_t SubtrieBits) { | 
|  | auto &Ret = Trie.emplace(RootBits, SubtrieBits); | 
|  | TrieRawHashMapTestHelper::setTrie(&Ret); | 
|  | return Ret; | 
|  | } | 
|  |  | 
|  | void destroyTrie() { Trie.reset(); } | 
|  | ~SimpleTrieHashMapTest() { destroyTrie(); } | 
|  |  | 
|  | // Use the number itself as hash to test the pathological case. | 
|  | static HashType hash(uint64_t Num) { | 
|  | uint64_t HashN = | 
|  | llvm::support::endian::byte_swap(Num, llvm::endianness::big); | 
|  | HashType Hash; | 
|  | memcpy(&Hash[0], &HashN, sizeof(HashType)); | 
|  | return Hash; | 
|  | }; | 
|  |  | 
|  | private: | 
|  | std::optional<TrieType> Trie; | 
|  | }; | 
|  |  | 
|  | using SmallNodeTrieTest = SimpleTrieHashMapTest<uint64_t>; | 
|  |  | 
|  | TEST_F(SmallNodeTrieTest, TrieAllocation) { | 
|  | NumType Numbers[] = { | 
|  | 0x0, std::numeric_limits<NumType>::max(),      0x1, 0x2, | 
|  | 0x3, std::numeric_limits<NumType>::max() - 1u, | 
|  | }; | 
|  |  | 
|  | unsigned ExpectedTries[] = { | 
|  | 1,       // Allocate Root. | 
|  | 1,       // Both on the root. | 
|  | 64,      // 0 and 1 sinks all the way down. | 
|  | 64,      // no new allocation needed. | 
|  | 65,      // need a new node between 2 and 3. | 
|  | 65 + 63, // 63 new allocation to sink two big numbers all the way. | 
|  | }; | 
|  |  | 
|  | const char *ExpectedPrefix[] = { | 
|  | "", // Root. | 
|  | "", // Root. | 
|  | "00000000000000[0000000]", | 
|  | "00000000000000[0000000]", | 
|  | "00000000000000[0000001]", | 
|  | "ffffffffffffff[1111111]", | 
|  | }; | 
|  |  | 
|  | // Use root and subtrie sizes of 1 so this gets sunk quite deep. | 
|  | auto &Trie = createTrie(/*RootBits=*/1, /*SubtrieBits=*/1); | 
|  |  | 
|  | for (unsigned I = 0; I < 6; ++I) { | 
|  | // Lookup first to exercise hint code for deep tries. | 
|  | TrieType::pointer Lookup = Trie.find(hash(Numbers[I])); | 
|  | EXPECT_FALSE(Lookup); | 
|  |  | 
|  | Trie.insert(Lookup, TrieType::value_type(hash(Numbers[I]), Numbers[I])); | 
|  | EXPECT_EQ(getNumTries(), ExpectedTries[I]); | 
|  | EXPECT_EQ(getTriePrefixAsString(getNextTrie(getRoot())), ExpectedPrefix[I]); | 
|  | } | 
|  | } | 
|  |  | 
|  | TEST_F(SmallNodeTrieTest, TrieStructure) { | 
|  | NumType Numbers[] = { | 
|  | // Three numbers that will nest deeply to test (1) sinking subtries and | 
|  | // (2) deep, non-trivial hints. | 
|  | std::numeric_limits<NumType>::max(), | 
|  | std::numeric_limits<NumType>::max() - 2u, | 
|  | std::numeric_limits<NumType>::max() - 3u, | 
|  | // One number to stay at the top-level. | 
|  | 0x37, | 
|  | }; | 
|  |  | 
|  | // Use root and subtrie sizes of 1 so this gets sunk quite deep. | 
|  | auto &Trie = createTrie(/*RootBits=*/1, /*SubtrieBits=*/1); | 
|  |  | 
|  | for (NumType N : Numbers) { | 
|  | // Lookup first to exercise hint code for deep tries. | 
|  | TrieType::pointer Lookup = Trie.find(hash(N)); | 
|  | EXPECT_FALSE(Lookup); | 
|  |  | 
|  | Trie.insert(Lookup, TrieType::value_type(hash(N), N)); | 
|  | } | 
|  | for (NumType N : Numbers) { | 
|  | TrieType::pointer Lookup = Trie.find(hash(N)); | 
|  | EXPECT_TRUE(Lookup); | 
|  | if (!Lookup) | 
|  | continue; | 
|  | EXPECT_EQ(hash(N), Lookup->Hash); | 
|  | EXPECT_EQ(N, Lookup->Data); | 
|  |  | 
|  | // Confirm a subsequent insertion fails to overwrite by trying to insert a | 
|  | // bad value. | 
|  | auto Result = Trie.insert(Lookup, TrieType::value_type(hash(N), N - 1)); | 
|  | EXPECT_EQ(N, Result->Data); | 
|  | } | 
|  |  | 
|  | // Check the trie so we can confirm the structure is correct. Each subtrie | 
|  | // should have 2 slots. The root's index=0 should have the content for | 
|  | // 0x37 directly, and index=1 should be a linked-list of subtries, finally | 
|  | // ending with content for (max-2) and (max-3). | 
|  | // | 
|  | // Note: This structure is not exhaustive (too expensive to update tests), | 
|  | // but it does test that the dump format is somewhat readable and that the | 
|  | // basic structure is correct. | 
|  | // | 
|  | // Note: This test requires that the trie reads bytes starting from index 0 | 
|  | // of the array of uint8_t, and then reads each byte's bits from high to low. | 
|  |  | 
|  | // Check the Trie. | 
|  | // We should allocated a total of 64 SubTries for 64 bit hash. | 
|  | ASSERT_EQ(getNumTries(), 64u); | 
|  | // Check the root trie. Two slots and both are used. | 
|  | ASSERT_EQ(getNumSlotUsed(getRoot()), 2u); | 
|  | // Check last subtrie. | 
|  | // Last allocated trie is the next node in the allocation chain. | 
|  | auto LastAlloctedSubTrie = getNextTrie(getRoot()); | 
|  | ASSERT_EQ(getTriePrefixAsString(LastAlloctedSubTrie), | 
|  | "ffffffffffffff[1111110]"); | 
|  | ASSERT_EQ(getStartBit(LastAlloctedSubTrie), 63u); | 
|  | ASSERT_EQ(getNumBits(LastAlloctedSubTrie), 1u); | 
|  | ASSERT_EQ(getNumSlotUsed(LastAlloctedSubTrie), 2u); | 
|  | } | 
|  |  | 
|  | TEST_F(SmallNodeTrieTest, TrieStructureSmallFinalSubtrie) { | 
|  | NumType Numbers[] = { | 
|  | // Three numbers that will nest deeply to test (1) sinking subtries and | 
|  | // (2) deep, non-trivial hints. | 
|  | std::numeric_limits<NumType>::max(), | 
|  | std::numeric_limits<NumType>::max() - 2u, | 
|  | std::numeric_limits<NumType>::max() - 3u, | 
|  | // One number to stay at the top-level. | 
|  | 0x37, | 
|  | }; | 
|  |  | 
|  | // Use subtrie size of 5 to avoid hitting 64 evenly, making the final subtrie | 
|  | // small. | 
|  | auto &Trie = createTrie(/*RootBits=*/8, /*SubtrieBits=*/5); | 
|  |  | 
|  | for (NumType N : Numbers) { | 
|  | // Lookup first to exercise hint code for deep tries. | 
|  | TrieType::pointer Lookup = Trie.find(hash(N)); | 
|  | EXPECT_FALSE(Lookup); | 
|  |  | 
|  | Trie.insert(Lookup, TrieType::value_type(hash(N), N)); | 
|  | } | 
|  | for (NumType N : Numbers) { | 
|  | TrieType::pointer Lookup = Trie.find(hash(N)); | 
|  | ASSERT_TRUE(Lookup); | 
|  | EXPECT_EQ(hash(N), Lookup->Hash); | 
|  | EXPECT_EQ(N, Lookup->Data); | 
|  |  | 
|  | // Confirm a subsequent insertion fails to overwrite by trying to insert a | 
|  | // bad value. | 
|  | auto Result = Trie.insert(Lookup, TrieType::value_type(hash(N), N - 1)); | 
|  | EXPECT_EQ(N, Result->Data); | 
|  | } | 
|  |  | 
|  | // Check the trie so we can confirm the structure is correct. The root | 
|  | // should have 2^8=256 slots, most subtries should have 2^5=32 slots, and the | 
|  | // deepest subtrie should have 2^1=2 slots (since (64-8)mod(5)=1). | 
|  | // should have 2 slots. The root's index=0 should have the content for | 
|  | // 0x37 directly, and index=1 should be a linked-list of subtries, finally | 
|  | // ending with content for (max-2) and (max-3). | 
|  | // | 
|  | // Note: This structure is not exhaustive (too expensive to update tests), | 
|  | // but it does test that the dump format is somewhat readable and that the | 
|  | // basic structure is correct. | 
|  | // | 
|  | // Note: This test requires that the trie reads bytes starting from index 0 | 
|  | // of the array of uint8_t, and then reads each byte's bits from high to low. | 
|  |  | 
|  | // Check the Trie. | 
|  | // 64 bit hash = 8 + 5 * 11 + 1, so 1 root, 11 8bit subtrie and 1 last level | 
|  | // subtrie, 13 total. | 
|  | ASSERT_EQ(getNumTries(), 13u); | 
|  | // Check the root trie. Two slots and both are used. | 
|  | ASSERT_EQ(getNumSlotUsed(getRoot()), 2u); | 
|  | // Check last subtrie. | 
|  | // Last allocated trie is the next node in the allocation chain. | 
|  | auto LastAlloctedSubTrie = getNextTrie(getRoot()); | 
|  | ASSERT_EQ(getTriePrefixAsString(LastAlloctedSubTrie), | 
|  | "ffffffffffffff[1111110]"); | 
|  | ASSERT_EQ(getStartBit(LastAlloctedSubTrie), 63u); | 
|  | ASSERT_EQ(getNumBits(LastAlloctedSubTrie), 1u); | 
|  | ASSERT_EQ(getNumSlotUsed(LastAlloctedSubTrie), 2u); | 
|  | } | 
|  |  | 
|  | TEST_F(SmallNodeTrieTest, TrieDestructionLoop) { | 
|  | // Test destroying large Trie. Make sure there is no recursion that can | 
|  | // overflow the stack. | 
|  |  | 
|  | // Limit the tries to 2 slots (1 bit) to generate subtries at a higher rate. | 
|  | auto &Trie = createTrie(/*NumRootBits=*/1, /*NumSubtrieBits=*/1); | 
|  |  | 
|  | // Fill them up. Pick a MaxN high enough to cause a stack overflow in debug | 
|  | // builds. | 
|  | static constexpr uint64_t MaxN = 100000; | 
|  | for (uint64_t N = 0; N != MaxN; ++N) { | 
|  | HashType Hash = hash(N); | 
|  | Trie.insert(TrieType::pointer(), TrieType::value_type(Hash, NumType{N})); | 
|  | } | 
|  |  | 
|  | // Destroy tries. If destruction is recursive and MaxN is high enough, these | 
|  | // will both fail. | 
|  | destroyTrie(); | 
|  | } | 
|  |  | 
|  | struct NumWithDestructorT { | 
|  | uint64_t Num; | 
|  | llvm::function_ref<void()> DestructorCallback; | 
|  | ~NumWithDestructorT() { DestructorCallback(); } | 
|  | }; | 
|  |  | 
|  | using NodeWithDestructorTrieTest = SimpleTrieHashMapTest<NumWithDestructorT>; | 
|  |  | 
|  | TEST_F(NodeWithDestructorTrieTest, TrieDestructionLoop) { | 
|  | // Test destroying large Trie. Make sure there is no recursion that can | 
|  | // overflow the stack. | 
|  |  | 
|  | // Limit the tries to 2 slots (1 bit) to generate subtries at a higher rate. | 
|  | auto &Trie = createTrie(/*NumRootBits=*/1, /*NumSubtrieBits=*/1); | 
|  |  | 
|  | // Fill them up. Pick a MaxN high enough to cause a stack overflow in debug | 
|  | // builds. | 
|  | static constexpr uint64_t MaxN = 100000; | 
|  |  | 
|  | uint64_t DestructorCalled = 0; | 
|  | auto DtorCallback = [&DestructorCalled]() { ++DestructorCalled; }; | 
|  | for (uint64_t N = 0; N != MaxN; ++N) { | 
|  | HashType Hash = hash(N); | 
|  | Trie.insert(TrieType::pointer(), | 
|  | TrieType::value_type(Hash, NumType{N, DtorCallback})); | 
|  | } | 
|  | // Reset the count after all the temporaries get destroyed. | 
|  | DestructorCalled = 0; | 
|  |  | 
|  | // Destroy tries. If destruction is recursive and MaxN is high enough, these | 
|  | // will both fail. | 
|  | destroyTrie(); | 
|  |  | 
|  | // Count the number of destructor calls during `destroyTrie()`. | 
|  | ASSERT_EQ(DestructorCalled, MaxN); | 
|  | } | 
|  |  | 
|  | using NumStrNodeTrieTest = SimpleTrieHashMapTest<std::string>; | 
|  |  | 
|  | TEST_F(NumStrNodeTrieTest, TrieInsertLazy) { | 
|  | for (unsigned RootBits : {2, 3, 6, 10}) { | 
|  | for (unsigned SubtrieBits : {2, 3, 4}) { | 
|  | auto &Trie = createTrie(RootBits, SubtrieBits); | 
|  | for (int I = 0, E = 1000; I != E; ++I) { | 
|  | TrieType::pointer Lookup; | 
|  | HashType H = hash(I); | 
|  | if (I & 1) | 
|  | Lookup = Trie.find(H); | 
|  |  | 
|  | auto insertNum = [&](uint64_t Num) { | 
|  | std::string S = Twine(I).str(); | 
|  | auto Hash = hash(Num); | 
|  | return Trie.insertLazy( | 
|  | Hash, [&](TrieType::LazyValueConstructor C) { C(std::move(S)); }); | 
|  | }; | 
|  | auto S1 = insertNum(I); | 
|  | // The address of the Data should be the same. | 
|  | EXPECT_EQ(&S1->Data, &insertNum(I)->Data); | 
|  |  | 
|  | auto insertStr = [&](std::string S) { | 
|  | int Num = std::stoi(S); | 
|  | return insertNum(Num); | 
|  | }; | 
|  | std::string S2 = S1->Data; | 
|  | // The address of the Data should be the same. | 
|  | EXPECT_EQ(&S1->Data, &insertStr(S2)->Data); | 
|  | } | 
|  | for (int I = 0, E = 1000; I != E; ++I) { | 
|  | std::string S = Twine(I).str(); | 
|  | TrieType::pointer Lookup = Trie.find(hash(I)); | 
|  | EXPECT_TRUE(Lookup); | 
|  | if (!Lookup) | 
|  | continue; | 
|  | EXPECT_EQ(S, Lookup->Data); | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  | } // end anonymous namespace |