llvm/unittests/Analysis/IR2VecTest.cpp - llvm-project - Git at Google

 //===- IR2VecTest.cpp - Unit tests for IR2Vec -----------------------------==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//

 #include "llvm/Analysis/IR2Vec.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/JSON.h"

 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include <map>
 #include <vector>

 using namespace llvm;
 using namespace ir2vec;
 using namespace ::testing;

 namespace {

 class TestableEmbedder : public Embedder {
 public:
   TestableEmbedder(const Function &F, const Vocabulary &V) : Embedder(F, V) {}
   Embedding computeEmbeddings(const Instruction &I) const override {
     return Embedding();
   }
 };

 TEST(EmbeddingTest, ConstructorsAndAccessors) {
   // Default constructor
   {
     Embedding E;
     EXPECT_TRUE(E.empty());
     EXPECT_EQ(E.size(), 0u);
   }

   // Constructor with const std::vector<double>&
   {
     std::vector<double> Data = {1.0, 2.0, 3.0};
     Embedding E(Data);
     EXPECT_FALSE(E.empty());
     ASSERT_THAT(E, SizeIs(3u));
     EXPECT_THAT(E.getData(), ElementsAre(1.0, 2.0, 3.0));
     EXPECT_EQ(E[0], 1.0);
     EXPECT_EQ(E[1], 2.0);
     EXPECT_EQ(E[2], 3.0);
   }

   // Constructor with std::vector<double>&&
   {
     Embedding E(std::vector<double>({4.0, 5.0}));
     ASSERT_THAT(E, SizeIs(2u));
     EXPECT_THAT(E.getData(), ElementsAre(4.0, 5.0));
   }

   // Constructor with std::initializer_list<double>
   {
     Embedding E({6.0, 7.0, 8.0, 9.0});
     ASSERT_THAT(E, SizeIs(4u));
     EXPECT_THAT(E.getData(), ElementsAre(6.0, 7.0, 8.0, 9.0));
     EXPECT_EQ(E[0], 6.0);
     E[0] = 6.5;
     EXPECT_EQ(E[0], 6.5);
   }

   // Constructor with size_t
   {
     Embedding E(5);
     ASSERT_THAT(E, SizeIs(5u));
     EXPECT_THAT(E.getData(), ElementsAre(0.0, 0.0, 0.0, 0.0, 0.0));
   }

   // Constructor with size_t and double
   {
     Embedding E(5, 1.5);
     ASSERT_THAT(E, SizeIs(5u));
     EXPECT_THAT(E.getData(), ElementsAre(1.5, 1.5, 1.5, 1.5, 1.5));
   }

   // Test iterators
   {
     Embedding E({6.5, 7.0, 8.0, 9.0});
     std::vector<double> VecE;
     for (double Val : E) {
       VecE.push_back(Val);
     }
     EXPECT_THAT(VecE, ElementsAre(6.5, 7.0, 8.0, 9.0));

     const Embedding CE = E;
     std::vector<double> VecCE;
     for (const double &Val : CE) {
       VecCE.push_back(Val);
     }
     EXPECT_THAT(VecCE, ElementsAre(6.5, 7.0, 8.0, 9.0));

     EXPECT_EQ(*E.begin(), 6.5);
     EXPECT_EQ(*(E.end() - 1), 9.0);
     EXPECT_EQ(*CE.cbegin(), 6.5);
     EXPECT_EQ(*(CE.cend() - 1), 9.0);
   }
 }

 TEST(EmbeddingTest, AddVectorsOutOfPlace) {
   Embedding E1 = {1.0, 2.0, 3.0};
   Embedding E2 = {0.5, 1.5, -1.0};

   Embedding E3 = E1 + E2;
   EXPECT_THAT(E3, ElementsAre(1.5, 3.5, 2.0));

   // Check that E1 and E2 are unchanged
   EXPECT_THAT(E1, ElementsAre(1.0, 2.0, 3.0));
   EXPECT_THAT(E2, ElementsAre(0.5, 1.5, -1.0));
 }

 TEST(EmbeddingTest, AddVectors) {
   Embedding E1 = {1.0, 2.0, 3.0};
   Embedding E2 = {0.5, 1.5, -1.0};

   E1 += E2;
   EXPECT_THAT(E1, ElementsAre(1.5, 3.5, 2.0));

   // Check that E2 is unchanged
   EXPECT_THAT(E2, ElementsAre(0.5, 1.5, -1.0));
 }

 TEST(EmbeddingTest, SubtractVectorsOutOfPlace) {
   Embedding E1 = {1.0, 2.0, 3.0};
   Embedding E2 = {0.5, 1.5, -1.0};

   Embedding E3 = E1 - E2;
   EXPECT_THAT(E3, ElementsAre(0.5, 0.5, 4.0));

   // Check that E1 and E2 are unchanged
   EXPECT_THAT(E1, ElementsAre(1.0, 2.0, 3.0));
   EXPECT_THAT(E2, ElementsAre(0.5, 1.5, -1.0));
 }

 TEST(EmbeddingTest, SubtractVectors) {
   Embedding E1 = {1.0, 2.0, 3.0};
   Embedding E2 = {0.5, 1.5, -1.0};

   E1 -= E2;
   EXPECT_THAT(E1, ElementsAre(0.5, 0.5, 4.0));

   // Check that E2 is unchanged
   EXPECT_THAT(E2, ElementsAre(0.5, 1.5, -1.0));
 }

 TEST(EmbeddingTest, ScaleVector) {
   Embedding E1 = {1.0, 2.0, 3.0};
   E1 *= 0.5f;
   EXPECT_THAT(E1, ElementsAre(0.5, 1.0, 1.5));
 }

 TEST(EmbeddingTest, ScaleVectorOutOfPlace) {
   Embedding E1 = {1.0, 2.0, 3.0};
   Embedding E2 = E1 * 0.5f;
   EXPECT_THAT(E2, ElementsAre(0.5, 1.0, 1.5));

   // Check that E1 is unchanged
   EXPECT_THAT(E1, ElementsAre(1.0, 2.0, 3.0));
 }

 TEST(EmbeddingTest, AddScaledVector) {
   Embedding E1 = {1.0, 2.0, 3.0};
   Embedding E2 = {2.0, 0.5, -1.0};

   E1.scaleAndAdd(E2, 0.5f);
   EXPECT_THAT(E1, ElementsAre(2.0, 2.25, 2.5));

   // Check that E2 is unchanged
   EXPECT_THAT(E2, ElementsAre(2.0, 0.5, -1.0));
 }

 TEST(EmbeddingTest, ApproximatelyEqual) {
   Embedding E1 = {1.0, 2.0, 3.0};
   Embedding E2 = {1.0000001, 2.0000001, 3.0000001};
   EXPECT_TRUE(E1.approximatelyEquals(E2)); // Diff = 1e-7

   Embedding E3 = {1.00002, 2.00002, 3.00002}; // Diff = 2e-5
   EXPECT_FALSE(E1.approximatelyEquals(E3, 1e-6));
   EXPECT_TRUE(E1.approximatelyEquals(E3, 3e-5));

   Embedding E_clearly_within = {1.0000005, 2.0000005, 3.0000005}; // Diff = 5e-7
   EXPECT_TRUE(E1.approximatelyEquals(E_clearly_within));

   Embedding E_clearly_outside = {1.00001, 2.00001, 3.00001}; // Diff = 1e-5
   EXPECT_FALSE(E1.approximatelyEquals(E_clearly_outside, 1e-6));

   Embedding E4 = {1.0, 2.0, 3.5}; // Large diff
   EXPECT_FALSE(E1.approximatelyEquals(E4, 0.01));

   Embedding E5 = {1.0, 2.0, 3.0};
   EXPECT_TRUE(E1.approximatelyEquals(E5, 0.0));
   EXPECT_TRUE(E1.approximatelyEquals(E5));
 }

 #if GTEST_HAS_DEATH_TEST
 #ifndef NDEBUG
 TEST(EmbeddingTest, AccessOutOfBounds) {
   Embedding E = {1.0, 2.0, 3.0};
   EXPECT_DEATH(E[3], "Index out of bounds");
   EXPECT_DEATH(E[-1], "Index out of bounds");
   EXPECT_DEATH(E[4] = 4.0, "Index out of bounds");
 }

 TEST(EmbeddingTest, MismatchedDimensionsAddVectorsOutOfPlace) {
   Embedding E1 = {1.0, 2.0};
   Embedding E2 = {1.0};
   EXPECT_DEATH(E1 + E2, "Vectors must have the same dimension");
 }

 TEST(EmbeddingTest, MismatchedDimensionsAddVectors) {
   Embedding E1 = {1.0, 2.0};
   Embedding E2 = {1.0};
   EXPECT_DEATH(E1 += E2, "Vectors must have the same dimension");
 }

 TEST(EmbeddingTest, MismatchedDimensionsSubtractVectors) {
   Embedding E1 = {1.0, 2.0};
   Embedding E2 = {1.0};
   EXPECT_DEATH(E1 -= E2, "Vectors must have the same dimension");
 }

 TEST(EmbeddingTest, MismatchedDimensionsAddScaledVector) {
   Embedding E1 = {1.0, 2.0};
   Embedding E2 = {1.0};
   EXPECT_DEATH(E1.scaleAndAdd(E2, 1.0f),
                "Vectors must have the same dimension");
 }

 TEST(EmbeddingTest, MismatchedDimensionsApproximatelyEqual) {
   Embedding E1 = {1.0, 2.0};
   Embedding E2 = {1.010};
   EXPECT_DEATH(E1.approximatelyEquals(E2),
                "Vectors must have the same dimension");
 }
 #endif // NDEBUG
 #endif // GTEST_HAS_DEATH_TEST

 TEST(IR2VecTest, CreateSymbolicEmbedder) {
   Vocabulary V = Vocabulary(Vocabulary::createDummyVocabForTest());

   LLVMContext Ctx;
   Module M("M", Ctx);
   FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), false);
   Function *F = Function::Create(FTy, Function::ExternalLinkage, "f", M);

   auto Emb = Embedder::create(IR2VecKind::Symbolic, *F, V);
   EXPECT_NE(Emb, nullptr);
 }

 TEST(IR2VecTest, CreateFlowAwareEmbedder) {
   Vocabulary V = Vocabulary(Vocabulary::createDummyVocabForTest());

   LLVMContext Ctx;
   Module M("M", Ctx);
   FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), false);
   Function *F = Function::Create(FTy, Function::ExternalLinkage, "f", M);

   auto Emb = Embedder::create(IR2VecKind::FlowAware, *F, V);
   EXPECT_NE(Emb, nullptr);
 }

 TEST(IR2VecTest, CreateInvalidMode) {
   Vocabulary V = Vocabulary(Vocabulary::createDummyVocabForTest());

   LLVMContext Ctx;
   Module M("M", Ctx);
   FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), false);
   Function *F = Function::Create(FTy, Function::ExternalLinkage, "f", M);

   // static_cast an invalid int to IR2VecKind
   auto Result = Embedder::create(static_cast<IR2VecKind>(-1), *F, V);
   EXPECT_FALSE(static_cast<bool>(Result));
 }

 TEST(IR2VecTest, ZeroDimensionEmbedding) {
   Embedding E1;
   Embedding E2;
   // Should be no-op, but not crash
   E1 += E2;
   E1 -= E2;
   E1.scaleAndAdd(E2, 1.0f);
   EXPECT_TRUE(E1.empty());
 }

 // Fixture for IR2Vec tests requiring IR setup.
 class IR2VecTestFixture : public ::testing::Test {
 protected:
   std::unique_ptr<Vocabulary> V;
   LLVMContext Ctx;
   std::unique_ptr<Module> M;
   Function *F = nullptr;
   BasicBlock *BB = nullptr;
   Instruction *AddInst = nullptr;
   Instruction *RetInst = nullptr;

   void SetUp() override {
     V = std::make_unique<Vocabulary>(Vocabulary::createDummyVocabForTest(2));

     // Setup IR
     M = std::make_unique<Module>("TestM", Ctx);
     FunctionType *FTy = FunctionType::get(
         Type::getInt32Ty(Ctx), {Type::getInt32Ty(Ctx), Type::getInt32Ty(Ctx)},
         false);
     F = Function::Create(FTy, Function::ExternalLinkage, "f", M.get());
     BB = BasicBlock::Create(Ctx, "entry", F);
     Argument *Arg = F->getArg(0);
     llvm::Value *Const = ConstantInt::get(Type::getInt32Ty(Ctx), 42);

     AddInst = BinaryOperator::CreateAdd(Arg, Const, "add", BB);
     RetInst = ReturnInst::Create(Ctx, AddInst, BB);
   }
 };

 TEST_F(IR2VecTestFixture, GetInstVec_Symbolic) {
   auto Emb = Embedder::create(IR2VecKind::Symbolic, *F, *V);
   ASSERT_TRUE(static_cast<bool>(Emb));

   const auto &AddEmb = Emb->getInstVector(*AddInst);
   const auto &RetEmb = Emb->getInstVector(*RetInst);
   EXPECT_EQ(AddEmb.size(), 2u);
   EXPECT_EQ(RetEmb.size(), 2u);

   EXPECT_TRUE(AddEmb.approximatelyEquals(Embedding(2, 25.5)));
   EXPECT_TRUE(RetEmb.approximatelyEquals(Embedding(2, 15.5)));
 }

 TEST_F(IR2VecTestFixture, GetInstVec_FlowAware) {
   auto Emb = Embedder::create(IR2VecKind::FlowAware, *F, *V);
   ASSERT_TRUE(static_cast<bool>(Emb));

   const auto &AddEmb = Emb->getInstVector(*AddInst);
   const auto &RetEmb = Emb->getInstVector(*RetInst);
   EXPECT_EQ(AddEmb.size(), 2u);
   EXPECT_EQ(RetEmb.size(), 2u);

   EXPECT_TRUE(AddEmb.approximatelyEquals(Embedding(2, 25.5)));
   EXPECT_TRUE(RetEmb.approximatelyEquals(Embedding(2, 32.6)));
 }

 TEST_F(IR2VecTestFixture, GetBBVector_Symbolic) {
   auto Emb = Embedder::create(IR2VecKind::Symbolic, *F, *V);
   ASSERT_TRUE(static_cast<bool>(Emb));

   const auto &BBVec = Emb->getBBVector(*BB);

   EXPECT_EQ(BBVec.size(), 2u);
   // BB vector should be sum of add and ret: {25.5, 25.5} + {15.5, 15.5} =
   // {41.0, 41.0}
   EXPECT_TRUE(BBVec.approximatelyEquals(Embedding(2, 41.0)));
 }

 TEST_F(IR2VecTestFixture, GetBBVector_FlowAware) {
   auto Emb = Embedder::create(IR2VecKind::FlowAware, *F, *V);
   ASSERT_TRUE(static_cast<bool>(Emb));

   const auto &BBVec = Emb->getBBVector(*BB);

   EXPECT_EQ(BBVec.size(), 2u);
   // BB vector should be sum of add and ret: {25.5, 25.5} + {32.6, 32.6} =
   // {58.1, 58.1}
   EXPECT_TRUE(BBVec.approximatelyEquals(Embedding(2, 58.1)));
 }

 TEST_F(IR2VecTestFixture, GetFunctionVector_Symbolic) {
   auto Emb = Embedder::create(IR2VecKind::Symbolic, *F, *V);
   ASSERT_TRUE(static_cast<bool>(Emb));

   const auto &FuncVec = Emb->getFunctionVector();

   EXPECT_EQ(FuncVec.size(), 2u);

   // Function vector should match BB vector (only one BB): {41.0, 41.0}
   EXPECT_TRUE(FuncVec.approximatelyEquals(Embedding(2, 41.0)));
 }

 TEST_F(IR2VecTestFixture, GetFunctionVector_FlowAware) {
   auto Emb = Embedder::create(IR2VecKind::FlowAware, *F, *V);
   ASSERT_TRUE(static_cast<bool>(Emb));

   const auto &FuncVec = Emb->getFunctionVector();

   EXPECT_EQ(FuncVec.size(), 2u);
   // Function vector should match BB vector (only one BB): {58.1, 58.1}
   EXPECT_TRUE(FuncVec.approximatelyEquals(Embedding(2, 58.1)));
 }

 TEST_F(IR2VecTestFixture, MultipleComputeEmbeddingsConsistency_Symbolic) {
   auto Emb = Embedder::create(IR2VecKind::Symbolic, *F, *V);
   ASSERT_TRUE(static_cast<bool>(Emb));

   // Get initial function vector
   const auto &FuncVec1 = Emb->getFunctionVector();

   // Compute embeddings again by calling getFunctionVector multiple times
   const auto &FuncVec2 = Emb->getFunctionVector();
   const auto &FuncVec3 = Emb->getFunctionVector();

   // All function vectors should be identical
   EXPECT_TRUE(FuncVec1.approximatelyEquals(FuncVec2));
   EXPECT_TRUE(FuncVec1.approximatelyEquals(FuncVec3));
   EXPECT_TRUE(FuncVec2.approximatelyEquals(FuncVec3));

   Emb->invalidateEmbeddings();
   const auto &FuncVec4 = Emb->getFunctionVector();
   EXPECT_TRUE(FuncVec1.approximatelyEquals(FuncVec4));
 }

 TEST_F(IR2VecTestFixture, MultipleComputeEmbeddingsConsistency_FlowAware) {
   auto Emb = Embedder::create(IR2VecKind::FlowAware, *F, *V);
   ASSERT_TRUE(static_cast<bool>(Emb));

   // Get initial function vector
   const auto &FuncVec1 = Emb->getFunctionVector();

   // Compute embeddings again by calling getFunctionVector multiple times
   const auto &FuncVec2 = Emb->getFunctionVector();
   const auto &FuncVec3 = Emb->getFunctionVector();

   // All function vectors should be identical
   EXPECT_TRUE(FuncVec1.approximatelyEquals(FuncVec2));
   EXPECT_TRUE(FuncVec1.approximatelyEquals(FuncVec3));
   EXPECT_TRUE(FuncVec2.approximatelyEquals(FuncVec3));

   Emb->invalidateEmbeddings();
   const auto &FuncVec4 = Emb->getFunctionVector();
   EXPECT_TRUE(FuncVec1.approximatelyEquals(FuncVec4));
 }

 static constexpr unsigned MaxOpcodes = Vocabulary::MaxOpcodes;
 [[maybe_unused]]
 static constexpr unsigned MaxTypeIDs = Vocabulary::MaxTypeIDs;
 static constexpr unsigned MaxCanonicalTypeIDs = Vocabulary::MaxCanonicalTypeIDs;
 static constexpr unsigned MaxOperands = Vocabulary::MaxOperandKinds;
 static constexpr unsigned MaxPredicateKinds = Vocabulary::MaxPredicateKinds;

 // Mapping between LLVM Type::TypeID tokens and Vocabulary::CanonicalTypeID
 // names and their canonical string keys.
 #define IR2VEC_HANDLE_TYPE_BIMAP(X)                                            \
   X(VoidTyID, VoidTy, "VoidTy")                                                \
   X(IntegerTyID, IntegerTy, "IntegerTy")                                       \
   X(FloatTyID, FloatTy, "FloatTy")                                             \
   X(PointerTyID, PointerTy, "PointerTy")                                       \
   X(FunctionTyID, FunctionTy, "FunctionTy")                                    \
   X(StructTyID, StructTy, "StructTy")                                          \
   X(ArrayTyID, ArrayTy, "ArrayTy")                                             \
   X(FixedVectorTyID, VectorTy, "VectorTy")                                     \
   X(LabelTyID, LabelTy, "LabelTy")                                             \
   X(TokenTyID, TokenTy, "TokenTy")                                             \
   X(MetadataTyID, MetadataTy, "MetadataTy")

 TEST(IR2VecVocabularyTest, DummyVocabTest) {
   for (unsigned Dim = 1; Dim <= 10; ++Dim) {
     auto VocabVec = Vocabulary::createDummyVocabForTest(Dim);
     auto VocabVecSize = VocabVec.size();
     // All embeddings should have the same dimension
     for (const auto &Emb : VocabVec)
       EXPECT_EQ(Emb.size(), Dim);

     // Should have the correct total number of embeddings
     EXPECT_EQ(VocabVecSize, MaxOpcodes + MaxCanonicalTypeIDs + MaxOperands +
                                 MaxPredicateKinds);

     // Collect embeddings for later comparison before moving VocabVec
     std::vector<Embedding> ExpectedVocab;
     for (const auto &Emb : VocabVec)
       ExpectedVocab.push_back(Emb);

     IR2VecVocabAnalysis VocabAnalysis(std::move(VocabVec));
     LLVMContext TestCtx;
     Module TestMod("TestModuleForVocabAnalysis", TestCtx);
     ModuleAnalysisManager MAM;
     Vocabulary Result = VocabAnalysis.run(TestMod, MAM);
     EXPECT_TRUE(Result.isValid());
     EXPECT_EQ(Result.getDimension(), Dim);
     EXPECT_EQ(Result.getCanonicalSize(), VocabVecSize);

     unsigned CurPos = 0;
     for (const auto &Entry : Result)
       EXPECT_TRUE(Entry.approximatelyEquals(ExpectedVocab[CurPos++], 0.01));
   }
 }

 TEST(IR2VecVocabularyTest, SlotIdxMapping) {
   // Test getIndex for Opcodes
 #define EXPECT_OPCODE_SLOT(NUM, OPCODE, CLASS)                                 \
   EXPECT_EQ(Vocabulary::getIndex(NUM), static_cast<unsigned>(NUM - 1));
 #define HANDLE_INST(NUM, OPCODE, CLASS) EXPECT_OPCODE_SLOT(NUM, OPCODE, CLASS)
 #include "llvm/IR/Instruction.def"
 #undef HANDLE_INST
 #undef EXPECT_OPCODE_SLOT

   // Test getIndex for Types
 #define EXPECT_TYPE_SLOT(TypeIDTok, CanonEnum, CanonStr)                       \
   EXPECT_EQ(Vocabulary::getIndex(Type::TypeIDTok),                             \
             MaxOpcodes + static_cast<unsigned>(                                \
                              Vocabulary::CanonicalTypeID::CanonEnum));

   IR2VEC_HANDLE_TYPE_BIMAP(EXPECT_TYPE_SLOT)

 #undef EXPECT_TYPE_SLOT

   // Test getIndex for Value operands
   LLVMContext Ctx;
   Module M("TestM", Ctx);
   FunctionType *FTy =
       FunctionType::get(Type::getVoidTy(Ctx), {Type::getInt32Ty(Ctx)}, false);
   Function *F = Function::Create(FTy, Function::ExternalLinkage, "testFunc", M);

 #define EXPECTED_VOCAB_OPERAND_SLOT(X)                                         \
   MaxOpcodes + MaxCanonicalTypeIDs + static_cast<unsigned>(X)
   // Test Function operand
   EXPECT_EQ(Vocabulary::getIndex(*F),
             EXPECTED_VOCAB_OPERAND_SLOT(Vocabulary::OperandKind::FunctionID));

   // Test Constant operand
   Constant *C = ConstantInt::get(Type::getInt32Ty(Ctx), 42);
   EXPECT_EQ(Vocabulary::getIndex(*C),
             EXPECTED_VOCAB_OPERAND_SLOT(Vocabulary::OperandKind::ConstantID));

   // Test Pointer operand
   BasicBlock *BB = BasicBlock::Create(Ctx, "entry", F);
   AllocaInst *PtrVal = new AllocaInst(Type::getInt32Ty(Ctx), 0, "ptr", BB);
   EXPECT_EQ(Vocabulary::getIndex(*PtrVal),
             EXPECTED_VOCAB_OPERAND_SLOT(Vocabulary::OperandKind::PointerID));

   // Test Variable operand (function argument)
   Argument *Arg = F->getArg(0);
   EXPECT_EQ(Vocabulary::getIndex(*Arg),
             EXPECTED_VOCAB_OPERAND_SLOT(Vocabulary::OperandKind::VariableID));
 #undef EXPECTED_VOCAB_OPERAND_SLOT

   // Test getIndex for predicates
 #define EXPECTED_VOCAB_PREDICATE_SLOT(X)                                       \
   MaxOpcodes + MaxCanonicalTypeIDs + MaxOperands + static_cast<unsigned>(X)
   for (unsigned P = CmpInst::FIRST_FCMP_PREDICATE;
        P <= CmpInst::LAST_FCMP_PREDICATE; ++P) {
     CmpInst::Predicate Pred = static_cast<CmpInst::Predicate>(P);
     unsigned ExpectedIdx =
         EXPECTED_VOCAB_PREDICATE_SLOT((P - CmpInst::FIRST_FCMP_PREDICATE));
     EXPECT_EQ(Vocabulary::getIndex(Pred), ExpectedIdx);
   }
   auto ICMP_Start = CmpInst::LAST_FCMP_PREDICATE + 1;
   for (unsigned P = CmpInst::FIRST_ICMP_PREDICATE;
        P <= CmpInst::LAST_ICMP_PREDICATE; ++P) {
     CmpInst::Predicate Pred = static_cast<CmpInst::Predicate>(P);
     unsigned ExpectedIdx = EXPECTED_VOCAB_PREDICATE_SLOT(
         ICMP_Start + P - CmpInst::FIRST_ICMP_PREDICATE);
     EXPECT_EQ(Vocabulary::getIndex(Pred), ExpectedIdx);
   }
 #undef EXPECTED_VOCAB_PREDICATE_SLOT
 }

 #if GTEST_HAS_DEATH_TEST
 #ifndef NDEBUG
 TEST(IR2VecVocabularyTest, NumericIDMapInvalidInputs) {
   // Test invalid opcode IDs
   EXPECT_DEATH(Vocabulary::getIndex(0u), "Invalid opcode");
   EXPECT_DEATH(Vocabulary::getIndex(MaxOpcodes + 1), "Invalid opcode");

   // Test invalid type IDs
   EXPECT_DEATH(Vocabulary::getIndex(static_cast<Type::TypeID>(MaxTypeIDs)),
                "Invalid type ID");
   EXPECT_DEATH(Vocabulary::getIndex(static_cast<Type::TypeID>(MaxTypeIDs + 10)),
                "Invalid type ID");
 }
 #endif // NDEBUG
 #endif // GTEST_HAS_DEATH_TEST

 TEST(IR2VecVocabularyTest, StringKeyGeneration) {
   EXPECT_EQ(Vocabulary::getStringKey(0), "Ret");
   EXPECT_EQ(Vocabulary::getStringKey(12), "Add");

 #define EXPECT_OPCODE(NUM, OPCODE, CLASS)                                      \
   EXPECT_EQ(Vocabulary::getStringKey(Vocabulary::getIndex(NUM)),               \
             Vocabulary::getVocabKeyForOpcode(NUM));
 #define HANDLE_INST(NUM, OPCODE, CLASS) EXPECT_OPCODE(NUM, OPCODE, CLASS)
 #include "llvm/IR/Instruction.def"
 #undef HANDLE_INST
 #undef EXPECT_OPCODE

   // Verify CanonicalTypeID -> string mapping
 #define EXPECT_CANONICAL_TYPE_NAME(TypeIDTok, CanonEnum, CanonStr)             \
   EXPECT_EQ(Vocabulary::getStringKey(                                          \
                 MaxOpcodes + static_cast<unsigned>(                            \
                                  Vocabulary::CanonicalTypeID::CanonEnum)),     \
             CanonStr);

   IR2VEC_HANDLE_TYPE_BIMAP(EXPECT_CANONICAL_TYPE_NAME)

 #undef EXPECT_CANONICAL_TYPE_NAME

   // Verify OperandKind -> string mapping
 #define HANDLE_OPERAND_KINDS(X)                                                \
   X(FunctionID, "Function")                                                    \
   X(PointerID, "Pointer")                                                      \
   X(ConstantID, "Constant")                                                    \
   X(VariableID, "Variable")

 #define EXPECT_OPERAND_KIND(EnumName, Str)                                     \
   EXPECT_EQ(Vocabulary::getStringKey(                                          \
                 MaxOpcodes + MaxCanonicalTypeIDs +                             \
                 static_cast<unsigned>(Vocabulary::OperandKind::EnumName)),     \
             Str);

   HANDLE_OPERAND_KINDS(EXPECT_OPERAND_KIND)

 #undef EXPECT_OPERAND_KIND
 #undef HANDLE_OPERAND_KINDS

   StringRef FuncArgKey =
       Vocabulary::getStringKey(MaxOpcodes + MaxCanonicalTypeIDs + 0);
   StringRef PtrArgKey =
       Vocabulary::getStringKey(MaxOpcodes + MaxCanonicalTypeIDs + 1);
   EXPECT_EQ(FuncArgKey, "Function");
   EXPECT_EQ(PtrArgKey, "Pointer");

 // Verify PredicateKind -> string mapping
 #define EXPECT_PREDICATE_KIND(PredNum, PredPos, PredKind)                      \
   do {                                                                         \
     std::string PredStr =                                                      \
         std::string(PredKind) + "_" +                                          \
         CmpInst::getPredicateName(static_cast<CmpInst::Predicate>(PredNum))    \
             .str();                                                            \
     unsigned Pos = MaxOpcodes + MaxCanonicalTypeIDs + MaxOperands + PredPos;   \
     EXPECT_EQ(Vocabulary::getStringKey(Pos), PredStr);                         \
   } while (0)

   for (unsigned P = CmpInst::FIRST_FCMP_PREDICATE;
        P <= CmpInst::LAST_FCMP_PREDICATE; ++P)
     EXPECT_PREDICATE_KIND(P, P - CmpInst::FIRST_FCMP_PREDICATE, "FCMP");

   auto ICMP_Pos = CmpInst::LAST_FCMP_PREDICATE + 1;
   for (unsigned P = CmpInst::FIRST_ICMP_PREDICATE;
        P <= CmpInst::LAST_ICMP_PREDICATE; ++P)
     EXPECT_PREDICATE_KIND(P, ICMP_Pos++, "ICMP");

 #undef EXPECT_PREDICATE_KIND
 }

 TEST(IR2VecVocabularyTest, VocabularyDimensions) {
   {
     Vocabulary V(Vocabulary::createDummyVocabForTest(1));
     EXPECT_TRUE(V.isValid());
     EXPECT_EQ(V.getDimension(), 1u);
   }

   {
     Vocabulary V(Vocabulary::createDummyVocabForTest(5));
     EXPECT_TRUE(V.isValid());
     EXPECT_EQ(V.getDimension(), 5u);
   }

   {
     Vocabulary V(Vocabulary::createDummyVocabForTest(10));
     EXPECT_TRUE(V.isValid());
     EXPECT_EQ(V.getDimension(), 10u);
   }
 }

 #if GTEST_HAS_DEATH_TEST
 #ifndef NDEBUG
 TEST(IR2VecVocabularyTest, InvalidAccess) {
   Vocabulary V(Vocabulary::createDummyVocabForTest(2));

   EXPECT_DEATH(V[0u], "Invalid opcode");

   EXPECT_DEATH(V[100u], "Invalid opcode");
 }
 #endif // NDEBUG
 #endif // GTEST_HAS_DEATH_TEST

 TEST(IR2VecVocabularyTest, TypeIDStringKeyMapping) {
   Vocabulary V = Vocabulary(Vocabulary::createDummyVocabForTest());
 #define EXPECT_TYPE_TO_CANONICAL(TypeIDTok, CanonEnum, CanonStr)               \
   do {                                                                         \
     unsigned FlatIdx = V.getIndex(Type::TypeIDTok);                            \
     EXPECT_EQ(Vocabulary::getStringKey(FlatIdx), CanonStr);                    \
   } while (0);

   IR2VEC_HANDLE_TYPE_BIMAP(EXPECT_TYPE_TO_CANONICAL)

 #undef EXPECT_TYPE_TO_CANONICAL
 }

 TEST(IR2VecVocabularyTest, InvalidVocabularyConstruction) {
   // Test 1: Create invalid VocabStorage with insufficient sections
   std::vector<std::vector<Embedding>> InvalidSectionData;
   // Only add one section with 2 embeddings, but the vocabulary needs 4 sections
   std::vector<Embedding> Section1;
   Section1.push_back(Embedding(2, 1.0));
   Section1.push_back(Embedding(2, 2.0));
   InvalidSectionData.push_back(std::move(Section1));

   VocabStorage InvalidStorage(std::move(InvalidSectionData));
   Vocabulary V(std::move(InvalidStorage));
   EXPECT_FALSE(V.isValid());

   {
     // Test 2: Default-constructed vocabulary should be invalid
     Vocabulary InvalidResult;
     EXPECT_FALSE(InvalidResult.isValid());
 #if GTEST_HAS_DEATH_TEST
 #ifndef NDEBUG
     EXPECT_DEATH(InvalidResult.getDimension(), "IR2Vec Vocabulary is invalid");
 #endif // NDEBUG
 #endif // GTEST_HAS_DEATH_TEST
   }
 }

 TEST(VocabStorageTest, DefaultConstructor) {
   VocabStorage storage;

   EXPECT_EQ(storage.size(), 0u);
   EXPECT_EQ(storage.getNumSections(), 0u);
   EXPECT_EQ(storage.getDimension(), 0u);
   EXPECT_FALSE(storage.isValid());

   // Test iterators on empty storage
   EXPECT_EQ(storage.begin(), storage.end());
 }

 TEST(VocabStorageTest, BasicConstruction) {
   // Create test data with 3 sections
   std::vector<std::vector<Embedding>> sectionData;

   // Section 0: 2 embeddings of dimension 3
   std::vector<Embedding> section0;
   section0.emplace_back(std::vector<double>{1.0, 2.0, 3.0});
   section0.emplace_back(std::vector<double>{4.0, 5.0, 6.0});
   sectionData.push_back(std::move(section0));

   // Section 1: 1 embedding of dimension 3
   std::vector<Embedding> section1;
   section1.emplace_back(std::vector<double>{7.0, 8.0, 9.0});
   sectionData.push_back(std::move(section1));

   // Section 2: 3 embeddings of dimension 3
   std::vector<Embedding> section2;
   section2.emplace_back(std::vector<double>{10.0, 11.0, 12.0});
   section2.emplace_back(std::vector<double>{13.0, 14.0, 15.0});
   section2.emplace_back(std::vector<double>{16.0, 17.0, 18.0});
   sectionData.push_back(std::move(section2));

   VocabStorage storage(std::move(sectionData));

   EXPECT_EQ(storage.size(), 6u); // Total: 2 + 1 + 3 = 6
   EXPECT_EQ(storage.getNumSections(), 3u);
   EXPECT_EQ(storage.getDimension(), 3u);
   EXPECT_TRUE(storage.isValid());
 }

 TEST(VocabStorageTest, SectionAccess) {
   // Create test data
   std::vector<std::vector<Embedding>> sectionData;

   std::vector<Embedding> section0;
   section0.emplace_back(std::vector<double>{1.0, 2.0});
   section0.emplace_back(std::vector<double>{3.0, 4.0});
   sectionData.push_back(std::move(section0));

   std::vector<Embedding> section1;
   section1.emplace_back(std::vector<double>{5.0, 6.0});
   sectionData.push_back(std::move(section1));

   VocabStorage storage(std::move(sectionData));

   // Test section access
   EXPECT_EQ(storage[0].size(), 2u);
   EXPECT_EQ(storage[1].size(), 1u);

   // Test embedding values
   EXPECT_THAT(storage[0][0].getData(), ElementsAre(1.0, 2.0));
   EXPECT_THAT(storage[0][1].getData(), ElementsAre(3.0, 4.0));
   EXPECT_THAT(storage[1][0].getData(), ElementsAre(5.0, 6.0));
 }

 #if GTEST_HAS_DEATH_TEST
 #ifndef NDEBUG
 TEST(VocabStorageTest, InvalidSectionAccess) {
   std::vector<std::vector<Embedding>> sectionData;
   std::vector<Embedding> section0;
   section0.emplace_back(std::vector<double>{1.0, 2.0});
   sectionData.push_back(std::move(section0));

   VocabStorage storage(std::move(sectionData));

   EXPECT_DEATH(storage[1], "Invalid section ID");
   EXPECT_DEATH(storage[10], "Invalid section ID");
 }

 TEST(VocabStorageTest, EmptySection) {
   std::vector<std::vector<Embedding>> sectionData;
   std::vector<Embedding> emptySection; // Empty section
   sectionData.push_back(std::move(emptySection));

   std::vector<Embedding> validSection;
   validSection.emplace_back(std::vector<double>{1.0});
   sectionData.push_back(std::move(validSection));

   EXPECT_DEATH(VocabStorage(std::move(sectionData)),
                "Vocabulary section is empty");
 }

 TEST(VocabStorageTest, EmptyMiddleSection) {
   std::vector<std::vector<Embedding>> sectionData;

   // Valid first section
   std::vector<Embedding> validSection1;
   validSection1.emplace_back(std::vector<double>{1.0});
   sectionData.push_back(std::move(validSection1));

   // Empty middle section
   std::vector<Embedding> emptySection;
   sectionData.push_back(std::move(emptySection));

   // Valid last section
   std::vector<Embedding> validSection2;
   validSection2.emplace_back(std::vector<double>{2.0});
   sectionData.push_back(std::move(validSection2));

   EXPECT_DEATH(VocabStorage(std::move(sectionData)),
                "Vocabulary section is empty");
 }

 TEST(VocabStorageTest, NoSections) {
   std::vector<std::vector<Embedding>> sectionData; // No sections

   EXPECT_DEATH(VocabStorage(std::move(sectionData)),
                "Vocabulary has no sections");
 }

 TEST(VocabStorageTest, MismatchedDimensionsAcrossSections) {
   std::vector<std::vector<Embedding>> sectionData;

   // Section 0: embeddings with dimension 2
   std::vector<Embedding> section0;
   section0.emplace_back(std::vector<double>{1.0, 2.0});
   section0.emplace_back(std::vector<double>{3.0, 4.0});
   sectionData.push_back(std::move(section0));

   // Section 1: embedding with dimension 3 (mismatch!)
   std::vector<Embedding> section1;
   section1.emplace_back(std::vector<double>{5.0, 6.0, 7.0});
   sectionData.push_back(std::move(section1));

   EXPECT_DEATH(VocabStorage(std::move(sectionData)),
                "All embeddings must have the same dimension");
 }

 TEST(VocabStorageTest, MismatchedDimensionsWithinSection) {
   std::vector<std::vector<Embedding>> sectionData;

   // Section 0: first embedding with dimension 2, second with dimension 3
   std::vector<Embedding> section0;
   section0.emplace_back(std::vector<double>{1.0, 2.0});
   section0.emplace_back(std::vector<double>{3.0, 4.0, 5.0}); // Mismatch!
   sectionData.push_back(std::move(section0));

   EXPECT_DEATH(VocabStorage(std::move(sectionData)),
                "All embeddings must have the same dimension");
 }
 #endif // NDEBUG
 #endif // GTEST_HAS_DEATH_TEST

 TEST(VocabStorageTest, IteratorBasics) {
   std::vector<std::vector<Embedding>> sectionData;

   std::vector<Embedding> section0;
   section0.emplace_back(std::vector<double>{1.0, 2.0});
   section0.emplace_back(std::vector<double>{3.0, 4.0});
   sectionData.push_back(std::move(section0));

   std::vector<Embedding> section1;
   section1.emplace_back(std::vector<double>{5.0, 6.0});
   sectionData.push_back(std::move(section1));

   VocabStorage storage(std::move(sectionData));

   // Test iterator basics
   auto it = storage.begin();
   auto end = storage.end();

   EXPECT_NE(it, end);

   // Check first embedding
   EXPECT_THAT((*it).getData(), ElementsAre(1.0, 2.0));

   // Advance to second embedding
   ++it;
   EXPECT_NE(it, end);
   EXPECT_THAT((*it).getData(), ElementsAre(3.0, 4.0));

   // Advance to third embedding (in section 1)
   ++it;
   EXPECT_NE(it, end);
   EXPECT_THAT((*it).getData(), ElementsAre(5.0, 6.0));

   // Advance past the end
   ++it;
   EXPECT_EQ(it, end);
 }

 TEST(VocabStorageTest, IteratorTraversal) {
   std::vector<std::vector<Embedding>> sectionData;

   // Section 0: 2 embeddings
   std::vector<Embedding> section0;
   section0.emplace_back(std::vector<double>{10.0});
   section0.emplace_back(std::vector<double>{20.0});
   sectionData.push_back(std::move(section0));

   // Section 1: 1 embedding
   std::vector<Embedding> section1;
   section1.emplace_back(std::vector<double>{25.0});
   sectionData.push_back(std::move(section1));

   // Section 2: 3 embeddings
   std::vector<Embedding> section2;
   section2.emplace_back(std::vector<double>{30.0});
   section2.emplace_back(std::vector<double>{40.0});
   section2.emplace_back(std::vector<double>{50.0});
   sectionData.push_back(std::move(section2));

   VocabStorage storage(std::move(sectionData));

   // Collect all values using iterator
   std::vector<double> values;
   for (const auto &emb : storage) {
     EXPECT_EQ(emb.size(), 1u);
     values.push_back(emb[0]);
   }

   // Should get all embeddings from all sections
   EXPECT_THAT(values, ElementsAre(10.0, 20.0, 25.0, 30.0, 40.0, 50.0));
 }

 TEST(VocabStorageTest, IteratorComparison) {
   std::vector<std::vector<Embedding>> sectionData;
   std::vector<Embedding> section0;
   section0.emplace_back(std::vector<double>{1.0});
   section0.emplace_back(std::vector<double>{2.0});
   sectionData.push_back(std::move(section0));

   VocabStorage storage(std::move(sectionData));

   auto it1 = storage.begin();
   auto it2 = storage.begin();
   auto end = storage.end();

   // Test equality
   EXPECT_EQ(it1, it2);
   EXPECT_NE(it1, end);

   // Advance one iterator
   ++it1;
   EXPECT_NE(it1, it2);
   EXPECT_NE(it1, end);

   // Advance second iterator to match
   ++it2;
   EXPECT_EQ(it1, it2);

   // Advance both to end
   ++it1;
   ++it2;
   EXPECT_EQ(it1, end);
   EXPECT_EQ(it2, end);
   EXPECT_EQ(it1, it2);
 }

 } // end anonymous namespace