lib/IR/StructuralHash.cpp - llvm-project/llvm - Git at Google

 //===-- StructuralHash.cpp - IR Hashing -------------------------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//

 #include "llvm/IR/StructuralHash.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"

 using namespace llvm;

 namespace {

 // Basic hashing mechanism to detect structural change to the IR, used to verify
 // pass return status consistency with actual change. In addition to being used
 // by the MergeFunctions pass.

 class StructuralHashImpl {
   uint64_t Hash;

   void hash(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }

   // This will produce different values on 32-bit and 64-bit systens as
   // hash_combine returns a size_t. However, this is only used for
   // detailed hashing which, in-tree, only needs to distinguish between
   // differences in functions.
   template <typename T> void hashArbitaryType(const T &V) {
     hash(hash_combine(V));
   }

   void hashType(Type *ValueType) {
     hash(ValueType->getTypeID());
     if (ValueType->isIntegerTy())
       hash(ValueType->getIntegerBitWidth());
   }

 public:
   StructuralHashImpl() : Hash(4) {}

   void updateOperand(Value *Operand) {
     hashType(Operand->getType());

     // The cases enumerated below are not exhaustive and are only aimed to
     // get decent coverage over the function.
     if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(Operand)) {
       hashArbitaryType(ConstInt->getValue());
     } else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(Operand)) {
       hashArbitaryType(ConstFP->getValue());
     } else if (Argument *Arg = dyn_cast<Argument>(Operand)) {
       hash(Arg->getArgNo());
     } else if (Function *Func = dyn_cast<Function>(Operand)) {
       // Hashing the name will be deterministic as LLVM's hashing infrastructure
       // has explicit support for hashing strings and will not simply hash
       // the pointer.
       hashArbitaryType(Func->getName());
     }
   }

   void updateInstruction(const Instruction &Inst, bool DetailedHash) {
     hash(Inst.getOpcode());

     if (!DetailedHash)
       return;

     hashType(Inst.getType());

     // Handle additional properties of specific instructions that cause
     // semantic differences in the IR.
     if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst))
       hash(ComparisonInstruction->getPredicate());

     for (const auto &Op : Inst.operands())
       updateOperand(Op);
   }

   // A function hash is calculated by considering only the number of arguments
   // and whether a function is varargs, the order of basic blocks (given by the
   // successors of each basic block in depth first order), and the order of
   // opcodes of each instruction within each of these basic blocks. This mirrors
   // the strategy FunctionComparator::compare() uses to compare functions by
   // walking the BBs in depth first order and comparing each instruction in
   // sequence. Because this hash currently does not look at the operands, it is
   // insensitive to things such as the target of calls and the constants used in
   // the function, which makes it useful when possibly merging functions which
   // are the same modulo constants and call targets.
   //
   // Note that different users of StructuralHash will want different behavior
   // out of it (i.e., MergeFunctions will want something different from PM
   // expensive checks for pass modification status). When modifying this
   // function, most changes should be gated behind an option and enabled
   // selectively.
   void update(const Function &F, bool DetailedHash) {
     // Declarations don't affect analyses.
     if (F.isDeclaration())
       return;

     hash(0x62642d6b6b2d6b72); // Function header

     hash(F.isVarArg());
     hash(F.arg_size());

     SmallVector<const BasicBlock *, 8> BBs;
     SmallPtrSet<const BasicBlock *, 16> VisitedBBs;

     // Walk the blocks in the same order as
     // FunctionComparator::cmpBasicBlocks(), accumulating the hash of the
     // function "structure." (BB and opcode sequence)
     BBs.push_back(&F.getEntryBlock());
     VisitedBBs.insert(BBs[0]);
     while (!BBs.empty()) {
       const BasicBlock *BB = BBs.pop_back_val();

       // This random value acts as a block header, as otherwise the partition of
       // opcodes into BBs wouldn't affect the hash, only the order of the
       // opcodes
       hash(45798);
       for (auto &Inst : *BB)
         updateInstruction(Inst, DetailedHash);

       for (const BasicBlock *Succ : successors(BB))
         if (VisitedBBs.insert(Succ).second)
           BBs.push_back(Succ);
     }
   }

   void update(const GlobalVariable &GV) {
     // Declarations and used/compiler.used don't affect analyses.
     // Since there are several `llvm.*` metadata, like `llvm.embedded.object`,
     // we ignore anything with the `.llvm` prefix
     if (GV.isDeclaration() || GV.getName().starts_with("llvm."))
       return;
     hash(23456); // Global header
     hash(GV.getValueType()->getTypeID());
   }

   void update(const Module &M, bool DetailedHash) {
     for (const GlobalVariable &GV : M.globals())
       update(GV);
     for (const Function &F : M)
       update(F, DetailedHash);
   }

   uint64_t getHash() const { return Hash; }
 };

 } // namespace

 IRHash llvm::StructuralHash(const Function &F, bool DetailedHash) {
   StructuralHashImpl H;
   H.update(F, DetailedHash);
   return H.getHash();
 }

 IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) {
   StructuralHashImpl H;
   H.update(M, DetailedHash);
   return H.getHash();
 }
	//===-- StructuralHash.cpp - IR Hashing -------------------------- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#include "llvm/IR/StructuralHash.h"
	#include "llvm/ADT/Hashing.h"
	#include "llvm/IR/Function.h"
	#include "llvm/IR/GlobalVariable.h"
	#include "llvm/IR/InstrTypes.h"
	#include "llvm/IR/Instructions.h"
	#include "llvm/IR/IntrinsicInst.h"
	#include "llvm/IR/Module.h"

	using namespace llvm;

	namespace {

	// Basic hashing mechanism to detect structural change to the IR, used to verify
	// pass return status consistency with actual change. In addition to being used
	// by the MergeFunctions pass.

	class StructuralHashImpl {
	uint64_t Hash;

	void hash(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }

	// This will produce different values on 32-bit and 64-bit systens as
	// hash_combine returns a size_t. However, this is only used for
	// detailed hashing which, in-tree, only needs to distinguish between
	// differences in functions.
	template <typename T> void hashArbitaryType(const T &V) {
	hash(hash_combine(V));
	}

	void hashType(Type *ValueType) {
	hash(ValueType->getTypeID());
	if (ValueType->isIntegerTy())
	hash(ValueType->getIntegerBitWidth());
	}

	public:
	StructuralHashImpl() : Hash(4) {}

	void updateOperand(Value *Operand) {
	hashType(Operand->getType());

	// The cases enumerated below are not exhaustive and are only aimed to
	// get decent coverage over the function.
	if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(Operand)) {
	hashArbitaryType(ConstInt->getValue());
	} else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(Operand)) {
	hashArbitaryType(ConstFP->getValue());
	} else if (Argument *Arg = dyn_cast<Argument>(Operand)) {
	hash(Arg->getArgNo());
	} else if (Function *Func = dyn_cast<Function>(Operand)) {
	// Hashing the name will be deterministic as LLVM's hashing infrastructure
	// has explicit support for hashing strings and will not simply hash
	// the pointer.
	hashArbitaryType(Func->getName());
	}
	}

	void updateInstruction(const Instruction &Inst, bool DetailedHash) {
	hash(Inst.getOpcode());

	if (!DetailedHash)
	return;

	hashType(Inst.getType());

	// Handle additional properties of specific instructions that cause
	// semantic differences in the IR.
	if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst))
	hash(ComparisonInstruction->getPredicate());

	for (const auto &Op : Inst.operands())
	updateOperand(Op);
	}

	// A function hash is calculated by considering only the number of arguments
	// and whether a function is varargs, the order of basic blocks (given by the
	// successors of each basic block in depth first order), and the order of
	// opcodes of each instruction within each of these basic blocks. This mirrors
	// the strategy FunctionComparator::compare() uses to compare functions by
	// walking the BBs in depth first order and comparing each instruction in
	// sequence. Because this hash currently does not look at the operands, it is
	// insensitive to things such as the target of calls and the constants used in
	// the function, which makes it useful when possibly merging functions which
	// are the same modulo constants and call targets.
	//
	// Note that different users of StructuralHash will want different behavior
	// out of it (i.e., MergeFunctions will want something different from PM
	// expensive checks for pass modification status). When modifying this
	// function, most changes should be gated behind an option and enabled
	// selectively.
	void update(const Function &F, bool DetailedHash) {
	// Declarations don't affect analyses.
	if (F.isDeclaration())
	return;

	hash(0x62642d6b6b2d6b72); // Function header

	hash(F.isVarArg());
	hash(F.arg_size());

	SmallVector<const BasicBlock *, 8> BBs;
	SmallPtrSet<const BasicBlock *, 16> VisitedBBs;

	// Walk the blocks in the same order as
	// FunctionComparator::cmpBasicBlocks(), accumulating the hash of the
	// function "structure." (BB and opcode sequence)
	BBs.push_back(&F.getEntryBlock());
	VisitedBBs.insert(BBs[0]);
	while (!BBs.empty()) {
	const BasicBlock *BB = BBs.pop_back_val();

	// This random value acts as a block header, as otherwise the partition of
	// opcodes into BBs wouldn't affect the hash, only the order of the
	// opcodes
	hash(45798);
	for (auto &Inst : *BB)
	updateInstruction(Inst, DetailedHash);

	for (const BasicBlock *Succ : successors(BB))
	if (VisitedBBs.insert(Succ).second)
	BBs.push_back(Succ);
	}
	}

	void update(const GlobalVariable &GV) {
	// Declarations and used/compiler.used don't affect analyses.
	// Since there are several `llvm.*` metadata, like `llvm.embedded.object`,
	// we ignore anything with the `.llvm` prefix
	if (GV.isDeclaration() \|\| GV.getName().starts_with("llvm."))
	return;
	hash(23456); // Global header
	hash(GV.getValueType()->getTypeID());
	}

	void update(const Module &M, bool DetailedHash) {
	for (const GlobalVariable &GV : M.globals())
	update(GV);
	for (const Function &F : M)
	update(F, DetailedHash);
	}

	uint64_t getHash() const { return Hash; }
	};

	} // namespace

	IRHash llvm::StructuralHash(const Function &F, bool DetailedHash) {
	StructuralHashImpl H;
	H.update(F, DetailedHash);
	return H.getHash();
	}

	IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) {
	StructuralHashImpl H;
	H.update(M, DetailedHash);
	return H.getHash();
	}