blob: 75e775086493af1f1089c76d01fc191161fc32f6 [file] [log] [blame]
//===--------------- IRNormalizer.cpp - IR Normalizer ---------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
/// This file implements the IRNormalizer class which aims to transform LLVM
/// Modules into a normal form by reordering and renaming instructions while
/// preserving the same semantics. The normalizer makes it easier to spot
/// semantic differences while diffing two modules which have undergone
/// different passes.
///
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/IRNormalizer.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/PassRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils.h"
#include <algorithm>
#include <stack>
#define DEBUG_TYPE "normalize"
using namespace llvm;
namespace {
/// IRNormalizer aims to transform LLVM IR into normal form.
class IRNormalizer {
public:
bool runOnFunction(Function &F);
IRNormalizer(IRNormalizerOptions Options) : Options(Options) {}
private:
const IRNormalizerOptions Options;
// Random constant for hashing, so the state isn't zero.
const uint64_t MagicHashConstant = 0x6acaa36bef8325c5ULL;
DenseSet<const Instruction *> NamedInstructions;
SmallVector<Instruction *, 16> Outputs;
/// \name Naming.
/// @{
void nameFunctionArguments(Function &F) const;
void nameBasicBlocks(Function &F) const;
void nameInstruction(Instruction *I);
void nameAsInitialInstruction(Instruction *I) const;
void nameAsRegularInstruction(Instruction *I);
void foldInstructionName(Instruction *I) const;
/// @}
/// \name Reordering.
/// @{
void reorderInstructions(Function &F) const;
void reorderDefinition(Instruction *Definition,
std::stack<Instruction *> &TopologicalSort,
SmallPtrSet<const Instruction *, 32> &Visited) const;
void reorderInstructionOperandsByNames(Instruction *I) const;
void reorderPHIIncomingValues(PHINode *Phi) const;
/// @}
/// \name Utility methods.
/// @{
template <typename T>
void sortCommutativeOperands(Instruction *I, T &Operands) const;
SmallVector<Instruction *, 16> collectOutputInstructions(Function &F) const;
bool isOutput(const Instruction *I) const;
bool isInitialInstruction(const Instruction *I) const;
bool hasOnlyImmediateOperands(const Instruction *I) const;
SetVector<int>
getOutputFootprint(Instruction *I,
SmallPtrSet<const Instruction *, 32> &Visited) const;
/// @}
};
} // namespace
/// Entry method to the IRNormalizer.
///
/// \param F Function to normalize.
bool IRNormalizer::runOnFunction(Function &F) {
nameFunctionArguments(F);
nameBasicBlocks(F);
Outputs = collectOutputInstructions(F);
if (!Options.PreserveOrder)
reorderInstructions(F);
// TODO: Reorder basic blocks via a topological sort.
for (auto &I : Outputs)
nameInstruction(I);
for (auto &I : instructions(F)) {
if (!Options.PreserveOrder) {
if (Options.ReorderOperands)
reorderInstructionOperandsByNames(&I);
if (auto *Phi = dyn_cast<PHINode>(&I))
reorderPHIIncomingValues(Phi);
}
foldInstructionName(&I);
}
return true;
}
/// Numbers arguments.
///
/// \param F Function whose arguments will be renamed.
void IRNormalizer::nameFunctionArguments(Function &F) const {
int ArgumentCounter = 0;
for (auto &A : F.args()) {
if (Options.RenameAll || A.getName().empty()) {
A.setName("a" + Twine(ArgumentCounter));
ArgumentCounter += 1;
}
}
}
/// Names basic blocks using a generated hash for each basic block in
/// a function considering the opcode and the order of output instructions.
///
/// \param F Function containing basic blocks to rename.
void IRNormalizer::nameBasicBlocks(Function &F) const {
for (auto &B : F) {
// Initialize to a magic constant, so the state isn't zero.
uint64_t Hash = MagicHashConstant;
// Hash considering output instruction opcodes.
for (auto &I : B)
if (isOutput(&I))
Hash = hashing::detail::hash_16_bytes(Hash, I.getOpcode());
if (Options.RenameAll || B.getName().empty()) {
// Name basic block. Substring hash to make diffs more readable.
B.setName("bb" + std::to_string(Hash).substr(0, 5));
}
}
}
/// Names instructions graphically (recursive) in accordance with the
/// def-use tree, starting from the initial instructions (defs), finishing at
/// the output (top-most user) instructions (depth-first).
///
/// \param I Instruction to be renamed.
void IRNormalizer::nameInstruction(Instruction *I) {
// Ensure instructions are not renamed. This is done
// to prevent situation where instructions are used
// before their definition (in phi nodes)
if (NamedInstructions.contains(I))
return;
NamedInstructions.insert(I);
if (isInitialInstruction(I)) {
nameAsInitialInstruction(I);
} else {
// This must be a regular instruction.
nameAsRegularInstruction(I);
}
}
template <typename T>
void IRNormalizer::sortCommutativeOperands(Instruction *I, T &Operands) const {
if (!(I->isCommutative() && Operands.size() >= 2))
return;
auto CommutativeEnd = Operands.begin();
std::advance(CommutativeEnd, 2);
llvm::sort(Operands.begin(), CommutativeEnd);
}
/// Names instruction following the scheme:
/// vl00000Callee(Operands)
///
/// Where 00000 is a hash calculated considering instruction's opcode and output
/// footprint. Callee's name is only included when instruction's type is
/// CallInst. In cases where instruction is commutative, operands list is also
/// sorted.
///
/// Renames instruction only when RenameAll flag is raised or instruction is
/// unnamed.
///
/// \see getOutputFootprint()
/// \param I Instruction to be renamed.
void IRNormalizer::nameAsInitialInstruction(Instruction *I) const {
if (I->getType()->isVoidTy())
return;
if (!(I->getName().empty() || Options.RenameAll))
return;
LLVM_DEBUG(dbgs() << "Naming initial instruction: " << *I << "\n");
// Instruction operands for further sorting.
SmallVector<SmallString<64>, 4> Operands;
// Collect operands.
for (auto &Op : I->operands()) {
if (!isa<Function>(Op)) {
std::string TextRepresentation;
raw_string_ostream Stream(TextRepresentation);
Op->printAsOperand(Stream, false);
Operands.push_back(StringRef(Stream.str()));
}
}
sortCommutativeOperands(I, Operands);
// Initialize to a magic constant, so the state isn't zero.
uint64_t Hash = MagicHashConstant;
// Consider instruction's opcode in the hash.
Hash = hashing::detail::hash_16_bytes(Hash, I->getOpcode());
SmallPtrSet<const Instruction *, 32> Visited;
// Get output footprint for I.
SetVector<int> OutputFootprint = getOutputFootprint(I, Visited);
// Consider output footprint in the hash.
for (const int &Output : OutputFootprint)
Hash = hashing::detail::hash_16_bytes(Hash, Output);
// Base instruction name.
SmallString<256> Name;
Name.append("vl" + std::to_string(Hash).substr(0, 5));
// In case of CallInst, consider callee in the instruction name.
if (const auto *CI = dyn_cast<CallInst>(I)) {
Function *F = CI->getCalledFunction();
if (F != nullptr)
Name.append(F->getName());
}
Name.append("(");
for (size_t i = 0; i < Operands.size(); ++i) {
Name.append(Operands[i]);
if (i < Operands.size() - 1)
Name.append(", ");
}
Name.append(")");
I->setName(Name);
}
/// Names instruction following the scheme:
/// op00000Callee(Operands)
///
/// Where 00000 is a hash calculated considering instruction's opcode, its
/// operands' opcodes and order. Callee's name is only included when
/// instruction's type is CallInst. In cases where instruction is commutative,
/// operand list is also sorted.
///
/// Names instructions recursively in accordance with the def-use tree,
/// starting from the initial instructions (defs), finishing at
/// the output (top-most user) instructions (depth-first).
///
/// Renames instruction only when RenameAll flag is raised or instruction is
/// unnamed.
///
/// \see getOutputFootprint()
/// \param I Instruction to be renamed.
void IRNormalizer::nameAsRegularInstruction(Instruction *I) {
LLVM_DEBUG(dbgs() << "Naming regular instruction: " << *I << "\n");
// Instruction operands for further sorting.
SmallVector<SmallString<128>, 4> Operands;
// The name of a regular instruction depends
// on the names of its operands. Hence, all
// operands must be named first in the use-def
// walk.
// Collect operands.
for (auto &Op : I->operands()) {
if (auto *I = dyn_cast<Instruction>(Op)) {
// Walk down the use-def chain.
nameInstruction(I);
Operands.push_back(I->getName());
} else if (!isa<Function>(Op)) {
// This must be an immediate value.
std::string TextRepresentation;
raw_string_ostream Stream(TextRepresentation);
Op->printAsOperand(Stream, false);
Operands.push_back(StringRef(Stream.str()));
}
}
sortCommutativeOperands(I, Operands);
// Initialize to a magic constant, so the state isn't zero.
uint64_t Hash = MagicHashConstant;
// Consider instruction opcode in the hash.
Hash = hashing::detail::hash_16_bytes(Hash, I->getOpcode());
// Operand opcodes for further sorting (commutative).
SmallVector<int, 4> OperandsOpcodes;
// Collect operand opcodes for hashing.
for (auto &Op : I->operands())
if (auto *I = dyn_cast<Instruction>(Op))
OperandsOpcodes.push_back(I->getOpcode());
sortCommutativeOperands(I, OperandsOpcodes);
// Consider operand opcodes in the hash.
for (const int Code : OperandsOpcodes)
Hash = hashing::detail::hash_16_bytes(Hash, Code);
// Base instruction name.
SmallString<512> Name;
Name.append("op" + std::to_string(Hash).substr(0, 5));
// In case of CallInst, consider callee in the instruction name.
if (const auto *CI = dyn_cast<CallInst>(I))
if (const Function *F = CI->getCalledFunction())
Name.append(F->getName());
Name.append("(");
for (size_t i = 0; i < Operands.size(); ++i) {
Name.append(Operands[i]);
if (i < Operands.size() - 1)
Name.append(", ");
}
Name.append(")");
if ((I->getName().empty() || Options.RenameAll) && !I->getType()->isVoidTy())
I->setName(Name);
}
/// Shortens instruction's name. This method removes called function name from
/// the instruction name and substitutes the call chain with a corresponding
/// list of operands.
///
/// Examples:
/// op00000Callee(op00001Callee(...), vl00000Callee(1, 2), ...) ->
/// op00000(op00001, vl00000, ...) vl00000Callee(1, 2) -> vl00000(1, 2)
///
/// This method omits output instructions and pre-output (instructions directly
/// used by an output instruction) instructions (by default). By default it also
/// does not affect user named instructions.
///
/// \param I Instruction whose name will be folded.
void IRNormalizer::foldInstructionName(Instruction *I) const {
// If this flag is raised, fold all regular
// instructions (including pre-outputs).
if (!Options.FoldPreOutputs) {
// Don't fold if one of the users is an output instruction.
for (auto *U : I->users())
if (auto *IU = dyn_cast<Instruction>(U))
if (isOutput(IU))
return;
}
// Don't fold if it is an output instruction or has no op prefix.
if (isOutput(I) || I->getName().substr(0, 2) != "op")
return;
// Instruction operands.
SmallVector<SmallString<64>, 4> Operands;
for (auto &Op : I->operands()) {
if (const auto *I = dyn_cast<Instruction>(Op)) {
bool HasNormalName = I->getName().substr(0, 2) == "op" ||
I->getName().substr(0, 2) == "vl";
Operands.push_back(HasNormalName ? I->getName().substr(0, 7)
: I->getName());
}
}
sortCommutativeOperands(I, Operands);
SmallString<256> Name;
Name.append(I->getName().substr(0, 7));
Name.append("(");
for (size_t i = 0; i < Operands.size(); ++i) {
Name.append(Operands[i]);
if (i < Operands.size() - 1)
Name.append(", ");
}
Name.append(")");
I->setName(Name);
}
/// Reorders instructions by walking up the tree from each operand of an output
/// instruction and reducing the def-use distance.
/// This method assumes that output instructions were collected top-down,
/// otherwise the def-use chain may be broken.
/// This method is a wrapper for recursive reorderInstruction().
///
/// \see reorderInstruction()
void IRNormalizer::reorderInstructions(Function &F) const {
for (auto &BB : F) {
LLVM_DEBUG(dbgs() << "Reordering instructions in basic block: "
<< BB.getName() << "\n");
// Find the source nodes of the DAG of instructions in this basic block.
// Source nodes are instructions that have side effects, are terminators, or
// don't have a parent in the DAG of instructions.
//
// We must iterate from the first to the last instruction otherwise side
// effecting instructions could be reordered.
std::stack<Instruction *> TopologicalSort;
SmallPtrSet<const Instruction *, 32> Visited;
for (auto &I : BB) {
// First process side effecting and terminating instructions.
if (!(isOutput(&I) || I.isTerminator()))
continue;
LLVM_DEBUG(dbgs() << "\tReordering from source effecting instruction: ";
I.dump());
reorderDefinition(&I, TopologicalSort, Visited);
}
for (auto &I : BB) {
// Process the remaining instructions.
//
// TODO: Do more a intelligent sorting of these instructions. For example,
// seperate between dead instructinos and instructions used in another
// block. Use properties of the CFG the order instructions that are used
// in another block.
if (Visited.contains(&I))
continue;
LLVM_DEBUG(dbgs() << "\tReordering from source instruction: "; I.dump());
reorderDefinition(&I, TopologicalSort, Visited);
}
LLVM_DEBUG(dbgs() << "Inserting instructions into: " << BB.getName()
<< "\n");
// Reorder based on the topological sort.
while (!TopologicalSort.empty()) {
auto *Instruction = TopologicalSort.top();
auto FirstNonPHIOrDbgOrAlloca = BB.getFirstNonPHIOrDbgOrAlloca();
if (auto *Call = dyn_cast<CallInst>(&*FirstNonPHIOrDbgOrAlloca)) {
if (Call->getIntrinsicID() ==
Intrinsic::experimental_convergence_entry ||
Call->getIntrinsicID() == Intrinsic::experimental_convergence_loop)
FirstNonPHIOrDbgOrAlloca++;
}
Instruction->moveBefore(FirstNonPHIOrDbgOrAlloca);
TopologicalSort.pop();
}
}
}
void IRNormalizer::reorderDefinition(
Instruction *Definition, std::stack<Instruction *> &TopologicalSort,
SmallPtrSet<const Instruction *, 32> &Visited) const {
if (Visited.contains(Definition))
return;
Visited.insert(Definition);
{
const auto *BasicBlock = Definition->getParent();
const auto FirstNonPHIOrDbgOrAlloca =
BasicBlock->getFirstNonPHIOrDbgOrAlloca();
if (FirstNonPHIOrDbgOrAlloca == BasicBlock->end())
return; // TODO: Is this necessary?
if (Definition->comesBefore(&*FirstNonPHIOrDbgOrAlloca))
return; // TODO: Do some kind of ordering for these instructions.
}
for (auto &Operand : Definition->operands()) {
if (auto *Op = dyn_cast<Instruction>(Operand)) {
if (Op->getParent() != Definition->getParent())
continue; // Only reorder instruction within the same basic block
reorderDefinition(Op, TopologicalSort, Visited);
}
}
LLVM_DEBUG(dbgs() << "\t\tNext in topological sort: "; Definition->dump());
if (Definition->isTerminator())
return;
if (auto *Call = dyn_cast<CallInst>(Definition)) {
if (Call->isMustTailCall())
return;
if (Call->getIntrinsicID() == Intrinsic::experimental_deoptimize)
return;
if (Call->getIntrinsicID() == Intrinsic::experimental_convergence_entry)
return;
if (Call->getIntrinsicID() == Intrinsic::experimental_convergence_loop)
return;
}
if (auto *BitCast = dyn_cast<BitCastInst>(Definition)) {
if (auto *Call = dyn_cast<CallInst>(BitCast->getOperand(0))) {
if (Call->isMustTailCall())
return;
}
}
TopologicalSort.emplace(Definition);
}
/// Reorders instruction's operands alphabetically. This method assumes
/// that passed instruction is commutative. Changing the operand order
/// in other instructions may change the semantics.
///
/// \param I Instruction whose operands will be reordered.
void IRNormalizer::reorderInstructionOperandsByNames(Instruction *I) const {
// This method assumes that passed I is commutative,
// changing the order of operands in other instructions
// may change the semantics.
// Instruction operands for further sorting.
SmallVector<std::pair<std::string, Value *>, 4> Operands;
// Collect operands.
for (auto &Op : I->operands()) {
if (auto *V = dyn_cast<Value>(Op)) {
if (isa<Instruction>(V)) {
// This is an an instruction.
Operands.push_back(std::pair<std::string, Value *>(V->getName(), V));
} else {
std::string TextRepresentation;
raw_string_ostream Stream(TextRepresentation);
Op->printAsOperand(Stream, false);
Operands.push_back(std::pair<std::string, Value *>(Stream.str(), V));
}
}
}
// Sort operands.
sortCommutativeOperands(I, Operands);
// Reorder operands.
unsigned Position = 0;
for (auto &Op : I->operands()) {
Op.set(Operands[Position].second);
Position += 1;
}
}
/// Reorders PHI node's values according to the names of corresponding basic
/// blocks.
///
/// \param Phi PHI node to normalize.
void IRNormalizer::reorderPHIIncomingValues(PHINode *Phi) const {
// Values for further sorting.
SmallVector<std::pair<Value *, BasicBlock *>, 2> Values;
// Collect blocks and corresponding values.
for (auto &BB : Phi->blocks()) {
Value *V = Phi->getIncomingValueForBlock(BB);
Values.push_back(std::pair<Value *, BasicBlock *>(V, BB));
}
// Sort values according to the name of a basic block.
llvm::sort(Values, [](const std::pair<Value *, BasicBlock *> &LHS,
const std::pair<Value *, BasicBlock *> &RHS) {
return LHS.second->getName() < RHS.second->getName();
});
// Swap.
for (unsigned i = 0; i < Values.size(); ++i) {
Phi->setIncomingBlock(i, Values[i].second);
Phi->setIncomingValue(i, Values[i].first);
}
}
/// Returns a vector of output instructions. An output is an instruction which
/// has side-effects or is ReturnInst. Uses isOutput().
///
/// \see isOutput()
/// \param F Function to collect outputs from.
SmallVector<Instruction *, 16>
IRNormalizer::collectOutputInstructions(Function &F) const {
// Output instructions are collected top-down in each function,
// any change may break the def-use chain in reordering methods.
SmallVector<Instruction *, 16> Outputs;
for (auto &I : instructions(F))
if (isOutput(&I))
Outputs.push_back(&I);
return Outputs;
}
/// Helper method checking whether the instruction may have side effects or is
/// ReturnInst.
///
/// \param I Considered instruction.
bool IRNormalizer::isOutput(const Instruction *I) const {
// Outputs are such instructions which may have side effects or is ReturnInst.
return I->mayHaveSideEffects() || isa<ReturnInst>(I);
}
/// Helper method checking whether the instruction has users and only
/// immediate operands.
///
/// \param I Considered instruction.
bool IRNormalizer::isInitialInstruction(const Instruction *I) const {
// Initial instructions are such instructions whose values are used by
// other instructions, yet they only depend on immediate values.
return !I->user_empty() && hasOnlyImmediateOperands(I);
}
/// Helper method checking whether the instruction has only immediate operands.
///
/// \param I Considered instruction.
bool IRNormalizer::hasOnlyImmediateOperands(const Instruction *I) const {
for (const auto &Op : I->operands())
if (isa<Instruction>(Op))
return false; // Found non-immediate operand (instruction).
return true;
}
/// Helper method returning indices (distance from the beginning of the basic
/// block) of outputs using the \p I (eliminates repetitions). Walks down the
/// def-use tree recursively.
///
/// \param I Considered instruction.
/// \param Visited Set of visited instructions.
SetVector<int> IRNormalizer::getOutputFootprint(
Instruction *I, SmallPtrSet<const Instruction *, 32> &Visited) const {
// Vector containing indexes of outputs (no repetitions),
// which use I in the order of walking down the def-use tree.
SetVector<int> Outputs;
if (!Visited.count(I)) {
Visited.insert(I);
if (isOutput(I)) {
// Gets output instruction's parent function.
Function *Func = I->getParent()->getParent();
// Finds and inserts the index of the output to the vector.
unsigned Count = 0;
for (const auto &B : *Func) {
for (const auto &E : B) {
if (&E == I)
Outputs.insert(Count);
Count += 1;
}
}
// Returns to the used instruction.
return Outputs;
}
for (auto *U : I->users()) {
if (auto *UI = dyn_cast<Instruction>(U)) {
// Vector for outputs which use UI.
SetVector<int> OutputsUsingUI = getOutputFootprint(UI, Visited);
// Insert the indexes of outputs using UI.
Outputs.insert_range(OutputsUsingUI);
}
}
}
// Return to the used instruction.
return Outputs;
}
PreservedAnalyses IRNormalizerPass::run(Function &F,
FunctionAnalysisManager &AM) const {
IRNormalizer(Options).runOnFunction(F);
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
return PA;
}