| //===- bolt/Passes/BinaryPasses.h - Binary-level passes ---------*- C++ -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // The set of optimization/analysis passes that run on BinaryFunctions. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #ifndef BOLT_PASSES_BINARY_PASSES_H |
| #define BOLT_PASSES_BINARY_PASSES_H |
| |
| #include "bolt/Core/BinaryContext.h" |
| #include "bolt/Core/BinaryFunction.h" |
| #include "bolt/Core/DynoStats.h" |
| #include "llvm/Support/CommandLine.h" |
| #include <atomic> |
| #include <set> |
| #include <string> |
| #include <unordered_set> |
| |
| namespace llvm { |
| namespace bolt { |
| |
| /// An optimization/analysis pass that runs on functions. |
| class BinaryFunctionPass { |
| protected: |
| bool PrintPass; |
| |
| explicit BinaryFunctionPass(const bool PrintPass) : PrintPass(PrintPass) {} |
| |
| /// Control whether a specific function should be skipped during |
| /// optimization. |
| virtual bool shouldOptimize(const BinaryFunction &BF) const; |
| |
| public: |
| virtual ~BinaryFunctionPass() = default; |
| |
| /// The name of this pass |
| virtual const char *getName() const = 0; |
| |
| /// Control whether debug info is printed after this pass is completed. |
| bool printPass() const { return PrintPass; } |
| |
| /// Control whether debug info is printed for an individual function after |
| /// this pass is completed (printPass() must have returned true). |
| virtual bool shouldPrint(const BinaryFunction &BF) const; |
| |
| virtual Error runOnFunctions(BinaryContext &BC) = 0; |
| }; |
| |
| /// A pass to print program-wide dynostats. |
| class DynoStatsPrintPass : public BinaryFunctionPass { |
| protected: |
| DynoStats PrevDynoStats; |
| std::string Title; |
| |
| public: |
| DynoStatsPrintPass(const DynoStats &PrevDynoStats, const char *Title) |
| : BinaryFunctionPass(false), PrevDynoStats(PrevDynoStats), Title(Title) {} |
| |
| const char *getName() const override { |
| return "print dyno-stats after optimizations"; |
| } |
| |
| bool shouldPrint(const BinaryFunction &BF) const override { return false; } |
| |
| Error runOnFunctions(BinaryContext &BC) override { |
| const DynoStats NewDynoStats = |
| getDynoStats(BC.getBinaryFunctions(), BC.isAArch64()); |
| const bool Changed = (NewDynoStats != PrevDynoStats); |
| BC.outs() << "BOLT-INFO: program-wide dynostats " << Title |
| << (Changed ? "" : " (no change)") << ":\n\n" |
| << PrevDynoStats; |
| if (Changed) { |
| BC.outs() << '\n'; |
| NewDynoStats.print(BC.outs(), &PrevDynoStats, BC.InstPrinter.get()); |
| } |
| BC.outs() << '\n'; |
| return Error::success(); |
| } |
| }; |
| |
| /// The pass normalizes CFG by performing the following transformations: |
| /// * removes empty basic blocks |
| /// * merges duplicate edges and updates jump instructions |
| class NormalizeCFG : public BinaryFunctionPass { |
| std::atomic<uint64_t> NumBlocksRemoved{0}; |
| std::atomic<uint64_t> NumDuplicateEdgesMerged{0}; |
| |
| void runOnFunction(BinaryFunction &BF); |
| |
| public: |
| NormalizeCFG(const cl::opt<bool> &PrintPass) |
| : BinaryFunctionPass(PrintPass) {} |
| |
| const char *getName() const override { return "normalize CFG"; } |
| |
| Error runOnFunctions(BinaryContext &) override; |
| }; |
| |
| /// Detect and eliminate unreachable basic blocks. We could have those |
| /// filled with nops and they are used for alignment. |
| class EliminateUnreachableBlocks : public BinaryFunctionPass { |
| std::unordered_set<const BinaryFunction *> Modified; |
| std::atomic<unsigned> DeletedBlocks{0}; |
| std::atomic<uint64_t> DeletedBytes{0}; |
| void runOnFunction(BinaryFunction &Function); |
| |
| public: |
| EliminateUnreachableBlocks(const cl::opt<bool> &PrintPass) |
| : BinaryFunctionPass(PrintPass) {} |
| |
| const char *getName() const override { return "eliminate-unreachable"; } |
| bool shouldPrint(const BinaryFunction &BF) const override { |
| return BinaryFunctionPass::shouldPrint(BF) && Modified.count(&BF) > 0; |
| } |
| Error runOnFunctions(BinaryContext &) override; |
| }; |
| |
| // Reorder the basic blocks for each function based on hotness. |
| class ReorderBasicBlocks : public BinaryFunctionPass { |
| public: |
| /// Choose which strategy should the block layout heuristic prioritize when |
| /// facing conflicting goals. |
| enum LayoutType : char { |
| /// LT_NONE - do not change layout of basic blocks |
| LT_NONE = 0, /// no reordering |
| /// LT_REVERSE - reverse the order of basic blocks, meant for testing |
| /// purposes. The first basic block is left intact and the rest are |
| /// put in the reverse order. |
| LT_REVERSE, |
| /// LT_OPTIMIZE - optimize layout of basic blocks based on profile. |
| LT_OPTIMIZE, |
| /// LT_OPTIMIZE_BRANCH is an implementation of what is suggested in Pettis' |
| /// paper (PLDI '90) about block reordering, trying to minimize branch |
| /// mispredictions. |
| LT_OPTIMIZE_BRANCH, |
| /// LT_OPTIMIZE_CACHE piggybacks on the idea from Ispike paper (CGO '04) |
| /// that suggests putting frequently executed chains first in the layout. |
| LT_OPTIMIZE_CACHE, |
| // CACHE_PLUS and EXT_TSP are synonyms, emit warning of deprecation. |
| LT_OPTIMIZE_CACHE_PLUS, |
| /// Block reordering guided by the extended TSP metric. |
| LT_OPTIMIZE_EXT_TSP, |
| /// Create clusters and use random order for them. |
| LT_OPTIMIZE_SHUFFLE, |
| }; |
| |
| private: |
| /// Run the specified layout algorithm on the given function. Returns `true` |
| /// if the order of blocks was changed. |
| bool modifyFunctionLayout(BinaryFunction &Function, LayoutType Type, |
| bool MinBranchClusters) const; |
| |
| public: |
| explicit ReorderBasicBlocks(const cl::opt<bool> &PrintPass) |
| : BinaryFunctionPass(PrintPass) {} |
| |
| bool shouldOptimize(const BinaryFunction &BF) const override; |
| |
| const char *getName() const override { return "reorder-blocks"; } |
| bool shouldPrint(const BinaryFunction &BF) const override; |
| Error runOnFunctions(BinaryContext &BC) override; |
| }; |
| |
| /// Sync local branches with CFG. |
| class FixupBranches : public BinaryFunctionPass { |
| public: |
| explicit FixupBranches(const cl::opt<bool> &PrintPass) |
| : BinaryFunctionPass(PrintPass) {} |
| |
| const char *getName() const override { return "fix-branches"; } |
| Error runOnFunctions(BinaryContext &BC) override; |
| }; |
| |
| /// Fix the CFI state and exception handling information after all other |
| /// passes have completed. |
| class FinalizeFunctions : public BinaryFunctionPass { |
| public: |
| explicit FinalizeFunctions(const cl::opt<bool> &PrintPass) |
| : BinaryFunctionPass(PrintPass) {} |
| |
| const char *getName() const override { return "finalize-functions"; } |
| Error runOnFunctions(BinaryContext &BC) override; |
| }; |
| |
| /// Perform any necessary adjustments for functions that do not fit into their |
| /// original space in non-relocation mode. |
| class CheckLargeFunctions : public BinaryFunctionPass { |
| public: |
| explicit CheckLargeFunctions(const cl::opt<bool> &PrintPass) |
| : BinaryFunctionPass(PrintPass) {} |
| |
| const char *getName() const override { return "check-large-functions"; } |
| |
| Error runOnFunctions(BinaryContext &BC) override; |
| |
| bool shouldOptimize(const BinaryFunction &BF) const override; |
| }; |
| |
| /// Convert and remove all BOLT-related annotations before LLVM code emission. |
| class LowerAnnotations : public BinaryFunctionPass { |
| public: |
| explicit LowerAnnotations(const cl::opt<bool> &PrintPass) |
| : BinaryFunctionPass(PrintPass) {} |
| |
| const char *getName() const override { return "lower-annotations"; } |
| Error runOnFunctions(BinaryContext &BC) override; |
| }; |
| |
| /// Clean the state of the MC representation before sending it to emission |
| class CleanMCState : public BinaryFunctionPass { |
| public: |
| explicit CleanMCState(const cl::opt<bool> &PrintPass) |
| : BinaryFunctionPass(PrintPass) {} |
| |
| const char *getName() const override { return "clean-mc-state"; } |
| Error runOnFunctions(BinaryContext &BC) override; |
| }; |
| |
| /// An optimization to simplify conditional tail calls by removing |
| /// unnecessary branches. |
| /// |
| /// This optimization considers both of the following cases: |
| /// |
| /// foo: ... |
| /// jcc L1 original |
| /// ... |
| /// L1: jmp bar # TAILJMP |
| /// |
| /// -> |
| /// |
| /// foo: ... |
| /// jcc bar iff jcc L1 is expected |
| /// ... |
| /// |
| /// L1 is unreachable |
| /// |
| /// OR |
| /// |
| /// foo: ... |
| /// jcc L2 |
| /// L1: jmp dest # TAILJMP |
| /// L2: ... |
| /// |
| /// -> |
| /// |
| /// foo: jncc dest # TAILJMP |
| /// L2: ... |
| /// |
| /// L1 is unreachable |
| /// |
| /// For this particular case, the first basic block ends with |
| /// a conditional branch and has two successors, one fall-through |
| /// and one for when the condition is true. |
| /// The target of the conditional is a basic block with a single |
| /// unconditional branch (i.e. tail call) to another function. |
| /// We don't care about the contents of the fall-through block. |
| /// We assume that the target of the conditional branch is the |
| /// first successor. |
| class SimplifyConditionalTailCalls : public BinaryFunctionPass { |
| uint64_t NumCandidateTailCalls{0}; |
| uint64_t NumTailCallsPatched{0}; |
| uint64_t CTCExecCount{0}; |
| uint64_t CTCTakenCount{0}; |
| uint64_t NumOrigForwardBranches{0}; |
| uint64_t NumOrigBackwardBranches{0}; |
| uint64_t NumDoubleJumps{0}; |
| uint64_t DeletedBlocks{0}; |
| uint64_t DeletedBytes{0}; |
| std::unordered_set<const BinaryFunction *> Modified; |
| std::set<const BinaryBasicBlock *> BeenOptimized; |
| |
| bool shouldRewriteBranch(const BinaryBasicBlock *PredBB, |
| const MCInst &CondBranch, const BinaryBasicBlock *BB, |
| const bool DirectionFlag); |
| |
| uint64_t fixTailCalls(BinaryFunction &BF); |
| |
| public: |
| explicit SimplifyConditionalTailCalls(const cl::opt<bool> &PrintPass) |
| : BinaryFunctionPass(PrintPass) {} |
| |
| const char *getName() const override { |
| return "simplify-conditional-tail-calls"; |
| } |
| bool shouldPrint(const BinaryFunction &BF) const override { |
| return BinaryFunctionPass::shouldPrint(BF) && Modified.count(&BF) > 0; |
| } |
| Error runOnFunctions(BinaryContext &BC) override; |
| }; |
| |
| /// Convert instructions to the form with the minimum operand width. |
| class ShortenInstructions : public BinaryFunctionPass { |
| uint64_t shortenInstructions(BinaryFunction &Function); |
| |
| public: |
| explicit ShortenInstructions(const cl::opt<bool> &PrintPass) |
| : BinaryFunctionPass(PrintPass) {} |
| |
| const char *getName() const override { return "shorten-instructions"; } |
| |
| Error runOnFunctions(BinaryContext &BC) override; |
| }; |
| |
| /// Perform simple peephole optimizations. |
| class Peepholes : public BinaryFunctionPass { |
| public: |
| enum PeepholeOpts : char { |
| PEEP_NONE = 0x0, |
| PEEP_DOUBLE_JUMPS = 0x2, |
| PEEP_TAILCALL_TRAPS = 0x4, |
| PEEP_USELESS_BRANCHES = 0x8, |
| PEEP_ALL = 0xf |
| }; |
| |
| private: |
| uint64_t NumDoubleJumps{0}; |
| uint64_t TailCallTraps{0}; |
| uint64_t NumUselessCondBranches{0}; |
| |
| /// Add trap instructions immediately after indirect tail calls to prevent |
| /// the processor from decoding instructions immediate following the |
| /// tailcall. |
| void addTailcallTraps(BinaryFunction &Function); |
| |
| /// Remove useless duplicate successors. When the conditional |
| /// successor is the same as the unconditional successor, we can |
| /// remove the conditional successor and branch instruction. |
| void removeUselessCondBranches(BinaryFunction &Function); |
| |
| public: |
| explicit Peepholes(const cl::opt<bool> &PrintPass) |
| : BinaryFunctionPass(PrintPass) {} |
| |
| const char *getName() const override { return "peepholes"; } |
| Error runOnFunctions(BinaryContext &BC) override; |
| }; |
| |
| /// An optimization to simplify loads from read-only sections.The pass converts |
| /// load instructions with statically computed target address such as: |
| /// |
| /// mov 0x12f(%rip), %eax |
| /// |
| /// to their counterparts that use immediate operands instead of memory loads: |
| /// |
| /// mov $0x4007dc, %eax |
| /// |
| /// when the target address points somewhere inside a read-only section. |
| /// |
| class SimplifyRODataLoads : public BinaryFunctionPass { |
| uint64_t NumLoadsSimplified{0}; |
| uint64_t NumDynamicLoadsSimplified{0}; |
| uint64_t NumLoadsFound{0}; |
| uint64_t NumDynamicLoadsFound{0}; |
| std::unordered_set<const BinaryFunction *> Modified; |
| |
| bool simplifyRODataLoads(BinaryFunction &BF); |
| |
| public: |
| explicit SimplifyRODataLoads(const cl::opt<bool> &PrintPass) |
| : BinaryFunctionPass(PrintPass) {} |
| |
| const char *getName() const override { return "simplify-read-only-loads"; } |
| bool shouldPrint(const BinaryFunction &BF) const override { |
| return BinaryFunctionPass::shouldPrint(BF) && Modified.count(&BF) > 0; |
| } |
| Error runOnFunctions(BinaryContext &BC) override; |
| }; |
| |
| /// Assign output sections to all functions. |
| class AssignSections : public BinaryFunctionPass { |
| public: |
| explicit AssignSections() : BinaryFunctionPass(false) {} |
| |
| const char *getName() const override { return "assign-sections"; } |
| Error runOnFunctions(BinaryContext &BC) override; |
| }; |
| |
| /// Compute and report to the user the imbalance in flow equations for all |
| /// CFGs, so we can detect bad quality profile. Prints average and standard |
| /// deviation of the absolute differences of outgoing flow minus incoming flow |
| /// for blocks of interest (excluding prologues, epilogues, and BB frequency |
| /// lower than 100). |
| class PrintProfileStats : public BinaryFunctionPass { |
| public: |
| explicit PrintProfileStats(const cl::opt<bool> &PrintPass) |
| : BinaryFunctionPass(PrintPass) {} |
| |
| const char *getName() const override { return "profile-stats"; } |
| bool shouldPrint(const BinaryFunction &) const override { return false; } |
| Error runOnFunctions(BinaryContext &BC) override; |
| }; |
| |
| /// Prints a list of the top 100 functions sorted by a set of |
| /// dyno stats categories. |
| class PrintProgramStats : public BinaryFunctionPass { |
| public: |
| explicit PrintProgramStats() : BinaryFunctionPass(false) {} |
| |
| const char *getName() const override { return "print-stats"; } |
| bool shouldPrint(const BinaryFunction &) const override { return false; } |
| Error runOnFunctions(BinaryContext &BC) override; |
| }; |
| |
| /// Pass for lowering any instructions that we have raised and that have |
| /// to be lowered. |
| class InstructionLowering : public BinaryFunctionPass { |
| public: |
| explicit InstructionLowering(const cl::opt<bool> &PrintPass) |
| : BinaryFunctionPass(PrintPass) {} |
| |
| const char *getName() const override { return "inst-lowering"; } |
| |
| Error runOnFunctions(BinaryContext &BC) override; |
| }; |
| |
| /// Pass for stripping 'repz' from 'repz retq' sequence of instructions. |
| class StripRepRet : public BinaryFunctionPass { |
| public: |
| explicit StripRepRet(const cl::opt<bool> &PrintPass) |
| : BinaryFunctionPass(PrintPass) {} |
| |
| const char *getName() const override { return "strip-rep-ret"; } |
| |
| Error runOnFunctions(BinaryContext &BC) override; |
| }; |
| |
| /// Pass for inlining calls to memcpy using 'rep movsb' on X86. |
| class InlineMemcpy : public BinaryFunctionPass { |
| public: |
| explicit InlineMemcpy(const cl::opt<bool> &PrintPass) |
| : BinaryFunctionPass(PrintPass) {} |
| |
| const char *getName() const override { return "inline-memcpy"; } |
| |
| Error runOnFunctions(BinaryContext &BC) override; |
| }; |
| |
| /// Pass for specializing memcpy for a size of 1 byte. |
| class SpecializeMemcpy1 : public BinaryFunctionPass { |
| private: |
| std::vector<std::string> Spec; |
| |
| /// Return indices of the call sites to optimize. Count starts at 1. |
| /// Returns an empty set for all call sites in the function. |
| std::set<size_t> getCallSitesToOptimize(const BinaryFunction &) const; |
| |
| public: |
| explicit SpecializeMemcpy1(const cl::opt<bool> &PrintPass, |
| cl::list<std::string> &Spec) |
| : BinaryFunctionPass(PrintPass), Spec(Spec) {} |
| |
| bool shouldOptimize(const BinaryFunction &BF) const override; |
| |
| const char *getName() const override { return "specialize-memcpy"; } |
| |
| Error runOnFunctions(BinaryContext &BC) override; |
| }; |
| |
| /// Pass to remove nops in code |
| class RemoveNops : public BinaryFunctionPass { |
| void runOnFunction(BinaryFunction &Function); |
| |
| public: |
| explicit RemoveNops(const cl::opt<bool> &PrintPass) |
| : BinaryFunctionPass(PrintPass) {} |
| |
| const char *getName() const override { return "remove-nops"; } |
| |
| /// Pass entry point |
| Error runOnFunctions(BinaryContext &BC) override; |
| }; |
| |
| enum FrameOptimizationType : char { |
| FOP_NONE, /// Don't perform FOP. |
| FOP_HOT, /// Perform FOP on hot functions. |
| FOP_ALL /// Perform FOP on all functions. |
| }; |
| |
| } // namespace bolt |
| } // namespace llvm |
| |
| #endif |