blob: 53a45ab58db32a93b242b2e5555c7888659f391b [file] [log] [blame]
//===- bolt/Rewrite/BinaryPassManager.cpp - Binary-level pass manager -----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "bolt/Rewrite/BinaryPassManager.h"
#include "bolt/Passes/ADRRelaxationPass.h"
#include "bolt/Passes/Aligner.h"
#include "bolt/Passes/AllocCombiner.h"
#include "bolt/Passes/AsmDump.h"
#include "bolt/Passes/CMOVConversion.h"
#include "bolt/Passes/FixRelaxationPass.h"
#include "bolt/Passes/FrameOptimizer.h"
#include "bolt/Passes/Hugify.h"
#include "bolt/Passes/IdenticalCodeFolding.h"
#include "bolt/Passes/IndirectCallPromotion.h"
#include "bolt/Passes/Inliner.h"
#include "bolt/Passes/Instrumentation.h"
#include "bolt/Passes/JTFootprintReduction.h"
#include "bolt/Passes/LongJmp.h"
#include "bolt/Passes/LoopInversionPass.h"
#include "bolt/Passes/PLTCall.h"
#include "bolt/Passes/PatchEntries.h"
#include "bolt/Passes/RegReAssign.h"
#include "bolt/Passes/ReorderData.h"
#include "bolt/Passes/ReorderFunctions.h"
#include "bolt/Passes/RetpolineInsertion.h"
#include "bolt/Passes/SplitFunctions.h"
#include "bolt/Passes/StokeInfo.h"
#include "bolt/Passes/TailDuplication.h"
#include "bolt/Passes/ThreeWayBranch.h"
#include "bolt/Passes/ValidateInternalCalls.h"
#include "bolt/Passes/ValidateMemRefs.h"
#include "bolt/Passes/VeneerElimination.h"
#include "bolt/Utils/CommandLineOpts.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
#include <memory>
#include <numeric>
using namespace llvm;
namespace opts {
extern cl::opt<bool> PrintAll;
extern cl::opt<bool> PrintDynoStats;
extern cl::opt<bool> DumpDotAll;
extern cl::opt<std::string> AsmDump;
extern cl::opt<bolt::PLTCall::OptType> PLT;
static cl::opt<bool>
DynoStatsAll("dyno-stats-all",
cl::desc("print dyno stats after each stage"),
cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory));
static cl::opt<bool>
EliminateUnreachable("eliminate-unreachable",
cl::desc("eliminate unreachable code"), cl::init(true),
cl::cat(BoltOptCategory));
cl::opt<bool> ICF("icf", cl::desc("fold functions with identical code"),
cl::cat(BoltOptCategory));
static cl::opt<bool> JTFootprintReductionFlag(
"jt-footprint-reduction",
cl::desc("make jump tables size smaller at the cost of using more "
"instructions at jump sites"),
cl::cat(BoltOptCategory));
cl::opt<bool> NeverPrint("never-print", cl::desc("never print"),
cl::ReallyHidden, cl::cat(BoltOptCategory));
cl::opt<bool>
PrintAfterBranchFixup("print-after-branch-fixup",
cl::desc("print function after fixing local branches"),
cl::Hidden, cl::cat(BoltOptCategory));
static cl::opt<bool>
PrintAfterLowering("print-after-lowering",
cl::desc("print function after instruction lowering"),
cl::Hidden, cl::cat(BoltOptCategory));
cl::opt<bool>
PrintFinalized("print-finalized",
cl::desc("print function after CFG is finalized"),
cl::Hidden, cl::cat(BoltOptCategory));
static cl::opt<bool>
PrintFOP("print-fop",
cl::desc("print functions after frame optimizer pass"), cl::Hidden,
cl::cat(BoltOptCategory));
static cl::opt<bool>
PrintICF("print-icf", cl::desc("print functions after ICF optimization"),
cl::Hidden, cl::cat(BoltOptCategory));
static cl::opt<bool>
PrintICP("print-icp",
cl::desc("print functions after indirect call promotion"),
cl::Hidden, cl::cat(BoltOptCategory));
static cl::opt<bool>
PrintInline("print-inline",
cl::desc("print functions after inlining optimization"),
cl::Hidden, cl::cat(BoltOptCategory));
static cl::opt<bool> PrintJTFootprintReduction(
"print-after-jt-footprint-reduction",
cl::desc("print function after jt-footprint-reduction pass"),
cl::cat(BoltOptCategory));
static cl::opt<bool>
PrintLongJmp("print-longjmp",
cl::desc("print functions after longjmp pass"), cl::Hidden,
cl::cat(BoltOptCategory));
cl::opt<bool>
PrintNormalized("print-normalized",
cl::desc("print functions after CFG is normalized"),
cl::Hidden, cl::cat(BoltCategory));
static cl::opt<bool> PrintOptimizeBodyless(
"print-optimize-bodyless",
cl::desc("print functions after bodyless optimization"), cl::Hidden,
cl::cat(BoltOptCategory));
static cl::opt<bool>
PrintPeepholes("print-peepholes",
cl::desc("print functions after peephole optimization"),
cl::Hidden, cl::cat(BoltOptCategory));
static cl::opt<bool>
PrintPLT("print-plt", cl::desc("print functions after PLT optimization"),
cl::Hidden, cl::cat(BoltOptCategory));
static cl::opt<bool>
PrintProfileStats("print-profile-stats",
cl::desc("print profile quality/bias analysis"),
cl::cat(BoltCategory));
static cl::opt<bool>
PrintRegReAssign("print-regreassign",
cl::desc("print functions after regreassign pass"),
cl::Hidden, cl::cat(BoltOptCategory));
cl::opt<bool>
PrintReordered("print-reordered",
cl::desc("print functions after layout optimization"),
cl::Hidden, cl::cat(BoltOptCategory));
static cl::opt<bool>
PrintReorderedFunctions("print-reordered-functions",
cl::desc("print functions after clustering"),
cl::Hidden, cl::cat(BoltOptCategory));
static cl::opt<bool> PrintRetpolineInsertion(
"print-retpoline-insertion",
cl::desc("print functions after retpoline insertion pass"),
cl::cat(BoltCategory));
static cl::opt<bool> PrintSCTC(
"print-sctc",
cl::desc("print functions after conditional tail call simplification"),
cl::Hidden, cl::cat(BoltOptCategory));
static cl::opt<bool> PrintSimplifyROLoads(
"print-simplify-rodata-loads",
cl::desc("print functions after simplification of RO data loads"),
cl::Hidden, cl::cat(BoltOptCategory));
static cl::opt<bool>
PrintSplit("print-split", cl::desc("print functions after code splitting"),
cl::Hidden, cl::cat(BoltOptCategory));
static cl::opt<bool>
PrintStoke("print-stoke", cl::desc("print functions after stoke analysis"),
cl::cat(BoltOptCategory));
static cl::opt<bool>
PrintFixRelaxations("print-fix-relaxations",
cl::desc("print functions after fix relaxations pass"),
cl::cat(BoltOptCategory));
static cl::opt<bool> PrintVeneerElimination(
"print-veneer-elimination",
cl::desc("print functions after veneer elimination pass"),
cl::cat(BoltOptCategory));
static cl::opt<bool>
PrintUCE("print-uce",
cl::desc("print functions after unreachable code elimination"),
cl::Hidden, cl::cat(BoltOptCategory));
static cl::opt<bool> RegReAssign(
"reg-reassign",
cl::desc(
"reassign registers so as to avoid using REX prefixes in hot code"),
cl::cat(BoltOptCategory));
static cl::opt<bool> SimplifyConditionalTailCalls(
"simplify-conditional-tail-calls",
cl::desc("simplify conditional tail calls by removing unnecessary jumps"),
cl::init(true), cl::cat(BoltOptCategory));
static cl::opt<bool> SimplifyRODataLoads(
"simplify-rodata-loads",
cl::desc("simplify loads from read-only sections by replacing the memory "
"operand with the constant found in the corresponding section"),
cl::cat(BoltOptCategory));
static cl::list<std::string>
SpecializeMemcpy1("memcpy1-spec",
cl::desc("list of functions with call sites for which to specialize memcpy() "
"for size 1"),
cl::value_desc("func1,func2:cs1:cs2,func3:cs1,..."),
cl::ZeroOrMore, cl::cat(BoltOptCategory));
static cl::opt<bool> Stoke("stoke", cl::desc("turn on the stoke analysis"),
cl::cat(BoltOptCategory));
static cl::opt<bool> StringOps(
"inline-memcpy",
cl::desc("inline memcpy using 'rep movsb' instruction (X86-only)"),
cl::cat(BoltOptCategory));
static cl::opt<bool> StripRepRet(
"strip-rep-ret",
cl::desc("strip 'repz' prefix from 'repz retq' sequence (on by default)"),
cl::init(true), cl::cat(BoltOptCategory));
static cl::opt<bool> VerifyCFG("verify-cfg",
cl::desc("verify the CFG after every pass"),
cl::Hidden, cl::cat(BoltOptCategory));
static cl::opt<bool> ThreeWayBranchFlag("three-way-branch",
cl::desc("reorder three way branches"),
cl::ReallyHidden,
cl::cat(BoltOptCategory));
static cl::opt<bool> CMOVConversionFlag("cmov-conversion",
cl::desc("fold jcc+mov into cmov"),
cl::ReallyHidden,
cl::cat(BoltOptCategory));
} // namespace opts
namespace llvm {
namespace bolt {
using namespace opts;
const char BinaryFunctionPassManager::TimerGroupName[] = "passman";
const char BinaryFunctionPassManager::TimerGroupDesc[] =
"Binary Function Pass Manager";
void BinaryFunctionPassManager::runPasses() {
auto &BFs = BC.getBinaryFunctions();
for (size_t PassIdx = 0; PassIdx < Passes.size(); PassIdx++) {
const std::pair<const bool, std::unique_ptr<BinaryFunctionPass>>
&OptPassPair = Passes[PassIdx];
if (!OptPassPair.first)
continue;
const std::unique_ptr<BinaryFunctionPass> &Pass = OptPassPair.second;
std::string PassIdName =
formatv("{0:2}_{1}", PassIdx, Pass->getName()).str();
if (opts::Verbosity > 0)
outs() << "BOLT-INFO: Starting pass: " << Pass->getName() << "\n";
NamedRegionTimer T(Pass->getName(), Pass->getName(), TimerGroupName,
TimerGroupDesc, TimeOpts);
callWithDynoStats([this, &Pass] { Pass->runOnFunctions(BC); }, BFs,
Pass->getName(), opts::DynoStatsAll, BC.isAArch64());
if (opts::VerifyCFG &&
!std::accumulate(
BFs.begin(), BFs.end(), true,
[](const bool Valid,
const std::pair<const uint64_t, BinaryFunction> &It) {
return Valid && It.second.validateCFG();
})) {
errs() << "BOLT-ERROR: Invalid CFG detected after pass "
<< Pass->getName() << "\n";
exit(1);
}
if (opts::Verbosity > 0)
outs() << "BOLT-INFO: Finished pass: " << Pass->getName() << "\n";
if (!opts::PrintAll && !opts::DumpDotAll && !Pass->printPass())
continue;
const std::string Message = std::string("after ") + Pass->getName();
for (auto &It : BFs) {
BinaryFunction &Function = It.second;
if (!Pass->shouldPrint(Function))
continue;
Function.print(outs(), Message);
if (opts::DumpDotAll)
Function.dumpGraphForPass(PassIdName);
}
}
}
void BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
BinaryFunctionPassManager Manager(BC);
const DynoStats InitialDynoStats =
getDynoStats(BC.getBinaryFunctions(), BC.isAArch64());
Manager.registerPass(std::make_unique<AsmDumpPass>(),
opts::AsmDump.getNumOccurrences());
if (BC.isAArch64()) {
Manager.registerPass(std::make_unique<FixRelaxations>(PrintFixRelaxations));
Manager.registerPass(
std::make_unique<VeneerElimination>(PrintVeneerElimination));
}
// Here we manage dependencies/order manually, since passes are run in the
// order they're registered.
// Run this pass first to use stats for the original functions.
Manager.registerPass(std::make_unique<PrintProgramStats>(NeverPrint));
if (opts::PrintProfileStats)
Manager.registerPass(std::make_unique<PrintProfileStats>(NeverPrint));
Manager.registerPass(std::make_unique<ValidateInternalCalls>(NeverPrint));
Manager.registerPass(std::make_unique<ValidateMemRefs>(NeverPrint));
if (opts::Instrument)
Manager.registerPass(std::make_unique<Instrumentation>(NeverPrint));
else if (opts::Hugify)
Manager.registerPass(std::make_unique<HugePage>(NeverPrint));
Manager.registerPass(std::make_unique<ShortenInstructions>(NeverPrint));
Manager.registerPass(std::make_unique<RemoveNops>(NeverPrint));
Manager.registerPass(std::make_unique<NormalizeCFG>(PrintNormalized));
Manager.registerPass(std::make_unique<StripRepRet>(NeverPrint),
opts::StripRepRet);
Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF),
opts::ICF);
Manager.registerPass(
std::make_unique<SpecializeMemcpy1>(NeverPrint, opts::SpecializeMemcpy1),
!opts::SpecializeMemcpy1.empty());
Manager.registerPass(std::make_unique<InlineMemcpy>(NeverPrint),
opts::StringOps);
Manager.registerPass(std::make_unique<IndirectCallPromotion>(PrintICP));
Manager.registerPass(
std::make_unique<JTFootprintReduction>(PrintJTFootprintReduction),
opts::JTFootprintReductionFlag);
Manager.registerPass(
std::make_unique<SimplifyRODataLoads>(PrintSimplifyROLoads),
opts::SimplifyRODataLoads);
Manager.registerPass(std::make_unique<RegReAssign>(PrintRegReAssign),
opts::RegReAssign);
Manager.registerPass(std::make_unique<Inliner>(PrintInline));
Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF),
opts::ICF);
Manager.registerPass(std::make_unique<PLTCall>(PrintPLT));
Manager.registerPass(std::make_unique<ThreeWayBranch>(),
opts::ThreeWayBranchFlag);
Manager.registerPass(std::make_unique<ReorderBasicBlocks>(PrintReordered));
Manager.registerPass(std::make_unique<EliminateUnreachableBlocks>(PrintUCE),
opts::EliminateUnreachable);
Manager.registerPass(std::make_unique<SplitFunctions>(PrintSplit));
Manager.registerPass(std::make_unique<LoopInversionPass>());
Manager.registerPass(std::make_unique<TailDuplication>());
Manager.registerPass(std::make_unique<CMOVConversion>(),
opts::CMOVConversionFlag);
// This pass syncs local branches with CFG. If any of the following
// passes breaks the sync - they either need to re-run the pass or
// fix branches consistency internally.
Manager.registerPass(std::make_unique<FixupBranches>(PrintAfterBranchFixup));
// This pass should come close to last since it uses the estimated hot
// size of a function to determine the order. It should definitely
// also happen after any changes to the call graph are made, e.g. inlining.
Manager.registerPass(
std::make_unique<ReorderFunctions>(PrintReorderedFunctions));
// Print final dyno stats right while CFG and instruction analysis are intact.
Manager.registerPass(
std::make_unique<DynoStatsPrintPass>(
InitialDynoStats, "after all optimizations before SCTC and FOP"),
opts::PrintDynoStats || opts::DynoStatsAll);
// Add the StokeInfo pass, which extract functions for stoke optimization and
// get the liveness information for them
Manager.registerPass(std::make_unique<StokeInfo>(PrintStoke), opts::Stoke);
// This pass introduces conditional jumps into external functions.
// Between extending CFG to support this and isolating this pass we chose
// the latter. Thus this pass will do double jump removal and unreachable
// code elimination if necessary and won't rely on peepholes/UCE for these
// optimizations.
// More generally this pass should be the last optimization pass that
// modifies branches/control flow. This pass is run after function
// reordering so that it can tell whether calls are forward/backward
// accurately.
Manager.registerPass(
std::make_unique<SimplifyConditionalTailCalls>(PrintSCTC),
opts::SimplifyConditionalTailCalls);
Manager.registerPass(std::make_unique<Peepholes>(PrintPeepholes));
Manager.registerPass(std::make_unique<AlignerPass>());
// Perform reordering on data contained in one or more sections using
// memory profiling data.
Manager.registerPass(std::make_unique<ReorderData>());
if (BC.isAArch64()) {
Manager.registerPass(std::make_unique<ADRRelaxationPass>());
// Tighten branches according to offset differences between branch and
// targets. No extra instructions after this pass, otherwise we may have
// relocations out of range and crash during linking.
Manager.registerPass(std::make_unique<LongJmpPass>(PrintLongJmp));
}
// This pass should always run last.*
Manager.registerPass(std::make_unique<FinalizeFunctions>(PrintFinalized));
// FrameOptimizer has an implicit dependency on FinalizeFunctions.
// FrameOptimizer move values around and needs to update CFIs. To do this, it
// must read CFI, interpret it and rewrite it, so CFIs need to be correctly
// placed according to the final layout.
Manager.registerPass(std::make_unique<FrameOptimizerPass>(PrintFOP));
Manager.registerPass(std::make_unique<AllocCombinerPass>(PrintFOP));
Manager.registerPass(
std::make_unique<RetpolineInsertion>(PrintRetpolineInsertion));
// Assign each function an output section.
Manager.registerPass(std::make_unique<AssignSections>());
// Patch original function entries
if (BC.HasRelocations)
Manager.registerPass(std::make_unique<PatchEntries>());
// This pass turns tail calls into jumps which makes them invisible to
// function reordering. It's unsafe to use any CFG or instruction analysis
// after this point.
Manager.registerPass(
std::make_unique<InstructionLowering>(PrintAfterLowering));
// In non-relocation mode, mark functions that do not fit into their original
// space as non-simple if we have to (e.g. for correct debug info update).
// NOTE: this pass depends on finalized code.
if (!BC.HasRelocations)
Manager.registerPass(std::make_unique<CheckLargeFunctions>(NeverPrint));
Manager.registerPass(std::make_unique<LowerAnnotations>(NeverPrint));
Manager.runPasses();
}
} // namespace bolt
} // namespace llvm