blob: 397a38663948e9cbb2d7d2ed72947ade0f373e64 [file] [log] [blame]
//===- bolt/Passes/IndirectCallPromotion.h ----------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// The indirect call promotion (ICP) optimization pass.
//
//===----------------------------------------------------------------------===//
#ifndef BOLT_PASSES_INDIRECT_CALL_PROMOTION_H
#define BOLT_PASSES_INDIRECT_CALL_PROMOTION_H
#include "bolt/Passes/BinaryPasses.h"
namespace llvm {
namespace bolt {
/// Optimize indirect calls.
/// The indirect call promotion pass visits each indirect call and
/// examines a branch profile for each. If the most frequent targets
/// from that callsite exceed the specified threshold (default 90%),
/// the call is promoted. Otherwise, it is ignored. By default,
/// only one target is considered at each callsite.
///
/// When an candidate callsite is processed, we modify the callsite
/// to test for the most common call targets before calling through
/// the original generic call mechanism.
///
/// The CFG and layout are modified by ICP.
///
/// A few new command line options have been added:
/// -indirect-call-promotion=[none,call,jump-tables,all]
/// -indirect-call-promotion-threshold=<percentage>
/// -indirect-call-promotion-mispredict-threshold=<percentage>
/// -indirect-call-promotion-topn=<int>
///
/// The threshold is the minimum frequency of a call target needed
/// before ICP is triggered.
///
/// The mispredict threshold is used to disable the optimization at
/// any callsite where the branch predictor does a good enough job
/// that ICP wouldn't help regardless of the frequency of the most
/// common target.
///
/// The topn option controls the number of targets to consider for
/// each callsite, e.g. ICP is triggered if topn=2 and the total
/// frequency of the top two call targets exceeds the threshold.
///
/// The minimize code size option controls whether or not the hot
/// calls are to registers (callq %r10) or to function addresses
/// (callq $foo).
///
/// Example of ICP:
///
/// C++ code:
///
/// int B_count = 0;
/// int C_count = 0;
///
/// struct A { virtual void foo() = 0; }
/// struct B : public A { virtual void foo() { ++B_count; }; };
/// struct C : public A { virtual void foo() { ++C_count; }; };
///
/// A* a = ...
/// a->foo();
/// ...
///
/// original assembly:
///
/// B0: 49 8b 07 mov (%r15),%rax
/// 4c 89 ff mov %r15,%rdi
/// ff 10 callq *(%rax)
/// 41 83 e6 01 and $0x1,%r14d
/// 4d 89 e6 mov %r12,%r14
/// 4c 0f 44 f5 cmove %rbp,%r14
/// 4c 89 f7 mov %r14,%rdi
/// ...
///
/// after ICP:
///
/// B0: 49 8b 07 mov (%r15),%rax
/// 4c 89 ff mov %r15,%rdi
/// 48 81 38 e0 0b 40 00 cmpq $B::foo,(%rax)
/// 75 29 jne B3
/// B1: e8 45 03 00 00 callq $B::foo
/// B2: 41 83 e6 01 and $0x1,%r14d
/// 4d 89 e6 mov %r12,%r14
/// 4c 0f 44 f5 cmove %rbp,%r14
/// 4c 89 f7 mov %r14,%rdi
/// ...
///
/// B3: ff 10 callq *(%rax)
/// eb d6 jmp B2
///
class IndirectCallPromotion : public BinaryFunctionPass {
using BasicBlocksVector = std::vector<std::unique_ptr<BinaryBasicBlock>>;
using MethodInfoType = std::pair<std::vector<std::pair<MCSymbol *, uint64_t>>,
std::vector<MCInst *>>;
using JumpTableInfoType = std::vector<std::pair<uint64_t, uint64_t>>;
using SymTargetsType = std::vector<std::pair<MCSymbol *, uint64_t>>;
struct Location {
MCSymbol *Sym{nullptr};
uint64_t Addr{0};
bool isValid() const { return Sym || (!Sym && Addr != 0); }
Location() {}
explicit Location(MCSymbol *Sym) : Sym(Sym) {}
explicit Location(uint64_t Addr) : Addr(Addr) {}
};
struct Callsite {
Location From;
Location To;
uint64_t Mispreds{0};
uint64_t Branches{0};
// Indices in the jmp table (jt only)
std::vector<uint64_t> JTIndices;
bool isValid() const { return From.isValid() && To.isValid(); }
Callsite(BinaryFunction &BF, const IndirectCallProfile &ICP);
Callsite(const Location &From, const Location &To, uint64_t Mispreds,
uint64_t Branches, uint64_t JTIndex)
: From(From), To(To), Mispreds(Mispreds), Branches(Branches),
JTIndices(1, JTIndex) {}
};
std::unordered_set<const BinaryFunction *> Modified;
// Total number of calls from all callsites.
uint64_t TotalCalls{0};
// Total number of indirect calls from all callsites.
// (a fraction of TotalCalls)
uint64_t TotalIndirectCalls{0};
// Total number of jmp table calls from all callsites.
// (a fraction of TotalCalls)
uint64_t TotalIndirectJmps{0};
// Total number of callsites that use indirect calls.
// (the total number of callsites is not recorded)
uint64_t TotalIndirectCallsites{0};
// Total number of callsites that are jump tables.
uint64_t TotalJumpTableCallsites{0};
// Total number of indirect callsites that are optimized by ICP.
// (a fraction of TotalIndirectCallsites)
uint64_t TotalOptimizedIndirectCallsites{0};
// Total number of method callsites that can have loads eliminated.
mutable uint64_t TotalMethodLoadEliminationCandidates{0};
// Total number of method callsites that had loads eliminated.
uint64_t TotalMethodLoadsEliminated{0};
// Total number of jump table callsites that are optimized by ICP.
uint64_t TotalOptimizedJumpTableCallsites{0};
// Total number of indirect calls that are optimized by ICP.
// (a fraction of TotalCalls)
uint64_t TotalNumFrequentCalls{0};
// Total number of jump table calls that are optimized by ICP.
// (a fraction of TotalCalls)
uint64_t TotalNumFrequentJmps{0};
// Total number of jump table sites that can use hot indices.
mutable uint64_t TotalIndexBasedCandidates{0};
// Total number of jump table sites that use hot indices.
uint64_t TotalIndexBasedJumps{0};
void printDecision(llvm::raw_ostream &OS,
std::vector<IndirectCallPromotion::Callsite> &Targets,
unsigned N) const;
std::vector<Callsite> getCallTargets(BinaryBasicBlock &BB,
const MCInst &Inst) const;
size_t canPromoteCallsite(const BinaryBasicBlock &BB, const MCInst &Inst,
const std::vector<Callsite> &Targets,
uint64_t NumCalls);
void printCallsiteInfo(const BinaryBasicBlock &BB, const MCInst &Inst,
const std::vector<Callsite> &Targets, const size_t N,
uint64_t NumCalls) const;
JumpTableInfoType maybeGetHotJumpTableTargets(BinaryBasicBlock &BB,
MCInst &Inst,
MCInst *&TargetFetchInst,
const JumpTable *JT) const;
SymTargetsType findCallTargetSymbols(std::vector<Callsite> &Targets,
size_t &N, BinaryBasicBlock &BB,
MCInst &Inst,
MCInst *&TargetFetchInst) const;
MethodInfoType maybeGetVtableSyms(BinaryBasicBlock &BB, MCInst &Inst,
const SymTargetsType &SymTargets) const;
std::vector<std::unique_ptr<BinaryBasicBlock>>
rewriteCall(BinaryBasicBlock &IndCallBlock, const MCInst &CallInst,
MCPlusBuilder::BlocksVectorTy &&ICPcode,
const std::vector<MCInst *> &MethodFetchInsns) const;
BinaryBasicBlock *fixCFG(BinaryBasicBlock &IndCallBlock,
const bool IsTailCall, const bool IsJumpTable,
BasicBlocksVector &&NewBBs,
const std::vector<Callsite> &Targets) const;
public:
explicit IndirectCallPromotion(const cl::opt<bool> &PrintPass)
: BinaryFunctionPass(PrintPass) {}
const char *getName() const override { return "indirect-call-promotion"; }
bool shouldPrint(const BinaryFunction &BF) const override {
return BinaryFunctionPass::shouldPrint(BF) && Modified.count(&BF) > 0;
}
bool shouldOptimize(const BinaryFunction &BF) const override {
return BF.isSimple() && !BF.isIgnored() && BF.hasProfile() &&
!BF.hasUnknownControlFlow();
}
void runOnFunctions(BinaryContext &BC) override;
};
} // namespace bolt
} // namespace llvm
#endif