| //===- bolt/Passes/LongJmp.cpp --------------------------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file implements the LongJmpPass class. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "bolt/Passes/LongJmp.h" |
| #include "bolt/Core/ParallelUtilities.h" |
| #include "bolt/Utils/CommandLineOpts.h" |
| #include "llvm/Support/MathExtras.h" |
| |
| #define DEBUG_TYPE "longjmp" |
| |
| using namespace llvm; |
| |
| namespace opts { |
| extern cl::OptionCategory BoltCategory; |
| extern cl::OptionCategory BoltOptCategory; |
| extern llvm::cl::opt<unsigned> AlignText; |
| extern cl::opt<unsigned> AlignFunctions; |
| extern cl::opt<bool> UseOldText; |
| extern cl::opt<bool> HotFunctionsAtEnd; |
| |
| static cl::opt<bool> GroupStubs("group-stubs", |
| cl::desc("share stubs across functions"), |
| cl::init(true), cl::cat(BoltOptCategory)); |
| } |
| |
| namespace llvm { |
| namespace bolt { |
| |
| constexpr unsigned ColdFragAlign = 16; |
| |
| static void relaxStubToShortJmp(BinaryBasicBlock &StubBB, const MCSymbol *Tgt) { |
| const BinaryContext &BC = StubBB.getFunction()->getBinaryContext(); |
| InstructionListType Seq; |
| BC.MIB->createShortJmp(Seq, Tgt, BC.Ctx.get()); |
| StubBB.clear(); |
| StubBB.addInstructions(Seq.begin(), Seq.end()); |
| } |
| |
| static void relaxStubToLongJmp(BinaryBasicBlock &StubBB, const MCSymbol *Tgt) { |
| const BinaryContext &BC = StubBB.getFunction()->getBinaryContext(); |
| InstructionListType Seq; |
| BC.MIB->createLongJmp(Seq, Tgt, BC.Ctx.get()); |
| StubBB.clear(); |
| StubBB.addInstructions(Seq.begin(), Seq.end()); |
| } |
| |
| static BinaryBasicBlock *getBBAtHotColdSplitPoint(BinaryFunction &Func) { |
| if (!Func.isSplit() || Func.empty()) |
| return nullptr; |
| |
| assert(!(*Func.begin()).isCold() && "Entry cannot be cold"); |
| for (auto I = Func.getLayout().block_begin(), |
| E = Func.getLayout().block_end(); |
| I != E; ++I) { |
| auto Next = std::next(I); |
| if (Next != E && (*Next)->isCold()) |
| return *I; |
| } |
| llvm_unreachable("No hot-cold split point found"); |
| } |
| |
| static bool mayNeedStub(const BinaryContext &BC, const MCInst &Inst) { |
| return (BC.MIB->isBranch(Inst) || BC.MIB->isCall(Inst)) && |
| !BC.MIB->isIndirectBranch(Inst) && !BC.MIB->isIndirectCall(Inst); |
| } |
| |
| std::pair<std::unique_ptr<BinaryBasicBlock>, MCSymbol *> |
| LongJmpPass::createNewStub(BinaryBasicBlock &SourceBB, const MCSymbol *TgtSym, |
| bool TgtIsFunc, uint64_t AtAddress) { |
| BinaryFunction &Func = *SourceBB.getFunction(); |
| const BinaryContext &BC = Func.getBinaryContext(); |
| const bool IsCold = SourceBB.isCold(); |
| MCSymbol *StubSym = BC.Ctx->createNamedTempSymbol("Stub"); |
| std::unique_ptr<BinaryBasicBlock> StubBB = Func.createBasicBlock(StubSym); |
| MCInst Inst; |
| BC.MIB->createUncondBranch(Inst, TgtSym, BC.Ctx.get()); |
| if (TgtIsFunc) |
| BC.MIB->convertJmpToTailCall(Inst); |
| StubBB->addInstruction(Inst); |
| StubBB->setExecutionCount(0); |
| |
| // Register this in stubs maps |
| auto registerInMap = [&](StubGroupsTy &Map) { |
| StubGroupTy &StubGroup = Map[TgtSym]; |
| StubGroup.insert( |
| llvm::lower_bound( |
| StubGroup, std::make_pair(AtAddress, nullptr), |
| [&](const std::pair<uint64_t, BinaryBasicBlock *> &LHS, |
| const std::pair<uint64_t, BinaryBasicBlock *> &RHS) { |
| return LHS.first < RHS.first; |
| }), |
| std::make_pair(AtAddress, StubBB.get())); |
| }; |
| |
| Stubs[&Func].insert(StubBB.get()); |
| StubBits[StubBB.get()] = BC.MIB->getUncondBranchEncodingSize(); |
| if (IsCold) { |
| registerInMap(ColdLocalStubs[&Func]); |
| if (opts::GroupStubs && TgtIsFunc) |
| registerInMap(ColdStubGroups); |
| ++NumColdStubs; |
| } else { |
| registerInMap(HotLocalStubs[&Func]); |
| if (opts::GroupStubs && TgtIsFunc) |
| registerInMap(HotStubGroups); |
| ++NumHotStubs; |
| } |
| |
| return std::make_pair(std::move(StubBB), StubSym); |
| } |
| |
| BinaryBasicBlock *LongJmpPass::lookupStubFromGroup( |
| const StubGroupsTy &StubGroups, const BinaryFunction &Func, |
| const MCInst &Inst, const MCSymbol *TgtSym, uint64_t DotAddress) const { |
| const BinaryContext &BC = Func.getBinaryContext(); |
| auto CandidatesIter = StubGroups.find(TgtSym); |
| if (CandidatesIter == StubGroups.end()) |
| return nullptr; |
| const StubGroupTy &Candidates = CandidatesIter->second; |
| if (Candidates.empty()) |
| return nullptr; |
| auto Cand = llvm::lower_bound( |
| Candidates, std::make_pair(DotAddress, nullptr), |
| [&](const std::pair<uint64_t, BinaryBasicBlock *> &LHS, |
| const std::pair<uint64_t, BinaryBasicBlock *> &RHS) { |
| return LHS.first < RHS.first; |
| }); |
| if (Cand == Candidates.end()) { |
| Cand = std::prev(Cand); |
| } else if (Cand != Candidates.begin()) { |
| const StubTy *LeftCand = std::prev(Cand); |
| if (Cand->first - DotAddress > DotAddress - LeftCand->first) |
| Cand = LeftCand; |
| } |
| int BitsAvail = BC.MIB->getPCRelEncodingSize(Inst) - 1; |
| assert(BitsAvail < 63 && "PCRelEncodingSize is too large to use int64_t to" |
| "check for out-of-bounds."); |
| int64_t MaxVal = (1ULL << BitsAvail) - 1; |
| int64_t MinVal = -(1ULL << BitsAvail); |
| uint64_t PCRelTgtAddress = Cand->first; |
| int64_t PCOffset = (int64_t)(PCRelTgtAddress - DotAddress); |
| |
| LLVM_DEBUG({ |
| if (Candidates.size() > 1) |
| dbgs() << "Considering stub group with " << Candidates.size() |
| << " candidates. DotAddress is " << Twine::utohexstr(DotAddress) |
| << ", chosen candidate address is " |
| << Twine::utohexstr(Cand->first) << "\n"; |
| }); |
| return (PCOffset < MinVal || PCOffset > MaxVal) ? nullptr : Cand->second; |
| } |
| |
| BinaryBasicBlock * |
| LongJmpPass::lookupGlobalStub(const BinaryBasicBlock &SourceBB, |
| const MCInst &Inst, const MCSymbol *TgtSym, |
| uint64_t DotAddress) const { |
| const BinaryFunction &Func = *SourceBB.getFunction(); |
| const StubGroupsTy &StubGroups = |
| SourceBB.isCold() ? ColdStubGroups : HotStubGroups; |
| return lookupStubFromGroup(StubGroups, Func, Inst, TgtSym, DotAddress); |
| } |
| |
| BinaryBasicBlock *LongJmpPass::lookupLocalStub(const BinaryBasicBlock &SourceBB, |
| const MCInst &Inst, |
| const MCSymbol *TgtSym, |
| uint64_t DotAddress) const { |
| const BinaryFunction &Func = *SourceBB.getFunction(); |
| const DenseMap<const BinaryFunction *, StubGroupsTy> &StubGroups = |
| SourceBB.isCold() ? ColdLocalStubs : HotLocalStubs; |
| const auto Iter = StubGroups.find(&Func); |
| if (Iter == StubGroups.end()) |
| return nullptr; |
| return lookupStubFromGroup(Iter->second, Func, Inst, TgtSym, DotAddress); |
| } |
| |
| std::unique_ptr<BinaryBasicBlock> |
| LongJmpPass::replaceTargetWithStub(BinaryBasicBlock &BB, MCInst &Inst, |
| uint64_t DotAddress, |
| uint64_t StubCreationAddress) { |
| const BinaryFunction &Func = *BB.getFunction(); |
| const BinaryContext &BC = Func.getBinaryContext(); |
| std::unique_ptr<BinaryBasicBlock> NewBB; |
| const MCSymbol *TgtSym = BC.MIB->getTargetSymbol(Inst); |
| assert(TgtSym && "getTargetSymbol failed"); |
| |
| BinaryBasicBlock::BinaryBranchInfo BI{0, 0}; |
| BinaryBasicBlock *TgtBB = BB.getSuccessor(TgtSym, BI); |
| auto LocalStubsIter = Stubs.find(&Func); |
| |
| // If already using stub and the stub is from another function, create a local |
| // stub, since the foreign stub is now out of range |
| if (!TgtBB) { |
| auto SSIter = SharedStubs.find(TgtSym); |
| if (SSIter != SharedStubs.end()) { |
| TgtSym = BC.MIB->getTargetSymbol(*SSIter->second->begin()); |
| --NumSharedStubs; |
| } |
| } else if (LocalStubsIter != Stubs.end() && |
| LocalStubsIter->second.count(TgtBB)) { |
| // The TgtBB and TgtSym now are the local out-of-range stub and its label. |
| // So, we are attempting to restore BB to its previous state without using |
| // this stub. |
| TgtSym = BC.MIB->getTargetSymbol(*TgtBB->begin()); |
| assert(TgtSym && |
| "First instruction is expected to contain a target symbol."); |
| BinaryBasicBlock *TgtBBSucc = TgtBB->getSuccessor(TgtSym, BI); |
| |
| // TgtBB might have no successor. e.g. a stub for a function call. |
| if (TgtBBSucc) { |
| BB.replaceSuccessor(TgtBB, TgtBBSucc, BI.Count, BI.MispredictedCount); |
| assert(TgtBB->getExecutionCount() >= BI.Count && |
| "At least equal or greater than the branch count."); |
| TgtBB->setExecutionCount(TgtBB->getExecutionCount() - BI.Count); |
| } |
| |
| TgtBB = TgtBBSucc; |
| } |
| |
| BinaryBasicBlock *StubBB = lookupLocalStub(BB, Inst, TgtSym, DotAddress); |
| // If not found, look it up in globally shared stub maps if it is a function |
| // call (TgtBB is not set) |
| if (!StubBB && !TgtBB) { |
| StubBB = lookupGlobalStub(BB, Inst, TgtSym, DotAddress); |
| if (StubBB) { |
| SharedStubs[StubBB->getLabel()] = StubBB; |
| ++NumSharedStubs; |
| } |
| } |
| MCSymbol *StubSymbol = StubBB ? StubBB->getLabel() : nullptr; |
| |
| if (!StubBB) { |
| std::tie(NewBB, StubSymbol) = |
| createNewStub(BB, TgtSym, /*is func?*/ !TgtBB, StubCreationAddress); |
| StubBB = NewBB.get(); |
| } |
| |
| // Local branch |
| if (TgtBB) { |
| uint64_t OrigCount = BI.Count; |
| uint64_t OrigMispreds = BI.MispredictedCount; |
| BB.replaceSuccessor(TgtBB, StubBB, OrigCount, OrigMispreds); |
| StubBB->setExecutionCount(StubBB->getExecutionCount() + OrigCount); |
| if (NewBB) { |
| StubBB->addSuccessor(TgtBB, OrigCount, OrigMispreds); |
| StubBB->setIsCold(BB.isCold()); |
| } |
| // Call / tail call |
| } else { |
| StubBB->setExecutionCount(StubBB->getExecutionCount() + |
| BB.getExecutionCount()); |
| if (NewBB) { |
| assert(TgtBB == nullptr); |
| StubBB->setIsCold(BB.isCold()); |
| // Set as entry point because this block is valid but we have no preds |
| StubBB->getFunction()->addEntryPoint(*StubBB); |
| } |
| } |
| BC.MIB->replaceBranchTarget(Inst, StubSymbol, BC.Ctx.get()); |
| |
| return NewBB; |
| } |
| |
| void LongJmpPass::updateStubGroups() { |
| auto update = [&](StubGroupsTy &StubGroups) { |
| for (auto &KeyVal : StubGroups) { |
| for (StubTy &Elem : KeyVal.second) |
| Elem.first = BBAddresses[Elem.second]; |
| llvm::sort(KeyVal.second, llvm::less_first()); |
| } |
| }; |
| |
| for (auto &KeyVal : HotLocalStubs) |
| update(KeyVal.second); |
| for (auto &KeyVal : ColdLocalStubs) |
| update(KeyVal.second); |
| update(HotStubGroups); |
| update(ColdStubGroups); |
| } |
| |
| void LongJmpPass::tentativeBBLayout(const BinaryFunction &Func) { |
| const BinaryContext &BC = Func.getBinaryContext(); |
| uint64_t HotDot = HotAddresses[&Func]; |
| uint64_t ColdDot = ColdAddresses[&Func]; |
| bool Cold = false; |
| for (const BinaryBasicBlock *BB : Func.getLayout().blocks()) { |
| if (Cold || BB->isCold()) { |
| Cold = true; |
| BBAddresses[BB] = ColdDot; |
| ColdDot += BC.computeCodeSize(BB->begin(), BB->end()); |
| } else { |
| BBAddresses[BB] = HotDot; |
| HotDot += BC.computeCodeSize(BB->begin(), BB->end()); |
| } |
| } |
| } |
| |
| uint64_t LongJmpPass::tentativeLayoutRelocColdPart( |
| const BinaryContext &BC, std::vector<BinaryFunction *> &SortedFunctions, |
| uint64_t DotAddress) { |
| DotAddress = alignTo(DotAddress, llvm::Align(opts::AlignFunctions)); |
| for (BinaryFunction *Func : SortedFunctions) { |
| if (!Func->isSplit()) |
| continue; |
| DotAddress = alignTo(DotAddress, Func->getMinAlignment()); |
| uint64_t Pad = |
| offsetToAlignment(DotAddress, llvm::Align(Func->getAlignment())); |
| if (Pad <= Func->getMaxColdAlignmentBytes()) |
| DotAddress += Pad; |
| ColdAddresses[Func] = DotAddress; |
| LLVM_DEBUG(dbgs() << Func->getPrintName() << " cold tentative: " |
| << Twine::utohexstr(DotAddress) << "\n"); |
| DotAddress += Func->estimateColdSize(); |
| DotAddress = alignTo(DotAddress, Func->getConstantIslandAlignment()); |
| DotAddress += Func->estimateConstantIslandSize(); |
| } |
| return DotAddress; |
| } |
| |
| uint64_t LongJmpPass::tentativeLayoutRelocMode( |
| const BinaryContext &BC, std::vector<BinaryFunction *> &SortedFunctions, |
| uint64_t DotAddress) { |
| // Compute hot cold frontier |
| int64_t LastHotIndex = -1u; |
| uint32_t CurrentIndex = 0; |
| if (opts::HotFunctionsAtEnd) { |
| for (BinaryFunction *BF : SortedFunctions) { |
| if (BF->hasValidIndex()) { |
| LastHotIndex = CurrentIndex; |
| break; |
| } |
| |
| ++CurrentIndex; |
| } |
| } else { |
| for (BinaryFunction *BF : SortedFunctions) { |
| if (!BF->hasValidIndex()) { |
| LastHotIndex = CurrentIndex; |
| break; |
| } |
| |
| ++CurrentIndex; |
| } |
| } |
| |
| // Hot |
| CurrentIndex = 0; |
| bool ColdLayoutDone = false; |
| auto runColdLayout = [&]() { |
| DotAddress = tentativeLayoutRelocColdPart(BC, SortedFunctions, DotAddress); |
| ColdLayoutDone = true; |
| if (opts::HotFunctionsAtEnd) |
| DotAddress = alignTo(DotAddress, opts::AlignText); |
| }; |
| for (BinaryFunction *Func : SortedFunctions) { |
| if (!BC.shouldEmit(*Func)) { |
| HotAddresses[Func] = Func->getAddress(); |
| continue; |
| } |
| |
| if (!ColdLayoutDone && CurrentIndex >= LastHotIndex) |
| runColdLayout(); |
| |
| DotAddress = alignTo(DotAddress, Func->getMinAlignment()); |
| uint64_t Pad = |
| offsetToAlignment(DotAddress, llvm::Align(Func->getAlignment())); |
| if (Pad <= Func->getMaxAlignmentBytes()) |
| DotAddress += Pad; |
| HotAddresses[Func] = DotAddress; |
| LLVM_DEBUG(dbgs() << Func->getPrintName() << " tentative: " |
| << Twine::utohexstr(DotAddress) << "\n"); |
| if (!Func->isSplit()) |
| DotAddress += Func->estimateSize(); |
| else |
| DotAddress += Func->estimateHotSize(); |
| |
| DotAddress = alignTo(DotAddress, Func->getConstantIslandAlignment()); |
| DotAddress += Func->estimateConstantIslandSize(); |
| ++CurrentIndex; |
| } |
| |
| // Ensure that tentative code layout always runs for cold blocks. |
| if (!ColdLayoutDone) |
| runColdLayout(); |
| |
| // BBs |
| for (BinaryFunction *Func : SortedFunctions) |
| tentativeBBLayout(*Func); |
| |
| return DotAddress; |
| } |
| |
| void LongJmpPass::tentativeLayout( |
| const BinaryContext &BC, std::vector<BinaryFunction *> &SortedFunctions) { |
| uint64_t DotAddress = BC.LayoutStartAddress; |
| |
| if (!BC.HasRelocations) { |
| for (BinaryFunction *Func : SortedFunctions) { |
| HotAddresses[Func] = Func->getAddress(); |
| DotAddress = alignTo(DotAddress, ColdFragAlign); |
| ColdAddresses[Func] = DotAddress; |
| if (Func->isSplit()) |
| DotAddress += Func->estimateColdSize(); |
| tentativeBBLayout(*Func); |
| } |
| |
| return; |
| } |
| |
| // Relocation mode |
| uint64_t EstimatedTextSize = 0; |
| if (opts::UseOldText) { |
| EstimatedTextSize = tentativeLayoutRelocMode(BC, SortedFunctions, 0); |
| |
| // Initial padding |
| if (EstimatedTextSize <= BC.OldTextSectionSize) { |
| DotAddress = BC.OldTextSectionAddress; |
| uint64_t Pad = |
| offsetToAlignment(DotAddress, llvm::Align(opts::AlignText)); |
| if (Pad + EstimatedTextSize <= BC.OldTextSectionSize) { |
| DotAddress += Pad; |
| } |
| } |
| } |
| |
| if (!EstimatedTextSize || EstimatedTextSize > BC.OldTextSectionSize) |
| DotAddress = alignTo(BC.LayoutStartAddress, opts::AlignText); |
| |
| tentativeLayoutRelocMode(BC, SortedFunctions, DotAddress); |
| } |
| |
| bool LongJmpPass::usesStub(const BinaryFunction &Func, |
| const MCInst &Inst) const { |
| const MCSymbol *TgtSym = Func.getBinaryContext().MIB->getTargetSymbol(Inst); |
| const BinaryBasicBlock *TgtBB = Func.getBasicBlockForLabel(TgtSym); |
| auto Iter = Stubs.find(&Func); |
| if (Iter != Stubs.end()) |
| return Iter->second.count(TgtBB); |
| return false; |
| } |
| |
| uint64_t LongJmpPass::getSymbolAddress(const BinaryContext &BC, |
| const MCSymbol *Target, |
| const BinaryBasicBlock *TgtBB) const { |
| if (TgtBB) { |
| auto Iter = BBAddresses.find(TgtBB); |
| assert(Iter != BBAddresses.end() && "Unrecognized BB"); |
| return Iter->second; |
| } |
| uint64_t EntryID = 0; |
| const BinaryFunction *TargetFunc = BC.getFunctionForSymbol(Target, &EntryID); |
| auto Iter = HotAddresses.find(TargetFunc); |
| if (Iter == HotAddresses.end() || (TargetFunc && EntryID)) { |
| // Look at BinaryContext's resolution for this symbol - this is a symbol not |
| // mapped to a BinaryFunction |
| ErrorOr<uint64_t> ValueOrError = BC.getSymbolValue(*Target); |
| assert(ValueOrError && "Unrecognized symbol"); |
| return *ValueOrError; |
| } |
| return Iter->second; |
| } |
| |
| Error LongJmpPass::relaxStub(BinaryBasicBlock &StubBB, bool &Modified) { |
| const BinaryFunction &Func = *StubBB.getFunction(); |
| const BinaryContext &BC = Func.getBinaryContext(); |
| const int Bits = StubBits[&StubBB]; |
| // Already working with the largest range? |
| if (Bits == static_cast<int>(BC.AsmInfo->getCodePointerSize() * 8)) |
| return Error::success(); |
| |
| const static int RangeShortJmp = BC.MIB->getShortJmpEncodingSize(); |
| const static int RangeSingleInstr = BC.MIB->getUncondBranchEncodingSize(); |
| const static uint64_t ShortJmpMask = ~((1ULL << RangeShortJmp) - 1); |
| const static uint64_t SingleInstrMask = |
| ~((1ULL << (RangeSingleInstr - 1)) - 1); |
| |
| const MCSymbol *RealTargetSym = BC.MIB->getTargetSymbol(*StubBB.begin()); |
| const BinaryBasicBlock *TgtBB = Func.getBasicBlockForLabel(RealTargetSym); |
| uint64_t TgtAddress = getSymbolAddress(BC, RealTargetSym, TgtBB); |
| uint64_t DotAddress = BBAddresses[&StubBB]; |
| uint64_t PCRelTgtAddress = DotAddress > TgtAddress ? DotAddress - TgtAddress |
| : TgtAddress - DotAddress; |
| // If it fits in one instruction, do not relax |
| if (!(PCRelTgtAddress & SingleInstrMask)) |
| return Error::success(); |
| |
| // Fits short jmp |
| if (!(PCRelTgtAddress & ShortJmpMask)) { |
| if (Bits >= RangeShortJmp) |
| return Error::success(); |
| |
| LLVM_DEBUG(dbgs() << "Relaxing stub to short jump. PCRelTgtAddress = " |
| << Twine::utohexstr(PCRelTgtAddress) |
| << " RealTargetSym = " << RealTargetSym->getName() |
| << "\n"); |
| relaxStubToShortJmp(StubBB, RealTargetSym); |
| StubBits[&StubBB] = RangeShortJmp; |
| Modified = true; |
| return Error::success(); |
| } |
| |
| // The long jmp uses absolute address on AArch64 |
| // So we could not use it for PIC binaries |
| if (BC.isAArch64() && !BC.HasFixedLoadAddress) |
| return createFatalBOLTError( |
| "BOLT-ERROR: Unable to relax stub for PIC binary\n"); |
| |
| LLVM_DEBUG(dbgs() << "Relaxing stub to long jump. PCRelTgtAddress = " |
| << Twine::utohexstr(PCRelTgtAddress) |
| << " RealTargetSym = " << RealTargetSym->getName() << "\n"); |
| relaxStubToLongJmp(StubBB, RealTargetSym); |
| StubBits[&StubBB] = static_cast<int>(BC.AsmInfo->getCodePointerSize() * 8); |
| Modified = true; |
| return Error::success(); |
| } |
| |
| bool LongJmpPass::needsStub(const BinaryBasicBlock &BB, const MCInst &Inst, |
| uint64_t DotAddress) const { |
| const BinaryFunction &Func = *BB.getFunction(); |
| const BinaryContext &BC = Func.getBinaryContext(); |
| const MCSymbol *TgtSym = BC.MIB->getTargetSymbol(Inst); |
| assert(TgtSym && "getTargetSymbol failed"); |
| |
| const BinaryBasicBlock *TgtBB = Func.getBasicBlockForLabel(TgtSym); |
| // Check for shared stubs from foreign functions |
| if (!TgtBB) { |
| auto SSIter = SharedStubs.find(TgtSym); |
| if (SSIter != SharedStubs.end()) |
| TgtBB = SSIter->second; |
| } |
| |
| int BitsAvail = BC.MIB->getPCRelEncodingSize(Inst) - 1; |
| assert(BitsAvail < 63 && "PCRelEncodingSize is too large to use int64_t to" |
| "check for out-of-bounds."); |
| int64_t MaxVal = (1ULL << BitsAvail) - 1; |
| int64_t MinVal = -(1ULL << BitsAvail); |
| |
| uint64_t PCRelTgtAddress = getSymbolAddress(BC, TgtSym, TgtBB); |
| int64_t PCOffset = (int64_t)(PCRelTgtAddress - DotAddress); |
| |
| return PCOffset < MinVal || PCOffset > MaxVal; |
| } |
| |
| Error LongJmpPass::relax(BinaryFunction &Func, bool &Modified) { |
| const BinaryContext &BC = Func.getBinaryContext(); |
| |
| assert(BC.isAArch64() && "Unsupported arch"); |
| constexpr int InsnSize = 4; // AArch64 |
| std::vector<std::pair<BinaryBasicBlock *, std::unique_ptr<BinaryBasicBlock>>> |
| Insertions; |
| |
| BinaryBasicBlock *Frontier = getBBAtHotColdSplitPoint(Func); |
| uint64_t FrontierAddress = Frontier ? BBAddresses[Frontier] : 0; |
| if (FrontierAddress) |
| FrontierAddress += Frontier->getNumNonPseudos() * InsnSize; |
| |
| // Add necessary stubs for branch targets we know we can't fit in the |
| // instruction |
| for (BinaryBasicBlock &BB : Func) { |
| uint64_t DotAddress = BBAddresses[&BB]; |
| // Stubs themselves are relaxed on the next loop |
| if (Stubs[&Func].count(&BB)) |
| continue; |
| |
| for (MCInst &Inst : BB) { |
| if (BC.MIB->isPseudo(Inst)) |
| continue; |
| |
| if (!mayNeedStub(BC, Inst)) { |
| DotAddress += InsnSize; |
| continue; |
| } |
| |
| // Check and relax direct branch or call |
| if (!needsStub(BB, Inst, DotAddress)) { |
| DotAddress += InsnSize; |
| continue; |
| } |
| Modified = true; |
| |
| // Insert stubs close to the patched BB if call, but far away from the |
| // hot path if a branch, since this branch target is the cold region |
| // (but first check that the far away stub will be in range). |
| BinaryBasicBlock *InsertionPoint = &BB; |
| if (Func.isSimple() && !BC.MIB->isCall(Inst) && FrontierAddress && |
| !BB.isCold()) { |
| int BitsAvail = BC.MIB->getPCRelEncodingSize(Inst) - 1; |
| uint64_t Mask = ~((1ULL << BitsAvail) - 1); |
| assert(FrontierAddress > DotAddress && |
| "Hot code should be before the frontier"); |
| uint64_t PCRelTgt = FrontierAddress - DotAddress; |
| if (!(PCRelTgt & Mask)) |
| InsertionPoint = Frontier; |
| } |
| // Always put stubs at the end of the function if non-simple. We can't |
| // change the layout of non-simple functions because it has jump tables |
| // that we do not control. |
| if (!Func.isSimple()) |
| InsertionPoint = &*std::prev(Func.end()); |
| |
| // Create a stub to handle a far-away target |
| Insertions.emplace_back(InsertionPoint, |
| replaceTargetWithStub(BB, Inst, DotAddress, |
| InsertionPoint == Frontier |
| ? FrontierAddress |
| : DotAddress)); |
| |
| DotAddress += InsnSize; |
| } |
| } |
| |
| // Relax stubs if necessary |
| for (BinaryBasicBlock &BB : Func) { |
| if (!Stubs[&Func].count(&BB) || !BB.isValid()) |
| continue; |
| |
| if (auto E = relaxStub(BB, Modified)) |
| return Error(std::move(E)); |
| } |
| |
| for (std::pair<BinaryBasicBlock *, std::unique_ptr<BinaryBasicBlock>> &Elmt : |
| Insertions) { |
| if (!Elmt.second) |
| continue; |
| std::vector<std::unique_ptr<BinaryBasicBlock>> NewBBs; |
| NewBBs.emplace_back(std::move(Elmt.second)); |
| Func.insertBasicBlocks(Elmt.first, std::move(NewBBs), true); |
| } |
| |
| return Error::success(); |
| } |
| |
| void LongJmpPass::relaxLocalBranches(BinaryFunction &BF) { |
| BinaryContext &BC = BF.getBinaryContext(); |
| auto &MIB = BC.MIB; |
| |
| // Quick path. |
| if (!BF.isSplit() && BF.estimateSize() < ShortestJumpSpan) |
| return; |
| |
| auto isBranchOffsetInRange = [&](const MCInst &Inst, int64_t Offset) { |
| const unsigned Bits = MIB->getPCRelEncodingSize(Inst); |
| return isIntN(Bits, Offset); |
| }; |
| |
| auto isBlockInRange = [&](const MCInst &Inst, uint64_t InstAddress, |
| const BinaryBasicBlock &BB) { |
| const int64_t Offset = BB.getOutputStartAddress() - InstAddress; |
| return isBranchOffsetInRange(Inst, Offset); |
| }; |
| |
| // Keep track of *all* function trampolines that are going to be added to the |
| // function layout at the end of relaxation. |
| std::vector<std::pair<BinaryBasicBlock *, std::unique_ptr<BinaryBasicBlock>>> |
| FunctionTrampolines; |
| |
| // Function fragments are relaxed independently. |
| for (FunctionFragment &FF : BF.getLayout().fragments()) { |
| // Fill out code size estimation for the fragment. Use output BB address |
| // ranges to store offsets from the start of the function fragment. |
| uint64_t CodeSize = 0; |
| for (BinaryBasicBlock *BB : FF) { |
| BB->setOutputStartAddress(CodeSize); |
| CodeSize += BB->estimateSize(); |
| BB->setOutputEndAddress(CodeSize); |
| } |
| |
| // Dynamically-updated size of the fragment. |
| uint64_t FragmentSize = CodeSize; |
| |
| // Size of the trampoline in bytes. |
| constexpr uint64_t TrampolineSize = 4; |
| |
| // Trampolines created for the fragment. DestinationBB -> TrampolineBB. |
| // NB: here we store only the first trampoline created for DestinationBB. |
| DenseMap<const BinaryBasicBlock *, BinaryBasicBlock *> FragmentTrampolines; |
| |
| // Create a trampoline code after \p BB or at the end of the fragment if BB |
| // is nullptr. If \p UpdateOffsets is true, update FragmentSize and offsets |
| // for basic blocks affected by the insertion of the trampoline. |
| auto addTrampolineAfter = [&](BinaryBasicBlock *BB, |
| BinaryBasicBlock *TargetBB, uint64_t Count, |
| bool UpdateOffsets = true) { |
| FunctionTrampolines.emplace_back(BB ? BB : FF.back(), |
| BF.createBasicBlock()); |
| BinaryBasicBlock *TrampolineBB = FunctionTrampolines.back().second.get(); |
| |
| MCInst Inst; |
| { |
| auto L = BC.scopeLock(); |
| MIB->createUncondBranch(Inst, TargetBB->getLabel(), BC.Ctx.get()); |
| } |
| TrampolineBB->addInstruction(Inst); |
| TrampolineBB->addSuccessor(TargetBB, Count); |
| TrampolineBB->setExecutionCount(Count); |
| const uint64_t TrampolineAddress = |
| BB ? BB->getOutputEndAddress() : FragmentSize; |
| TrampolineBB->setOutputStartAddress(TrampolineAddress); |
| TrampolineBB->setOutputEndAddress(TrampolineAddress + TrampolineSize); |
| TrampolineBB->setFragmentNum(FF.getFragmentNum()); |
| |
| if (!FragmentTrampolines.lookup(TargetBB)) |
| FragmentTrampolines[TargetBB] = TrampolineBB; |
| |
| if (!UpdateOffsets) |
| return TrampolineBB; |
| |
| FragmentSize += TrampolineSize; |
| |
| // If the trampoline was added at the end of the fragment, offsets of |
| // other fragments should stay intact. |
| if (!BB) |
| return TrampolineBB; |
| |
| // Update offsets for blocks after BB. |
| for (BinaryBasicBlock *IBB : FF) { |
| if (IBB->getOutputStartAddress() >= TrampolineAddress) { |
| IBB->setOutputStartAddress(IBB->getOutputStartAddress() + |
| TrampolineSize); |
| IBB->setOutputEndAddress(IBB->getOutputEndAddress() + TrampolineSize); |
| } |
| } |
| |
| // Update offsets for trampolines in this fragment that are placed after |
| // the new trampoline. Note that trampoline blocks are not part of the |
| // function/fragment layout until we add them right before the return |
| // from relaxLocalBranches(). |
| for (auto &Pair : FunctionTrampolines) { |
| BinaryBasicBlock *IBB = Pair.second.get(); |
| if (IBB->getFragmentNum() != TrampolineBB->getFragmentNum()) |
| continue; |
| if (IBB == TrampolineBB) |
| continue; |
| if (IBB->getOutputStartAddress() >= TrampolineAddress) { |
| IBB->setOutputStartAddress(IBB->getOutputStartAddress() + |
| TrampolineSize); |
| IBB->setOutputEndAddress(IBB->getOutputEndAddress() + TrampolineSize); |
| } |
| } |
| |
| return TrampolineBB; |
| }; |
| |
| // Pre-populate trampolines by splitting unconditional branches from the |
| // containing basic block. |
| for (BinaryBasicBlock *BB : FF) { |
| MCInst *Inst = BB->getLastNonPseudoInstr(); |
| if (!Inst || !MIB->isUnconditionalBranch(*Inst)) |
| continue; |
| |
| const MCSymbol *TargetSymbol = MIB->getTargetSymbol(*Inst); |
| BB->eraseInstruction(BB->findInstruction(Inst)); |
| BB->setOutputEndAddress(BB->getOutputEndAddress() - TrampolineSize); |
| |
| BinaryBasicBlock::BinaryBranchInfo BI; |
| BinaryBasicBlock *TargetBB = BB->getSuccessor(TargetSymbol, BI); |
| |
| BinaryBasicBlock *TrampolineBB = |
| addTrampolineAfter(BB, TargetBB, BI.Count, /*UpdateOffsets*/ false); |
| BB->replaceSuccessor(TargetBB, TrampolineBB, BI.Count); |
| } |
| |
| /// Relax the branch \p Inst in basic block \p BB that targets \p TargetBB. |
| /// \p InstAddress contains offset of the branch from the start of the |
| /// containing function fragment. |
| auto relaxBranch = [&](BinaryBasicBlock *BB, MCInst &Inst, |
| uint64_t InstAddress, BinaryBasicBlock *TargetBB) { |
| BinaryFunction *BF = BB->getParent(); |
| |
| // Use branch taken count for optimal relaxation. |
| const uint64_t Count = BB->getBranchInfo(*TargetBB).Count; |
| assert(Count != BinaryBasicBlock::COUNT_NO_PROFILE && |
| "Expected valid branch execution count"); |
| |
| // Try to reuse an existing trampoline without introducing any new code. |
| BinaryBasicBlock *TrampolineBB = FragmentTrampolines.lookup(TargetBB); |
| if (TrampolineBB && isBlockInRange(Inst, InstAddress, *TrampolineBB)) { |
| BB->replaceSuccessor(TargetBB, TrampolineBB, Count); |
| TrampolineBB->setExecutionCount(TrampolineBB->getExecutionCount() + |
| Count); |
| auto L = BC.scopeLock(); |
| MIB->replaceBranchTarget(Inst, TrampolineBB->getLabel(), BC.Ctx.get()); |
| return; |
| } |
| |
| // For cold branches, check if we can introduce a trampoline at the end |
| // of the fragment that is within the branch reach. Note that such |
| // trampoline may change address later and become unreachable in which |
| // case we will need further relaxation. |
| const int64_t OffsetToEnd = FragmentSize - InstAddress; |
| if (Count == 0 && isBranchOffsetInRange(Inst, OffsetToEnd)) { |
| TrampolineBB = addTrampolineAfter(nullptr, TargetBB, Count); |
| BB->replaceSuccessor(TargetBB, TrampolineBB, Count); |
| auto L = BC.scopeLock(); |
| MIB->replaceBranchTarget(Inst, TrampolineBB->getLabel(), BC.Ctx.get()); |
| |
| return; |
| } |
| |
| // Insert a new block after the current one and use it as a trampoline. |
| TrampolineBB = addTrampolineAfter(BB, TargetBB, Count); |
| |
| // If the other successor is a fall-through, invert the condition code. |
| const BinaryBasicBlock *const NextBB = |
| BF->getLayout().getBasicBlockAfter(BB, /*IgnoreSplits*/ false); |
| if (BB->getConditionalSuccessor(false) == NextBB) { |
| BB->swapConditionalSuccessors(); |
| auto L = BC.scopeLock(); |
| MIB->reverseBranchCondition(Inst, NextBB->getLabel(), BC.Ctx.get()); |
| } else { |
| auto L = BC.scopeLock(); |
| MIB->replaceBranchTarget(Inst, TrampolineBB->getLabel(), BC.Ctx.get()); |
| } |
| BB->replaceSuccessor(TargetBB, TrampolineBB, Count); |
| }; |
| |
| bool MayNeedRelaxation; |
| uint64_t NumIterations = 0; |
| do { |
| MayNeedRelaxation = false; |
| ++NumIterations; |
| for (auto BBI = FF.begin(); BBI != FF.end(); ++BBI) { |
| BinaryBasicBlock *BB = *BBI; |
| uint64_t NextInstOffset = BB->getOutputStartAddress(); |
| for (MCInst &Inst : *BB) { |
| const size_t InstAddress = NextInstOffset; |
| if (!MIB->isPseudo(Inst)) |
| NextInstOffset += 4; |
| |
| if (!mayNeedStub(BF.getBinaryContext(), Inst)) |
| continue; |
| |
| const size_t BitsAvailable = MIB->getPCRelEncodingSize(Inst); |
| |
| // Span of +/-128MB. |
| if (BitsAvailable == LongestJumpBits) |
| continue; |
| |
| const MCSymbol *TargetSymbol = MIB->getTargetSymbol(Inst); |
| BinaryBasicBlock *TargetBB = BB->getSuccessor(TargetSymbol); |
| assert(TargetBB && |
| "Basic block target expected for conditional branch."); |
| |
| // Check if the relaxation is needed. |
| if (TargetBB->getFragmentNum() == FF.getFragmentNum() && |
| isBlockInRange(Inst, InstAddress, *TargetBB)) |
| continue; |
| |
| relaxBranch(BB, Inst, InstAddress, TargetBB); |
| |
| MayNeedRelaxation = true; |
| } |
| } |
| |
| // We may have added new instructions, but the whole fragment is less than |
| // the minimum branch span. |
| if (FragmentSize < ShortestJumpSpan) |
| MayNeedRelaxation = false; |
| |
| } while (MayNeedRelaxation); |
| |
| LLVM_DEBUG({ |
| if (NumIterations > 2) { |
| dbgs() << "BOLT-DEBUG: relaxed fragment " << FF.getFragmentNum().get() |
| << " of " << BF << " in " << NumIterations << " iterations\n"; |
| } |
| }); |
| (void)NumIterations; |
| } |
| |
| // Add trampoline blocks from all fragments to the layout. |
| DenseMap<BinaryBasicBlock *, std::vector<std::unique_ptr<BinaryBasicBlock>>> |
| Insertions; |
| for (std::pair<BinaryBasicBlock *, std::unique_ptr<BinaryBasicBlock>> &Pair : |
| FunctionTrampolines) { |
| if (!Pair.second) |
| continue; |
| Insertions[Pair.first].emplace_back(std::move(Pair.second)); |
| } |
| |
| for (auto &Pair : Insertions) { |
| BF.insertBasicBlocks(Pair.first, std::move(Pair.second), |
| /*UpdateLayout*/ true, /*UpdateCFI*/ true, |
| /*RecomputeLPs*/ false); |
| } |
| } |
| |
| Error LongJmpPass::runOnFunctions(BinaryContext &BC) { |
| |
| if (opts::CompactCodeModel) { |
| BC.outs() |
| << "BOLT-INFO: relaxing branches for compact code model (<128MB)\n"; |
| |
| ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { |
| relaxLocalBranches(BF); |
| }; |
| |
| ParallelUtilities::PredicateTy SkipPredicate = |
| [&](const BinaryFunction &BF) { |
| return !BC.shouldEmit(BF) || !BF.isSimple(); |
| }; |
| |
| ParallelUtilities::runOnEachFunction( |
| BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun, |
| SkipPredicate, "RelaxLocalBranches"); |
| |
| return Error::success(); |
| } |
| |
| BC.outs() << "BOLT-INFO: Starting stub-insertion pass\n"; |
| std::vector<BinaryFunction *> Sorted = BC.getSortedFunctions(); |
| bool Modified; |
| uint32_t Iterations = 0; |
| do { |
| ++Iterations; |
| Modified = false; |
| tentativeLayout(BC, Sorted); |
| updateStubGroups(); |
| for (BinaryFunction *Func : Sorted) { |
| if (auto E = relax(*Func, Modified)) |
| return Error(std::move(E)); |
| // Don't ruin non-simple functions, they can't afford to have the layout |
| // changed. |
| if (Modified && Func->isSimple()) |
| Func->fixBranches(); |
| } |
| } while (Modified); |
| BC.outs() << "BOLT-INFO: Inserted " << NumHotStubs |
| << " stubs in the hot area and " << NumColdStubs |
| << " stubs in the cold area. Shared " << NumSharedStubs |
| << " times, iterated " << Iterations << " times.\n"; |
| return Error::success(); |
| } |
| } // namespace bolt |
| } // namespace llvm |