| //===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file implements the BinaryContext class. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "bolt/Core/BinaryContext.h" |
| #include "bolt/Core/BinaryEmitter.h" |
| #include "bolt/Core/BinaryFunction.h" |
| #include "bolt/Utils/CommandLineOpts.h" |
| #include "bolt/Utils/Utils.h" |
| #include "llvm/ADT/STLExtras.h" |
| #include "llvm/ADT/Twine.h" |
| #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" |
| #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" |
| #include "llvm/DebugInfo/DWARF/DWARFUnit.h" |
| #include "llvm/MC/MCAssembler.h" |
| #include "llvm/MC/MCContext.h" |
| #include "llvm/MC/MCDisassembler/MCDisassembler.h" |
| #include "llvm/MC/MCInstPrinter.h" |
| #include "llvm/MC/MCObjectStreamer.h" |
| #include "llvm/MC/MCObjectWriter.h" |
| #include "llvm/MC/MCRegisterInfo.h" |
| #include "llvm/MC/MCSectionELF.h" |
| #include "llvm/MC/MCStreamer.h" |
| #include "llvm/MC/MCSubtargetInfo.h" |
| #include "llvm/MC/MCSymbol.h" |
| #include "llvm/Support/CommandLine.h" |
| #include "llvm/Support/Error.h" |
| #include "llvm/Support/Regex.h" |
| #include <algorithm> |
| #include <functional> |
| #include <iterator> |
| #include <unordered_set> |
| |
| using namespace llvm; |
| |
| #undef DEBUG_TYPE |
| #define DEBUG_TYPE "bolt" |
| |
| namespace opts { |
| |
| cl::opt<bool> NoHugePages("no-huge-pages", |
| cl::desc("use regular size pages for code alignment"), |
| cl::Hidden, cl::cat(BoltCategory)); |
| |
| static cl::opt<bool> |
| PrintDebugInfo("print-debug-info", |
| cl::desc("print debug info when printing functions"), |
| cl::Hidden, |
| cl::ZeroOrMore, |
| cl::cat(BoltCategory)); |
| |
| cl::opt<bool> PrintRelocations( |
| "print-relocations", |
| cl::desc("print relocations when printing functions/objects"), cl::Hidden, |
| cl::cat(BoltCategory)); |
| |
| static cl::opt<bool> |
| PrintMemData("print-mem-data", |
| cl::desc("print memory data annotations when printing functions"), |
| cl::Hidden, |
| cl::ZeroOrMore, |
| cl::cat(BoltCategory)); |
| |
| cl::opt<std::string> CompDirOverride( |
| "comp-dir-override", |
| cl::desc("overrides DW_AT_comp_dir, and provides an alternative base " |
| "location, which is used with DW_AT_dwo_name to construct a path " |
| "to *.dwo files."), |
| cl::Hidden, cl::init(""), cl::cat(BoltCategory)); |
| } // namespace opts |
| |
| namespace llvm { |
| namespace bolt { |
| |
| char BOLTError::ID = 0; |
| |
| BOLTError::BOLTError(bool IsFatal, const Twine &S) |
| : IsFatal(IsFatal), Msg(S.str()) {} |
| |
| void BOLTError::log(raw_ostream &OS) const { |
| if (IsFatal) |
| OS << "FATAL "; |
| StringRef ErrMsg = StringRef(Msg); |
| // Prepend our error prefix if it is missing |
| if (ErrMsg.empty()) { |
| OS << "BOLT-ERROR\n"; |
| } else { |
| if (!ErrMsg.starts_with("BOLT-ERROR")) |
| OS << "BOLT-ERROR: "; |
| OS << ErrMsg << "\n"; |
| } |
| } |
| |
| std::error_code BOLTError::convertToErrorCode() const { |
| return inconvertibleErrorCode(); |
| } |
| |
| Error createNonFatalBOLTError(const Twine &S) { |
| return make_error<BOLTError>(/*IsFatal*/ false, S); |
| } |
| |
| Error createFatalBOLTError(const Twine &S) { |
| return make_error<BOLTError>(/*IsFatal*/ true, S); |
| } |
| |
| void BinaryContext::logBOLTErrorsAndQuitOnFatal(Error E) { |
| handleAllErrors(Error(std::move(E)), [&](const BOLTError &E) { |
| if (!E.getMessage().empty()) |
| E.log(this->errs()); |
| if (E.isFatal()) |
| exit(1); |
| }); |
| } |
| |
| BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx, |
| std::unique_ptr<DWARFContext> DwCtx, |
| std::unique_ptr<Triple> TheTriple, |
| const Target *TheTarget, std::string TripleName, |
| std::unique_ptr<MCCodeEmitter> MCE, |
| std::unique_ptr<MCObjectFileInfo> MOFI, |
| std::unique_ptr<const MCAsmInfo> AsmInfo, |
| std::unique_ptr<const MCInstrInfo> MII, |
| std::unique_ptr<const MCSubtargetInfo> STI, |
| std::unique_ptr<MCInstPrinter> InstPrinter, |
| std::unique_ptr<const MCInstrAnalysis> MIA, |
| std::unique_ptr<MCPlusBuilder> MIB, |
| std::unique_ptr<const MCRegisterInfo> MRI, |
| std::unique_ptr<MCDisassembler> DisAsm, |
| JournalingStreams Logger) |
| : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)), |
| TheTriple(std::move(TheTriple)), TheTarget(TheTarget), |
| TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)), |
| AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)), |
| InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)), |
| MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)), |
| Logger(Logger), InitialDynoStats(isAArch64()) { |
| RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86; |
| PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize; |
| } |
| |
| BinaryContext::~BinaryContext() { |
| for (BinarySection *Section : Sections) |
| delete Section; |
| for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions) |
| delete InjectedFunction; |
| for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables) |
| delete JTI.second; |
| clearBinaryData(); |
| } |
| |
| /// Create BinaryContext for a given architecture \p ArchName and |
| /// triple \p TripleName. |
| Expected<std::unique_ptr<BinaryContext>> BinaryContext::createBinaryContext( |
| Triple TheTriple, StringRef InputFileName, SubtargetFeatures *Features, |
| bool IsPIC, std::unique_ptr<DWARFContext> DwCtx, JournalingStreams Logger) { |
| StringRef ArchName = ""; |
| std::string FeaturesStr = ""; |
| switch (TheTriple.getArch()) { |
| case llvm::Triple::x86_64: |
| if (Features) |
| return createFatalBOLTError( |
| "x86_64 target does not use SubtargetFeatures"); |
| ArchName = "x86-64"; |
| FeaturesStr = "+nopl"; |
| break; |
| case llvm::Triple::aarch64: |
| if (Features) |
| return createFatalBOLTError( |
| "AArch64 target does not use SubtargetFeatures"); |
| ArchName = "aarch64"; |
| FeaturesStr = "+all"; |
| break; |
| case llvm::Triple::riscv64: { |
| ArchName = "riscv64"; |
| if (!Features) |
| return createFatalBOLTError("RISCV target needs SubtargetFeatures"); |
| // We rely on relaxation for some transformations (e.g., promoting all calls |
| // to PseudoCALL and then making JITLink relax them). Since the relax |
| // feature is not stored in the object file, we manually enable it. |
| Features->AddFeature("relax"); |
| FeaturesStr = Features->getString(); |
| break; |
| } |
| default: |
| return createStringError(std::errc::not_supported, |
| "BOLT-ERROR: Unrecognized machine in ELF file"); |
| } |
| |
| const std::string TripleName = TheTriple.str(); |
| |
| std::string Error; |
| const Target *TheTarget = |
| TargetRegistry::lookupTarget(std::string(ArchName), TheTriple, Error); |
| if (!TheTarget) |
| return createStringError(make_error_code(std::errc::not_supported), |
| Twine("BOLT-ERROR: ", Error)); |
| |
| std::unique_ptr<const MCRegisterInfo> MRI( |
| TheTarget->createMCRegInfo(TripleName)); |
| if (!MRI) |
| return createStringError( |
| make_error_code(std::errc::not_supported), |
| Twine("BOLT-ERROR: no register info for target ", TripleName)); |
| |
| // Set up disassembler. |
| std::unique_ptr<MCAsmInfo> AsmInfo( |
| TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions())); |
| if (!AsmInfo) |
| return createStringError( |
| make_error_code(std::errc::not_supported), |
| Twine("BOLT-ERROR: no assembly info for target ", TripleName)); |
| // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump |
| // we want to emit such names as using @PLT without double quotes to convey |
| // variant kind to the assembler. BOLT doesn't rely on the linker so we can |
| // override the default AsmInfo behavior to emit names the way we want. |
| AsmInfo->setAllowAtInName(true); |
| |
| std::unique_ptr<const MCSubtargetInfo> STI( |
| TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr)); |
| if (!STI) |
| return createStringError( |
| make_error_code(std::errc::not_supported), |
| Twine("BOLT-ERROR: no subtarget info for target ", TripleName)); |
| |
| std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo()); |
| if (!MII) |
| return createStringError( |
| make_error_code(std::errc::not_supported), |
| Twine("BOLT-ERROR: no instruction info for target ", TripleName)); |
| |
| std::unique_ptr<MCContext> Ctx( |
| new MCContext(TheTriple, AsmInfo.get(), MRI.get(), STI.get())); |
| std::unique_ptr<MCObjectFileInfo> MOFI( |
| TheTarget->createMCObjectFileInfo(*Ctx, IsPIC)); |
| Ctx->setObjectFileInfo(MOFI.get()); |
| // We do not support X86 Large code model. Change this in the future. |
| bool Large = false; |
| if (TheTriple.getArch() == llvm::Triple::aarch64) |
| Large = true; |
| unsigned LSDAEncoding = |
| Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4; |
| if (IsPIC) { |
| LSDAEncoding = dwarf::DW_EH_PE_pcrel | |
| (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); |
| } |
| |
| std::unique_ptr<MCDisassembler> DisAsm( |
| TheTarget->createMCDisassembler(*STI, *Ctx)); |
| |
| if (!DisAsm) |
| return createStringError( |
| make_error_code(std::errc::not_supported), |
| Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); |
| |
| std::unique_ptr<const MCInstrAnalysis> MIA( |
| TheTarget->createMCInstrAnalysis(MII.get())); |
| if (!MIA) |
| return createStringError( |
| make_error_code(std::errc::not_supported), |
| Twine("BOLT-ERROR: failed to create instruction analysis for target ", |
| TripleName)); |
| |
| int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); |
| std::unique_ptr<MCInstPrinter> InstructionPrinter( |
| TheTarget->createMCInstPrinter(TheTriple, AsmPrinterVariant, *AsmInfo, |
| *MII, *MRI)); |
| if (!InstructionPrinter) |
| return createStringError( |
| make_error_code(std::errc::not_supported), |
| Twine("BOLT-ERROR: no instruction printer for target ", TripleName)); |
| InstructionPrinter->setPrintImmHex(true); |
| |
| std::unique_ptr<MCCodeEmitter> MCE( |
| TheTarget->createMCCodeEmitter(*MII, *Ctx)); |
| |
| auto BC = std::make_unique<BinaryContext>( |
| std::move(Ctx), std::move(DwCtx), std::make_unique<Triple>(TheTriple), |
| TheTarget, std::string(TripleName), std::move(MCE), std::move(MOFI), |
| std::move(AsmInfo), std::move(MII), std::move(STI), |
| std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI), |
| std::move(DisAsm), Logger); |
| |
| BC->LSDAEncoding = LSDAEncoding; |
| |
| BC->MAB = std::unique_ptr<MCAsmBackend>( |
| BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions())); |
| |
| BC->setFilename(InputFileName); |
| |
| BC->HasFixedLoadAddress = !IsPIC; |
| |
| BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>( |
| BC->TheTarget->createMCDisassembler(*BC->STI, *BC->Ctx)); |
| |
| if (!BC->SymbolicDisAsm) |
| return createStringError( |
| make_error_code(std::errc::not_supported), |
| Twine("BOLT-ERROR: no disassembler info for target ", TripleName)); |
| |
| return std::move(BC); |
| } |
| |
| bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const { |
| if (opts::HotText && |
| (SymbolName == "__hot_start" || SymbolName == "__hot_end")) |
| return true; |
| |
| if (opts::HotData && |
| (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end")) |
| return true; |
| |
| if (SymbolName == "_end") |
| return true; |
| |
| return false; |
| } |
| |
| std::unique_ptr<MCObjectWriter> |
| BinaryContext::createObjectWriter(raw_pwrite_stream &OS) { |
| return MAB->createObjectWriter(OS); |
| } |
| |
| bool BinaryContext::validateObjectNesting() const { |
| auto Itr = BinaryDataMap.begin(); |
| auto End = BinaryDataMap.end(); |
| bool Valid = true; |
| while (Itr != End) { |
| auto Next = std::next(Itr); |
| while (Next != End && |
| Itr->second->getSection() == Next->second->getSection() && |
| Itr->second->containsRange(Next->second->getAddress(), |
| Next->second->getSize())) { |
| if (Next->second->Parent != Itr->second) { |
| this->errs() << "BOLT-WARNING: object nesting incorrect for:\n" |
| << "BOLT-WARNING: " << *Itr->second << "\n" |
| << "BOLT-WARNING: " << *Next->second << "\n"; |
| Valid = false; |
| } |
| ++Next; |
| } |
| Itr = Next; |
| } |
| return Valid; |
| } |
| |
| bool BinaryContext::validateHoles() const { |
| bool Valid = true; |
| for (BinarySection &Section : sections()) { |
| for (const Relocation &Rel : Section.relocations()) { |
| uint64_t RelAddr = Rel.Offset + Section.getAddress(); |
| const BinaryData *BD = getBinaryDataContainingAddress(RelAddr); |
| if (!BD) { |
| this->errs() |
| << "BOLT-WARNING: no BinaryData found for relocation at address" |
| << " 0x" << Twine::utohexstr(RelAddr) << " in " << Section.getName() |
| << "\n"; |
| Valid = false; |
| } else if (!BD->getAtomicRoot()) { |
| this->errs() |
| << "BOLT-WARNING: no atomic BinaryData found for relocation at " |
| << "address 0x" << Twine::utohexstr(RelAddr) << " in " |
| << Section.getName() << "\n"; |
| Valid = false; |
| } |
| } |
| } |
| return Valid; |
| } |
| |
| void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) { |
| const uint64_t Address = GAI->second->getAddress(); |
| const uint64_t Size = GAI->second->getSize(); |
| |
| auto fixParents = [&](BinaryDataMapType::iterator Itr, |
| BinaryData *NewParent) { |
| BinaryData *OldParent = Itr->second->Parent; |
| Itr->second->Parent = NewParent; |
| ++Itr; |
| while (Itr != BinaryDataMap.end() && OldParent && |
| Itr->second->Parent == OldParent) { |
| Itr->second->Parent = NewParent; |
| ++Itr; |
| } |
| }; |
| |
| // Check if the previous symbol contains the newly added symbol. |
| if (GAI != BinaryDataMap.begin()) { |
| BinaryData *Prev = std::prev(GAI)->second; |
| while (Prev) { |
| if (Prev->getSection() == GAI->second->getSection() && |
| Prev->containsRange(Address, Size)) { |
| fixParents(GAI, Prev); |
| } else { |
| fixParents(GAI, nullptr); |
| } |
| Prev = Prev->Parent; |
| } |
| } |
| |
| // Check if the newly added symbol contains any subsequent symbols. |
| if (Size != 0) { |
| BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second; |
| auto Itr = std::next(GAI); |
| while ( |
| Itr != BinaryDataMap.end() && |
| BD->containsRange(Itr->second->getAddress(), Itr->second->getSize())) { |
| Itr->second->Parent = BD; |
| ++Itr; |
| } |
| } |
| } |
| |
| iterator_range<BinaryContext::binary_data_iterator> |
| BinaryContext::getSubBinaryData(BinaryData *BD) { |
| auto Start = std::next(BinaryDataMap.find(BD->getAddress())); |
| auto End = Start; |
| while (End != BinaryDataMap.end() && BD->isAncestorOf(End->second)) |
| ++End; |
| return make_range(Start, End); |
| } |
| |
| std::pair<const MCSymbol *, uint64_t> |
| BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF, |
| bool IsPCRel) { |
| if (isAArch64()) { |
| // Check if this is an access to a constant island and create bookkeeping |
| // to keep track of it and emit it later as part of this function. |
| if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address)) |
| return std::make_pair(IslandSym, 0); |
| |
| // Detect custom code written in assembly that refers to arbitrary |
| // constant islands from other functions. Write this reference so we |
| // can pull this constant island and emit it as part of this function |
| // too. |
| auto IslandIter = AddressToConstantIslandMap.lower_bound(Address); |
| |
| if (IslandIter != AddressToConstantIslandMap.begin() && |
| (IslandIter == AddressToConstantIslandMap.end() || |
| IslandIter->first > Address)) |
| --IslandIter; |
| |
| if (IslandIter != AddressToConstantIslandMap.end()) { |
| // Fall-back to referencing the original constant island in the presence |
| // of dynamic relocs, as we currently do not support cloning them. |
| // Notice: we might fail to link because of this, if the original constant |
| // island we are referring would be emitted too far away. |
| if (IslandIter->second->hasDynamicRelocationAtIsland()) { |
| MCSymbol *IslandSym = |
| IslandIter->second->getOrCreateIslandAccess(Address); |
| if (IslandSym) |
| return std::make_pair(IslandSym, 0); |
| } else if (MCSymbol *IslandSym = |
| IslandIter->second->getOrCreateProxyIslandAccess(Address, |
| BF)) { |
| BF.createIslandDependency(IslandSym, IslandIter->second); |
| return std::make_pair(IslandSym, 0); |
| } |
| } |
| } |
| |
| // Note that the address does not necessarily have to reside inside |
| // a section, it could be an absolute address too. |
| ErrorOr<BinarySection &> Section = getSectionForAddress(Address); |
| if (Section && Section->isText()) { |
| if (BF.containsAddress(Address, /*UseMaxSize=*/isAArch64())) { |
| if (Address != BF.getAddress()) { |
| // The address could potentially escape. Mark it as another entry |
| // point into the function. |
| if (opts::Verbosity >= 1) { |
| this->outs() << "BOLT-INFO: potentially escaped address 0x" |
| << Twine::utohexstr(Address) << " in function " << BF |
| << '\n'; |
| } |
| BF.HasInternalLabelReference = true; |
| return std::make_pair( |
| BF.addEntryPointAtOffset(Address - BF.getAddress()), 0); |
| } |
| } else { |
| addInterproceduralReference(&BF, Address); |
| } |
| } |
| |
| // With relocations, catch jump table references outside of the basic block |
| // containing the indirect jump. |
| if (HasRelocations) { |
| const MemoryContentsType MemType = analyzeMemoryAt(Address, BF); |
| if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) { |
| const MCSymbol *Symbol = |
| getOrCreateJumpTable(BF, Address, JumpTable::JTT_PIC); |
| |
| return std::make_pair(Symbol, 0); |
| } |
| } |
| |
| if (BinaryData *BD = getBinaryDataContainingAddress(Address)) |
| return std::make_pair(BD->getSymbol(), Address - BD->getAddress()); |
| |
| // TODO: use DWARF info to get size/alignment here? |
| MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, "DATAat"); |
| LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n'); |
| return std::make_pair(TargetSymbol, 0); |
| } |
| |
| MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address, |
| BinaryFunction &BF) { |
| if (!isX86()) |
| return MemoryContentsType::UNKNOWN; |
| |
| ErrorOr<BinarySection &> Section = getSectionForAddress(Address); |
| if (!Section) { |
| // No section - possibly an absolute address. Since we don't allow |
| // internal function addresses to escape the function scope - we |
| // consider it a tail call. |
| if (opts::Verbosity > 1) { |
| this->errs() << "BOLT-WARNING: no section for address 0x" |
| << Twine::utohexstr(Address) << " referenced from function " |
| << BF << '\n'; |
| } |
| return MemoryContentsType::UNKNOWN; |
| } |
| |
| if (Section->isVirtual()) { |
| // The contents are filled at runtime. |
| return MemoryContentsType::UNKNOWN; |
| } |
| |
| // No support for jump tables in code yet. |
| if (Section->isText()) |
| return MemoryContentsType::UNKNOWN; |
| |
| // Start with checking for PIC jump table. We expect non-PIC jump tables |
| // to have high 32 bits set to 0. |
| if (analyzeJumpTable(Address, JumpTable::JTT_PIC, BF)) |
| return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE; |
| |
| if (analyzeJumpTable(Address, JumpTable::JTT_NORMAL, BF)) |
| return MemoryContentsType::POSSIBLE_JUMP_TABLE; |
| |
| return MemoryContentsType::UNKNOWN; |
| } |
| |
| bool BinaryContext::analyzeJumpTable(const uint64_t Address, |
| const JumpTable::JumpTableType Type, |
| const BinaryFunction &BF, |
| const uint64_t NextJTAddress, |
| JumpTable::AddressesType *EntriesAsAddress, |
| bool *HasEntryInFragment) const { |
| // Target address of __builtin_unreachable. |
| const uint64_t UnreachableAddress = BF.getAddress() + BF.getSize(); |
| |
| // Is one of the targets __builtin_unreachable? |
| bool HasUnreachable = false; |
| |
| // Does one of the entries match function start address? |
| bool HasStartAsEntry = false; |
| |
| // Number of targets other than __builtin_unreachable. |
| uint64_t NumRealEntries = 0; |
| |
| // Size of the jump table without trailing __builtin_unreachable entries. |
| size_t TrimmedSize = 0; |
| |
| auto addEntryAddress = [&](uint64_t EntryAddress, bool Unreachable = false) { |
| if (!EntriesAsAddress) |
| return; |
| EntriesAsAddress->emplace_back(EntryAddress); |
| if (!Unreachable) |
| TrimmedSize = EntriesAsAddress->size(); |
| }; |
| |
| ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); |
| if (!Section) |
| return false; |
| |
| // The upper bound is defined by containing object, section limits, and |
| // the next jump table in memory. |
| uint64_t UpperBound = Section->getEndAddress(); |
| const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address); |
| if (JumpTableBD && JumpTableBD->getSize()) { |
| assert(JumpTableBD->getEndAddress() <= UpperBound && |
| "data object cannot cross a section boundary"); |
| UpperBound = JumpTableBD->getEndAddress(); |
| } |
| if (NextJTAddress) |
| UpperBound = std::min(NextJTAddress, UpperBound); |
| |
| LLVM_DEBUG({ |
| using JTT = JumpTable::JumpTableType; |
| dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n", |
| Address, BF.getPrintName(), |
| Type == JTT::JTT_PIC ? "PIC" : "Normal"); |
| }); |
| const uint64_t EntrySize = getJumpTableEntrySize(Type); |
| for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize; |
| EntryAddress += EntrySize) { |
| LLVM_DEBUG(dbgs() << " * Checking 0x" << Twine::utohexstr(EntryAddress) |
| << " -> "); |
| // Check if there's a proper relocation against the jump table entry. |
| if (HasRelocations) { |
| if (Type == JumpTable::JTT_PIC && |
| !DataPCRelocations.count(EntryAddress)) { |
| LLVM_DEBUG( |
| dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n"); |
| break; |
| } |
| if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(EntryAddress)) { |
| LLVM_DEBUG( |
| dbgs() |
| << "FAIL: JTT_NORMAL table, no relocation for this address\n"); |
| break; |
| } |
| } |
| |
| const uint64_t Value = |
| (Type == JumpTable::JTT_PIC) |
| ? Address + *getSignedValueAtAddress(EntryAddress, EntrySize) |
| : *getPointerAtAddress(EntryAddress); |
| |
| // __builtin_unreachable() case. |
| if (Value == UnreachableAddress) { |
| addEntryAddress(Value, /*Unreachable*/ true); |
| HasUnreachable = true; |
| LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n", Value)); |
| continue; |
| } |
| |
| // Function start is another special case. It is allowed in the jump table, |
| // but we need at least one another regular entry to distinguish the table |
| // from, e.g. a function pointer array. |
| if (Value == BF.getAddress()) { |
| HasStartAsEntry = true; |
| addEntryAddress(Value); |
| continue; |
| } |
| |
| // Function or one of its fragments. |
| const BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value); |
| const bool DoesBelongToFunction = |
| BF.containsAddress(Value) || |
| (TargetBF && areRelatedFragments(TargetBF, &BF)); |
| if (!DoesBelongToFunction) { |
| LLVM_DEBUG({ |
| if (!BF.containsAddress(Value)) { |
| dbgs() << "FAIL: function doesn't contain this address\n"; |
| if (TargetBF) { |
| dbgs() << " ! function containing this address: " |
| << TargetBF->getPrintName() << '\n'; |
| if (TargetBF->isFragment()) { |
| dbgs() << " ! is a fragment"; |
| for (BinaryFunction *Parent : TargetBF->ParentFragments) |
| dbgs() << ", parent: " << Parent->getPrintName(); |
| dbgs() << '\n'; |
| } |
| } |
| } |
| }); |
| break; |
| } |
| |
| // Check there's an instruction at this offset. |
| if (TargetBF->getState() == BinaryFunction::State::Disassembled && |
| !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) { |
| LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n", Value)); |
| break; |
| } |
| |
| ++NumRealEntries; |
| LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value)); |
| |
| if (TargetBF != &BF && HasEntryInFragment) |
| *HasEntryInFragment = true; |
| addEntryAddress(Value); |
| } |
| |
| // Trim direct/normal jump table to exclude trailing unreachable entries that |
| // can collide with a function address. |
| if (Type == JumpTable::JTT_NORMAL && EntriesAsAddress && |
| TrimmedSize != EntriesAsAddress->size() && |
| getBinaryFunctionAtAddress(UnreachableAddress)) |
| EntriesAsAddress->resize(TrimmedSize); |
| |
| // It's a jump table if the number of real entries is more than 1, or there's |
| // one real entry and one or more special targets. If there are only multiple |
| // special targets, then it's not a jump table. |
| return NumRealEntries + (HasUnreachable || HasStartAsEntry) >= 2; |
| } |
| |
| void BinaryContext::populateJumpTables() { |
| LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size() |
| << '\n'); |
| for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE; |
| ++JTI) { |
| JumpTable *JT = JTI->second; |
| |
| bool NonSimpleParent = false; |
| for (BinaryFunction *BF : JT->Parents) |
| NonSimpleParent |= !BF->isSimple(); |
| if (NonSimpleParent) |
| continue; |
| |
| uint64_t NextJTAddress = 0; |
| auto NextJTI = std::next(JTI); |
| if (NextJTI != JTE) |
| NextJTAddress = NextJTI->second->getAddress(); |
| |
| const bool Success = |
| analyzeJumpTable(JT->getAddress(), JT->Type, *(JT->Parents[0]), |
| NextJTAddress, &JT->EntriesAsAddress, &JT->IsSplit); |
| if (!Success) { |
| LLVM_DEBUG({ |
| dbgs() << "failed to analyze "; |
| JT->print(dbgs()); |
| if (NextJTI != JTE) { |
| dbgs() << "next "; |
| NextJTI->second->print(dbgs()); |
| } |
| }); |
| llvm_unreachable("jump table heuristic failure"); |
| } |
| for (BinaryFunction *Frag : JT->Parents) { |
| if (JT->IsSplit) |
| Frag->setHasIndirectTargetToSplitFragment(true); |
| for (uint64_t EntryAddress : JT->EntriesAsAddress) |
| // if target is builtin_unreachable |
| if (EntryAddress == Frag->getAddress() + Frag->getSize()) { |
| Frag->IgnoredBranches.emplace_back(EntryAddress - Frag->getAddress(), |
| Frag->getSize()); |
| } else if (EntryAddress >= Frag->getAddress() && |
| EntryAddress < Frag->getAddress() + Frag->getSize()) { |
| Frag->registerReferencedOffset(EntryAddress - Frag->getAddress()); |
| } |
| } |
| |
| // In strict mode, erase PC-relative relocation record. Later we check that |
| // all such records are erased and thus have been accounted for. |
| if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) { |
| for (uint64_t Address = JT->getAddress(); |
| Address < JT->getAddress() + JT->getSize(); |
| Address += JT->EntrySize) { |
| DataPCRelocations.erase(DataPCRelocations.find(Address)); |
| } |
| } |
| |
| // Mark to skip the function and all its fragments. |
| for (BinaryFunction *Frag : JT->Parents) |
| if (Frag->hasIndirectTargetToSplitFragment()) |
| addFragmentsToSkip(Frag); |
| } |
| |
| if (opts::StrictMode && DataPCRelocations.size()) { |
| LLVM_DEBUG({ |
| dbgs() << DataPCRelocations.size() |
| << " unclaimed PC-relative relocations left in data:\n"; |
| for (uint64_t Reloc : DataPCRelocations) |
| dbgs() << Twine::utohexstr(Reloc) << '\n'; |
| }); |
| assert(0 && "unclaimed PC-relative relocations left in data\n"); |
| } |
| clearList(DataPCRelocations); |
| } |
| |
| void BinaryContext::skipMarkedFragments() { |
| std::vector<BinaryFunction *> FragmentQueue; |
| // Copy the functions to FragmentQueue. |
| FragmentQueue.assign(FragmentsToSkip.begin(), FragmentsToSkip.end()); |
| auto addToWorklist = [&](BinaryFunction *Function) -> void { |
| if (FragmentsToSkip.count(Function)) |
| return; |
| FragmentQueue.push_back(Function); |
| addFragmentsToSkip(Function); |
| }; |
| // Functions containing split jump tables need to be skipped with all |
| // fragments (transitively). |
| for (size_t I = 0; I != FragmentQueue.size(); I++) { |
| BinaryFunction *BF = FragmentQueue[I]; |
| assert(FragmentsToSkip.count(BF) && |
| "internal error in traversing function fragments"); |
| if (opts::Verbosity >= 1) |
| this->errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n'; |
| BF->setSimple(false); |
| BF->setHasIndirectTargetToSplitFragment(true); |
| |
| llvm::for_each(BF->Fragments, addToWorklist); |
| llvm::for_each(BF->ParentFragments, addToWorklist); |
| } |
| if (!FragmentsToSkip.empty()) |
| this->errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size() |
| << " function" << (FragmentsToSkip.size() == 1 ? "" : "s") |
| << " due to cold fragments\n"; |
| } |
| |
| MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix, |
| uint64_t Size, |
| uint16_t Alignment, |
| unsigned Flags) { |
| auto Itr = BinaryDataMap.find(Address); |
| if (Itr != BinaryDataMap.end()) { |
| assert(Itr->second->getSize() == Size || !Size); |
| return Itr->second->getSymbol(); |
| } |
| |
| std::string Name = (Prefix + "0x" + Twine::utohexstr(Address)).str(); |
| assert(!GlobalSymbols.count(Name) && "created name is not unique"); |
| return registerNameAtAddress(Name, Address, Size, Alignment, Flags); |
| } |
| |
| MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) { |
| return Ctx->getOrCreateSymbol(Name); |
| } |
| |
| BinaryFunction *BinaryContext::createBinaryFunction( |
| const std::string &Name, BinarySection &Section, uint64_t Address, |
| uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) { |
| auto Result = BinaryFunctions.emplace( |
| Address, BinaryFunction(Name, Section, Address, Size, *this)); |
| assert(Result.second == true && "unexpected duplicate function"); |
| BinaryFunction *BF = &Result.first->second; |
| registerNameAtAddress(Name, Address, SymbolSize ? SymbolSize : Size, |
| Alignment); |
| setSymbolToFunctionMap(BF->getSymbol(), BF); |
| return BF; |
| } |
| |
| const MCSymbol * |
| BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address, |
| JumpTable::JumpTableType Type) { |
| // Two fragments of same function access same jump table |
| if (JumpTable *JT = getJumpTableContainingAddress(Address)) { |
| assert(JT->Type == Type && "jump table types have to match"); |
| assert(Address == JT->getAddress() && "unexpected non-empty jump table"); |
| |
| // Prevent associating a jump table to a specific fragment twice. |
| if (!llvm::is_contained(JT->Parents, &Function)) { |
| assert(llvm::all_of(JT->Parents, |
| [&](const BinaryFunction *BF) { |
| return areRelatedFragments(&Function, BF); |
| }) && |
| "cannot re-use jump table of a different function"); |
| // Duplicate the entry for the parent function for easy access |
| JT->Parents.push_back(&Function); |
| if (opts::Verbosity > 2) { |
| this->outs() << "BOLT-INFO: Multiple fragments access same jump table: " |
| << JT->Parents[0]->getPrintName() << "; " |
| << Function.getPrintName() << "\n"; |
| JT->print(this->outs()); |
| } |
| Function.JumpTables.emplace(Address, JT); |
| for (BinaryFunction *Parent : JT->Parents) |
| Parent->setHasIndirectTargetToSplitFragment(true); |
| } |
| |
| bool IsJumpTableParent = false; |
| (void)IsJumpTableParent; |
| for (BinaryFunction *Frag : JT->Parents) |
| if (Frag == &Function) |
| IsJumpTableParent = true; |
| assert(IsJumpTableParent && |
| "cannot re-use jump table of a different function"); |
| return JT->getFirstLabel(); |
| } |
| |
| // Re-use the existing symbol if possible. |
| MCSymbol *JTLabel = nullptr; |
| if (BinaryData *Object = getBinaryDataAtAddress(Address)) { |
| if (!isInternalSymbolName(Object->getSymbol()->getName())) |
| JTLabel = Object->getSymbol(); |
| } |
| |
| const uint64_t EntrySize = getJumpTableEntrySize(Type); |
| if (!JTLabel) { |
| const std::string JumpTableName = generateJumpTableName(Function, Address); |
| JTLabel = registerNameAtAddress(JumpTableName, Address, 0, EntrySize); |
| } |
| |
| LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName() |
| << " in function " << Function << '\n'); |
| |
| JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type, |
| JumpTable::LabelMapType{{0, JTLabel}}, |
| *getSectionForAddress(Address)); |
| JT->Parents.push_back(&Function); |
| if (opts::Verbosity > 2) |
| JT->print(this->outs()); |
| JumpTables.emplace(Address, JT); |
| |
| // Duplicate the entry for the parent function for easy access. |
| Function.JumpTables.emplace(Address, JT); |
| return JTLabel; |
| } |
| |
| std::pair<uint64_t, const MCSymbol *> |
| BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT, |
| const MCSymbol *OldLabel) { |
| auto L = scopeLock(); |
| unsigned Offset = 0; |
| bool Found = false; |
| for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) { |
| if (Elmt.second != OldLabel) |
| continue; |
| Offset = Elmt.first; |
| Found = true; |
| break; |
| } |
| assert(Found && "Label not found"); |
| (void)Found; |
| MCSymbol *NewLabel = Ctx->createNamedTempSymbol("duplicatedJT"); |
| JumpTable *NewJT = |
| new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type, |
| JumpTable::LabelMapType{{Offset, NewLabel}}, |
| *getSectionForAddress(JT->getAddress())); |
| NewJT->Parents = JT->Parents; |
| NewJT->Entries = JT->Entries; |
| NewJT->Counts = JT->Counts; |
| uint64_t JumpTableID = ++DuplicatedJumpTables; |
| // Invert it to differentiate from regular jump tables whose IDs are their |
| // addresses in the input binary memory space |
| JumpTableID = ~JumpTableID; |
| JumpTables.emplace(JumpTableID, NewJT); |
| Function.JumpTables.emplace(JumpTableID, NewJT); |
| return std::make_pair(JumpTableID, NewLabel); |
| } |
| |
| std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF, |
| uint64_t Address) { |
| size_t Id; |
| uint64_t Offset = 0; |
| if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) { |
| Offset = Address - JT->getAddress(); |
| auto JTLabelsIt = JT->Labels.find(Offset); |
| if (JTLabelsIt != JT->Labels.end()) |
| return std::string(JTLabelsIt->second->getName()); |
| |
| auto JTIdsIt = JumpTableIds.find(JT->getAddress()); |
| assert(JTIdsIt != JumpTableIds.end()); |
| Id = JTIdsIt->second; |
| } else { |
| Id = JumpTableIds[Address] = BF.JumpTables.size(); |
| } |
| return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(Id) + |
| (Offset ? ("." + std::to_string(Offset)) : "")); |
| } |
| |
| bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) { |
| // FIXME: aarch64 support is missing. |
| if (!isX86()) |
| return true; |
| |
| if (BF.getSize() == BF.getMaxSize()) |
| return true; |
| |
| ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData(); |
| assert(FunctionData && "cannot get function as data"); |
| |
| uint64_t Offset = BF.getSize(); |
| MCInst Instr; |
| uint64_t InstrSize = 0; |
| uint64_t InstrAddress = BF.getAddress() + Offset; |
| using std::placeholders::_1; |
| |
| // Skip instructions that satisfy the predicate condition. |
| auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) { |
| const uint64_t StartOffset = Offset; |
| for (; Offset < BF.getMaxSize(); |
| Offset += InstrSize, InstrAddress += InstrSize) { |
| if (!DisAsm->getInstruction(Instr, InstrSize, FunctionData->slice(Offset), |
| InstrAddress, nulls())) |
| break; |
| if (!Predicate(Instr)) |
| break; |
| } |
| |
| return Offset - StartOffset; |
| }; |
| |
| // Skip a sequence of zero bytes. |
| auto skipZeros = [&]() { |
| const uint64_t StartOffset = Offset; |
| for (; Offset < BF.getMaxSize(); ++Offset) |
| if ((*FunctionData)[Offset] != 0) |
| break; |
| |
| return Offset - StartOffset; |
| }; |
| |
| // Accept the whole padding area filled with breakpoints. |
| auto isBreakpoint = std::bind(&MCPlusBuilder::isBreakpoint, MIB.get(), _1); |
| if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize()) |
| return true; |
| |
| auto isNoop = std::bind(&MCPlusBuilder::isNoop, MIB.get(), _1); |
| |
| // Some functions have a jump to the next function or to the padding area |
| // inserted after the body. |
| auto isSkipJump = [&](const MCInst &Instr) { |
| uint64_t TargetAddress = 0; |
| if (MIB->isUnconditionalBranch(Instr) && |
| MIB->evaluateBranch(Instr, InstrAddress, InstrSize, TargetAddress)) { |
| if (TargetAddress >= InstrAddress + InstrSize && |
| TargetAddress <= BF.getAddress() + BF.getMaxSize()) { |
| return true; |
| } |
| } |
| return false; |
| }; |
| |
| // Skip over nops, jumps, and zero padding. Allow interleaving (this happens). |
| while (skipInstructions(isNoop) || skipInstructions(isSkipJump) || |
| skipZeros()) |
| ; |
| |
| if (Offset == BF.getMaxSize()) |
| return true; |
| |
| if (opts::Verbosity >= 1) { |
| this->errs() << "BOLT-WARNING: bad padding at address 0x" |
| << Twine::utohexstr(BF.getAddress() + BF.getSize()) |
| << " starting at offset " << (Offset - BF.getSize()) |
| << " in function " << BF << '\n' |
| << FunctionData->slice(BF.getSize(), |
| BF.getMaxSize() - BF.getSize()) |
| << '\n'; |
| } |
| |
| return false; |
| } |
| |
| void BinaryContext::adjustCodePadding() { |
| for (auto &BFI : BinaryFunctions) { |
| BinaryFunction &BF = BFI.second; |
| if (!shouldEmit(BF)) |
| continue; |
| |
| if (!hasValidCodePadding(BF)) { |
| if (HasRelocations) { |
| if (opts::Verbosity >= 1) { |
| this->outs() << "BOLT-INFO: function " << BF |
| << " has invalid padding. Ignoring the function.\n"; |
| } |
| BF.setIgnored(); |
| } else { |
| BF.setMaxSize(BF.getSize()); |
| } |
| } |
| } |
| } |
| |
| MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address, |
| uint64_t Size, |
| uint16_t Alignment, |
| unsigned Flags) { |
| // Register the name with MCContext. |
| MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name); |
| |
| auto GAI = BinaryDataMap.find(Address); |
| BinaryData *BD; |
| if (GAI == BinaryDataMap.end()) { |
| ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address); |
| BinarySection &Section = |
| SectionOrErr ? SectionOrErr.get() : absoluteSection(); |
| BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1, |
| Section, Flags); |
| GAI = BinaryDataMap.emplace(Address, BD).first; |
| GlobalSymbols[Name] = BD; |
| updateObjectNesting(GAI); |
| } else { |
| BD = GAI->second; |
| if (!BD->hasName(Name)) { |
| GlobalSymbols[Name] = BD; |
| BD->Symbols.push_back(Symbol); |
| } |
| } |
| |
| return Symbol; |
| } |
| |
| const BinaryData * |
| BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const { |
| auto NI = BinaryDataMap.lower_bound(Address); |
| auto End = BinaryDataMap.end(); |
| if ((NI != End && Address == NI->first) || |
| ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) { |
| if (NI->second->containsAddress(Address)) |
| return NI->second; |
| |
| // If this is a sub-symbol, see if a parent data contains the address. |
| const BinaryData *BD = NI->second->getParent(); |
| while (BD) { |
| if (BD->containsAddress(Address)) |
| return BD; |
| BD = BD->getParent(); |
| } |
| } |
| return nullptr; |
| } |
| |
| BinaryData *BinaryContext::getGOTSymbol() { |
| // First tries to find a global symbol with that name |
| BinaryData *GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_"); |
| if (GOTSymBD) |
| return GOTSymBD; |
| |
| // This symbol might be hidden from run-time link, so fetch the local |
| // definition if available. |
| GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_/1"); |
| if (!GOTSymBD) |
| return nullptr; |
| |
| // If the local symbol is not unique, fail |
| unsigned Index = 2; |
| SmallString<30> Storage; |
| while (const BinaryData *BD = |
| getBinaryDataByName(Twine("_GLOBAL_OFFSET_TABLE_/") |
| .concat(Twine(Index++)) |
| .toStringRef(Storage))) |
| if (BD->getAddress() != GOTSymBD->getAddress()) |
| return nullptr; |
| |
| return GOTSymBD; |
| } |
| |
| bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) { |
| auto NI = BinaryDataMap.find(Address); |
| assert(NI != BinaryDataMap.end()); |
| if (NI == BinaryDataMap.end()) |
| return false; |
| // TODO: it's possible that a jump table starts at the same address |
| // as a larger blob of private data. When we set the size of the |
| // jump table, it might be smaller than the total blob size. In this |
| // case we just leave the original size since (currently) it won't really |
| // affect anything. |
| assert((!NI->second->Size || NI->second->Size == Size || |
| (NI->second->isJumpTable() && NI->second->Size > Size)) && |
| "can't change the size of a symbol that has already had its " |
| "size set"); |
| if (!NI->second->Size) { |
| NI->second->Size = Size; |
| updateObjectNesting(NI); |
| return true; |
| } |
| return false; |
| } |
| |
| void BinaryContext::generateSymbolHashes() { |
| auto isPadding = [](const BinaryData &BD) { |
| StringRef Contents = BD.getSection().getContents(); |
| StringRef SymData = Contents.substr(BD.getOffset(), BD.getSize()); |
| return (BD.getName().starts_with("HOLEat") || |
| SymData.find_first_not_of(0) == StringRef::npos); |
| }; |
| |
| uint64_t NumCollisions = 0; |
| for (auto &Entry : BinaryDataMap) { |
| BinaryData &BD = *Entry.second; |
| StringRef Name = BD.getName(); |
| |
| if (!isInternalSymbolName(Name)) |
| continue; |
| |
| // First check if a non-anonymous alias exists and move it to the front. |
| if (BD.getSymbols().size() > 1) { |
| auto Itr = llvm::find_if(BD.getSymbols(), [&](const MCSymbol *Symbol) { |
| return !isInternalSymbolName(Symbol->getName()); |
| }); |
| if (Itr != BD.getSymbols().end()) { |
| size_t Idx = std::distance(BD.getSymbols().begin(), Itr); |
| std::swap(BD.getSymbols()[0], BD.getSymbols()[Idx]); |
| continue; |
| } |
| } |
| |
| // We have to skip 0 size symbols since they will all collide. |
| if (BD.getSize() == 0) { |
| continue; |
| } |
| |
| const uint64_t Hash = BD.getSection().hash(BD); |
| const size_t Idx = Name.find("0x"); |
| std::string NewName = |
| (Twine(Name.substr(0, Idx)) + "_" + Twine::utohexstr(Hash)).str(); |
| if (getBinaryDataByName(NewName)) { |
| // Ignore collisions for symbols that appear to be padding |
| // (i.e. all zeros or a "hole") |
| if (!isPadding(BD)) { |
| if (opts::Verbosity) { |
| this->errs() << "BOLT-WARNING: collision detected when hashing " << BD |
| << " with new name (" << NewName << "), skipping.\n"; |
| } |
| ++NumCollisions; |
| } |
| continue; |
| } |
| BD.Symbols.insert(BD.Symbols.begin(), Ctx->getOrCreateSymbol(NewName)); |
| GlobalSymbols[NewName] = &BD; |
| } |
| if (NumCollisions) { |
| this->errs() << "BOLT-WARNING: " << NumCollisions |
| << " collisions detected while hashing binary objects"; |
| if (!opts::Verbosity) |
| this->errs() << ". Use -v=1 to see the list."; |
| this->errs() << '\n'; |
| } |
| } |
| |
| bool BinaryContext::registerFragment(BinaryFunction &TargetFunction, |
| BinaryFunction &Function) { |
| assert(TargetFunction.isFragment() && "TargetFunction must be a fragment"); |
| if (TargetFunction.isChildOf(Function)) |
| return true; |
| TargetFunction.addParentFragment(Function); |
| Function.addFragment(TargetFunction); |
| FragmentClasses.unionSets(&TargetFunction, &Function); |
| if (!HasRelocations) { |
| TargetFunction.setSimple(false); |
| Function.setSimple(false); |
| } |
| if (opts::Verbosity >= 1) { |
| this->outs() << "BOLT-INFO: marking " << TargetFunction |
| << " as a fragment of " << Function << '\n'; |
| } |
| return true; |
| } |
| |
| void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF, |
| MCInst &LoadLowBits, |
| MCInst &LoadHiBits, |
| uint64_t Target) { |
| const MCSymbol *TargetSymbol; |
| uint64_t Addend = 0; |
| std::tie(TargetSymbol, Addend) = handleAddressRef(Target, BF, |
| /*IsPCRel*/ true); |
| int64_t Val; |
| MIB->replaceImmWithSymbolRef(LoadHiBits, TargetSymbol, Addend, Ctx.get(), Val, |
| ELF::R_AARCH64_ADR_PREL_PG_HI21); |
| MIB->replaceImmWithSymbolRef(LoadLowBits, TargetSymbol, Addend, Ctx.get(), |
| Val, ELF::R_AARCH64_ADD_ABS_LO12_NC); |
| } |
| |
| bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) { |
| BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address); |
| if (TargetFunction) |
| return false; |
| |
| ErrorOr<BinarySection &> Section = getSectionForAddress(Address); |
| assert(Section && "cannot get section for referenced address"); |
| if (!Section->isText()) |
| return false; |
| |
| bool Ret = false; |
| StringRef SectionContents = Section->getContents(); |
| uint64_t Offset = Address - Section->getAddress(); |
| const uint64_t MaxSize = SectionContents.size() - Offset; |
| const uint8_t *Bytes = |
| reinterpret_cast<const uint8_t *>(SectionContents.data()); |
| ArrayRef<uint8_t> Data(Bytes + Offset, MaxSize); |
| |
| auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions, |
| MCInst &Instruction, uint64_t Offset, |
| uint64_t AbsoluteInstrAddr, |
| uint64_t TotalSize) -> bool { |
| MCInst *TargetHiBits, *TargetLowBits; |
| uint64_t TargetAddress, Count; |
| Count = MIB->matchLinkerVeneer(Instructions.begin(), Instructions.end(), |
| AbsoluteInstrAddr, Instruction, TargetHiBits, |
| TargetLowBits, TargetAddress); |
| if (!Count) |
| return false; |
| |
| if (MatchOnly) |
| return true; |
| |
| // NOTE The target symbol was created during disassemble's |
| // handleExternalReference |
| const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, "FUNCat"); |
| BinaryFunction *Veneer = createBinaryFunction(VeneerSymbol->getName().str(), |
| *Section, Address, TotalSize); |
| addAdrpAddRelocAArch64(*Veneer, *TargetLowBits, *TargetHiBits, |
| TargetAddress); |
| MIB->addAnnotation(Instruction, "AArch64Veneer", true); |
| Veneer->addInstruction(Offset, std::move(Instruction)); |
| --Count; |
| for (auto It = Instructions.rbegin(); Count != 0; ++It, --Count) { |
| MIB->addAnnotation(It->second, "AArch64Veneer", true); |
| Veneer->addInstruction(It->first, std::move(It->second)); |
| } |
| |
| Veneer->getOrCreateLocalLabel(Address); |
| Veneer->setMaxSize(TotalSize); |
| Veneer->updateState(BinaryFunction::State::Disassembled); |
| LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" << Address |
| << "\n"); |
| return true; |
| }; |
| |
| uint64_t Size = 0, TotalSize = 0; |
| BinaryFunction::InstrMapType VeneerInstructions; |
| for (Offset = 0; Offset < MaxSize; Offset += Size) { |
| MCInst Instruction; |
| const uint64_t AbsoluteInstrAddr = Address + Offset; |
| if (!SymbolicDisAsm->getInstruction(Instruction, Size, Data.slice(Offset), |
| AbsoluteInstrAddr, nulls())) |
| break; |
| |
| TotalSize += Size; |
| if (MIB->isBranch(Instruction)) { |
| Ret = matchVeneer(VeneerInstructions, Instruction, Offset, |
| AbsoluteInstrAddr, TotalSize); |
| break; |
| } |
| |
| VeneerInstructions.emplace(Offset, std::move(Instruction)); |
| } |
| |
| return Ret; |
| } |
| |
| void BinaryContext::processInterproceduralReferences() { |
| for (const std::pair<BinaryFunction *, uint64_t> &It : |
| InterproceduralReferences) { |
| BinaryFunction &Function = *It.first; |
| uint64_t Address = It.second; |
| // Process interprocedural references from ignored functions in BAT mode |
| // (non-simple in non-relocation mode) to properly register entry points |
| if (!Address || (Function.isIgnored() && !HasBATSection)) |
| continue; |
| |
| BinaryFunction *TargetFunction = |
| getBinaryFunctionContainingAddress(Address); |
| if (&Function == TargetFunction) |
| continue; |
| |
| if (TargetFunction) { |
| if (TargetFunction->isFragment() && |
| !areRelatedFragments(TargetFunction, &Function)) { |
| this->errs() |
| << "BOLT-WARNING: interprocedural reference between unrelated " |
| "fragments: " |
| << Function.getPrintName() << " and " |
| << TargetFunction->getPrintName() << '\n'; |
| } |
| if (uint64_t Offset = Address - TargetFunction->getAddress()) |
| TargetFunction->addEntryPointAtOffset(Offset); |
| |
| continue; |
| } |
| |
| // Check if address falls in function padding space - this could be |
| // unmarked data in code. In this case adjust the padding space size. |
| ErrorOr<BinarySection &> Section = getSectionForAddress(Address); |
| assert(Section && "cannot get section for referenced address"); |
| |
| if (!Section->isText()) |
| continue; |
| |
| // PLT requires special handling and could be ignored in this context. |
| StringRef SectionName = Section->getName(); |
| if (SectionName == ".plt" || SectionName == ".plt.got") |
| continue; |
| |
| // Check if it is aarch64 veneer written at Address |
| if (isAArch64() && handleAArch64Veneer(Address)) |
| continue; |
| |
| if (opts::processAllFunctions()) { |
| this->errs() << "BOLT-ERROR: cannot process binaries with unmarked " |
| << "object in code at address 0x" |
| << Twine::utohexstr(Address) << " belonging to section " |
| << SectionName << " in current mode\n"; |
| exit(1); |
| } |
| |
| TargetFunction = getBinaryFunctionContainingAddress(Address, |
| /*CheckPastEnd=*/false, |
| /*UseMaxSize=*/true); |
| // We are not going to overwrite non-simple functions, but for simple |
| // ones - adjust the padding size. |
| if (TargetFunction && TargetFunction->isSimple()) { |
| this->errs() |
| << "BOLT-WARNING: function " << *TargetFunction |
| << " has an object detected in a padding region at address 0x" |
| << Twine::utohexstr(Address) << '\n'; |
| TargetFunction->setMaxSize(TargetFunction->getSize()); |
| } |
| } |
| |
| InterproceduralReferences.clear(); |
| } |
| |
| void BinaryContext::postProcessSymbolTable() { |
| fixBinaryDataHoles(); |
| bool Valid = true; |
| for (auto &Entry : BinaryDataMap) { |
| BinaryData *BD = Entry.second; |
| if ((BD->getName().starts_with("SYMBOLat") || |
| BD->getName().starts_with("DATAat")) && |
| !BD->getParent() && !BD->getSize() && !BD->isAbsolute() && |
| BD->getSection()) { |
| this->errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD |
| << "\n"; |
| Valid = false; |
| } |
| } |
| assert(Valid); |
| (void)Valid; |
| generateSymbolHashes(); |
| } |
| |
| void BinaryContext::foldFunction(BinaryFunction &ChildBF, |
| BinaryFunction &ParentBF) { |
| assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() && |
| "cannot merge functions with multiple entry points"); |
| |
| std::unique_lock<llvm::sys::RWMutex> WriteCtxLock(CtxMutex, std::defer_lock); |
| std::unique_lock<llvm::sys::RWMutex> WriteSymbolMapLock( |
| SymbolToFunctionMapMutex, std::defer_lock); |
| |
| const StringRef ChildName = ChildBF.getOneName(); |
| |
| // Move symbols over and update bookkeeping info. |
| for (MCSymbol *Symbol : ChildBF.getSymbols()) { |
| ParentBF.getSymbols().push_back(Symbol); |
| WriteSymbolMapLock.lock(); |
| SymbolToFunctionMap[Symbol] = &ParentBF; |
| WriteSymbolMapLock.unlock(); |
| // NB: there's no need to update BinaryDataMap and GlobalSymbols. |
| } |
| ChildBF.getSymbols().clear(); |
| |
| // Move other names the child function is known under. |
| llvm::move(ChildBF.Aliases, std::back_inserter(ParentBF.Aliases)); |
| ChildBF.Aliases.clear(); |
| |
| if (HasRelocations) { |
| // Merge execution counts of ChildBF into those of ParentBF. |
| // Without relocations, we cannot reliably merge profiles as both functions |
| // continue to exist and either one can be executed. |
| ChildBF.mergeProfileDataInto(ParentBF); |
| |
| std::shared_lock<llvm::sys::RWMutex> ReadBfsLock(BinaryFunctionsMutex, |
| std::defer_lock); |
| std::unique_lock<llvm::sys::RWMutex> WriteBfsLock(BinaryFunctionsMutex, |
| std::defer_lock); |
| // Remove ChildBF from the global set of functions in relocs mode. |
| ReadBfsLock.lock(); |
| auto FI = BinaryFunctions.find(ChildBF.getAddress()); |
| ReadBfsLock.unlock(); |
| |
| assert(FI != BinaryFunctions.end() && "function not found"); |
| assert(&ChildBF == &FI->second && "function mismatch"); |
| |
| WriteBfsLock.lock(); |
| ChildBF.clearDisasmState(); |
| FI = BinaryFunctions.erase(FI); |
| WriteBfsLock.unlock(); |
| |
| } else { |
| // In non-relocation mode we keep the function, but rename it. |
| std::string NewName = "__ICF_" + ChildName.str(); |
| |
| WriteCtxLock.lock(); |
| ChildBF.getSymbols().push_back(Ctx->getOrCreateSymbol(NewName)); |
| WriteCtxLock.unlock(); |
| |
| ChildBF.setFolded(&ParentBF); |
| } |
| |
| ParentBF.setHasFunctionsFoldedInto(); |
| } |
| |
| void BinaryContext::fixBinaryDataHoles() { |
| assert(validateObjectNesting() && "object nesting inconsistency detected"); |
| |
| for (BinarySection &Section : allocatableSections()) { |
| std::vector<std::pair<uint64_t, uint64_t>> Holes; |
| |
| auto isNotHole = [&Section](const binary_data_iterator &Itr) { |
| BinaryData *BD = Itr->second; |
| bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() && |
| (BD->getName().starts_with("SYMBOLat0x") || |
| BD->getName().starts_with("DATAat0x") || |
| BD->getName().starts_with("ANONYMOUS"))); |
| return !isHole && BD->getSection() == Section && !BD->getParent(); |
| }; |
| |
| auto BDStart = BinaryDataMap.begin(); |
| auto BDEnd = BinaryDataMap.end(); |
| auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd); |
| auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd); |
| |
| uint64_t EndAddress = Section.getAddress(); |
| |
| while (Itr != End) { |
| if (Itr->second->getAddress() > EndAddress) { |
| uint64_t Gap = Itr->second->getAddress() - EndAddress; |
| Holes.emplace_back(EndAddress, Gap); |
| } |
| EndAddress = Itr->second->getEndAddress(); |
| ++Itr; |
| } |
| |
| if (EndAddress < Section.getEndAddress()) |
| Holes.emplace_back(EndAddress, Section.getEndAddress() - EndAddress); |
| |
| // If there is already a symbol at the start of the hole, grow that symbol |
| // to cover the rest. Otherwise, create a new symbol to cover the hole. |
| for (std::pair<uint64_t, uint64_t> &Hole : Holes) { |
| BinaryData *BD = getBinaryDataAtAddress(Hole.first); |
| if (BD) { |
| // BD->getSection() can be != Section if there are sections that |
| // overlap. In this case it is probably safe to just skip the holes |
| // since the overlapping section will not(?) have any symbols in it. |
| if (BD->getSection() == Section) |
| setBinaryDataSize(Hole.first, Hole.second); |
| } else { |
| getOrCreateGlobalSymbol(Hole.first, "HOLEat", Hole.second, 1); |
| } |
| } |
| } |
| |
| assert(validateObjectNesting() && "object nesting inconsistency detected"); |
| assert(validateHoles() && "top level hole detected in object map"); |
| } |
| |
| void BinaryContext::printGlobalSymbols(raw_ostream &OS) const { |
| const BinarySection *CurrentSection = nullptr; |
| bool FirstSection = true; |
| |
| for (auto &Entry : BinaryDataMap) { |
| const BinaryData *BD = Entry.second; |
| const BinarySection &Section = BD->getSection(); |
| if (FirstSection || Section != *CurrentSection) { |
| uint64_t Address, Size; |
| StringRef Name = Section.getName(); |
| if (Section) { |
| Address = Section.getAddress(); |
| Size = Section.getSize(); |
| } else { |
| Address = BD->getAddress(); |
| Size = BD->getSize(); |
| } |
| OS << "BOLT-INFO: Section " << Name << ", " |
| << "0x" + Twine::utohexstr(Address) << ":" |
| << "0x" + Twine::utohexstr(Address + Size) << "/" << Size << "\n"; |
| CurrentSection = &Section; |
| FirstSection = false; |
| } |
| |
| OS << "BOLT-INFO: "; |
| const BinaryData *P = BD->getParent(); |
| while (P) { |
| OS << " "; |
| P = P->getParent(); |
| } |
| OS << *BD << "\n"; |
| } |
| } |
| |
| Expected<unsigned> BinaryContext::getDwarfFile( |
| StringRef Directory, StringRef FileName, unsigned FileNumber, |
| std::optional<MD5::MD5Result> Checksum, std::optional<StringRef> Source, |
| unsigned CUID, unsigned DWARFVersion) { |
| DwarfLineTable &Table = DwarfLineTablesCUMap[CUID]; |
| return Table.tryGetFile(Directory, FileName, Checksum, Source, DWARFVersion, |
| FileNumber); |
| } |
| |
| unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID, |
| const uint32_t SrcCUID, |
| unsigned FileIndex) { |
| DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(SrcCUID); |
| const DWARFDebugLine::LineTable *LineTable = |
| DwCtx->getLineTableForUnit(SrcUnit); |
| const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = |
| LineTable->Prologue.FileNames; |
| // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 |
| // means empty dir. |
| assert(FileIndex > 0 && FileIndex <= FileNames.size() && |
| "FileIndex out of range for the compilation unit."); |
| StringRef Dir = ""; |
| if (FileNames[FileIndex - 1].DirIdx != 0) { |
| if (std::optional<const char *> DirName = dwarf::toString( |
| LineTable->Prologue |
| .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) { |
| Dir = *DirName; |
| } |
| } |
| StringRef FileName = ""; |
| if (std::optional<const char *> FName = |
| dwarf::toString(FileNames[FileIndex - 1].Name)) |
| FileName = *FName; |
| assert(FileName != ""); |
| DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID); |
| return cantFail(getDwarfFile(Dir, FileName, 0, std::nullopt, std::nullopt, |
| DestCUID, DstUnit->getVersion())); |
| } |
| |
| std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() { |
| std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size()); |
| llvm::transform(llvm::make_second_range(BinaryFunctions), |
| SortedFunctions.begin(), |
| [](BinaryFunction &BF) { return &BF; }); |
| |
| llvm::stable_sort(SortedFunctions, |
| [](const BinaryFunction *A, const BinaryFunction *B) { |
| if (A->hasValidIndex() && B->hasValidIndex()) { |
| return A->getIndex() < B->getIndex(); |
| } |
| return A->hasValidIndex(); |
| }); |
| return SortedFunctions; |
| } |
| |
| std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() { |
| std::vector<BinaryFunction *> AllFunctions; |
| AllFunctions.reserve(BinaryFunctions.size() + InjectedBinaryFunctions.size()); |
| llvm::transform(llvm::make_second_range(BinaryFunctions), |
| std::back_inserter(AllFunctions), |
| [](BinaryFunction &BF) { return &BF; }); |
| llvm::copy(InjectedBinaryFunctions, std::back_inserter(AllFunctions)); |
| |
| return AllFunctions; |
| } |
| |
| std::optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) { |
| auto Iter = DWOCUs.find(DWOId); |
| if (Iter == DWOCUs.end()) |
| return std::nullopt; |
| |
| return Iter->second; |
| } |
| |
| DWARFContext *BinaryContext::getDWOContext() const { |
| if (DWOCUs.empty()) |
| return nullptr; |
| return &DWOCUs.begin()->second->getContext(); |
| } |
| |
| /// Handles DWO sections that can either be in .o, .dwo or .dwp files. |
| void BinaryContext::preprocessDWODebugInfo() { |
| for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { |
| DWARFUnit *const DwarfUnit = CU.get(); |
| if (std::optional<uint64_t> DWOId = DwarfUnit->getDWOId()) { |
| std::string DWOName = dwarf::toString( |
| DwarfUnit->getUnitDIE().find( |
| {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), |
| ""); |
| SmallString<16> AbsolutePath; |
| if (!opts::CompDirOverride.empty()) { |
| sys::path::append(AbsolutePath, opts::CompDirOverride); |
| sys::path::append(AbsolutePath, DWOName); |
| } |
| DWARFUnit *DWOCU = |
| DwarfUnit->getNonSkeletonUnitDIE(false, AbsolutePath).getDwarfUnit(); |
| if (!DWOCU->isDWOUnit()) { |
| this->outs() |
| << "BOLT-WARNING: Debug Fission: DWO debug information for " |
| << DWOName |
| << " was not retrieved and won't be updated. Please check " |
| "relative path.\n"; |
| continue; |
| } |
| DWOCUs[*DWOId] = DWOCU; |
| } |
| } |
| if (!DWOCUs.empty()) |
| this->outs() << "BOLT-INFO: processing split DWARF\n"; |
| } |
| |
| void BinaryContext::preprocessDebugInfo() { |
| struct CURange { |
| uint64_t LowPC; |
| uint64_t HighPC; |
| DWARFUnit *Unit; |
| |
| bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; } |
| }; |
| |
| // Building a map of address ranges to CUs similar to .debug_aranges and use |
| // it to assign CU to functions. |
| std::vector<CURange> AllRanges; |
| AllRanges.reserve(DwCtx->getNumCompileUnits()); |
| for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { |
| Expected<DWARFAddressRangesVector> RangesOrError = |
| CU->getUnitDIE().getAddressRanges(); |
| if (!RangesOrError) { |
| consumeError(RangesOrError.takeError()); |
| continue; |
| } |
| for (DWARFAddressRange &Range : *RangesOrError) { |
| // Parts of the debug info could be invalidated due to corresponding code |
| // being removed from the binary by the linker. Hence we check if the |
| // address is a valid one. |
| if (containsAddress(Range.LowPC)) |
| AllRanges.emplace_back(CURange{Range.LowPC, Range.HighPC, CU.get()}); |
| } |
| |
| ContainsDwarf5 |= CU->getVersion() >= 5; |
| ContainsDwarfLegacy |= CU->getVersion() < 5; |
| } |
| |
| llvm::sort(AllRanges); |
| for (auto &KV : BinaryFunctions) { |
| const uint64_t FunctionAddress = KV.first; |
| BinaryFunction &Function = KV.second; |
| |
| auto It = llvm::partition_point( |
| AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; }); |
| if (It != AllRanges.end() && It->LowPC <= FunctionAddress) |
| Function.setDWARFUnit(It->Unit); |
| } |
| |
| // Discover units with debug info that needs to be updated. |
| for (const auto &KV : BinaryFunctions) { |
| const BinaryFunction &BF = KV.second; |
| if (shouldEmit(BF) && BF.getDWARFUnit()) |
| ProcessedCUs.insert(BF.getDWARFUnit()); |
| } |
| |
| // Clear debug info for functions from units that we are not going to process. |
| for (auto &KV : BinaryFunctions) { |
| BinaryFunction &BF = KV.second; |
| if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit())) |
| BF.setDWARFUnit(nullptr); |
| } |
| |
| if (opts::Verbosity >= 1) { |
| this->outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of " |
| << DwCtx->getNumCompileUnits() << " CUs will be updated\n"; |
| } |
| |
| preprocessDWODebugInfo(); |
| |
| // Populate MCContext with DWARF files from all units. |
| StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix(); |
| for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) { |
| const uint64_t CUID = CU->getOffset(); |
| DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID); |
| BinaryLineTable.setLabel(Ctx->getOrCreateSymbol( |
| GlobalPrefix + "line_table_start" + Twine(CUID))); |
| |
| if (!ProcessedCUs.count(CU.get())) |
| continue; |
| |
| const DWARFDebugLine::LineTable *LineTable = |
| DwCtx->getLineTableForUnit(CU.get()); |
| const std::vector<DWARFDebugLine::FileNameEntry> &FileNames = |
| LineTable->Prologue.FileNames; |
| |
| uint16_t DwarfVersion = LineTable->Prologue.getVersion(); |
| if (DwarfVersion >= 5) { |
| std::optional<MD5::MD5Result> Checksum; |
| if (LineTable->Prologue.ContentTypes.HasMD5) |
| Checksum = LineTable->Prologue.FileNames[0].Checksum; |
| std::optional<const char *> Name = |
| dwarf::toString(CU->getUnitDIE().find(dwarf::DW_AT_name), nullptr); |
| if (std::optional<uint64_t> DWOID = CU->getDWOId()) { |
| auto Iter = DWOCUs.find(*DWOID); |
| assert(Iter != DWOCUs.end() && "DWO CU was not found."); |
| Name = dwarf::toString( |
| Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr); |
| } |
| BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum, |
| std::nullopt); |
| } |
| |
| BinaryLineTable.setDwarfVersion(DwarfVersion); |
| |
| // Assign a unique label to every line table, one per CU. |
| // Make sure empty debug line tables are registered too. |
| if (FileNames.empty()) { |
| cantFail(getDwarfFile("", "<unknown>", 0, std::nullopt, std::nullopt, |
| CUID, DwarfVersion)); |
| continue; |
| } |
| const uint32_t Offset = DwarfVersion < 5 ? 1 : 0; |
| for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) { |
| // Dir indexes start at 1, as DWARF file numbers, and a dir index 0 |
| // means empty dir. |
| StringRef Dir = ""; |
| if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5) |
| if (std::optional<const char *> DirName = dwarf::toString( |
| LineTable->Prologue |
| .IncludeDirectories[FileNames[I].DirIdx - Offset])) |
| Dir = *DirName; |
| StringRef FileName = ""; |
| if (std::optional<const char *> FName = |
| dwarf::toString(FileNames[I].Name)) |
| FileName = *FName; |
| assert(FileName != ""); |
| std::optional<MD5::MD5Result> Checksum; |
| if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5) |
| Checksum = LineTable->Prologue.FileNames[I].Checksum; |
| cantFail(getDwarfFile(Dir, FileName, 0, Checksum, std::nullopt, CUID, |
| DwarfVersion)); |
| } |
| } |
| } |
| |
| bool BinaryContext::shouldEmit(const BinaryFunction &Function) const { |
| if (Function.isPseudo()) |
| return false; |
| |
| if (opts::processAllFunctions()) |
| return true; |
| |
| if (Function.isIgnored()) |
| return false; |
| |
| // In relocation mode we will emit non-simple functions with CFG. |
| // If the function does not have a CFG it should be marked as ignored. |
| return HasRelocations || Function.isSimple(); |
| } |
| |
| void BinaryContext::dump(const MCInst &Inst) const { |
| if (LLVM_UNLIKELY(!InstPrinter)) { |
| dbgs() << "Cannot dump for InstPrinter is not initialized.\n"; |
| return; |
| } |
| InstPrinter->printInst(&Inst, 0, "", *STI, dbgs()); |
| dbgs() << "\n"; |
| } |
| |
| void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) { |
| uint32_t Operation = Inst.getOperation(); |
| switch (Operation) { |
| case MCCFIInstruction::OpSameValue: |
| OS << "OpSameValue Reg" << Inst.getRegister(); |
| break; |
| case MCCFIInstruction::OpRememberState: |
| OS << "OpRememberState"; |
| break; |
| case MCCFIInstruction::OpRestoreState: |
| OS << "OpRestoreState"; |
| break; |
| case MCCFIInstruction::OpOffset: |
| OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); |
| break; |
| case MCCFIInstruction::OpDefCfaRegister: |
| OS << "OpDefCfaRegister Reg" << Inst.getRegister(); |
| break; |
| case MCCFIInstruction::OpDefCfaOffset: |
| OS << "OpDefCfaOffset " << Inst.getOffset(); |
| break; |
| case MCCFIInstruction::OpDefCfa: |
| OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset(); |
| break; |
| case MCCFIInstruction::OpRelOffset: |
| OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset(); |
| break; |
| case MCCFIInstruction::OpAdjustCfaOffset: |
| OS << "OfAdjustCfaOffset " << Inst.getOffset(); |
| break; |
| case MCCFIInstruction::OpEscape: |
| OS << "OpEscape"; |
| break; |
| case MCCFIInstruction::OpRestore: |
| OS << "OpRestore Reg" << Inst.getRegister(); |
| break; |
| case MCCFIInstruction::OpUndefined: |
| OS << "OpUndefined Reg" << Inst.getRegister(); |
| break; |
| case MCCFIInstruction::OpRegister: |
| OS << "OpRegister Reg" << Inst.getRegister() << " Reg" |
| << Inst.getRegister2(); |
| break; |
| case MCCFIInstruction::OpWindowSave: |
| OS << "OpWindowSave"; |
| break; |
| case MCCFIInstruction::OpGnuArgsSize: |
| OS << "OpGnuArgsSize"; |
| break; |
| default: |
| OS << "Op#" << Operation; |
| break; |
| } |
| } |
| |
| MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const { |
| // For aarch64 and riscv, the ABI defines mapping symbols so we identify data |
| // in the code section (see IHI0056B). $x identifies a symbol starting code or |
| // the end of a data chunk inside code, $d identifies start of data. |
| if (isX86() || ELFSymbolRef(Symbol).getSize()) |
| return MarkerSymType::NONE; |
| |
| Expected<StringRef> NameOrError = Symbol.getName(); |
| Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType(); |
| |
| if (!TypeOrError || !NameOrError) |
| return MarkerSymType::NONE; |
| |
| if (*TypeOrError != SymbolRef::ST_Unknown) |
| return MarkerSymType::NONE; |
| |
| if (*NameOrError == "$x" || NameOrError->starts_with("$x.")) |
| return MarkerSymType::CODE; |
| |
| // $x<ISA> |
| if (isRISCV() && NameOrError->starts_with("$x")) |
| return MarkerSymType::CODE; |
| |
| if (*NameOrError == "$d" || NameOrError->starts_with("$d.")) |
| return MarkerSymType::DATA; |
| |
| return MarkerSymType::NONE; |
| } |
| |
| bool BinaryContext::isMarker(const SymbolRef &Symbol) const { |
| return getMarkerType(Symbol) != MarkerSymType::NONE; |
| } |
| |
| static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction, |
| const BinaryFunction *Function, |
| DWARFContext *DwCtx) { |
| DebugLineTableRowRef RowRef = |
| DebugLineTableRowRef::fromSMLoc(Instruction.getLoc()); |
| if (RowRef == DebugLineTableRowRef::NULL_ROW) |
| return; |
| |
| const DWARFDebugLine::LineTable *LineTable; |
| if (Function && Function->getDWARFUnit() && |
| Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) { |
| LineTable = Function->getDWARFLineTable(); |
| } else { |
| LineTable = DwCtx->getLineTableForUnit( |
| DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex)); |
| } |
| assert(LineTable && "line table expected for instruction with debug info"); |
| |
| const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1]; |
| StringRef FileName = ""; |
| if (std::optional<const char *> FName = |
| dwarf::toString(LineTable->Prologue.FileNames[Row.File - 1].Name)) |
| FileName = *FName; |
| OS << " # debug line " << FileName << ":" << Row.Line; |
| if (Row.Column) |
| OS << ":" << Row.Column; |
| if (Row.Discriminator) |
| OS << " discriminator:" << Row.Discriminator; |
| } |
| |
| void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction, |
| uint64_t Offset, |
| const BinaryFunction *Function, |
| bool PrintMCInst, bool PrintMemData, |
| bool PrintRelocations, |
| StringRef Endl) const { |
| OS << format(" %08" PRIx64 ": ", Offset); |
| if (MIB->isCFI(Instruction)) { |
| uint32_t Offset = Instruction.getOperand(0).getImm(); |
| OS << "\t!CFI\t$" << Offset << "\t; "; |
| if (Function) |
| printCFI(OS, *Function->getCFIFor(Instruction)); |
| OS << Endl; |
| return; |
| } |
| if (std::optional<uint32_t> DynamicID = |
| MIB->getDynamicBranchID(Instruction)) { |
| OS << "\tjit\t" << MIB->getTargetSymbol(Instruction)->getName() |
| << " # ID: " << DynamicID; |
| } else { |
| InstPrinter->printInst(&Instruction, 0, "", *STI, OS); |
| } |
| if (MIB->isCall(Instruction)) { |
| if (MIB->isTailCall(Instruction)) |
| OS << " # TAILCALL "; |
| if (MIB->isInvoke(Instruction)) { |
| const std::optional<MCPlus::MCLandingPad> EHInfo = |
| MIB->getEHInfo(Instruction); |
| OS << " # handler: "; |
| if (EHInfo->first) |
| OS << *EHInfo->first; |
| else |
| OS << '0'; |
| OS << "; action: " << EHInfo->second; |
| const int64_t GnuArgsSize = MIB->getGnuArgsSize(Instruction); |
| if (GnuArgsSize >= 0) |
| OS << "; GNU_args_size = " << GnuArgsSize; |
| } |
| } else if (MIB->isIndirectBranch(Instruction)) { |
| if (uint64_t JTAddress = MIB->getJumpTable(Instruction)) { |
| OS << " # JUMPTABLE @0x" << Twine::utohexstr(JTAddress); |
| } else { |
| OS << " # UNKNOWN CONTROL FLOW"; |
| } |
| } |
| if (std::optional<uint32_t> Offset = MIB->getOffset(Instruction)) |
| OS << " # Offset: " << *Offset; |
| if (std::optional<uint32_t> Size = MIB->getSize(Instruction)) |
| OS << " # Size: " << *Size; |
| if (MCSymbol *Label = MIB->getInstLabel(Instruction)) |
| OS << " # Label: " << *Label; |
| |
| MIB->printAnnotations(Instruction, OS); |
| |
| if (opts::PrintDebugInfo) |
| printDebugInfo(OS, Instruction, Function, DwCtx.get()); |
| |
| if ((opts::PrintRelocations || PrintRelocations) && Function) { |
| const uint64_t Size = computeCodeSize(&Instruction, &Instruction + 1); |
| Function->printRelocations(OS, Offset, Size); |
| } |
| |
| OS << Endl; |
| |
| if (PrintMCInst) { |
| Instruction.dump_pretty(OS, InstPrinter.get()); |
| OS << Endl; |
| } |
| } |
| |
| std::optional<uint64_t> |
| BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress, |
| uint64_t FileOffset) const { |
| // Find a segment with a matching file offset. |
| for (auto &KV : SegmentMapInfo) { |
| const SegmentInfo &SegInfo = KV.second; |
| // FileOffset is got from perf event, |
| // and it is equal to alignDown(SegInfo.FileOffset, pagesize). |
| // If the pagesize is not equal to SegInfo.Alignment. |
| // FileOffset and SegInfo.FileOffset should be aligned first, |
| // and then judge whether they are equal. |
| if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) == |
| alignDown(FileOffset, SegInfo.Alignment)) { |
| // The function's offset from base address in VAS is aligned by pagesize |
| // instead of SegInfo.Alignment. Pagesize can't be got from perf events. |
| // However, The ELF document says that SegInfo.FileOffset should equal |
| // to SegInfo.Address, modulo the pagesize. |
| // Reference: https://refspecs.linuxfoundation.org/elf/elf.pdf |
| |
| // So alignDown(SegInfo.Address, pagesize) can be calculated by: |
| // alignDown(SegInfo.Address, pagesize) |
| // = SegInfo.Address - (SegInfo.Address % pagesize) |
| // = SegInfo.Address - (SegInfo.FileOffset % pagesize) |
| // = SegInfo.Address - SegInfo.FileOffset + |
| // alignDown(SegInfo.FileOffset, pagesize) |
| // = SegInfo.Address - SegInfo.FileOffset + FileOffset |
| return MMapAddress - (SegInfo.Address - SegInfo.FileOffset + FileOffset); |
| } |
| } |
| |
| return std::nullopt; |
| } |
| |
| ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) { |
| auto SI = AddressToSection.upper_bound(Address); |
| if (SI != AddressToSection.begin()) { |
| --SI; |
| uint64_t UpperBound = SI->first + SI->second->getSize(); |
| if (!SI->second->getSize()) |
| UpperBound += 1; |
| if (UpperBound > Address) |
| return *SI->second; |
| } |
| return std::make_error_code(std::errc::bad_address); |
| } |
| |
| ErrorOr<StringRef> |
| BinaryContext::getSectionNameForAddress(uint64_t Address) const { |
| if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address)) |
| return Section->getName(); |
| return std::make_error_code(std::errc::bad_address); |
| } |
| |
| BinarySection &BinaryContext::registerSection(BinarySection *Section) { |
| auto Res = Sections.insert(Section); |
| (void)Res; |
| assert(Res.second && "can't register the same section twice."); |
| |
| // Only register allocatable sections in the AddressToSection map. |
| if (Section->isAllocatable() && Section->getAddress()) |
| AddressToSection.insert(std::make_pair(Section->getAddress(), Section)); |
| NameToSection.insert( |
| std::make_pair(std::string(Section->getName()), Section)); |
| if (Section->hasSectionRef()) |
| SectionRefToBinarySection.insert( |
| std::make_pair(Section->getSectionRef(), Section)); |
| |
| LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n"); |
| return *Section; |
| } |
| |
| BinarySection &BinaryContext::registerSection(SectionRef Section) { |
| return registerSection(new BinarySection(*this, Section)); |
| } |
| |
| BinarySection & |
| BinaryContext::registerSection(const Twine &SectionName, |
| const BinarySection &OriginalSection) { |
| return registerSection( |
| new BinarySection(*this, SectionName, OriginalSection)); |
| } |
| |
| BinarySection & |
| BinaryContext::registerOrUpdateSection(const Twine &Name, unsigned ELFType, |
| unsigned ELFFlags, uint8_t *Data, |
| uint64_t Size, unsigned Alignment) { |
| auto NamedSections = getSectionByName(Name); |
| if (NamedSections.begin() != NamedSections.end()) { |
| assert(std::next(NamedSections.begin()) == NamedSections.end() && |
| "can only update unique sections"); |
| BinarySection *Section = NamedSections.begin()->second; |
| |
| LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> "); |
| const bool Flag = Section->isAllocatable(); |
| (void)Flag; |
| Section->update(Data, Size, Alignment, ELFType, ELFFlags); |
| LLVM_DEBUG(dbgs() << *Section << "\n"); |
| // FIXME: Fix section flags/attributes for MachO. |
| if (isELF()) |
| assert(Flag == Section->isAllocatable() && |
| "can't change section allocation status"); |
| return *Section; |
| } |
| |
| return registerSection( |
| new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags)); |
| } |
| |
| void BinaryContext::deregisterSectionName(const BinarySection &Section) { |
| auto NameRange = NameToSection.equal_range(Section.getName().str()); |
| while (NameRange.first != NameRange.second) { |
| if (NameRange.first->second == &Section) { |
| NameToSection.erase(NameRange.first); |
| break; |
| } |
| ++NameRange.first; |
| } |
| } |
| |
| void BinaryContext::deregisterUnusedSections() { |
| ErrorOr<BinarySection &> AbsSection = getUniqueSectionByName("<absolute>"); |
| for (auto SI = Sections.begin(); SI != Sections.end();) { |
| BinarySection *Section = *SI; |
| // We check getOutputData() instead of getOutputSize() because sometimes |
| // zero-sized .text.cold sections are allocated. |
| if (Section->hasSectionRef() || Section->getOutputData() || |
| (AbsSection && Section == &AbsSection.get())) { |
| ++SI; |
| continue; |
| } |
| |
| LLVM_DEBUG(dbgs() << "LLVM-DEBUG: deregistering " << Section->getName() |
| << '\n';); |
| deregisterSectionName(*Section); |
| SI = Sections.erase(SI); |
| delete Section; |
| } |
| } |
| |
| bool BinaryContext::deregisterSection(BinarySection &Section) { |
| BinarySection *SectionPtr = &Section; |
| auto Itr = Sections.find(SectionPtr); |
| if (Itr != Sections.end()) { |
| auto Range = AddressToSection.equal_range(SectionPtr->getAddress()); |
| while (Range.first != Range.second) { |
| if (Range.first->second == SectionPtr) { |
| AddressToSection.erase(Range.first); |
| break; |
| } |
| ++Range.first; |
| } |
| |
| deregisterSectionName(*SectionPtr); |
| Sections.erase(Itr); |
| delete SectionPtr; |
| return true; |
| } |
| return false; |
| } |
| |
| void BinaryContext::renameSection(BinarySection &Section, |
| const Twine &NewName) { |
| auto Itr = Sections.find(&Section); |
| assert(Itr != Sections.end() && "Section must exist to be renamed."); |
| Sections.erase(Itr); |
| |
| deregisterSectionName(Section); |
| |
| Section.Name = NewName.str(); |
| Section.setOutputName(Section.Name); |
| |
| NameToSection.insert(std::make_pair(Section.Name, &Section)); |
| |
| // Reinsert with the new name. |
| Sections.insert(&Section); |
| } |
| |
| void BinaryContext::printSections(raw_ostream &OS) const { |
| for (BinarySection *const &Section : Sections) |
| OS << "BOLT-INFO: " << *Section << "\n"; |
| } |
| |
| BinarySection &BinaryContext::absoluteSection() { |
| if (ErrorOr<BinarySection &> Section = getUniqueSectionByName("<absolute>")) |
| return *Section; |
| return registerOrUpdateSection("<absolute>", ELF::SHT_NULL, 0u); |
| } |
| |
| ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address, |
| size_t Size) const { |
| const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); |
| if (!Section) |
| return std::make_error_code(std::errc::bad_address); |
| |
| if (Section->isVirtual()) |
| return 0; |
| |
| DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), |
| AsmInfo->getCodePointerSize()); |
| auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); |
| return DE.getUnsigned(&ValueOffset, Size); |
| } |
| |
| ErrorOr<int64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address, |
| size_t Size) const { |
| const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); |
| if (!Section) |
| return std::make_error_code(std::errc::bad_address); |
| |
| if (Section->isVirtual()) |
| return 0; |
| |
| DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(), |
| AsmInfo->getCodePointerSize()); |
| auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress()); |
| return DE.getSigned(&ValueOffset, Size); |
| } |
| |
| void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol, |
| uint64_t Type, uint64_t Addend, |
| uint64_t Value) { |
| ErrorOr<BinarySection &> Section = getSectionForAddress(Address); |
| assert(Section && "cannot find section for address"); |
| Section->addRelocation(Address - Section->getAddress(), Symbol, Type, Addend, |
| Value); |
| } |
| |
| void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol, |
| uint64_t Type, uint64_t Addend, |
| uint64_t Value) { |
| ErrorOr<BinarySection &> Section = getSectionForAddress(Address); |
| assert(Section && "cannot find section for address"); |
| Section->addDynamicRelocation(Address - Section->getAddress(), Symbol, Type, |
| Addend, Value); |
| } |
| |
| bool BinaryContext::removeRelocationAt(uint64_t Address) { |
| ErrorOr<BinarySection &> Section = getSectionForAddress(Address); |
| assert(Section && "cannot find section for address"); |
| return Section->removeRelocationAt(Address - Section->getAddress()); |
| } |
| |
| const Relocation *BinaryContext::getRelocationAt(uint64_t Address) const { |
| ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); |
| if (!Section) |
| return nullptr; |
| |
| return Section->getRelocationAt(Address - Section->getAddress()); |
| } |
| |
| const Relocation * |
| BinaryContext::getDynamicRelocationAt(uint64_t Address) const { |
| ErrorOr<const BinarySection &> Section = getSectionForAddress(Address); |
| if (!Section) |
| return nullptr; |
| |
| return Section->getDynamicRelocationAt(Address - Section->getAddress()); |
| } |
| |
| void BinaryContext::markAmbiguousRelocations(BinaryData &BD, |
| const uint64_t Address) { |
| auto setImmovable = [&](BinaryData &BD) { |
| BinaryData *Root = BD.getAtomicRoot(); |
| LLVM_DEBUG(if (Root->isMoveable()) { |
| dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable " |
| << "due to ambiguous relocation referencing 0x" |
| << Twine::utohexstr(Address) << '\n'; |
| }); |
| Root->setIsMoveable(false); |
| }; |
| |
| if (Address == BD.getAddress()) { |
| setImmovable(BD); |
| |
| // Set previous symbol as immovable |
| BinaryData *Prev = getBinaryDataContainingAddress(Address - 1); |
| if (Prev && Prev->getEndAddress() == BD.getAddress()) |
| setImmovable(*Prev); |
| } |
| |
| if (Address == BD.getEndAddress()) { |
| setImmovable(BD); |
| |
| // Set next symbol as immovable |
| BinaryData *Next = getBinaryDataContainingAddress(BD.getEndAddress()); |
| if (Next && Next->getAddress() == BD.getEndAddress()) |
| setImmovable(*Next); |
| } |
| } |
| |
| BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol, |
| uint64_t *EntryDesc) { |
| std::shared_lock<llvm::sys::RWMutex> Lock(SymbolToFunctionMapMutex); |
| auto BFI = SymbolToFunctionMap.find(Symbol); |
| if (BFI == SymbolToFunctionMap.end()) |
| return nullptr; |
| |
| BinaryFunction *BF = BFI->second; |
| if (EntryDesc) |
| *EntryDesc = BF->getEntryIDForSymbol(Symbol); |
| |
| return BF; |
| } |
| |
| std::string |
| BinaryContext::generateBugReportMessage(StringRef Message, |
| const BinaryFunction &Function) const { |
| std::string Msg; |
| raw_string_ostream SS(Msg); |
| SS << "=======================================\n"; |
| SS << "BOLT is unable to proceed because it couldn't properly understand " |
| "this function.\n"; |
| SS << "If you are running the most recent version of BOLT, you may " |
| "want to " |
| "report this and paste this dump.\nPlease check that there is no " |
| "sensitive contents being shared in this dump.\n"; |
| SS << "\nOffending function: " << Function.getPrintName() << "\n\n"; |
| ScopedPrinter SP(SS); |
| SP.printBinaryBlock("Function contents", *Function.getData()); |
| SS << "\n"; |
| const_cast<BinaryFunction &>(Function).print(SS, ""); |
| SS << "ERROR: " << Message; |
| SS << "\n=======================================\n"; |
| return Msg; |
| } |
| |
| BinaryFunction * |
| BinaryContext::createInjectedBinaryFunction(const std::string &Name, |
| bool IsSimple) { |
| InjectedBinaryFunctions.push_back(new BinaryFunction(Name, *this, IsSimple)); |
| BinaryFunction *BF = InjectedBinaryFunctions.back(); |
| setSymbolToFunctionMap(BF->getSymbol(), BF); |
| BF->CurrentState = BinaryFunction::State::CFG; |
| return BF; |
| } |
| |
| std::pair<size_t, size_t> |
| BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) { |
| // Adjust branch instruction to match the current layout. |
| if (FixBranches) |
| BF.fixBranches(); |
| |
| // Create local MC context to isolate the effect of ephemeral code emission. |
| IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter(); |
| MCContext *LocalCtx = MCEInstance.LocalCtx.get(); |
| MCAsmBackend *MAB = |
| TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions()); |
| |
| SmallString<256> Code; |
| raw_svector_ostream VecOS(Code); |
| |
| std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS); |
| std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer( |
| *TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW), |
| std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI)); |
| |
| Streamer->initSections(false, *STI); |
| |
| MCSection *Section = MCEInstance.LocalMOFI->getTextSection(); |
| Section->setHasInstructions(true); |
| |
| // Create symbols in the LocalCtx so that they get destroyed with it. |
| MCSymbol *StartLabel = LocalCtx->createTempSymbol(); |
| MCSymbol *EndLabel = LocalCtx->createTempSymbol(); |
| |
| Streamer->switchSection(Section); |
| Streamer->emitLabel(StartLabel); |
| emitFunctionBody(*Streamer, BF, BF.getLayout().getMainFragment(), |
| /*EmitCodeOnly=*/true); |
| Streamer->emitLabel(EndLabel); |
| |
| using LabelRange = std::pair<const MCSymbol *, const MCSymbol *>; |
| SmallVector<LabelRange> SplitLabels; |
| for (FunctionFragment &FF : BF.getLayout().getSplitFragments()) { |
| MCSymbol *const SplitStartLabel = LocalCtx->createTempSymbol(); |
| MCSymbol *const SplitEndLabel = LocalCtx->createTempSymbol(); |
| SplitLabels.emplace_back(SplitStartLabel, SplitEndLabel); |
| |
| MCSectionELF *const SplitSection = LocalCtx->getELFSection( |
| BF.getCodeSectionName(FF.getFragmentNum()), ELF::SHT_PROGBITS, |
| ELF::SHF_EXECINSTR | ELF::SHF_ALLOC); |
| SplitSection->setHasInstructions(true); |
| Streamer->switchSection(SplitSection); |
| |
| Streamer->emitLabel(SplitStartLabel); |
| emitFunctionBody(*Streamer, BF, FF, /*EmitCodeOnly=*/true); |
| Streamer->emitLabel(SplitEndLabel); |
| } |
| |
| MCAssembler &Assembler = |
| static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler(); |
| Assembler.layout(); |
| |
| // Obtain fragment sizes. |
| std::vector<uint64_t> FragmentSizes; |
| // Main fragment size. |
| const uint64_t HotSize = Assembler.getSymbolOffset(*EndLabel) - |
| Assembler.getSymbolOffset(*StartLabel); |
| FragmentSizes.push_back(HotSize); |
| // Split fragment sizes. |
| uint64_t ColdSize = 0; |
| for (const auto &Labels : SplitLabels) { |
| uint64_t Size = Assembler.getSymbolOffset(*Labels.second) - |
| Assembler.getSymbolOffset(*Labels.first); |
| FragmentSizes.push_back(Size); |
| ColdSize += Size; |
| } |
| |
| // Populate new start and end offsets of each basic block. |
| uint64_t FragmentIndex = 0; |
| for (FunctionFragment &FF : BF.getLayout().fragments()) { |
| BinaryBasicBlock *PrevBB = nullptr; |
| for (BinaryBasicBlock *BB : FF) { |
| const uint64_t BBStartOffset = |
| Assembler.getSymbolOffset(*(BB->getLabel())); |
| BB->setOutputStartAddress(BBStartOffset); |
| if (PrevBB) |
| PrevBB->setOutputEndAddress(BBStartOffset); |
| PrevBB = BB; |
| } |
| if (PrevBB) |
| PrevBB->setOutputEndAddress(FragmentSizes[FragmentIndex]); |
| FragmentIndex++; |
| } |
| |
| // Clean-up the effect of the code emission. |
| for (const MCSymbol &Symbol : Assembler.symbols()) { |
| MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol); |
| MutableSymbol->setUndefined(); |
| MutableSymbol->setIsRegistered(false); |
| } |
| |
| return std::make_pair(HotSize, ColdSize); |
| } |
| |
| bool BinaryContext::validateInstructionEncoding( |
| ArrayRef<uint8_t> InputSequence) const { |
| MCInst Inst; |
| uint64_t InstSize; |
| DisAsm->getInstruction(Inst, InstSize, InputSequence, 0, nulls()); |
| assert(InstSize == InputSequence.size() && |
| "Disassembled instruction size does not match the sequence."); |
| |
| SmallString<256> Code; |
| SmallVector<MCFixup, 4> Fixups; |
| |
| MCE->encodeInstruction(Inst, Code, Fixups, *STI); |
| auto OutputSequence = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size()); |
| if (InputSequence != OutputSequence) { |
| if (opts::Verbosity > 1) { |
| this->errs() << "BOLT-WARNING: mismatched encoding detected\n" |
| << " input: " << InputSequence << '\n' |
| << " output: " << OutputSequence << '\n'; |
| } |
| return false; |
| } |
| |
| return true; |
| } |
| |
| uint64_t BinaryContext::getHotThreshold() const { |
| static uint64_t Threshold = 0; |
| if (Threshold == 0) { |
| Threshold = std::max( |
| (uint64_t)opts::ExecutionCountThreshold, |
| NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1); |
| } |
| return Threshold; |
| } |
| |
| BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress( |
| uint64_t Address, bool CheckPastEnd, bool UseMaxSize) { |
| auto FI = BinaryFunctions.upper_bound(Address); |
| if (FI == BinaryFunctions.begin()) |
| return nullptr; |
| --FI; |
| |
| const uint64_t UsedSize = |
| UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize(); |
| |
| if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0)) |
| return nullptr; |
| |
| return &FI->second; |
| } |
| |
| BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) { |
| // First, try to find a function starting at the given address. If the |
| // function was folded, this will get us the original folded function if it |
| // wasn't removed from the list, e.g. in non-relocation mode. |
| auto BFI = BinaryFunctions.find(Address); |
| if (BFI != BinaryFunctions.end()) |
| return &BFI->second; |
| |
| // We might have folded the function matching the object at the given |
| // address. In such case, we look for a function matching the symbol |
| // registered at the original address. The new function (the one that the |
| // original was folded into) will hold the symbol. |
| if (const BinaryData *BD = getBinaryDataAtAddress(Address)) { |
| uint64_t EntryID = 0; |
| BinaryFunction *BF = getFunctionForSymbol(BD->getSymbol(), &EntryID); |
| if (BF && EntryID == 0) |
| return BF; |
| } |
| return nullptr; |
| } |
| |
| /// Deregister JumpTable registered at a given \p Address and delete it. |
| void BinaryContext::deleteJumpTable(uint64_t Address) { |
| assert(JumpTables.count(Address) && "Must have a jump table at address"); |
| JumpTable *JT = JumpTables.at(Address); |
| for (BinaryFunction *Parent : JT->Parents) |
| Parent->JumpTables.erase(Address); |
| JumpTables.erase(Address); |
| delete JT; |
| } |
| |
| DebugAddressRangesVector BinaryContext::translateModuleAddressRanges( |
| const DWARFAddressRangesVector &InputRanges) const { |
| DebugAddressRangesVector OutputRanges; |
| |
| for (const DWARFAddressRange Range : InputRanges) { |
| auto BFI = BinaryFunctions.lower_bound(Range.LowPC); |
| while (BFI != BinaryFunctions.end()) { |
| const BinaryFunction &Function = BFI->second; |
| if (Function.getAddress() >= Range.HighPC) |
| break; |
| const DebugAddressRangesVector FunctionRanges = |
| Function.getOutputAddressRanges(); |
| llvm::move(FunctionRanges, std::back_inserter(OutputRanges)); |
| std::advance(BFI, 1); |
| } |
| } |
| |
| return OutputRanges; |
| } |
| |
| } // namespace bolt |
| } // namespace llvm |