| //===- bolt/Core/BinarySection.h - Section in a binary file -----*- C++ -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file contains the declaration of the BinarySection class, which |
| // represents a section in an executable file and contains its properties, |
| // flags, contents, and relocations. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #ifndef BOLT_CORE_BINARY_SECTION_H |
| #define BOLT_CORE_BINARY_SECTION_H |
| |
| #include "bolt/Core/DebugData.h" |
| #include "bolt/Core/Relocation.h" |
| #include "llvm/ADT/ArrayRef.h" |
| #include "llvm/ADT/STLExtras.h" |
| #include "llvm/BinaryFormat/ELF.h" |
| #include "llvm/Object/ELFObjectFile.h" |
| #include "llvm/Object/MachO.h" |
| #include "llvm/Object/ObjectFile.h" |
| #include "llvm/Support/raw_ostream.h" |
| #include <map> |
| #include <memory> |
| #include <set> |
| |
| namespace llvm { |
| class MCStreamer; |
| class MCSymbol; |
| |
| using namespace object; |
| |
| namespace bolt { |
| |
| class BinaryContext; |
| class BinaryData; |
| |
| /// A class to manage binary sections that also manages related relocations. |
| class BinarySection { |
| friend class BinaryContext; |
| |
| /// Count the number of sections created. |
| static uint64_t Count; |
| |
| BinaryContext &BC; // Owning BinaryContext |
| std::string Name; // Section name |
| const SectionRef Section; // SectionRef for input binary sections. |
| StringRef Contents; // Input section contents |
| const uint64_t Address; // Address of section in input binary (may be 0) |
| const uint64_t Size; // Input section size |
| uint64_t InputFileOffset{0}; // Offset in the input binary |
| unsigned Alignment; // alignment in bytes (must be > 0) |
| unsigned ELFType; // ELF section type |
| unsigned ELFFlags; // ELF section flags |
| |
| // Relocations associated with this section. Relocation offsets are |
| // wrt. to the original section address and size. |
| using RelocationSetType = std::set<Relocation, std::less<>>; |
| RelocationSetType Relocations; |
| |
| // Dynamic relocations associated with this section. Relocation offsets are |
| // from the original section address. |
| RelocationSetType DynamicRelocations; |
| |
| // Pending relocations for this section. |
| std::vector<Relocation> PendingRelocations; |
| |
| struct BinaryPatch { |
| uint64_t Offset; |
| SmallString<8> Bytes; |
| |
| BinaryPatch(uint64_t Offset, const SmallVectorImpl<char> &Bytes) |
| : Offset(Offset), Bytes(Bytes.begin(), Bytes.end()) {} |
| }; |
| std::vector<BinaryPatch> Patches; |
| /// Patcher used to apply simple changes to sections of the input binary. |
| std::unique_ptr<BinaryPatcher> Patcher; |
| |
| // Output info |
| bool IsFinalized{false}; // Has this section had output information |
| // finalized? |
| std::string OutputName; // Output section name (if the section has |
| // been renamed) |
| uint64_t OutputAddress{0}; // Section address for the rewritten binary. |
| uint64_t OutputSize{0}; // Section size in the rewritten binary. |
| uint64_t OutputFileOffset{0}; // File offset in the rewritten binary file. |
| StringRef OutputContents; // Rewritten section contents. |
| const uint64_t SectionNumber; // Order in which the section was created. |
| unsigned SectionID{-1u}; // Unique ID used for address mapping. |
| // Set by ExecutableFileMemoryManager. |
| uint32_t Index{0}; // Section index in the output file. |
| mutable bool IsReordered{false}; // Have the contents been reordered? |
| bool IsAnonymous{false}; // True if the name should not be included |
| // in the output file. |
| |
| uint64_t hash(const BinaryData &BD, |
| std::map<const BinaryData *, uint64_t> &Cache) const; |
| |
| // non-copyable |
| BinarySection(const BinarySection &) = delete; |
| BinarySection(BinarySection &&) = delete; |
| BinarySection &operator=(const BinarySection &) = delete; |
| BinarySection &operator=(BinarySection &&) = delete; |
| |
| static StringRef getName(SectionRef Section) { |
| return cantFail(Section.getName()); |
| } |
| static StringRef getContents(SectionRef Section) { |
| if (Section.getObject()->isELF() && |
| ELFSectionRef(Section).getType() == ELF::SHT_NOBITS) |
| return StringRef(); |
| |
| Expected<StringRef> ContentsOrErr = Section.getContents(); |
| if (!ContentsOrErr) { |
| Error E = ContentsOrErr.takeError(); |
| errs() << "BOLT-ERROR: cannot get section contents for " |
| << getName(Section) << ": " << E << ".\n"; |
| exit(1); |
| } |
| return *ContentsOrErr; |
| } |
| |
| /// Get the set of relocations refering to data in this section that |
| /// has been reordered. The relocation offsets will be modified to |
| /// reflect the new data locations. |
| RelocationSetType reorderRelocations(bool Inplace) const; |
| |
| /// Set output info for this section. |
| void update(uint8_t *NewData, uint64_t NewSize, unsigned NewAlignment, |
| unsigned NewELFType, unsigned NewELFFlags) { |
| assert(NewAlignment > 0 && "section alignment must be > 0"); |
| Alignment = NewAlignment; |
| ELFType = NewELFType; |
| ELFFlags = NewELFFlags; |
| OutputSize = NewSize; |
| OutputContents = StringRef(reinterpret_cast<const char *>(NewData), |
| NewData ? NewSize : 0); |
| IsFinalized = true; |
| } |
| |
| public: |
| /// Copy a section. |
| explicit BinarySection(BinaryContext &BC, const Twine &Name, |
| const BinarySection &Section) |
| : BC(BC), Name(Name.str()), Section(SectionRef()), |
| Contents(Section.getContents()), Address(Section.getAddress()), |
| Size(Section.getSize()), Alignment(Section.getAlignment()), |
| ELFType(Section.getELFType()), ELFFlags(Section.getELFFlags()), |
| Relocations(Section.Relocations), |
| PendingRelocations(Section.PendingRelocations), OutputName(Name.str()), |
| SectionNumber(++Count) {} |
| |
| BinarySection(BinaryContext &BC, SectionRef Section) |
| : BC(BC), Name(getName(Section)), Section(Section), |
| Contents(getContents(Section)), Address(Section.getAddress()), |
| Size(Section.getSize()), Alignment(Section.getAlignment().value()), |
| OutputName(Name), SectionNumber(++Count) { |
| if (isELF()) { |
| ELFType = ELFSectionRef(Section).getType(); |
| ELFFlags = ELFSectionRef(Section).getFlags(); |
| InputFileOffset = ELFSectionRef(Section).getOffset(); |
| } else if (isMachO()) { |
| auto *O = cast<MachOObjectFile>(Section.getObject()); |
| InputFileOffset = |
| O->is64Bit() ? O->getSection64(Section.getRawDataRefImpl()).offset |
| : O->getSection(Section.getRawDataRefImpl()).offset; |
| } |
| } |
| |
| // TODO: pass Data as StringRef/ArrayRef? use StringRef::copy method. |
| BinarySection(BinaryContext &BC, const Twine &Name, uint8_t *Data, |
| uint64_t Size, unsigned Alignment, unsigned ELFType, |
| unsigned ELFFlags) |
| : BC(BC), Name(Name.str()), |
| Contents(reinterpret_cast<const char *>(Data), Data ? Size : 0), |
| Address(0), Size(Size), Alignment(Alignment), ELFType(ELFType), |
| ELFFlags(ELFFlags), IsFinalized(true), OutputName(Name.str()), |
| OutputSize(Size), OutputContents(Contents), SectionNumber(++Count) { |
| assert(Alignment > 0 && "section alignment must be > 0"); |
| } |
| |
| ~BinarySection(); |
| |
| /// Helper function to generate the proper ELF flags from section properties. |
| static unsigned getFlags(bool IsReadOnly = true, bool IsText = false, |
| bool IsAllocatable = false) { |
| unsigned Flags = 0; |
| if (IsAllocatable) |
| Flags |= ELF::SHF_ALLOC; |
| if (!IsReadOnly) |
| Flags |= ELF::SHF_WRITE; |
| if (IsText) |
| Flags |= ELF::SHF_EXECINSTR; |
| return Flags; |
| } |
| |
| operator bool() const { return ELFType != ELF::SHT_NULL; } |
| |
| bool operator==(const BinarySection &Other) const { |
| return (Name == Other.Name && Address == Other.Address && |
| Size == Other.Size && getData() == Other.getData() && |
| Alignment == Other.Alignment && ELFType == Other.ELFType && |
| ELFFlags == Other.ELFFlags); |
| } |
| |
| bool operator!=(const BinarySection &Other) const { |
| return !operator==(Other); |
| } |
| |
| // Order sections by their immutable properties. |
| bool operator<(const BinarySection &Other) const { |
| // Allocatable before non-allocatable. |
| if (isAllocatable() != Other.isAllocatable()) |
| return isAllocatable() > Other.isAllocatable(); |
| |
| // Input sections take precedence. |
| if (hasSectionRef() != Other.hasSectionRef()) |
| return hasSectionRef() > Other.hasSectionRef(); |
| |
| // Compare allocatable input sections by their address. |
| if (hasSectionRef() && getAddress() != Other.getAddress()) |
| return getAddress() < Other.getAddress(); |
| if (hasSectionRef() && getAddress() && getSize() != Other.getSize()) |
| return getSize() < Other.getSize(); |
| |
| // Code before data. |
| if (isText() != Other.isText()) |
| return isText() > Other.isText(); |
| |
| // Read-only before writable. |
| if (isWritable() != Other.isWritable()) |
| return isWritable() < Other.isWritable(); |
| |
| // BSS at the end. |
| if (isBSS() != Other.isBSS()) |
| return isBSS() < Other.isBSS(); |
| |
| // Otherwise, preserve the order of creation. |
| return SectionNumber < Other.SectionNumber; |
| } |
| |
| /// |
| /// Basic property access. |
| /// |
| BinaryContext &getBinaryContext() { return BC; } |
| bool isELF() const; |
| bool isMachO() const; |
| StringRef getName() const { return Name; } |
| uint64_t getAddress() const { return Address; } |
| uint64_t getEndAddress() const { return Address + Size; } |
| uint64_t getSize() const { return Size; } |
| uint64_t getInputFileOffset() const { return InputFileOffset; } |
| Align getAlign() const { return Align(Alignment); } |
| uint64_t getAlignment() const { return Alignment; } |
| bool isText() const { |
| if (isELF()) |
| return (ELFFlags & ELF::SHF_EXECINSTR); |
| return hasSectionRef() && getSectionRef().isText(); |
| } |
| bool isData() const { |
| if (isELF()) |
| return (ELFType == ELF::SHT_PROGBITS && |
| (ELFFlags & (ELF::SHF_ALLOC | ELF::SHF_WRITE))); |
| return hasSectionRef() && getSectionRef().isData(); |
| } |
| bool isBSS() const { |
| return (ELFType == ELF::SHT_NOBITS && |
| (ELFFlags & (ELF::SHF_ALLOC | ELF::SHF_WRITE))); |
| } |
| bool isTLS() const { return (ELFFlags & ELF::SHF_TLS); } |
| bool isTBSS() const { return isBSS() && isTLS(); } |
| bool isVirtual() const { return ELFType == ELF::SHT_NOBITS; } |
| bool isRela() const { return ELFType == ELF::SHT_RELA; } |
| bool isWritable() const { return (ELFFlags & ELF::SHF_WRITE); } |
| bool isAllocatable() const { |
| if (isELF()) { |
| return (ELFFlags & ELF::SHF_ALLOC) && !isTBSS(); |
| } else { |
| // On non-ELF assume all sections are allocatable. |
| return true; |
| } |
| } |
| bool isReordered() const { return IsReordered; } |
| bool isAnonymous() const { return IsAnonymous; } |
| unsigned getELFType() const { return ELFType; } |
| unsigned getELFFlags() const { return ELFFlags; } |
| |
| uint8_t *getData() { |
| return reinterpret_cast<uint8_t *>( |
| const_cast<char *>(getContents().data())); |
| } |
| const uint8_t *getData() const { |
| return reinterpret_cast<const uint8_t *>(getContents().data()); |
| } |
| StringRef getContents() const { return Contents; } |
| void clearContents() { Contents = {}; } |
| bool hasSectionRef() const { return Section != SectionRef(); } |
| SectionRef getSectionRef() const { return Section; } |
| |
| /// Does this section contain the given \p Address? |
| /// Note: this is in terms of the original mapped binary addresses. |
| bool containsAddress(uint64_t Address) const { |
| return (getAddress() <= Address && Address < getEndAddress()) || |
| (getSize() == 0 && getAddress() == Address); |
| } |
| |
| /// Does this section contain the range [\p Address, \p Address + \p Size)? |
| /// Note: this is in terms of the original mapped binary addresses. |
| bool containsRange(uint64_t Address, uint64_t Size) const { |
| return containsAddress(Address) && Address + Size <= getEndAddress(); |
| } |
| |
| /// Iterate over all non-pending relocations for this section. |
| iterator_range<RelocationSetType::iterator> relocations() { |
| return make_range(Relocations.begin(), Relocations.end()); |
| } |
| |
| /// Iterate over all non-pending relocations for this section. |
| iterator_range<RelocationSetType::const_iterator> relocations() const { |
| return make_range(Relocations.begin(), Relocations.end()); |
| } |
| |
| /// Iterate over all dynamic relocations for this section. |
| iterator_range<RelocationSetType::iterator> dynamicRelocations() { |
| return make_range(DynamicRelocations.begin(), DynamicRelocations.end()); |
| } |
| |
| /// Iterate over all dynamic relocations for this section. |
| iterator_range<RelocationSetType::const_iterator> dynamicRelocations() const { |
| return make_range(DynamicRelocations.begin(), DynamicRelocations.end()); |
| } |
| |
| /// Does this section have any non-pending relocations? |
| bool hasRelocations() const { return !Relocations.empty(); } |
| |
| /// Does this section have any pending relocations? |
| bool hasPendingRelocations() const { return !PendingRelocations.empty(); } |
| |
| /// Remove non-pending relocation with the given /p Offset. |
| bool removeRelocationAt(uint64_t Offset) { |
| auto Itr = Relocations.find(Offset); |
| if (Itr != Relocations.end()) { |
| Relocations.erase(Itr); |
| return true; |
| } |
| return false; |
| } |
| |
| void clearRelocations(); |
| |
| /// Add a new relocation at the given /p Offset. |
| void addRelocation(uint64_t Offset, MCSymbol *Symbol, uint64_t Type, |
| uint64_t Addend, uint64_t Value = 0, |
| bool Pending = false) { |
| assert(Offset < getSize() && "offset not within section bounds"); |
| if (!Pending) { |
| Relocations.emplace(Relocation{Offset, Symbol, Type, Addend, Value}); |
| } else { |
| PendingRelocations.emplace_back( |
| Relocation{Offset, Symbol, Type, Addend, Value}); |
| } |
| } |
| |
| /// Add a dynamic relocation at the given /p Offset. |
| void addDynamicRelocation(uint64_t Offset, MCSymbol *Symbol, uint64_t Type, |
| uint64_t Addend, uint64_t Value = 0) { |
| assert(Offset < getSize() && "offset not within section bounds"); |
| DynamicRelocations.emplace(Relocation{Offset, Symbol, Type, Addend, Value}); |
| } |
| |
| /// Add relocation against the original contents of this section. |
| void addPendingRelocation(const Relocation &Rel) { |
| PendingRelocations.push_back(Rel); |
| } |
| |
| /// Add patch to the input contents of this section. |
| void addPatch(uint64_t Offset, const SmallVectorImpl<char> &Bytes) { |
| Patches.emplace_back(BinaryPatch(Offset, Bytes)); |
| } |
| |
| /// Register patcher for this section. |
| void registerPatcher(std::unique_ptr<BinaryPatcher> BPatcher) { |
| Patcher = std::move(BPatcher); |
| } |
| |
| /// Returns the patcher |
| BinaryPatcher *getPatcher() { return Patcher.get(); } |
| |
| /// Lookup the relocation (if any) at the given /p Offset. |
| const Relocation *getRelocationAt(uint64_t Offset) const { |
| auto Itr = Relocations.find(Offset); |
| return Itr != Relocations.end() ? &*Itr : nullptr; |
| } |
| |
| /// Lookup the relocation (if any) at the given /p Offset. |
| const Relocation *getDynamicRelocationAt(uint64_t Offset) const { |
| Relocation Key{Offset, 0, 0, 0, 0}; |
| auto Itr = DynamicRelocations.find(Key); |
| return Itr != DynamicRelocations.end() ? &*Itr : nullptr; |
| } |
| |
| uint64_t hash(const BinaryData &BD) const { |
| std::map<const BinaryData *, uint64_t> Cache; |
| return hash(BD, Cache); |
| } |
| |
| /// |
| /// Property accessors related to output data. |
| /// |
| |
| bool isFinalized() const { return IsFinalized; } |
| void setIsFinalized() { IsFinalized = true; } |
| StringRef getOutputName() const { return OutputName; } |
| uint64_t getOutputSize() const { return OutputSize; } |
| uint8_t *getOutputData() { |
| return reinterpret_cast<uint8_t *>( |
| const_cast<char *>(getOutputContents().data())); |
| } |
| const uint8_t *getOutputData() const { |
| return reinterpret_cast<const uint8_t *>(getOutputContents().data()); |
| } |
| StringRef getOutputContents() const { return OutputContents; } |
| uint64_t getAllocAddress() const { |
| return reinterpret_cast<uint64_t>(getOutputData()); |
| } |
| uint64_t getOutputAddress() const { return OutputAddress; } |
| uint64_t getOutputFileOffset() const { return OutputFileOffset; } |
| unsigned getSectionID() const { |
| assert(hasValidSectionID() && "trying to use uninitialized section id"); |
| return SectionID; |
| } |
| bool hasValidSectionID() const { return SectionID != -1u; } |
| bool hasValidIndex() { return Index != 0; } |
| uint32_t getIndex() const { return Index; } |
| |
| // mutation |
| void setOutputAddress(uint64_t Address) { OutputAddress = Address; } |
| void setOutputFileOffset(uint64_t Offset) { OutputFileOffset = Offset; } |
| void setSectionID(unsigned ID) { |
| assert(!hasValidSectionID() && "trying to set section id twice"); |
| SectionID = ID; |
| } |
| void setIndex(uint32_t I) { Index = I; } |
| void setOutputName(const Twine &Name) { OutputName = Name.str(); } |
| void setAnonymous(bool Flag) { IsAnonymous = Flag; } |
| |
| /// Emit the section as data, possibly with relocations. |
| /// Use name \p SectionName for the section during the emission. |
| void emitAsData(MCStreamer &Streamer, const Twine &SectionName) const; |
| |
| using SymbolResolverFuncTy = llvm::function_ref<uint64_t(const MCSymbol *)>; |
| |
| /// Flush all pending relocations to patch original contents of sections |
| /// that were not emitted via MCStreamer. |
| void flushPendingRelocations(raw_pwrite_stream &OS, |
| SymbolResolverFuncTy Resolver); |
| |
| /// Change contents of the section. |
| void updateContents(const uint8_t *Data, size_t NewSize) { |
| OutputContents = StringRef(reinterpret_cast<const char *>(Data), NewSize); |
| OutputSize = NewSize; |
| IsFinalized = true; |
| } |
| |
| /// Reorder the contents of this section according to /p Order. If |
| /// /p Inplace is true, the entire contents of the section is reordered, |
| /// otherwise the new contents contain only the reordered data. |
| void reorderContents(const std::vector<BinaryData *> &Order, bool Inplace); |
| |
| void print(raw_ostream &OS) const; |
| |
| /// Write the contents of an ELF note section given the name of the producer, |
| /// a number identifying the type of note and the contents of the note in |
| /// \p DescStr. |
| static std::string encodeELFNote(StringRef NameStr, StringRef DescStr, |
| uint32_t Type); |
| |
| /// Code for ELF notes written by producer 'BOLT' |
| enum { NT_BOLT_BAT = 1, NT_BOLT_INSTRUMENTATION_TABLES = 2 }; |
| }; |
| |
| inline uint8_t *copyByteArray(const uint8_t *Data, uint64_t Size) { |
| auto *Array = new uint8_t[Size]; |
| memcpy(Array, Data, Size); |
| return Array; |
| } |
| |
| inline uint8_t *copyByteArray(StringRef Buffer) { |
| return copyByteArray(reinterpret_cast<const uint8_t *>(Buffer.data()), |
| Buffer.size()); |
| } |
| |
| inline uint8_t *copyByteArray(ArrayRef<char> Buffer) { |
| return copyByteArray(reinterpret_cast<const uint8_t *>(Buffer.data()), |
| Buffer.size()); |
| } |
| |
| inline raw_ostream &operator<<(raw_ostream &OS, const BinarySection &Section) { |
| Section.print(OS); |
| return OS; |
| } |
| |
| struct SDTMarkerInfo { |
| uint64_t PC; |
| uint64_t Base; |
| uint64_t Semaphore; |
| StringRef Provider; |
| StringRef Name; |
| StringRef Args; |
| |
| /// The offset of PC within the note section |
| unsigned PCOffset; |
| }; |
| |
| /// Linux Kernel special sections point to a specific instruction in many cases. |
| /// Unlike SDTMarkerInfo, these markers can come from different sections. |
| struct LKInstructionMarkerInfo { |
| uint64_t SectionOffset; |
| int32_t PCRelativeOffset; |
| bool IsPCRelative; |
| StringRef SectionName; |
| }; |
| |
| } // namespace bolt |
| } // namespace llvm |
| |
| #endif |