blob: 1d29a871d3628655660965d2ab42858bbd044ffd [file] [log] [blame]
//===- bolt/Core/DebugData.h - Debugging information handling ---*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains declaration of classes that represent and serialize
// DWARF-related entities.
//
//===----------------------------------------------------------------------===//
#ifndef BOLT_CORE_DEBUG_DATA_H
#define BOLT_CORE_DEBUG_DATA_H
#include "llvm/ADT/SmallVector.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdint>
#include <map>
#include <mutex>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
#define DWARF2_FLAG_END_SEQUENCE (1 << 4)
namespace llvm {
namespace bolt {
struct AttrInfo {
DWARFFormValue V;
const DWARFAbbreviationDeclaration *AbbrevDecl;
uint64_t Offset;
uint32_t Size; // Size of the attribute.
};
/// Finds attributes FormValue and Offset.
///
/// \param DIE die to look up in.
/// \param AbbrevDecl abbrev declaration for the die.
/// \param Index an index in Abbrev declaration entry.
std::optional<AttrInfo>
findAttributeInfo(const DWARFDie DIE,
const DWARFAbbreviationDeclaration *AbbrevDecl,
uint32_t Index);
/// Finds attributes FormValue and Offset.
///
/// \param DIE die to look up in.
/// \param Attr the attribute to extract.
/// \return an optional AttrInfo with DWARFFormValue and Offset.
std::optional<AttrInfo> findAttributeInfo(const DWARFDie DIE,
dwarf::Attribute Attr);
// DWARF5 Header in order of encoding.
// Types represent encodnig sizes.
using UnitLengthType = uint32_t;
using VersionType = uint16_t;
using AddressSizeType = uint8_t;
using SegmentSelectorType = uint8_t;
using OffsetEntryCountType = uint32_t;
/// Get DWARF5 Header size.
/// Rangelists and Loclists have the same header.
constexpr uint32_t getDWARF5RngListLocListHeaderSize() {
return sizeof(UnitLengthType) + sizeof(VersionType) +
sizeof(AddressSizeType) + sizeof(SegmentSelectorType) +
sizeof(OffsetEntryCountType);
}
class BinaryContext;
/// Address range representation. Takes less space than DWARFAddressRange.
struct DebugAddressRange {
uint64_t LowPC{0};
uint64_t HighPC{0};
DebugAddressRange() = default;
DebugAddressRange(uint64_t LowPC, uint64_t HighPC)
: LowPC(LowPC), HighPC(HighPC) {}
};
static inline bool operator<(const DebugAddressRange &LHS,
const DebugAddressRange &RHS) {
return std::tie(LHS.LowPC, LHS.HighPC) < std::tie(RHS.LowPC, RHS.HighPC);
}
/// DebugAddressRangesVector - represents a set of absolute address ranges.
using DebugAddressRangesVector = SmallVector<DebugAddressRange, 2>;
/// Address range with location used by .debug_loc section.
/// More compact than DWARFLocationEntry and uses absolute addresses.
struct DebugLocationEntry {
uint64_t LowPC;
uint64_t HighPC;
SmallVector<uint8_t, 4> Expr;
};
using DebugLocationsVector = SmallVector<DebugLocationEntry, 4>;
/// References a row in a DWARFDebugLine::LineTable by the DWARF
/// Context index of the DWARF Compile Unit that owns the Line Table and the row
/// index. This is tied to our IR during disassembly so that we can later update
/// .debug_line information. RowIndex has a base of 1, which means a RowIndex
/// of 1 maps to the first row of the line table and a RowIndex of 0 is invalid.
struct DebugLineTableRowRef {
uint32_t DwCompileUnitIndex;
uint32_t RowIndex;
const static DebugLineTableRowRef NULL_ROW;
bool operator==(const DebugLineTableRowRef &Rhs) const {
return DwCompileUnitIndex == Rhs.DwCompileUnitIndex &&
RowIndex == Rhs.RowIndex;
}
bool operator!=(const DebugLineTableRowRef &Rhs) const {
return !(*this == Rhs);
}
static DebugLineTableRowRef fromSMLoc(const SMLoc &Loc) {
union {
decltype(Loc.getPointer()) Ptr;
DebugLineTableRowRef Ref;
} U;
U.Ptr = Loc.getPointer();
return U.Ref;
}
SMLoc toSMLoc() const {
union {
decltype(SMLoc().getPointer()) Ptr;
DebugLineTableRowRef Ref;
} U;
U.Ref = *this;
return SMLoc::getFromPointer(U.Ptr);
}
};
/// Common buffer vector used for debug info handling.
using DebugBufferVector = SmallVector<char, 16>;
/// Map of old CU offset to new offset and length.
struct CUInfo {
uint32_t Offset;
uint32_t Length;
};
using CUOffsetMap = std::map<uint32_t, CUInfo>;
enum class RangesWriterKind { DebugRangesWriter, DebugRangeListsWriter };
/// Serializes the .debug_ranges DWARF section.
class DebugRangesSectionWriter {
public:
DebugRangesSectionWriter();
DebugRangesSectionWriter(RangesWriterKind K) : Kind(K){};
virtual ~DebugRangesSectionWriter(){};
/// Add ranges with caching.
virtual uint64_t
addRanges(DebugAddressRangesVector &&Ranges,
std::map<DebugAddressRangesVector, uint64_t> &CachedRanges);
/// Add ranges and return offset into section.
virtual uint64_t addRanges(DebugAddressRangesVector &Ranges);
/// Returns an offset of an empty address ranges list that is always written
/// to .debug_ranges
uint64_t getEmptyRangesOffset() const { return EmptyRangesOffset; }
/// Returns the SectionOffset.
uint64_t getSectionOffset();
/// Returns a buffer containing Ranges.
virtual std::unique_ptr<DebugBufferVector> releaseBuffer() {
return std::move(RangesBuffer);
}
RangesWriterKind getKind() const { return Kind; }
static bool classof(const DebugRangesSectionWriter *Writer) {
return Writer->getKind() == RangesWriterKind::DebugRangesWriter;
}
/// Writes out range lists for a current CU being processed.
void virtual finalizeSection(){};
/// Needs to be invoked before each \p CU is processed.
void virtual initSection(DWARFUnit &CU){};
protected:
std::unique_ptr<DebugBufferVector> RangesBuffer;
std::unique_ptr<raw_svector_ostream> RangesStream;
std::mutex WriterMutex;
/// Current offset in the section (updated as new entries are written).
/// Starts with 16 since the first 16 bytes are reserved for an empty range.
uint32_t SectionOffset{0};
/// Offset of an empty address ranges list.
static constexpr uint64_t EmptyRangesOffset{0};
private:
RangesWriterKind Kind;
};
class DebugAddrWriter;
class DebugRangeListsSectionWriter : public DebugRangesSectionWriter {
public:
DebugRangeListsSectionWriter()
: DebugRangesSectionWriter(RangesWriterKind::DebugRangeListsWriter) {
RangesBuffer = std::make_unique<DebugBufferVector>();
RangesStream = std::make_unique<raw_svector_ostream>(*RangesBuffer);
};
virtual ~DebugRangeListsSectionWriter(){};
static void setAddressWriter(DebugAddrWriter *AddrW) { AddrWriter = AddrW; }
/// Add ranges with caching.
uint64_t addRanges(
DebugAddressRangesVector &&Ranges,
std::map<DebugAddressRangesVector, uint64_t> &CachedRanges) override;
/// Add ranges and return offset into section.
uint64_t addRanges(DebugAddressRangesVector &Ranges) override;
std::unique_ptr<DebugBufferVector> releaseBuffer() override {
return std::move(RangesBuffer);
}
/// Needs to be invoked before each \p CU is processed.
void initSection(DWARFUnit &CU) override;
/// Writes out range lists for a current CU being processed.
void finalizeSection() override;
// Returns true if section is empty.
bool empty() { return RangesBuffer->empty(); }
static bool classof(const DebugRangesSectionWriter *Writer) {
return Writer->getKind() == RangesWriterKind::DebugRangeListsWriter;
}
private:
static DebugAddrWriter *AddrWriter;
/// Used to find unique CU ID.
DWARFUnit *CU;
/// Current relative offset of range list entry within this CUs rangelist
/// body.
uint32_t CurrentOffset{0};
/// Contains relative offset of each range list entry.
SmallVector<uint32_t, 1> RangeEntries;
std::unique_ptr<DebugBufferVector> CUBodyBuffer;
std::unique_ptr<raw_svector_ostream> CUBodyStream;
};
/// Serializes the .debug_aranges DWARF section.
class DebugARangesSectionWriter {
public:
/// Add ranges for CU matching \p CUOffset.
void addCURanges(uint64_t CUOffset, DebugAddressRangesVector &&Ranges);
/// Writes .debug_aranges with the added ranges to the MCObjectWriter.
/// Takes in \p RangesStream to write into, and \p CUMap which maps CU
/// original offsets to new ones.
void writeARangesSection(raw_svector_ostream &RangesStream,
const CUOffsetMap &CUMap) const;
/// Resets the writer to a clear state.
void reset() { CUAddressRanges.clear(); }
/// Map DWARFCompileUnit index to ranges.
using CUAddressRangesType = std::map<uint64_t, DebugAddressRangesVector>;
/// Return ranges for a given CU.
const CUAddressRangesType &getCUAddressRanges() const {
return CUAddressRanges;
}
private:
/// Map from compile unit offset to the list of address intervals that belong
/// to that compile unit. Each interval is a pair
/// (first address, interval size).
CUAddressRangesType CUAddressRanges;
std::mutex CUAddressRangesMutex;
};
using IndexAddressPair = std::pair<uint32_t, uint64_t>;
using AddressToIndexMap = std::unordered_map<uint64_t, uint32_t>;
using IndexToAddressMap = std::unordered_map<uint32_t, uint64_t>;
using AddressSectionBuffer = SmallVector<char, 4>;
class DebugAddrWriter {
public:
DebugAddrWriter() = delete;
DebugAddrWriter(BinaryContext *BC_);
virtual ~DebugAddrWriter(){};
/// Given an address returns an index in .debug_addr.
/// Adds Address to map.
uint32_t getIndexFromAddress(uint64_t Address, DWARFUnit &CU);
/// Adds {\p Address, \p Index} to \p CU.
void addIndexAddress(uint64_t Address, uint32_t Index, DWARFUnit &CU);
/// Creates consolidated .debug_addr section, and builds DWOID to offset map.
virtual AddressSectionBuffer finalize();
/// Given DWARFUnit \p Unit returns offset of this CU in to .debug_addr
/// section.
virtual uint64_t getOffset(DWARFUnit &Unit);
/// Returns True if CU exists in the DebugAddrWriter.
bool doesCUExist(DWARFUnit &Unit) {
return DWOIdToOffsetMap.count(getCUID(Unit)) > 0;
}
/// Returns False if .debug_addr section was created..
bool isInitialized() const { return !AddressMaps.empty(); }
protected:
class AddressForDWOCU {
public:
AddressToIndexMap::iterator find(uint64_t Address) {
return AddressToIndex.find(Address);
}
AddressToIndexMap::iterator end() { return AddressToIndex.end(); }
AddressToIndexMap::iterator begin() { return AddressToIndex.begin(); }
IndexToAddressMap::iterator indexToAdddessEnd() {
return IndexToAddress.end();
}
IndexToAddressMap::iterator indexToAddressBegin() {
return IndexToAddress.begin();
}
uint32_t getNextIndex() {
while (IndexToAddress.count(CurrentIndex))
++CurrentIndex;
return CurrentIndex;
}
/// Inserts elements in to IndexToAddress and AddressToIndex.
/// Follows the same semantics as unordered_map insert.
std::pair<AddressToIndexMap::iterator, bool> insert(uint64_t Address,
uint32_t Index) {
IndexToAddress.insert({Index, Address});
return AddressToIndex.insert({Address, Index});
}
/// Updates AddressToIndex Map.
/// Follows the same symantics as unordered map [].
void updateAddressToIndex(uint64_t Address, uint32_t Index) {
AddressToIndex[Address] = Index;
}
/// Updates IndexToAddress Map.
/// Follows the same symantics as unordered map [].
void updateIndexToAddrss(uint64_t Address, uint32_t Index) {
IndexToAddress[Index] = Address;
}
void dump();
private:
AddressToIndexMap AddressToIndex;
IndexToAddressMap IndexToAddress;
uint32_t CurrentIndex{0};
};
virtual uint64_t getCUID(DWARFUnit &Unit) {
assert(Unit.getDWOId() && "Unit is not Skeleton CU.");
return *Unit.getDWOId();
}
BinaryContext *BC;
/// Maps DWOID to AddressForDWOCU.
std::unordered_map<uint64_t, AddressForDWOCU> AddressMaps;
/// Maps DWOID to offset within .debug_addr section.
std::unordered_map<uint64_t, uint64_t> DWOIdToOffsetMap;
/// Mutex used for parallel processing of debug info.
std::mutex WriterMutex;
};
class DebugAddrWriterDwarf5 : public DebugAddrWriter {
public:
DebugAddrWriterDwarf5() = delete;
DebugAddrWriterDwarf5(BinaryContext *BC) : DebugAddrWriter(BC) {}
/// Creates consolidated .debug_addr section, and builds DWOID to offset map.
AddressSectionBuffer finalize() override;
/// Given DWARFUnit \p Unit returns offset of this CU in to .debug_addr
/// section.
uint64_t getOffset(DWARFUnit &Unit) override;
protected:
/// Given DWARFUnit \p Unit returns either DWO ID or it's offset within
/// .debug_info.
uint64_t getCUID(DWARFUnit &Unit) override {
if (Unit.isDWOUnit()) {
DWARFUnit *SkeletonCU = Unit.getLinkedUnit();
return SkeletonCU->getOffset();
}
return Unit.getOffset();
}
};
/// This class is NOT thread safe.
using DebugStrOffsetsBufferVector = SmallVector<char, 16>;
class DebugStrOffsetsWriter {
public:
DebugStrOffsetsWriter() {
StrOffsetsBuffer = std::make_unique<DebugStrOffsetsBufferVector>();
StrOffsetsStream = std::make_unique<raw_svector_ostream>(*StrOffsetsBuffer);
}
/// Initializes Buffer and Stream.
void initialize(const DWARFSection &StrOffsetsSection,
const std::optional<StrOffsetsContributionDescriptor> Contr);
/// Update Str offset in .debug_str in .debug_str_offsets.
void updateAddressMap(uint32_t Index, uint32_t Address);
/// Writes out current sections entry into .debug_str_offsets.
void finalizeSection(DWARFUnit &Unit);
/// Returns False if no strings were added to .debug_str.
bool isFinalized() const { return !StrOffsetsBuffer->empty(); }
/// Returns buffer containing .debug_str_offsets.
std::unique_ptr<DebugStrOffsetsBufferVector> releaseBuffer() {
return std::move(StrOffsetsBuffer);
}
private:
std::unique_ptr<DebugStrOffsetsBufferVector> StrOffsetsBuffer;
std::unique_ptr<raw_svector_ostream> StrOffsetsStream;
std::map<uint32_t, uint32_t> IndexToAddressMap;
DenseSet<uint64_t> ProcessedBaseOffsets;
// Section size not including header.
uint32_t CurrentSectionSize{0};
bool StrOffsetSectionWasModified = false;
};
using DebugStrBufferVector = SmallVector<char, 16>;
class DebugStrWriter {
public:
DebugStrWriter() = delete;
DebugStrWriter(BinaryContext &BC) : BC(BC) { create(); }
std::unique_ptr<DebugStrBufferVector> releaseBuffer() {
return std::move(StrBuffer);
}
/// Adds string to .debug_str.
/// On first invokation it initializes internal data stractures.
uint32_t addString(StringRef Str);
/// Returns False if no strings were added to .debug_str.
bool isInitialized() const { return !StrBuffer->empty(); }
private:
/// Mutex used for parallel processing of debug info.
std::mutex WriterMutex;
/// Initializes Buffer and Stream.
void initialize();
/// Creats internal data stractures.
void create();
std::unique_ptr<DebugStrBufferVector> StrBuffer;
std::unique_ptr<raw_svector_ostream> StrStream;
BinaryContext &BC;
};
class DebugInfoBinaryPatcher;
class DebugAbbrevWriter;
enum class LocWriterKind { DebugLocWriter, DebugLoclistWriter };
/// Serializes part of a .debug_loc DWARF section with LocationLists.
class SimpleBinaryPatcher;
class DebugLocWriter {
protected:
DebugLocWriter(uint8_t DwarfVersion, LocWriterKind Kind)
: DwarfVersion(DwarfVersion), Kind(Kind) {
init();
}
public:
DebugLocWriter() { init(); };
virtual ~DebugLocWriter(){};
/// Writes out location lists and stores internal patches.
virtual void addList(AttrInfo &AttrVal, DebugLocationsVector &LocList,
DebugInfoBinaryPatcher &DebugInfoPatcher,
DebugAbbrevWriter &AbbrevWriter);
/// Writes out locations in to a local buffer, and adds Debug Info patches.
virtual void finalize(DebugInfoBinaryPatcher &DebugInfoPatcher,
DebugAbbrevWriter &AbbrevWriter);
/// Return internal buffer.
virtual std::unique_ptr<DebugBufferVector> getBuffer();
/// Returns DWARF version.
uint8_t getDwarfVersion() const { return DwarfVersion; }
/// Offset of an empty location list.
static constexpr uint32_t EmptyListOffset = 0;
LocWriterKind getKind() const { return Kind; }
static bool classof(const DebugLocWriter *Writer) {
return Writer->getKind() == LocWriterKind::DebugLocWriter;
}
protected:
std::unique_ptr<DebugBufferVector> LocBuffer;
std::unique_ptr<raw_svector_ostream> LocStream;
/// Current offset in the section (updated as new entries are written).
/// Starts with 0 here since this only writes part of a full location lists
/// section. In the final section, for DWARF4, the first 16 bytes are reserved
/// for an empty list.
static uint32_t LocSectionOffset;
uint8_t DwarfVersion{4};
LocWriterKind Kind{LocWriterKind::DebugLocWriter};
private:
/// Inits all the related data structures.
void init();
struct LocListDebugInfoPatchType {
uint64_t DebugInfoAttrOffset;
uint64_t LocListOffset;
};
using VectorLocListDebugInfoPatchType =
std::vector<LocListDebugInfoPatchType>;
/// The list of debug info patches to be made once individual
/// location list writers have been filled
VectorLocListDebugInfoPatchType LocListDebugInfoPatches;
};
class DebugLoclistWriter : public DebugLocWriter {
public:
~DebugLoclistWriter() {}
DebugLoclistWriter() = delete;
DebugLoclistWriter(DWARFUnit &Unit, uint8_t DV, bool SD)
: DebugLocWriter(DV, LocWriterKind::DebugLoclistWriter), CU(Unit),
IsSplitDwarf(SD) {
assert(DebugLoclistWriter::AddrWriter &&
"Please use SetAddressWriter to initialize "
"DebugAddrWriter before instantiation.");
if (DwarfVersion >= 5) {
LocBodyBuffer = std::make_unique<DebugBufferVector>();
LocBodyStream = std::make_unique<raw_svector_ostream>(*LocBodyBuffer);
} else {
// Writing out empty location list to which all references to empty
// location lists will point.
const char Zeroes[16] = {0};
*LocStream << StringRef(Zeroes, 16);
}
}
static void setAddressWriter(DebugAddrWriter *AddrW) { AddrWriter = AddrW; }
/// Stores location lists internally to be written out during finalize phase.
void addList(AttrInfo &AttrVal, DebugLocationsVector &LocList,
DebugInfoBinaryPatcher &DebugInfoPatcher,
DebugAbbrevWriter &AbbrevWriter) override;
/// Writes out locations in to a local buffer and applies debug info patches.
void finalize(DebugInfoBinaryPatcher &DebugInfoPatcher,
DebugAbbrevWriter &AbbrevWriter) override;
/// Returns CU ID.
/// For Skelton CU it is a CU Offset.
/// For DWO CU it is a DWO ID.
uint64_t getCUID() const {
return CU.isDWOUnit() ? *CU.getDWOId() : CU.getOffset();
}
LocWriterKind getKind() const { return DebugLocWriter::getKind(); }
static bool classof(const DebugLocWriter *Writer) {
return Writer->getKind() == LocWriterKind::DebugLoclistWriter;
}
bool isSplitDwarf() const { return IsSplitDwarf; }
constexpr static uint32_t InvalidIndex = UINT32_MAX;
private:
/// Writes out locations in to a local buffer and applies debug info patches.
void finalizeDWARF5(DebugInfoBinaryPatcher &DebugInfoPatcher,
DebugAbbrevWriter &AbbrevWriter);
static DebugAddrWriter *AddrWriter;
DWARFUnit &CU;
bool IsSplitDwarf{false};
// Used for DWARF5 to store location lists before being finalized.
std::unique_ptr<DebugBufferVector> LocBodyBuffer;
std::unique_ptr<raw_svector_ostream> LocBodyStream;
std::vector<uint32_t> RelativeLocListOffsets;
uint32_t NumberOfEntries{0};
static uint32_t LoclistBaseOffset;
};
enum class PatcherKind { SimpleBinaryPatcher, DebugInfoBinaryPatcher };
/// Abstract interface for classes that apply modifications to a binary string.
class BinaryPatcher {
public:
virtual ~BinaryPatcher() {}
/// Applies modifications to the copy of binary string \p BinaryContents .
/// Implementations do not need to guarantee that size of a new \p
/// BinaryContents remains unchanged.
virtual std::string patchBinary(StringRef BinaryContents) = 0;
};
/// Applies simple modifications to a binary string, such as directly replacing
/// the contents of a certain portion with a string or an integer.
class SimpleBinaryPatcher : public BinaryPatcher {
private:
std::vector<std::pair<uint32_t, std::string>> Patches;
/// Adds a patch to replace the contents of \p ByteSize bytes with the integer
/// \p NewValue encoded in little-endian, with the least-significant byte
/// being written at the offset \p Offset.
void addLEPatch(uint64_t Offset, uint64_t NewValue, size_t ByteSize);
/// RangeBase for DWO DebugInfo Patcher.
uint64_t RangeBase{0};
/// Gets reset to false when setRangeBase is invoked.
/// Gets set to true when getRangeBase is called
uint64_t WasRangeBaseUsed{false};
public:
virtual ~SimpleBinaryPatcher() {}
virtual PatcherKind getKind() const {
return PatcherKind::SimpleBinaryPatcher;
}
static bool classof(const SimpleBinaryPatcher *Patcher) {
return Patcher->getKind() == PatcherKind::SimpleBinaryPatcher;
}
/// Adds a patch to replace the contents of the binary string starting at the
/// specified \p Offset with the string \p NewValue.
/// The \p OldValueSize is the size of the old value that will be replaced.
void addBinaryPatch(uint64_t Offset, std::string &&NewValue,
uint32_t OldValueSize);
/// Adds a patch to replace the contents of a single byte of the string, at
/// the offset \p Offset, with the value \Value.
void addBytePatch(uint64_t Offset, uint8_t Value);
/// Adds a patch to put the integer \p NewValue encoded as a 64-bit
/// little-endian value at offset \p Offset.
virtual void addLE64Patch(uint64_t Offset, uint64_t NewValue);
/// Adds a patch to put the integer \p NewValue encoded as a 32-bit
/// little-endian value at offset \p Offset.
/// The \p OldValueSize is the size of the old value that will be replaced.
virtual void addLE32Patch(uint64_t Offset, uint32_t NewValue,
uint32_t OldValueSize = 4);
/// Add a patch at \p Offset with \p Value using unsigned LEB128 encoding with
/// size \p OldValueSize.
/// The \p OldValueSize is the size of the old value that will be replaced.
virtual void addUDataPatch(uint64_t Offset, uint64_t Value,
uint32_t OldValueSize);
/// Setting DW_AT_GNU_ranges_base
void setRangeBase(uint64_t Rb) {
WasRangeBaseUsed = false;
RangeBase = Rb;
}
/// Gets DW_AT_GNU_ranges_base
uint64_t getRangeBase() {
WasRangeBaseUsed = true;
return RangeBase;
}
/// Proxy for if we broke up low_pc/high_pc to ranges.
bool getWasRangBasedUsed() const { return WasRangeBaseUsed; }
/// This function takes in \p BinaryContents, applies patches to it and
/// returns an updated string.
std::string patchBinary(StringRef BinaryContents) override;
};
class DebugInfoBinaryPatcher : public SimpleBinaryPatcher {
public:
enum class DebugPatchKind {
PatchBaseClass,
PatchValue32,
PatchValue64to32,
PatchValue32GenericSize,
PatchValue64,
PatchValueVariable,
ReferencePatchValue,
DWARFUnitOffsetBaseLabel,
DestinationReferenceLabel,
NewDebugEntry
};
struct Patch {
Patch(uint32_t O, DebugPatchKind K) : Offset(O), Kind(K) {}
DebugPatchKind getKind() const { return Kind; }
static bool classof(const Patch *Writer) {
return Writer->getKind() == DebugPatchKind::PatchBaseClass;
}
uint32_t Offset;
DebugPatchKind Kind;
};
struct DebugPatch64to32 : public Patch {
DebugPatch64to32(uint32_t O, uint32_t V)
: Patch(O, DebugPatchKind::PatchValue64to32) {
Value = V;
}
DebugPatchKind getKind() const { return Kind; }
static bool classof(const Patch *Writer) {
return Writer->getKind() == DebugPatchKind::PatchValue64to32;
}
uint32_t Value;
};
struct DebugPatch32 : public Patch {
DebugPatch32(uint32_t O, uint32_t V)
: Patch(O, DebugPatchKind::PatchValue32) {
Value = V;
}
DebugPatchKind getKind() const { return Kind; }
static bool classof(const Patch *Writer) {
return Writer->getKind() == DebugPatchKind::PatchValue32;
}
uint32_t Value;
};
/// Patch for 4 byte entry, where original entry size is not 4 bytes or 8
/// bytes.
struct DebugPatch32GenericSize : public Patch {
DebugPatch32GenericSize(uint32_t O, uint32_t V, uint32_t OVS)
: Patch(O, DebugPatchKind::PatchValue32GenericSize) {
Value = V;
OldValueSize = OVS;
}
static bool classof(const Patch *Writer) {
return Writer->getKind() == DebugPatchKind::PatchValue32GenericSize;
}
uint32_t Value;
uint32_t OldValueSize;
};
struct DebugPatch64 : public Patch {
DebugPatch64(uint32_t O, uint64_t V)
: Patch(O, DebugPatchKind::PatchValue64) {
Value = V;
}
static bool classof(const Patch *Writer) {
return Writer->getKind() == DebugPatchKind::PatchValue64;
}
uint64_t Value;
};
struct DebugPatchVariableSize : public Patch {
DebugPatchVariableSize(uint32_t O, uint32_t OVS, uint32_t V)
: Patch(O, DebugPatchKind::PatchValueVariable) {
OldValueSize = OVS;
Value = V;
}
static bool classof(const Patch *Writer) {
return Writer->getKind() == DebugPatchKind::PatchValueVariable;
}
uint32_t OldValueSize;
uint32_t Value;
};
struct DebugPatchReference : public Patch {
struct Data {
uint32_t OldValueSize : 4;
uint32_t DirectRelative : 1;
uint32_t IndirectRelative : 1;
uint32_t DirectAbsolute : 1;
};
DebugPatchReference(uint32_t O, uint32_t OVS, uint32_t DO, dwarf::Form F)
: Patch(O, DebugPatchKind::ReferencePatchValue) {
PatchInfo.OldValueSize = OVS;
DestinationOffset = DO;
PatchInfo.DirectRelative =
F == dwarf::DW_FORM_ref1 || F == dwarf::DW_FORM_ref2 ||
F == dwarf::DW_FORM_ref4 || F == dwarf::DW_FORM_ref8;
PatchInfo.IndirectRelative = F == dwarf::DW_FORM_ref_udata;
PatchInfo.DirectAbsolute = F == dwarf::DW_FORM_ref_addr;
}
static bool classof(const Patch *Writer) {
return Writer->getKind() == DebugPatchKind::ReferencePatchValue;
}
Data PatchInfo;
uint32_t DestinationOffset;
};
struct DWARFUnitOffsetBaseLabel : public Patch {
DWARFUnitOffsetBaseLabel(uint32_t O)
: Patch(O, DebugPatchKind::DWARFUnitOffsetBaseLabel) {}
static bool classof(const Patch *Writer) {
return Writer->getKind() == DebugPatchKind::DWARFUnitOffsetBaseLabel;
}
};
struct DestinationReferenceLabel : public Patch {
DestinationReferenceLabel() = delete;
DestinationReferenceLabel(uint32_t O)
: Patch(O, DebugPatchKind::DestinationReferenceLabel) {}
static bool classof(const Patch *Writer) {
return Writer->getKind() == DebugPatchKind::DestinationReferenceLabel;
}
};
struct NewDebugEntry : public Patch {
NewDebugEntry() = delete;
NewDebugEntry(uint32_t O, std::string &&V)
: Patch(O, DebugPatchKind::NewDebugEntry) {
CurrentOrder = NewDebugEntry::OrderCounter++;
Value = std::move(V);
}
static bool classof(const Patch *Writer) {
return Writer->getKind() == DebugPatchKind::NewDebugEntry;
}
static uint32_t OrderCounter;
uint32_t CurrentOrder;
std::string Value;
};
PatcherKind getKind() const override {
return PatcherKind::DebugInfoBinaryPatcher;
}
static bool classof(const SimpleBinaryPatcher *Patcher) {
return Patcher->getKind() == PatcherKind::DebugInfoBinaryPatcher;
}
/// This function takes in \p BinaryContents, and re-writes it with new
/// patches inserted into it. It returns an updated string.
std::string patchBinary(StringRef BinaryContents) override;
/// Adds a patch to put the integer \p NewValue encoded as a 64-bit
/// little-endian value at offset \p Offset.
void addLE64Patch(uint64_t Offset, uint64_t NewValue) override;
/// Adds a patch to put the integer \p NewValue encoded as a 32-bit
/// little-endian value at offset \p Offset.
/// The \p OldValueSize is the size of the old value that will be replaced.
void addLE32Patch(uint64_t Offset, uint32_t NewValue,
uint32_t OldValueSize = 4) override;
/// Add a patch at \p Offset with \p Value using unsigned LEB128 encoding with
/// size \p OldValueSize.
/// The \p OldValueSize is the size of the old value that will be replaced.
void addUDataPatch(uint64_t Offset, uint64_t Value,
uint32_t OldValueSize) override;
/// Adds a label \p Offset for DWARF UNit.
/// Used to recompute relative references.
void addUnitBaseOffsetLabel(uint64_t Offset);
/// Adds a Label for destination. Either relative or explicit reference.
void addDestinationReferenceLabel(uint64_t Offset);
/// Adds a reference at \p Offset to patch with new fully resolved \p
/// DestinationOffset . The \p OldValueSize is the original size of entry in
/// the DIE. The \p Form is the form of the entry.
void addReferenceToPatch(uint64_t Offset, uint32_t DestinationOffset,
uint32_t OldValueSize, dwarf::Form Form);
/// Inserts a new uint32_t \p Value at the end of \p DIE .
void insertNewEntry(const DWARFDie &DIE, uint32_t);
/// Inserts a new encoded \p Value at the end of \p DIE .
void insertNewEntry(const DWARFDie &DIE, std::string &&Value);
/// Clears unordered set for DestinationLabels.
void clearDestinationLabels() { DestinationLabels.clear(); }
/// Sets DWARF Units offset, \p DWPOffset , within DWP file.
void setDWPOffset(uint64_t DWPOffset) { DWPUnitOffset = DWPOffset; }
/// When this function is invoked all of the DebugInfo Patches must be done.
/// Returns a map of old CU offsets to new offsets and new sizes.
CUOffsetMap computeNewOffsets(DWARFContext &DWCtx, bool IsDWOContext);
private:
struct PatchDeleter {
void operator()(Patch *P) const {
Patch *Pp = reinterpret_cast<Patch *>(P);
switch (Pp->Kind) {
default:
llvm_unreachable(
"Not all delete instances handled for Patch derived classes");
case DebugPatchKind::PatchValue32:
delete reinterpret_cast<DebugPatch32 *>(P);
break;
case DebugPatchKind::PatchValue64to32:
delete reinterpret_cast<DebugPatch64to32 *>(P);
break;
case DebugPatchKind::PatchValue32GenericSize:
delete reinterpret_cast<DebugPatch32GenericSize *>(P);
break;
case DebugPatchKind::PatchValue64:
delete reinterpret_cast<DebugPatch64 *>(P);
break;
case DebugPatchKind::PatchValueVariable:
delete reinterpret_cast<DebugPatchVariableSize *>(P);
break;
case DebugPatchKind::ReferencePatchValue:
delete reinterpret_cast<DebugPatchReference *>(P);
break;
case DebugPatchKind::DWARFUnitOffsetBaseLabel:
delete reinterpret_cast<DWARFUnitOffsetBaseLabel *>(P);
break;
case DebugPatchKind::DestinationReferenceLabel:
delete reinterpret_cast<DestinationReferenceLabel *>(P);
break;
case DebugPatchKind::NewDebugEntry:
delete reinterpret_cast<NewDebugEntry *>(P);
break;
}
}
};
using UniquePatchPtrType = std::unique_ptr<Patch, PatchDeleter>;
uint64_t DWPUnitOffset{0};
int32_t ChangeInSize{0};
std::vector<UniquePatchPtrType> DebugPatches;
/// Mutex used for parallel processing of debug info.
std::mutex WriterMutex;
/// Stores fully resolved addresses of DIEs that are being referenced.
std::unordered_set<uint32_t> DestinationLabels;
/// Map of original debug info references to new ones.
std::unordered_map<uint32_t, uint32_t> OldToNewOffset;
};
/// Class to facilitate modifying and writing abbreviation sections.
class DebugAbbrevWriter {
/// Mutex used for parallel processing of debug info.
std::mutex WriterMutex;
/// Offsets of abbreviation sets in normal .debug_abbrev section.
std::vector<uint64_t> AbbrevSetOffsets;
/// Abbrev data set for a single unit.
struct AbbrevData {
uint64_t Offset{0}; ///< Offset of the data in the final section.
std::unique_ptr<DebugBufferVector> Buffer;
std::unique_ptr<raw_svector_ostream> Stream;
};
/// Map original unit to abbreviations data.
std::unordered_map<const DWARFUnit *, AbbrevData *> UnitsAbbrevData;
/// Map from Hash Signature to AbbrevData.
llvm::StringMap<std::unique_ptr<AbbrevData>> AbbrevDataCache;
/// Attributes substitution (patch) information.
struct PatchInfo {
dwarf::Attribute OldAttr;
dwarf::Attribute NewAttr;
uint8_t NewAttrForm;
};
struct AbbrevEntry {
dwarf::Attribute Attr;
dwarf::Form Form;
};
using PatchesTy = std::unordered_map<const DWARFAbbreviationDeclaration *,
SmallVector<PatchInfo, 2>>;
std::unordered_map<const DWARFUnit *, PatchesTy> Patches;
using AbbrevEntryTy = std::unordered_map<const DWARFAbbreviationDeclaration *,
SmallVector<AbbrevEntry, 2>>;
std::unordered_map<const DWARFUnit *, AbbrevEntryTy> NewAbbrevEntries;
/// DWARF context containing abbreviations.
DWARFContext &Context;
/// DWO ID used to identify unit contribution in DWP.
std::optional<uint64_t> DWOId;
/// Add abbreviations from compile/type \p Unit to the writer.
void addUnitAbbreviations(DWARFUnit &Unit);
public:
/// Create an abbrev section writer for abbreviations in \p Context.
/// If no \p DWOId is given, all normal (non-DWO) abbreviations in the
/// \p Context are handled. Otherwise, only abbreviations associated with
/// the compile unit matching \p DWOId in DWP or DWO will be covered by
/// an instance of this class.
///
/// NOTE: Type unit abbreviations are not handled separately for DWOs.
/// Most of the time, using type units with DWO is not a good idea.
/// If type units are used, the caller is responsible for verifying
/// that abbreviations are shared by CU and TUs.
DebugAbbrevWriter(DWARFContext &Context,
std::optional<uint64_t> DWOId = std::nullopt)
: Context(Context), DWOId(DWOId) {}
DebugAbbrevWriter(const DebugAbbrevWriter &) = delete;
DebugAbbrevWriter &operator=(const DebugAbbrevWriter &) = delete;
DebugAbbrevWriter(DebugAbbrevWriter &&) = delete;
DebugAbbrevWriter &operator=(DebugAbbrevWriter &&) = delete;
virtual ~DebugAbbrevWriter() = default;
/// Substitute attribute \p AttrTag in abbreviation declaration \p Abbrev
/// belonging to CU \p Unit with new attribute \p NewAttrTag having
/// \p NewAttrForm form.
void addAttributePatch(const DWARFUnit &Unit,
const DWARFAbbreviationDeclaration *Abbrev,
dwarf::Attribute AttrTag, dwarf::Attribute NewAttrTag,
uint8_t NewAttrForm) {
assert(&Unit.getContext() == &Context &&
"cannot update attribute from a different DWARF context");
std::lock_guard<std::mutex> Lock(WriterMutex);
Patches[&Unit][Abbrev].emplace_back(
PatchInfo{AttrTag, NewAttrTag, NewAttrForm});
}
/// Adds attribute \p AttrTag and \p NewAttrForm in abbreviation declaration
/// \p Abbrev belonging to CU \p Unit .
void addAttribute(const DWARFUnit &Unit,
const DWARFAbbreviationDeclaration *Abbrev,
dwarf::Attribute AttrTag, dwarf::Form AttrForm) {
assert(&Unit.getContext() == &Context &&
"cannot update attribute from a different DWARF context");
std::lock_guard<std::mutex> Lock(WriterMutex);
bool AlreadyAdded =
llvm::any_of(NewAbbrevEntries[&Unit][Abbrev],
[&](AbbrevEntry &E) { return E.Attr == AttrTag; });
if (AlreadyAdded)
return;
NewAbbrevEntries[&Unit][Abbrev].emplace_back(
AbbrevEntry{AttrTag, AttrForm});
}
/// Return a buffer with concatenated abbrev sections for all CUs and TUs
/// in the associated DWARF context. Section offsets could be queried using
/// getAbbreviationsOffsetForUnit() interface. For DWP, we are using DWOId
/// to return only that unit's contribution to abbrev section.
std::unique_ptr<DebugBufferVector> finalize();
/// Return an offset in the finalized abbrev section corresponding to CU/TU.
uint64_t getAbbreviationsOffsetForUnit(const DWARFUnit &Unit) {
assert(!DWOId && "offsets are tracked for non-DWO units only");
assert(UnitsAbbrevData.count(&Unit) && "no abbrev data found for unit");
return UnitsAbbrevData[&Unit]->Offset;
}
};
/// Similar to MCDwarfLineEntry, but identifies the location by its address
/// instead of MCLabel.
class BinaryDwarfLineEntry : public MCDwarfLoc {
uint64_t Address;
public:
// Constructor to create an BinaryDwarfLineEntry given a symbol and the dwarf
// loc.
BinaryDwarfLineEntry(uint64_t Address, const MCDwarfLoc loc)
: MCDwarfLoc(loc), Address(Address) {}
uint64_t getAddress() const { return Address; }
};
/// Line number information for the output binary. One instance per CU.
///
/// For any given CU, we may:
/// 1. Generate new line table using:
/// a) emitted code: getMCLineSections().addEntry()
/// b) information from the input line table: addLineTableSequence()
/// or
/// 2. Copy line table from the input file: addRawContents().
class DwarfLineTable {
public:
/// Line number information on contiguous code region from the input binary.
/// It is represented by [FirstIndex, LastIndex] rows range in the input
/// line table, and the end address of the sequence used for issuing the end
/// of the sequence directive.
struct RowSequence {
uint32_t FirstIndex;
uint32_t LastIndex;
uint64_t EndAddress;
};
private:
MCDwarfLineTableHeader Header;
/// MC line tables for the code generated via MC layer.
MCLineSection MCLineSections;
/// Line info for the original code. To be merged with tables for new code.
const DWARFDebugLine::LineTable *InputTable{nullptr};
std::vector<RowSequence> InputSequences;
/// Raw data representing complete debug line section for the unit.
StringRef RawData;
/// DWARF Version
uint16_t DwarfVersion;
public:
/// Emit line info for all units in the binary context.
static void emit(BinaryContext &BC, MCStreamer &Streamer);
/// Emit the Dwarf file and the line tables for a given CU.
void emitCU(MCStreamer *MCOS, MCDwarfLineTableParams Params,
std::optional<MCDwarfLineStr> &LineStr, BinaryContext &BC) const;
Expected<unsigned> tryGetFile(StringRef &Directory, StringRef &FileName,
std::optional<MD5::MD5Result> Checksum,
std::optional<StringRef> Source,
uint16_t DwarfVersion,
unsigned FileNumber = 0) {
assert(RawData.empty() && "cannot use with raw data");
return Header.tryGetFile(Directory, FileName, Checksum, Source,
DwarfVersion, FileNumber);
}
/// Return label at the start of the emitted debug line for the unit.
MCSymbol *getLabel() const { return Header.Label; }
void setLabel(MCSymbol *Label) { Header.Label = Label; }
/// Sets the root file \p Directory, \p FileName, optional \p CheckSum, and
/// optional \p Source.
void setRootFile(StringRef Directory, StringRef FileName,
std::optional<MD5::MD5Result> Checksum,
std::optional<StringRef> Source) {
Header.setRootFile(Directory, FileName, Checksum, Source);
}
/// Access to MC line info.
MCLineSection &getMCLineSections() { return MCLineSections; }
/// Add line information using the sequence from the input line \p Table.
void addLineTableSequence(const DWARFDebugLine::LineTable *Table,
uint32_t FirstRow, uint32_t LastRow,
uint64_t EndOfSequenceAddress) {
assert((!InputTable || InputTable == Table) &&
"expected same table for CU");
InputTable = Table;
InputSequences.emplace_back(
RowSequence{FirstRow, LastRow, EndOfSequenceAddress});
}
/// Indicate that for the unit we should emit specified contents instead of
/// generating a new line info table.
void addRawContents(StringRef DebugLineContents) {
RawData = DebugLineContents;
}
/// Sets DWARF version for this line table.
void setDwarfVersion(uint16_t V) { DwarfVersion = V; }
// Returns DWARF Version for this line table.
uint16_t getDwarfVersion() const { return DwarfVersion; }
};
} // namespace bolt
} // namespace llvm
#endif