blob: 1e527ab3916e74237e0c5ce52a3a5dd9053744a6 [file] [log] [blame]
//===- DwarfTransformer.cpp -----------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <thread>
#include <unordered_set>
#include "llvm/DebugInfo/DIContext.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ThreadPool.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/DebugInfo/GSYM/DwarfTransformer.h"
#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
#include "llvm/DebugInfo/GSYM/GsymCreator.h"
#include "llvm/DebugInfo/GSYM/GsymReader.h"
#include "llvm/DebugInfo/GSYM/InlineInfo.h"
using namespace llvm;
using namespace gsym;
struct llvm::gsym::CUInfo {
const DWARFDebugLine::LineTable *LineTable;
const char *CompDir;
std::vector<uint32_t> FileCache;
uint64_t Language = 0;
uint8_t AddrSize = 0;
CUInfo(DWARFContext &DICtx, DWARFCompileUnit *CU) {
LineTable = DICtx.getLineTableForUnit(CU);
CompDir = CU->getCompilationDir();
FileCache.clear();
if (LineTable)
FileCache.assign(LineTable->Prologue.FileNames.size() + 1, UINT32_MAX);
DWARFDie Die = CU->getUnitDIE();
Language = dwarf::toUnsigned(Die.find(dwarf::DW_AT_language), 0);
AddrSize = CU->getAddressByteSize();
}
/// Return true if Addr is the highest address for a given compile unit. The
/// highest address is encoded as -1, of all ones in the address. These high
/// addresses are used by some linkers to indicate that a function has been
/// dead stripped or didn't end up in the linked executable.
bool isHighestAddress(uint64_t Addr) const {
if (AddrSize == 4)
return Addr == UINT32_MAX;
else if (AddrSize == 8)
return Addr == UINT64_MAX;
return false;
}
/// Convert a DWARF compile unit file index into a GSYM global file index.
///
/// Each compile unit in DWARF has its own file table in the line table
/// prologue. GSYM has a single large file table that applies to all files
/// from all of the info in a GSYM file. This function converts between the
/// two and caches and DWARF CU file index that has already been converted so
/// the first client that asks for a compile unit file index will end up
/// doing the conversion, and subsequent clients will get the cached GSYM
/// index.
uint32_t DWARFToGSYMFileIndex(GsymCreator &Gsym, uint32_t DwarfFileIdx) {
if (!LineTable)
return 0;
assert(DwarfFileIdx < FileCache.size());
uint32_t &GsymFileIdx = FileCache[DwarfFileIdx];
if (GsymFileIdx != UINT32_MAX)
return GsymFileIdx;
std::string File;
if (LineTable->getFileNameByIndex(
DwarfFileIdx, CompDir,
DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, File))
GsymFileIdx = Gsym.insertFile(File);
else
GsymFileIdx = 0;
return GsymFileIdx;
}
};
static DWARFDie GetParentDeclContextDIE(DWARFDie &Die) {
if (DWARFDie SpecDie =
Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_specification)) {
if (DWARFDie SpecParent = GetParentDeclContextDIE(SpecDie))
return SpecParent;
}
if (DWARFDie AbstDie =
Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_abstract_origin)) {
if (DWARFDie AbstParent = GetParentDeclContextDIE(AbstDie))
return AbstParent;
}
// We never want to follow parent for inlined subroutine - that would
// give us information about where the function is inlined, not what
// function is inlined
if (Die.getTag() == dwarf::DW_TAG_inlined_subroutine)
return DWARFDie();
DWARFDie ParentDie = Die.getParent();
if (!ParentDie)
return DWARFDie();
switch (ParentDie.getTag()) {
case dwarf::DW_TAG_namespace:
case dwarf::DW_TAG_structure_type:
case dwarf::DW_TAG_union_type:
case dwarf::DW_TAG_class_type:
case dwarf::DW_TAG_subprogram:
return ParentDie; // Found parent decl context DIE
case dwarf::DW_TAG_lexical_block:
return GetParentDeclContextDIE(ParentDie);
default:
break;
}
return DWARFDie();
}
/// Get the GsymCreator string table offset for the qualified name for the
/// DIE passed in. This function will avoid making copies of any strings in
/// the GsymCreator when possible. We don't need to copy a string when the
/// string comes from our .debug_str section or is an inlined string in the
/// .debug_info. If we create a qualified name string in this function by
/// combining multiple strings in the DWARF string table or info, we will make
/// a copy of the string when we add it to the string table.
static Optional<uint32_t> getQualifiedNameIndex(DWARFDie &Die,
uint64_t Language,
GsymCreator &Gsym) {
// If the dwarf has mangled name, use mangled name
if (auto LinkageName =
dwarf::toString(Die.findRecursively({dwarf::DW_AT_MIPS_linkage_name,
dwarf::DW_AT_linkage_name}),
nullptr))
return Gsym.insertString(LinkageName, /* Copy */ false);
StringRef ShortName(Die.getName(DINameKind::ShortName));
if (ShortName.empty())
return llvm::None;
// For C++ and ObjC, prepend names of all parent declaration contexts
if (!(Language == dwarf::DW_LANG_C_plus_plus ||
Language == dwarf::DW_LANG_C_plus_plus_03 ||
Language == dwarf::DW_LANG_C_plus_plus_11 ||
Language == dwarf::DW_LANG_C_plus_plus_14 ||
Language == dwarf::DW_LANG_ObjC_plus_plus ||
// This should not be needed for C, but we see C++ code marked as C
// in some binaries. This should hurt, so let's do it for C as well
Language == dwarf::DW_LANG_C))
return Gsym.insertString(ShortName, /* Copy */ false);
// Some GCC optimizations create functions with names ending with .isra.<num>
// or .part.<num> and those names are just DW_AT_name, not DW_AT_linkage_name
// If it looks like it could be the case, don't add any prefix
if (ShortName.startswith("_Z") &&
(ShortName.contains(".isra.") || ShortName.contains(".part.")))
return Gsym.insertString(ShortName, /* Copy */ false);
DWARFDie ParentDeclCtxDie = GetParentDeclContextDIE(Die);
if (ParentDeclCtxDie) {
std::string Name = ShortName.str();
while (ParentDeclCtxDie) {
StringRef ParentName(ParentDeclCtxDie.getName(DINameKind::ShortName));
if (!ParentName.empty()) {
// "lambda" names are wrapped in < >. Replace with { }
// to be consistent with demangled names and not to confuse with
// templates
if (ParentName.front() == '<' && ParentName.back() == '>')
Name = "{" + ParentName.substr(1, ParentName.size() - 2).str() + "}" +
"::" + Name;
else
Name = ParentName.str() + "::" + Name;
}
ParentDeclCtxDie = GetParentDeclContextDIE(ParentDeclCtxDie);
}
// Copy the name since we created a new name in a std::string.
return Gsym.insertString(Name, /* Copy */ true);
}
// Don't copy the name since it exists in the DWARF object file.
return Gsym.insertString(ShortName, /* Copy */ false);
}
static bool hasInlineInfo(DWARFDie Die, uint32_t Depth) {
bool CheckChildren = true;
switch (Die.getTag()) {
case dwarf::DW_TAG_subprogram:
// Don't look into functions within functions.
CheckChildren = Depth == 0;
break;
case dwarf::DW_TAG_inlined_subroutine:
return true;
default:
break;
}
if (!CheckChildren)
return false;
for (DWARFDie ChildDie : Die.children()) {
if (hasInlineInfo(ChildDie, Depth + 1))
return true;
}
return false;
}
static void parseInlineInfo(GsymCreator &Gsym, CUInfo &CUI, DWARFDie Die,
uint32_t Depth, FunctionInfo &FI,
InlineInfo &parent) {
if (!hasInlineInfo(Die, Depth))
return;
dwarf::Tag Tag = Die.getTag();
if (Tag == dwarf::DW_TAG_inlined_subroutine) {
// create new InlineInfo and append to parent.children
InlineInfo II;
DWARFAddressRange FuncRange =
DWARFAddressRange(FI.startAddress(), FI.endAddress());
Expected<DWARFAddressRangesVector> RangesOrError = Die.getAddressRanges();
if (RangesOrError) {
for (const DWARFAddressRange &Range : RangesOrError.get()) {
// Check that the inlined function is within the range of the function
// info, it might not be in case of split functions
if (FuncRange.LowPC <= Range.LowPC && Range.HighPC <= FuncRange.HighPC)
II.Ranges.insert(AddressRange(Range.LowPC, Range.HighPC));
}
}
if (II.Ranges.empty())
return;
if (auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym))
II.Name = *NameIndex;
II.CallFile = CUI.DWARFToGSYMFileIndex(
Gsym, dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_file), 0));
II.CallLine = dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_line), 0);
// parse all children and append to parent
for (DWARFDie ChildDie : Die.children())
parseInlineInfo(Gsym, CUI, ChildDie, Depth + 1, FI, II);
parent.Children.emplace_back(std::move(II));
return;
}
if (Tag == dwarf::DW_TAG_subprogram || Tag == dwarf::DW_TAG_lexical_block) {
// skip this Die and just recurse down
for (DWARFDie ChildDie : Die.children())
parseInlineInfo(Gsym, CUI, ChildDie, Depth + 1, FI, parent);
}
}
static void convertFunctionLineTable(raw_ostream &Log, CUInfo &CUI,
DWARFDie Die, GsymCreator &Gsym,
FunctionInfo &FI) {
std::vector<uint32_t> RowVector;
const uint64_t StartAddress = FI.startAddress();
const uint64_t EndAddress = FI.endAddress();
const uint64_t RangeSize = EndAddress - StartAddress;
const object::SectionedAddress SecAddress{
StartAddress, object::SectionedAddress::UndefSection};
if (!CUI.LineTable->lookupAddressRange(SecAddress, RangeSize, RowVector)) {
// If we have a DW_TAG_subprogram but no line entries, fall back to using
// the DW_AT_decl_file an d DW_AT_decl_line if we have both attributes.
if (auto FileIdx =
dwarf::toUnsigned(Die.findRecursively({dwarf::DW_AT_decl_file}))) {
if (auto Line =
dwarf::toUnsigned(Die.findRecursively({dwarf::DW_AT_decl_line}))) {
LineEntry LE(StartAddress, CUI.DWARFToGSYMFileIndex(Gsym, *FileIdx),
*Line);
FI.OptLineTable = LineTable();
FI.OptLineTable->push(LE);
// LE.Addr = EndAddress;
// FI.OptLineTable->push(LE);
}
}
return;
}
FI.OptLineTable = LineTable();
DWARFDebugLine::Row PrevRow;
for (uint32_t RowIndex : RowVector) {
// Take file number and line/column from the row.
const DWARFDebugLine::Row &Row = CUI.LineTable->Rows[RowIndex];
const uint32_t FileIdx = CUI.DWARFToGSYMFileIndex(Gsym, Row.File);
uint64_t RowAddress = Row.Address.Address;
// Watch out for a RowAddress that is in the middle of a line table entry
// in the DWARF. If we pass an address in between two line table entries
// we will get a RowIndex for the previous valid line table row which won't
// be contained in our function. This is usually a bug in the DWARF due to
// linker problems or LTO or other DWARF re-linking so it is worth emitting
// an error, but not worth stopping the creation of the GSYM.
if (!FI.Range.contains(RowAddress)) {
if (RowAddress < FI.Range.Start) {
Log << "error: DIE has a start address whose LowPC is between the "
"line table Row[" << RowIndex << "] with address "
<< HEX64(RowAddress) << " and the next one.\n";
Die.dump(Log, 0, DIDumpOptions::getForSingleDIE());
RowAddress = FI.Range.Start;
} else {
continue;
}
}
LineEntry LE(RowAddress, FileIdx, Row.Line);
if (RowIndex != RowVector[0] && Row.Address < PrevRow.Address) {
// We have seen full duplicate line tables for functions in some
// DWARF files. Watch for those here by checking the the last
// row was the function's end address (HighPC) and that the
// current line table entry's address is the same as the first
// line entry we already have in our "function_info.Lines". If
// so break out after printing a warning.
auto FirstLE = FI.OptLineTable->first();
if (FirstLE && *FirstLE == LE) {
Log << "warning: duplicate line table detected for DIE:\n";
Die.dump(Log, 0, DIDumpOptions::getForSingleDIE());
} else {
// Print out (ignore if os == nulls as this is expensive)
Log << "error: line table has addresses that do not "
<< "monotonically increase:\n";
for (uint32_t RowIndex2 : RowVector) {
CUI.LineTable->Rows[RowIndex2].dump(Log);
}
Die.dump(Log, 0, DIDumpOptions::getForSingleDIE());
}
break;
}
// Skip multiple line entries for the same file and line.
auto LastLE = FI.OptLineTable->last();
if (LastLE && LastLE->File == FileIdx && LastLE->Line == Row.Line)
continue;
// Only push a row if it isn't an end sequence. End sequence markers are
// included for the last address in a function or the last contiguous
// address in a sequence.
if (Row.EndSequence) {
// End sequence means that the next line entry could have a lower address
// that the previous entries. So we clear the previous row so we don't
// trigger the line table error about address that do not monotonically
// increase.
PrevRow = DWARFDebugLine::Row();
} else {
FI.OptLineTable->push(LE);
PrevRow = Row;
}
}
// If not line table rows were added, clear the line table so we don't encode
// on in the GSYM file.
if (FI.OptLineTable->empty())
FI.OptLineTable = llvm::None;
}
void DwarfTransformer::handleDie(raw_ostream &OS, CUInfo &CUI, DWARFDie Die) {
switch (Die.getTag()) {
case dwarf::DW_TAG_subprogram: {
Expected<DWARFAddressRangesVector> RangesOrError = Die.getAddressRanges();
if (!RangesOrError) {
consumeError(RangesOrError.takeError());
break;
}
const DWARFAddressRangesVector &Ranges = RangesOrError.get();
if (Ranges.empty())
break;
auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym);
if (!NameIndex) {
OS << "error: function at " << HEX64(Die.getOffset())
<< " has no name\n ";
Die.dump(OS, 0, DIDumpOptions::getForSingleDIE());
break;
}
// Create a function_info for each range
for (const DWARFAddressRange &Range : Ranges) {
// The low PC must be less than the high PC. Many linkers don't remove
// DWARF for functions that don't get linked into the final executable.
// If both the high and low pc have relocations, linkers will often set
// the address values for both to the same value to indicate the function
// has been remove. Other linkers have been known to set the one or both
// PC values to a UINT32_MAX for 4 byte addresses and UINT64_MAX for 8
// byte addresses to indicate the function isn't valid. The check below
// tries to watch for these cases and abort if it runs into them.
if (Range.LowPC >= Range.HighPC || CUI.isHighestAddress(Range.LowPC))
break;
// Many linkers can't remove DWARF and might set the LowPC to zero. Since
// high PC can be an offset from the low PC in more recent DWARF versions
// we need to watch for a zero'ed low pc which we do using
// ValidTextRanges below.
if (!Gsym.IsValidTextAddress(Range.LowPC)) {
// We expect zero and -1 to be invalid addresses in DWARF depending
// on the linker of the DWARF. This indicates a function was stripped
// and the debug info wasn't able to be stripped from the DWARF. If
// the LowPC isn't zero or -1, then we should emit an error.
if (Range.LowPC != 0) {
// Unexpected invalid address, emit an error
Log << "warning: DIE has an address range whose start address is "
"not in any executable sections (" <<
*Gsym.GetValidTextRanges() << ") and will not be processed:\n";
Die.dump(Log, 0, DIDumpOptions::getForSingleDIE());
}
break;
}
FunctionInfo FI;
FI.setStartAddress(Range.LowPC);
FI.setEndAddress(Range.HighPC);
FI.Name = *NameIndex;
if (CUI.LineTable) {
convertFunctionLineTable(OS, CUI, Die, Gsym, FI);
}
if (hasInlineInfo(Die, 0)) {
FI.Inline = InlineInfo();
FI.Inline->Name = *NameIndex;
FI.Inline->Ranges.insert(FI.Range);
parseInlineInfo(Gsym, CUI, Die, 0, FI, *FI.Inline);
}
Gsym.addFunctionInfo(std::move(FI));
}
} break;
default:
break;
}
for (DWARFDie ChildDie : Die.children())
handleDie(OS, CUI, ChildDie);
}
Error DwarfTransformer::convert(uint32_t NumThreads) {
size_t NumBefore = Gsym.getNumFunctionInfos();
if (NumThreads == 1) {
// Parse all DWARF data from this thread, use the same string/file table
// for everything
for (const auto &CU : DICtx.compile_units()) {
DWARFDie Die = CU->getUnitDIE(false);
CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get()));
handleDie(Log, CUI, Die);
}
} else {
// LLVM Dwarf parser is not thread-safe and we need to parse all DWARF up
// front before we start accessing any DIEs since there might be
// cross compile unit references in the DWARF. If we don't do this we can
// end up crashing.
// We need to call getAbbreviations sequentially first so that getUnitDIE()
// only works with its local data.
for (const auto &CU : DICtx.compile_units())
CU->getAbbreviations();
// Now parse all DIEs in case we have cross compile unit references in a
// thread pool.
ThreadPool pool(hardware_concurrency(NumThreads));
for (const auto &CU : DICtx.compile_units())
pool.async([&CU]() { CU->getUnitDIE(false /*CUDieOnly*/); });
pool.wait();
// Now convert all DWARF to GSYM in a thread pool.
std::mutex LogMutex;
for (const auto &CU : DICtx.compile_units()) {
DWARFDie Die = CU->getUnitDIE(false /*CUDieOnly*/);
if (Die) {
CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get()));
pool.async([this, CUI, &LogMutex, Die]() mutable {
std::string ThreadLogStorage;
raw_string_ostream ThreadOS(ThreadLogStorage);
handleDie(ThreadOS, CUI, Die);
ThreadOS.flush();
if (!ThreadLogStorage.empty()) {
// Print ThreadLogStorage lines into an actual stream under a lock
std::lock_guard<std::mutex> guard(LogMutex);
Log << ThreadLogStorage;
}
});
}
}
pool.wait();
}
size_t FunctionsAddedCount = Gsym.getNumFunctionInfos() - NumBefore;
Log << "Loaded " << FunctionsAddedCount << " functions from DWARF.\n";
return Error::success();
}
llvm::Error DwarfTransformer::verify(StringRef GsymPath) {
Log << "Verifying GSYM file \"" << GsymPath << "\":\n";
auto Gsym = GsymReader::openFile(GsymPath);
if (!Gsym)
return Gsym.takeError();
auto NumAddrs = Gsym->getNumAddresses();
DILineInfoSpecifier DLIS(
DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath,
DILineInfoSpecifier::FunctionNameKind::LinkageName);
std::string gsymFilename;
for (uint32_t I = 0; I < NumAddrs; ++I) {
auto FuncAddr = Gsym->getAddress(I);
if (!FuncAddr)
return createStringError(std::errc::invalid_argument,
"failed to extract address[%i]", I);
auto FI = Gsym->getFunctionInfo(*FuncAddr);
if (!FI)
return createStringError(std::errc::invalid_argument,
"failed to extract function info for address 0x%"
PRIu64, *FuncAddr);
for (auto Addr = *FuncAddr; Addr < *FuncAddr + FI->size(); ++Addr) {
const object::SectionedAddress SectAddr{
Addr, object::SectionedAddress::UndefSection};
auto LR = Gsym->lookup(Addr);
if (!LR)
return LR.takeError();
auto DwarfInlineInfos =
DICtx.getInliningInfoForAddress(SectAddr, DLIS);
uint32_t NumDwarfInlineInfos = DwarfInlineInfos.getNumberOfFrames();
if (NumDwarfInlineInfos == 0) {
DwarfInlineInfos.addFrame(
DICtx.getLineInfoForAddress(SectAddr, DLIS));
}
// Check for 1 entry that has no file and line info
if (NumDwarfInlineInfos == 1 &&
DwarfInlineInfos.getFrame(0).FileName == "<invalid>") {
DwarfInlineInfos = DIInliningInfo();
NumDwarfInlineInfos = 0;
}
if (NumDwarfInlineInfos > 0 &&
NumDwarfInlineInfos != LR->Locations.size()) {
Log << "error: address " << HEX64(Addr) << " has "
<< NumDwarfInlineInfos << " DWARF inline frames and GSYM has "
<< LR->Locations.size() << "\n";
Log << " " << NumDwarfInlineInfos << " DWARF frames:\n";
for (size_t Idx = 0; Idx < NumDwarfInlineInfos; ++Idx) {
const auto dii = DwarfInlineInfos.getFrame(Idx);
Log << " [" << Idx << "]: " << dii.FunctionName << " @ "
<< dii.FileName << ':' << dii.Line << '\n';
}
Log << " " << LR->Locations.size() << " GSYM frames:\n";
for (size_t Idx = 0, count = LR->Locations.size();
Idx < count; ++Idx) {
const auto &gii = LR->Locations[Idx];
Log << " [" << Idx << "]: " << gii.Name << " @ " << gii.Dir
<< '/' << gii.Base << ':' << gii.Line << '\n';
}
DwarfInlineInfos = DICtx.getInliningInfoForAddress(SectAddr, DLIS);
Gsym->dump(Log, *FI);
continue;
}
for (size_t Idx = 0, count = LR->Locations.size(); Idx < count;
++Idx) {
const auto &gii = LR->Locations[Idx];
if (Idx < NumDwarfInlineInfos) {
const auto dii = DwarfInlineInfos.getFrame(Idx);
gsymFilename = LR->getSourceFile(Idx);
// Verify function name
if (dii.FunctionName.find(gii.Name.str()) != 0)
Log << "error: address " << HEX64(Addr) << " DWARF function \""
<< dii.FunctionName.c_str()
<< "\" doesn't match GSYM function \"" << gii.Name << "\"\n";
// Verify source file path
if (dii.FileName != gsymFilename)
Log << "error: address " << HEX64(Addr) << " DWARF path \""
<< dii.FileName.c_str() << "\" doesn't match GSYM path \""
<< gsymFilename.c_str() << "\"\n";
// Verify source file line
if (dii.Line != gii.Line)
Log << "error: address " << HEX64(Addr) << " DWARF line "
<< dii.Line << " != GSYM line " << gii.Line << "\n";
}
}
}
}
return Error::success();
}