blob: 016f12cc216430a9619c68573b056dc125833d47 [file] [log] [blame]
//===-- llvm-objdump.cpp - Object file dumping utility for llvm -----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This program is a utility that works like binutils "objdump", that is, it
// dumps out a plethora of information about an object file depending on the
// flags.
//
// The flags and output of this program should be near identical to those of
// binutils objdump.
//
//===----------------------------------------------------------------------===//
#include "llvm-objdump.h"
#include "COFFDump.h"
#include "ELFDump.h"
#include "MachODump.h"
#include "WasmDump.h"
#include "XCOFFDump.h"
#include "llvm/ADT/IndexedMap.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/Symbolize/Symbolize.h"
#include "llvm/Demangle/Demangle.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/COFF.h"
#include "llvm/Object/COFFImportFile.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/FaultMapParser.h"
#include "llvm/Object/MachO.h"
#include "llvm/Object/MachOUniversal.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Object/Wasm.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/StringSaver.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cctype>
#include <cstring>
#include <system_error>
#include <unordered_map>
#include <utility>
using namespace llvm;
using namespace llvm::object;
using namespace llvm::objdump;
#define DEBUG_TYPE "objdump"
static cl::OptionCategory ObjdumpCat("llvm-objdump Options");
static cl::opt<uint64_t> AdjustVMA(
"adjust-vma",
cl::desc("Increase the displayed address by the specified offset"),
cl::value_desc("offset"), cl::init(0), cl::cat(ObjdumpCat));
static cl::opt<bool>
AllHeaders("all-headers",
cl::desc("Display all available header information"),
cl::cat(ObjdumpCat));
static cl::alias AllHeadersShort("x", cl::desc("Alias for --all-headers"),
cl::NotHidden, cl::Grouping,
cl::aliasopt(AllHeaders));
static cl::opt<std::string>
ArchName("arch-name",
cl::desc("Target arch to disassemble for, "
"see --version for available targets"),
cl::cat(ObjdumpCat));
cl::opt<bool>
objdump::ArchiveHeaders("archive-headers",
cl::desc("Display archive header information"),
cl::cat(ObjdumpCat));
static cl::alias ArchiveHeadersShort("a",
cl::desc("Alias for --archive-headers"),
cl::NotHidden, cl::Grouping,
cl::aliasopt(ArchiveHeaders));
cl::opt<bool> objdump::Demangle("demangle", cl::desc("Demangle symbols names"),
cl::init(false), cl::cat(ObjdumpCat));
static cl::alias DemangleShort("C", cl::desc("Alias for --demangle"),
cl::NotHidden, cl::Grouping,
cl::aliasopt(Demangle));
cl::opt<bool> objdump::Disassemble(
"disassemble",
cl::desc("Display assembler mnemonics for the machine instructions"),
cl::cat(ObjdumpCat));
static cl::alias DisassembleShort("d", cl::desc("Alias for --disassemble"),
cl::NotHidden, cl::Grouping,
cl::aliasopt(Disassemble));
cl::opt<bool> objdump::DisassembleAll(
"disassemble-all",
cl::desc("Display assembler mnemonics for the machine instructions"),
cl::cat(ObjdumpCat));
static cl::alias DisassembleAllShort("D",
cl::desc("Alias for --disassemble-all"),
cl::NotHidden, cl::Grouping,
cl::aliasopt(DisassembleAll));
cl::opt<bool> objdump::SymbolDescription(
"symbol-description",
cl::desc("Add symbol description for disassembly. This "
"option is for XCOFF files only"),
cl::init(false), cl::cat(ObjdumpCat));
static cl::list<std::string>
DisassembleSymbols("disassemble-symbols", cl::CommaSeparated,
cl::desc("List of symbols to disassemble. "
"Accept demangled names when --demangle is "
"specified, otherwise accept mangled names"),
cl::cat(ObjdumpCat));
static cl::opt<bool> DisassembleZeroes(
"disassemble-zeroes",
cl::desc("Do not skip blocks of zeroes when disassembling"),
cl::cat(ObjdumpCat));
static cl::alias
DisassembleZeroesShort("z", cl::desc("Alias for --disassemble-zeroes"),
cl::NotHidden, cl::Grouping,
cl::aliasopt(DisassembleZeroes));
static cl::list<std::string>
DisassemblerOptions("disassembler-options",
cl::desc("Pass target specific disassembler options"),
cl::value_desc("options"), cl::CommaSeparated,
cl::cat(ObjdumpCat));
static cl::alias
DisassemblerOptionsShort("M", cl::desc("Alias for --disassembler-options"),
cl::NotHidden, cl::Grouping, cl::Prefix,
cl::CommaSeparated,
cl::aliasopt(DisassemblerOptions));
cl::opt<DIDumpType> objdump::DwarfDumpType(
"dwarf", cl::init(DIDT_Null), cl::desc("Dump of dwarf debug sections:"),
cl::values(clEnumValN(DIDT_DebugFrame, "frames", ".debug_frame")),
cl::cat(ObjdumpCat));
static cl::opt<bool> DynamicRelocations(
"dynamic-reloc",
cl::desc("Display the dynamic relocation entries in the file"),
cl::cat(ObjdumpCat));
static cl::alias DynamicRelocationShort("R",
cl::desc("Alias for --dynamic-reloc"),
cl::NotHidden, cl::Grouping,
cl::aliasopt(DynamicRelocations));
static cl::opt<bool>
FaultMapSection("fault-map-section",
cl::desc("Display contents of faultmap section"),
cl::cat(ObjdumpCat));
static cl::opt<bool>
FileHeaders("file-headers",
cl::desc("Display the contents of the overall file header"),
cl::cat(ObjdumpCat));
static cl::alias FileHeadersShort("f", cl::desc("Alias for --file-headers"),
cl::NotHidden, cl::Grouping,
cl::aliasopt(FileHeaders));
cl::opt<bool>
objdump::SectionContents("full-contents",
cl::desc("Display the content of each section"),
cl::cat(ObjdumpCat));
static cl::alias SectionContentsShort("s",
cl::desc("Alias for --full-contents"),
cl::NotHidden, cl::Grouping,
cl::aliasopt(SectionContents));
static cl::list<std::string> InputFilenames(cl::Positional,
cl::desc("<input object files>"),
cl::ZeroOrMore,
cl::cat(ObjdumpCat));
static cl::opt<bool>
PrintLines("line-numbers",
cl::desc("Display source line numbers with "
"disassembly. Implies disassemble object"),
cl::cat(ObjdumpCat));
static cl::alias PrintLinesShort("l", cl::desc("Alias for --line-numbers"),
cl::NotHidden, cl::Grouping,
cl::aliasopt(PrintLines));
static cl::opt<bool> MachOOpt("macho",
cl::desc("Use MachO specific object file parser"),
cl::cat(ObjdumpCat));
static cl::alias MachOm("m", cl::desc("Alias for --macho"), cl::NotHidden,
cl::Grouping, cl::aliasopt(MachOOpt));
cl::opt<std::string> objdump::MCPU(
"mcpu", cl::desc("Target a specific cpu type (--mcpu=help for details)"),
cl::value_desc("cpu-name"), cl::init(""), cl::cat(ObjdumpCat));
cl::list<std::string> objdump::MAttrs(
"mattr", cl::CommaSeparated,
cl::desc("Target specific attributes (--mattr=help for details)"),
cl::value_desc("a1,+a2,-a3,..."), cl::cat(ObjdumpCat));
cl::opt<bool> objdump::NoShowRawInsn(
"no-show-raw-insn",
cl::desc(
"When disassembling instructions, do not print the instruction bytes."),
cl::cat(ObjdumpCat));
cl::opt<bool> objdump::NoLeadingAddr("no-leading-addr",
cl::desc("Print no leading address"),
cl::cat(ObjdumpCat));
static cl::opt<bool> RawClangAST(
"raw-clang-ast",
cl::desc("Dump the raw binary contents of the clang AST section"),
cl::cat(ObjdumpCat));
cl::opt<bool>
objdump::Relocations("reloc",
cl::desc("Display the relocation entries in the file"),
cl::cat(ObjdumpCat));
static cl::alias RelocationsShort("r", cl::desc("Alias for --reloc"),
cl::NotHidden, cl::Grouping,
cl::aliasopt(Relocations));
cl::opt<bool>
objdump::PrintImmHex("print-imm-hex",
cl::desc("Use hex format for immediate values"),
cl::cat(ObjdumpCat));
cl::opt<bool>
objdump::PrivateHeaders("private-headers",
cl::desc("Display format specific file headers"),
cl::cat(ObjdumpCat));
static cl::alias PrivateHeadersShort("p",
cl::desc("Alias for --private-headers"),
cl::NotHidden, cl::Grouping,
cl::aliasopt(PrivateHeaders));
cl::list<std::string>
objdump::FilterSections("section",
cl::desc("Operate on the specified sections only. "
"With --macho dump segment,section"),
cl::cat(ObjdumpCat));
static cl::alias FilterSectionsj("j", cl::desc("Alias for --section"),
cl::NotHidden, cl::Grouping, cl::Prefix,
cl::aliasopt(FilterSections));
cl::opt<bool> objdump::SectionHeaders(
"section-headers",
cl::desc("Display summaries of the headers for each section."),
cl::cat(ObjdumpCat));
static cl::alias SectionHeadersShort("headers",
cl::desc("Alias for --section-headers"),
cl::NotHidden,
cl::aliasopt(SectionHeaders));
static cl::alias SectionHeadersShorter("h",
cl::desc("Alias for --section-headers"),
cl::NotHidden, cl::Grouping,
cl::aliasopt(SectionHeaders));
static cl::opt<bool>
ShowLMA("show-lma",
cl::desc("Display LMA column when dumping ELF section headers"),
cl::cat(ObjdumpCat));
static cl::opt<bool> PrintSource(
"source",
cl::desc(
"Display source inlined with disassembly. Implies disassemble object"),
cl::cat(ObjdumpCat));
static cl::alias PrintSourceShort("S", cl::desc("Alias for --source"),
cl::NotHidden, cl::Grouping,
cl::aliasopt(PrintSource));
static cl::opt<uint64_t>
StartAddress("start-address", cl::desc("Disassemble beginning at address"),
cl::value_desc("address"), cl::init(0), cl::cat(ObjdumpCat));
static cl::opt<uint64_t> StopAddress("stop-address",
cl::desc("Stop disassembly at address"),
cl::value_desc("address"),
cl::init(UINT64_MAX), cl::cat(ObjdumpCat));
cl::opt<bool> objdump::SymbolTable("syms", cl::desc("Display the symbol table"),
cl::cat(ObjdumpCat));
static cl::alias SymbolTableShort("t", cl::desc("Alias for --syms"),
cl::NotHidden, cl::Grouping,
cl::aliasopt(SymbolTable));
static cl::opt<bool> SymbolizeOperands(
"symbolize-operands",
cl::desc("Symbolize instruction operands when disassembling"),
cl::cat(ObjdumpCat));
static cl::opt<bool> DynamicSymbolTable(
"dynamic-syms",
cl::desc("Display the contents of the dynamic symbol table"),
cl::cat(ObjdumpCat));
static cl::alias DynamicSymbolTableShort("T",
cl::desc("Alias for --dynamic-syms"),
cl::NotHidden, cl::Grouping,
cl::aliasopt(DynamicSymbolTable));
cl::opt<std::string>
objdump::TripleName("triple",
cl::desc("Target triple to disassemble for, see "
"--version for available targets"),
cl::cat(ObjdumpCat));
cl::opt<bool> objdump::UnwindInfo("unwind-info",
cl::desc("Display unwind information"),
cl::cat(ObjdumpCat));
static cl::alias UnwindInfoShort("u", cl::desc("Alias for --unwind-info"),
cl::NotHidden, cl::Grouping,
cl::aliasopt(UnwindInfo));
static cl::opt<bool>
Wide("wide", cl::desc("Ignored for compatibility with GNU objdump"),
cl::cat(ObjdumpCat));
static cl::alias WideShort("w", cl::Grouping, cl::aliasopt(Wide));
cl::opt<std::string> objdump::Prefix("prefix",
cl::desc("Add prefix to absolute paths"),
cl::cat(ObjdumpCat));
enum DebugVarsFormat {
DVDisabled,
DVUnicode,
DVASCII,
};
static cl::opt<DebugVarsFormat> DbgVariables(
"debug-vars", cl::init(DVDisabled),
cl::desc("Print the locations (in registers or memory) of "
"source-level variables alongside disassembly"),
cl::ValueOptional,
cl::values(clEnumValN(DVUnicode, "", "unicode"),
clEnumValN(DVUnicode, "unicode", "unicode"),
clEnumValN(DVASCII, "ascii", "unicode")),
cl::cat(ObjdumpCat));
static cl::opt<int>
DbgIndent("debug-vars-indent", cl::init(40),
cl::desc("Distance to indent the source-level variable display, "
"relative to the start of the disassembly"),
cl::cat(ObjdumpCat));
static cl::extrahelp
HelpResponse("\nPass @FILE as argument to read options from FILE.\n");
static StringSet<> DisasmSymbolSet;
StringSet<> objdump::FoundSectionSet;
static StringRef ToolName;
namespace {
struct FilterResult {
// True if the section should not be skipped.
bool Keep;
// True if the index counter should be incremented, even if the section should
// be skipped. For example, sections may be skipped if they are not included
// in the --section flag, but we still want those to count toward the section
// count.
bool IncrementIndex;
};
} // namespace
static FilterResult checkSectionFilter(object::SectionRef S) {
if (FilterSections.empty())
return {/*Keep=*/true, /*IncrementIndex=*/true};
Expected<StringRef> SecNameOrErr = S.getName();
if (!SecNameOrErr) {
consumeError(SecNameOrErr.takeError());
return {/*Keep=*/false, /*IncrementIndex=*/false};
}
StringRef SecName = *SecNameOrErr;
// StringSet does not allow empty key so avoid adding sections with
// no name (such as the section with index 0) here.
if (!SecName.empty())
FoundSectionSet.insert(SecName);
// Only show the section if it's in the FilterSections list, but always
// increment so the indexing is stable.
return {/*Keep=*/is_contained(FilterSections, SecName),
/*IncrementIndex=*/true};
}
SectionFilter objdump::ToolSectionFilter(object::ObjectFile const &O,
uint64_t *Idx) {
// Start at UINT64_MAX so that the first index returned after an increment is
// zero (after the unsigned wrap).
if (Idx)
*Idx = UINT64_MAX;
return SectionFilter(
[Idx](object::SectionRef S) {
FilterResult Result = checkSectionFilter(S);
if (Idx != nullptr && Result.IncrementIndex)
*Idx += 1;
return Result.Keep;
},
O);
}
std::string objdump::getFileNameForError(const object::Archive::Child &C,
unsigned Index) {
Expected<StringRef> NameOrErr = C.getName();
if (NameOrErr)
return std::string(NameOrErr.get());
// If we have an error getting the name then we print the index of the archive
// member. Since we are already in an error state, we just ignore this error.
consumeError(NameOrErr.takeError());
return "<file index: " + std::to_string(Index) + ">";
}
void objdump::reportWarning(const Twine &Message, StringRef File) {
// Output order between errs() and outs() matters especially for archive
// files where the output is per member object.
outs().flush();
WithColor::warning(errs(), ToolName)
<< "'" << File << "': " << Message << "\n";
}
LLVM_ATTRIBUTE_NORETURN void objdump::reportError(StringRef File,
const Twine &Message) {
outs().flush();
WithColor::error(errs(), ToolName) << "'" << File << "': " << Message << "\n";
exit(1);
}
LLVM_ATTRIBUTE_NORETURN void objdump::reportError(Error E, StringRef FileName,
StringRef ArchiveName,
StringRef ArchitectureName) {
assert(E);
outs().flush();
WithColor::error(errs(), ToolName);
if (ArchiveName != "")
errs() << ArchiveName << "(" << FileName << ")";
else
errs() << "'" << FileName << "'";
if (!ArchitectureName.empty())
errs() << " (for architecture " << ArchitectureName << ")";
errs() << ": ";
logAllUnhandledErrors(std::move(E), errs());
exit(1);
}
static void reportCmdLineWarning(const Twine &Message) {
WithColor::warning(errs(), ToolName) << Message << "\n";
}
LLVM_ATTRIBUTE_NORETURN static void reportCmdLineError(const Twine &Message) {
WithColor::error(errs(), ToolName) << Message << "\n";
exit(1);
}
static void warnOnNoMatchForSections() {
SetVector<StringRef> MissingSections;
for (StringRef S : FilterSections) {
if (FoundSectionSet.count(S))
return;
// User may specify a unnamed section. Don't warn for it.
if (!S.empty())
MissingSections.insert(S);
}
// Warn only if no section in FilterSections is matched.
for (StringRef S : MissingSections)
reportCmdLineWarning("section '" + S +
"' mentioned in a -j/--section option, but not "
"found in any input file");
}
static const Target *getTarget(const ObjectFile *Obj) {
// Figure out the target triple.
Triple TheTriple("unknown-unknown-unknown");
if (TripleName.empty()) {
TheTriple = Obj->makeTriple();
} else {
TheTriple.setTriple(Triple::normalize(TripleName));
auto Arch = Obj->getArch();
if (Arch == Triple::arm || Arch == Triple::armeb)
Obj->setARMSubArch(TheTriple);
}
// Get the target specific parser.
std::string Error;
const Target *TheTarget = TargetRegistry::lookupTarget(ArchName, TheTriple,
Error);
if (!TheTarget)
reportError(Obj->getFileName(), "can't find target: " + Error);
// Update the triple name and return the found target.
TripleName = TheTriple.getTriple();
return TheTarget;
}
bool objdump::isRelocAddressLess(RelocationRef A, RelocationRef B) {
return A.getOffset() < B.getOffset();
}
static Error getRelocationValueString(const RelocationRef &Rel,
SmallVectorImpl<char> &Result) {
const ObjectFile *Obj = Rel.getObject();
if (auto *ELF = dyn_cast<ELFObjectFileBase>(Obj))
return getELFRelocationValueString(ELF, Rel, Result);
if (auto *COFF = dyn_cast<COFFObjectFile>(Obj))
return getCOFFRelocationValueString(COFF, Rel, Result);
if (auto *Wasm = dyn_cast<WasmObjectFile>(Obj))
return getWasmRelocationValueString(Wasm, Rel, Result);
if (auto *MachO = dyn_cast<MachOObjectFile>(Obj))
return getMachORelocationValueString(MachO, Rel, Result);
if (auto *XCOFF = dyn_cast<XCOFFObjectFile>(Obj))
return getXCOFFRelocationValueString(XCOFF, Rel, Result);
llvm_unreachable("unknown object file format");
}
/// Indicates whether this relocation should hidden when listing
/// relocations, usually because it is the trailing part of a multipart
/// relocation that will be printed as part of the leading relocation.
static bool getHidden(RelocationRef RelRef) {
auto *MachO = dyn_cast<MachOObjectFile>(RelRef.getObject());
if (!MachO)
return false;
unsigned Arch = MachO->getArch();
DataRefImpl Rel = RelRef.getRawDataRefImpl();
uint64_t Type = MachO->getRelocationType(Rel);
// On arches that use the generic relocations, GENERIC_RELOC_PAIR
// is always hidden.
if (Arch == Triple::x86 || Arch == Triple::arm || Arch == Triple::ppc)
return Type == MachO::GENERIC_RELOC_PAIR;
if (Arch == Triple::x86_64) {
// On x86_64, X86_64_RELOC_UNSIGNED is hidden only when it follows
// an X86_64_RELOC_SUBTRACTOR.
if (Type == MachO::X86_64_RELOC_UNSIGNED && Rel.d.a > 0) {
DataRefImpl RelPrev = Rel;
RelPrev.d.a--;
uint64_t PrevType = MachO->getRelocationType(RelPrev);
if (PrevType == MachO::X86_64_RELOC_SUBTRACTOR)
return true;
}
}
return false;
}
namespace {
/// Get the column at which we want to start printing the instruction
/// disassembly, taking into account anything which appears to the left of it.
unsigned getInstStartColumn(const MCSubtargetInfo &STI) {
return NoShowRawInsn ? 16 : STI.getTargetTriple().isX86() ? 40 : 24;
}
/// Stores a single expression representing the location of a source-level
/// variable, along with the PC range for which that expression is valid.
struct LiveVariable {
DWARFLocationExpression LocExpr;
const char *VarName;
DWARFUnit *Unit;
const DWARFDie FuncDie;
LiveVariable(const DWARFLocationExpression &LocExpr, const char *VarName,
DWARFUnit *Unit, const DWARFDie FuncDie)
: LocExpr(LocExpr), VarName(VarName), Unit(Unit), FuncDie(FuncDie) {}
bool liveAtAddress(object::SectionedAddress Addr) {
if (LocExpr.Range == None)
return false;
return LocExpr.Range->SectionIndex == Addr.SectionIndex &&
LocExpr.Range->LowPC <= Addr.Address &&
LocExpr.Range->HighPC > Addr.Address;
}
void print(raw_ostream &OS, const MCRegisterInfo &MRI) const {
DataExtractor Data({LocExpr.Expr.data(), LocExpr.Expr.size()},
Unit->getContext().isLittleEndian(), 0);
DWARFExpression Expression(Data, Unit->getAddressByteSize());
Expression.printCompact(OS, MRI);
}
};
/// Helper class for printing source variable locations alongside disassembly.
class LiveVariablePrinter {
// Information we want to track about one column in which we are printing a
// variable live range.
struct Column {
unsigned VarIdx = NullVarIdx;
bool LiveIn = false;
bool LiveOut = false;
bool MustDrawLabel = false;
bool isActive() const { return VarIdx != NullVarIdx; }
static constexpr unsigned NullVarIdx = std::numeric_limits<unsigned>::max();
};
// All live variables we know about in the object/image file.
std::vector<LiveVariable> LiveVariables;
// The columns we are currently drawing.
IndexedMap<Column> ActiveCols;
const MCRegisterInfo &MRI;
const MCSubtargetInfo &STI;
void addVariable(DWARFDie FuncDie, DWARFDie VarDie) {
uint64_t FuncLowPC, FuncHighPC, SectionIndex;
FuncDie.getLowAndHighPC(FuncLowPC, FuncHighPC, SectionIndex);
const char *VarName = VarDie.getName(DINameKind::ShortName);
DWARFUnit *U = VarDie.getDwarfUnit();
Expected<DWARFLocationExpressionsVector> Locs =
VarDie.getLocations(dwarf::DW_AT_location);
if (!Locs) {
// If the variable doesn't have any locations, just ignore it. We don't
// report an error or warning here as that could be noisy on optimised
// code.
consumeError(Locs.takeError());
return;
}
for (const DWARFLocationExpression &LocExpr : *Locs) {
if (LocExpr.Range) {
LiveVariables.emplace_back(LocExpr, VarName, U, FuncDie);
} else {
// If the LocExpr does not have an associated range, it is valid for
// the whole of the function.
// TODO: technically it is not valid for any range covered by another
// LocExpr, does that happen in reality?
DWARFLocationExpression WholeFuncExpr{
DWARFAddressRange(FuncLowPC, FuncHighPC, SectionIndex),
LocExpr.Expr};
LiveVariables.emplace_back(WholeFuncExpr, VarName, U, FuncDie);
}
}
}
void addFunction(DWARFDie D) {
for (const DWARFDie &Child : D.children()) {
if (Child.getTag() == dwarf::DW_TAG_variable ||
Child.getTag() == dwarf::DW_TAG_formal_parameter)
addVariable(D, Child);
else
addFunction(Child);
}
}
// Get the column number (in characters) at which the first live variable
// line should be printed.
unsigned getIndentLevel() const {
return DbgIndent + getInstStartColumn(STI);
}
// Indent to the first live-range column to the right of the currently
// printed line, and return the index of that column.
// TODO: formatted_raw_ostream uses "column" to mean a number of characters
// since the last \n, and we use it to mean the number of slots in which we
// put live variable lines. Pick a less overloaded word.
unsigned moveToFirstVarColumn(formatted_raw_ostream &OS) {
// Logical column number: column zero is the first column we print in, each
// logical column is 2 physical columns wide.
unsigned FirstUnprintedLogicalColumn =
std::max((int)(OS.getColumn() - getIndentLevel() + 1) / 2, 0);
// Physical column number: the actual column number in characters, with
// zero being the left-most side of the screen.
unsigned FirstUnprintedPhysicalColumn =
getIndentLevel() + FirstUnprintedLogicalColumn * 2;
if (FirstUnprintedPhysicalColumn > OS.getColumn())
OS.PadToColumn(FirstUnprintedPhysicalColumn);
return FirstUnprintedLogicalColumn;
}
unsigned findFreeColumn() {
for (unsigned ColIdx = 0; ColIdx < ActiveCols.size(); ++ColIdx)
if (!ActiveCols[ColIdx].isActive())
return ColIdx;
size_t OldSize = ActiveCols.size();
ActiveCols.grow(std::max<size_t>(OldSize * 2, 1));
return OldSize;
}
public:
LiveVariablePrinter(const MCRegisterInfo &MRI, const MCSubtargetInfo &STI)
: LiveVariables(), ActiveCols(Column()), MRI(MRI), STI(STI) {}
void dump() const {
for (const LiveVariable &LV : LiveVariables) {
dbgs() << LV.VarName << " @ " << LV.LocExpr.Range << ": ";
LV.print(dbgs(), MRI);
dbgs() << "\n";
}
}
void addCompileUnit(DWARFDie D) {
if (D.getTag() == dwarf::DW_TAG_subprogram)
addFunction(D);
else
for (const DWARFDie &Child : D.children())
addFunction(Child);
}
/// Update to match the state of the instruction between ThisAddr and
/// NextAddr. In the common case, any live range active at ThisAddr is
/// live-in to the instruction, and any live range active at NextAddr is
/// live-out of the instruction. If IncludeDefinedVars is false, then live
/// ranges starting at NextAddr will be ignored.
void update(object::SectionedAddress ThisAddr,
object::SectionedAddress NextAddr, bool IncludeDefinedVars) {
// First, check variables which have already been assigned a column, so
// that we don't change their order.
SmallSet<unsigned, 8> CheckedVarIdxs;
for (unsigned ColIdx = 0, End = ActiveCols.size(); ColIdx < End; ++ColIdx) {
if (!ActiveCols[ColIdx].isActive())
continue;
CheckedVarIdxs.insert(ActiveCols[ColIdx].VarIdx);
LiveVariable &LV = LiveVariables[ActiveCols[ColIdx].VarIdx];
ActiveCols[ColIdx].LiveIn = LV.liveAtAddress(ThisAddr);
ActiveCols[ColIdx].LiveOut = LV.liveAtAddress(NextAddr);
LLVM_DEBUG(dbgs() << "pass 1, " << ThisAddr.Address << "-"
<< NextAddr.Address << ", " << LV.VarName << ", Col "
<< ColIdx << ": LiveIn=" << ActiveCols[ColIdx].LiveIn
<< ", LiveOut=" << ActiveCols[ColIdx].LiveOut << "\n");
if (!ActiveCols[ColIdx].LiveIn && !ActiveCols[ColIdx].LiveOut)
ActiveCols[ColIdx].VarIdx = Column::NullVarIdx;
}
// Next, look for variables which don't already have a column, but which
// are now live.
if (IncludeDefinedVars) {
for (unsigned VarIdx = 0, End = LiveVariables.size(); VarIdx < End;
++VarIdx) {
if (CheckedVarIdxs.count(VarIdx))
continue;
LiveVariable &LV = LiveVariables[VarIdx];
bool LiveIn = LV.liveAtAddress(ThisAddr);
bool LiveOut = LV.liveAtAddress(NextAddr);
if (!LiveIn && !LiveOut)
continue;
unsigned ColIdx = findFreeColumn();
LLVM_DEBUG(dbgs() << "pass 2, " << ThisAddr.Address << "-"
<< NextAddr.Address << ", " << LV.VarName << ", Col "
<< ColIdx << ": LiveIn=" << LiveIn
<< ", LiveOut=" << LiveOut << "\n");
ActiveCols[ColIdx].VarIdx = VarIdx;
ActiveCols[ColIdx].LiveIn = LiveIn;
ActiveCols[ColIdx].LiveOut = LiveOut;
ActiveCols[ColIdx].MustDrawLabel = true;
}
}
}
enum class LineChar {
RangeStart,
RangeMid,
RangeEnd,
LabelVert,
LabelCornerNew,
LabelCornerActive,
LabelHoriz,
};
const char *getLineChar(LineChar C) const {
bool IsASCII = DbgVariables == DVASCII;
switch (C) {
case LineChar::RangeStart:
return IsASCII ? "^" : (const char *)u8"\u2548";
case LineChar::RangeMid:
return IsASCII ? "|" : (const char *)u8"\u2503";
case LineChar::RangeEnd:
return IsASCII ? "v" : (const char *)u8"\u253b";
case LineChar::LabelVert:
return IsASCII ? "|" : (const char *)u8"\u2502";
case LineChar::LabelCornerNew:
return IsASCII ? "/" : (const char *)u8"\u250c";
case LineChar::LabelCornerActive:
return IsASCII ? "|" : (const char *)u8"\u2520";
case LineChar::LabelHoriz:
return IsASCII ? "-" : (const char *)u8"\u2500";
}
llvm_unreachable("Unhandled LineChar enum");
}
/// Print live ranges to the right of an existing line. This assumes the
/// line is not an instruction, so doesn't start or end any live ranges, so
/// we only need to print active ranges or empty columns. If AfterInst is
/// true, this is being printed after the last instruction fed to update(),
/// otherwise this is being printed before it.
void printAfterOtherLine(formatted_raw_ostream &OS, bool AfterInst) {
if (ActiveCols.size()) {
unsigned FirstUnprintedColumn = moveToFirstVarColumn(OS);
for (size_t ColIdx = FirstUnprintedColumn, End = ActiveCols.size();
ColIdx < End; ++ColIdx) {
if (ActiveCols[ColIdx].isActive()) {
if ((AfterInst && ActiveCols[ColIdx].LiveOut) ||
(!AfterInst && ActiveCols[ColIdx].LiveIn))
OS << getLineChar(LineChar::RangeMid);
else if (!AfterInst && ActiveCols[ColIdx].LiveOut)
OS << getLineChar(LineChar::LabelVert);
else
OS << " ";
}
OS << " ";
}
}
OS << "\n";
}
/// Print any live variable range info needed to the right of a
/// non-instruction line of disassembly. This is where we print the variable
/// names and expressions, with thin line-drawing characters connecting them
/// to the live range which starts at the next instruction. If MustPrint is
/// true, we have to print at least one line (with the continuation of any
/// already-active live ranges) because something has already been printed
/// earlier on this line.
void printBetweenInsts(formatted_raw_ostream &OS, bool MustPrint) {
bool PrintedSomething = false;
for (unsigned ColIdx = 0, End = ActiveCols.size(); ColIdx < End; ++ColIdx) {
if (ActiveCols[ColIdx].isActive() && ActiveCols[ColIdx].MustDrawLabel) {
// First we need to print the live range markers for any active
// columns to the left of this one.
OS.PadToColumn(getIndentLevel());
for (unsigned ColIdx2 = 0; ColIdx2 < ColIdx; ++ColIdx2) {
if (ActiveCols[ColIdx2].isActive()) {
if (ActiveCols[ColIdx2].MustDrawLabel &&
!ActiveCols[ColIdx2].LiveIn)
OS << getLineChar(LineChar::LabelVert) << " ";
else
OS << getLineChar(LineChar::RangeMid) << " ";
} else
OS << " ";
}
// Then print the variable name and location of the new live range,
// with box drawing characters joining it to the live range line.
OS << getLineChar(ActiveCols[ColIdx].LiveIn
? LineChar::LabelCornerActive
: LineChar::LabelCornerNew)
<< getLineChar(LineChar::LabelHoriz) << " ";
WithColor(OS, raw_ostream::GREEN)
<< LiveVariables[ActiveCols[ColIdx].VarIdx].VarName;
OS << " = ";
{
WithColor ExprColor(OS, raw_ostream::CYAN);
LiveVariables[ActiveCols[ColIdx].VarIdx].print(OS, MRI);
}
// If there are any columns to the right of the expression we just
// printed, then continue their live range lines.
unsigned FirstUnprintedColumn = moveToFirstVarColumn(OS);
for (unsigned ColIdx2 = FirstUnprintedColumn, End = ActiveCols.size();
ColIdx2 < End; ++ColIdx2) {
if (ActiveCols[ColIdx2].isActive() && ActiveCols[ColIdx2].LiveIn)
OS << getLineChar(LineChar::RangeMid) << " ";
else
OS << " ";
}
OS << "\n";
PrintedSomething = true;
}
}
for (unsigned ColIdx = 0, End = ActiveCols.size(); ColIdx < End; ++ColIdx)
if (ActiveCols[ColIdx].isActive())
ActiveCols[ColIdx].MustDrawLabel = false;
// If we must print something (because we printed a line/column number),
// but don't have any new variables to print, then print a line which
// just continues any existing live ranges.
if (MustPrint && !PrintedSomething)
printAfterOtherLine(OS, false);
}
/// Print the live variable ranges to the right of a disassembled instruction.
void printAfterInst(formatted_raw_ostream &OS) {
if (!ActiveCols.size())
return;
unsigned FirstUnprintedColumn = moveToFirstVarColumn(OS);
for (unsigned ColIdx = FirstUnprintedColumn, End = ActiveCols.size();
ColIdx < End; ++ColIdx) {
if (!ActiveCols[ColIdx].isActive())
OS << " ";
else if (ActiveCols[ColIdx].LiveIn && ActiveCols[ColIdx].LiveOut)
OS << getLineChar(LineChar::RangeMid) << " ";
else if (ActiveCols[ColIdx].LiveOut)
OS << getLineChar(LineChar::RangeStart) << " ";
else if (ActiveCols[ColIdx].LiveIn)
OS << getLineChar(LineChar::RangeEnd) << " ";
else
llvm_unreachable("var must be live in or out!");
}
}
};
class SourcePrinter {
protected:
DILineInfo OldLineInfo;
const ObjectFile *Obj = nullptr;
std::unique_ptr<symbolize::LLVMSymbolizer> Symbolizer;
// File name to file contents of source.
std::unordered_map<std::string, std::unique_ptr<MemoryBuffer>> SourceCache;
// Mark the line endings of the cached source.
std::unordered_map<std::string, std::vector<StringRef>> LineCache;
// Keep track of missing sources.
StringSet<> MissingSources;
// Only emit 'invalid debug info' warning once.
bool WarnedInvalidDebugInfo = false;
private:
bool cacheSource(const DILineInfo& LineInfoFile);
void printLines(formatted_raw_ostream &OS, const DILineInfo &LineInfo,
StringRef Delimiter, LiveVariablePrinter &LVP);
void printSources(formatted_raw_ostream &OS, const DILineInfo &LineInfo,
StringRef ObjectFilename, StringRef Delimiter,
LiveVariablePrinter &LVP);
public:
SourcePrinter() = default;
SourcePrinter(const ObjectFile *Obj, StringRef DefaultArch) : Obj(Obj) {
symbolize::LLVMSymbolizer::Options SymbolizerOpts;
SymbolizerOpts.PrintFunctions =
DILineInfoSpecifier::FunctionNameKind::LinkageName;
SymbolizerOpts.Demangle = Demangle;
SymbolizerOpts.DefaultArch = std::string(DefaultArch);
Symbolizer.reset(new symbolize::LLVMSymbolizer(SymbolizerOpts));
}
virtual ~SourcePrinter() = default;
virtual void printSourceLine(formatted_raw_ostream &OS,
object::SectionedAddress Address,
StringRef ObjectFilename,
LiveVariablePrinter &LVP,
StringRef Delimiter = "; ");
};
bool SourcePrinter::cacheSource(const DILineInfo &LineInfo) {
std::unique_ptr<MemoryBuffer> Buffer;
if (LineInfo.Source) {
Buffer = MemoryBuffer::getMemBuffer(*LineInfo.Source);
} else {
auto BufferOrError = MemoryBuffer::getFile(LineInfo.FileName);
if (!BufferOrError) {
if (MissingSources.insert(LineInfo.FileName).second)
reportWarning("failed to find source " + LineInfo.FileName,
Obj->getFileName());
return false;
}
Buffer = std::move(*BufferOrError);
}
// Chomp the file to get lines
const char *BufferStart = Buffer->getBufferStart(),
*BufferEnd = Buffer->getBufferEnd();
std::vector<StringRef> &Lines = LineCache[LineInfo.FileName];
const char *Start = BufferStart;
for (const char *I = BufferStart; I != BufferEnd; ++I)
if (*I == '\n') {
Lines.emplace_back(Start, I - Start - (BufferStart < I && I[-1] == '\r'));
Start = I + 1;
}
if (Start < BufferEnd)
Lines.emplace_back(Start, BufferEnd - Start);
SourceCache[LineInfo.FileName] = std::move(Buffer);
return true;
}
void SourcePrinter::printSourceLine(formatted_raw_ostream &OS,
object::SectionedAddress Address,
StringRef ObjectFilename,
LiveVariablePrinter &LVP,
StringRef Delimiter) {
if (!Symbolizer)
return;
DILineInfo LineInfo = DILineInfo();
Expected<DILineInfo> ExpectedLineInfo =
Symbolizer->symbolizeCode(*Obj, Address);
std::string ErrorMessage;
if (ExpectedLineInfo) {
LineInfo = *ExpectedLineInfo;
} else if (!WarnedInvalidDebugInfo) {
WarnedInvalidDebugInfo = true;
// TODO Untested.
reportWarning("failed to parse debug information: " +
toString(ExpectedLineInfo.takeError()),
ObjectFilename);
}
if (!Prefix.empty() && sys::path::is_absolute_gnu(LineInfo.FileName)) {
SmallString<128> FilePath;
sys::path::append(FilePath, Prefix, LineInfo.FileName);
LineInfo.FileName = std::string(FilePath);
}
if (PrintLines)
printLines(OS, LineInfo, Delimiter, LVP);
if (PrintSource)
printSources(OS, LineInfo, ObjectFilename, Delimiter, LVP);
OldLineInfo = LineInfo;
}
void SourcePrinter::printLines(formatted_raw_ostream &OS,
const DILineInfo &LineInfo, StringRef Delimiter,
LiveVariablePrinter &LVP) {
bool PrintFunctionName = LineInfo.FunctionName != DILineInfo::BadString &&
LineInfo.FunctionName != OldLineInfo.FunctionName;
if (PrintFunctionName) {
OS << Delimiter << LineInfo.FunctionName;
// If demangling is successful, FunctionName will end with "()". Print it
// only if demangling did not run or was unsuccessful.
if (!StringRef(LineInfo.FunctionName).endswith("()"))
OS << "()";
OS << ":\n";
}
if (LineInfo.FileName != DILineInfo::BadString && LineInfo.Line != 0 &&
(OldLineInfo.Line != LineInfo.Line ||
OldLineInfo.FileName != LineInfo.FileName || PrintFunctionName)) {
OS << Delimiter << LineInfo.FileName << ":" << LineInfo.Line;
LVP.printBetweenInsts(OS, true);
}
}
void SourcePrinter::printSources(formatted_raw_ostream &OS,
const DILineInfo &LineInfo,
StringRef ObjectFilename, StringRef Delimiter,
LiveVariablePrinter &LVP) {
if (LineInfo.FileName == DILineInfo::BadString || LineInfo.Line == 0 ||
(OldLineInfo.Line == LineInfo.Line &&
OldLineInfo.FileName == LineInfo.FileName))
return;
if (SourceCache.find(LineInfo.FileName) == SourceCache.end())
if (!cacheSource(LineInfo))
return;
auto LineBuffer = LineCache.find(LineInfo.FileName);
if (LineBuffer != LineCache.end()) {
if (LineInfo.Line > LineBuffer->second.size()) {
reportWarning(
formatv(
"debug info line number {0} exceeds the number of lines in {1}",
LineInfo.Line, LineInfo.FileName),
ObjectFilename);
return;
}
// Vector begins at 0, line numbers are non-zero
OS << Delimiter << LineBuffer->second[LineInfo.Line - 1];
LVP.printBetweenInsts(OS, true);
}
}
static bool isAArch64Elf(const ObjectFile *Obj) {
const auto *Elf = dyn_cast<ELFObjectFileBase>(Obj);
return Elf && Elf->getEMachine() == ELF::EM_AARCH64;
}
static bool isArmElf(const ObjectFile *Obj) {
const auto *Elf = dyn_cast<ELFObjectFileBase>(Obj);
return Elf && Elf->getEMachine() == ELF::EM_ARM;
}
static bool hasMappingSymbols(const ObjectFile *Obj) {
return isArmElf(Obj) || isAArch64Elf(Obj);
}
static void printRelocation(formatted_raw_ostream &OS, StringRef FileName,
const RelocationRef &Rel, uint64_t Address,
bool Is64Bits) {
StringRef Fmt = Is64Bits ? "\t\t%016" PRIx64 ": " : "\t\t\t%08" PRIx64 ": ";
SmallString<16> Name;
SmallString<32> Val;
Rel.getTypeName(Name);
if (Error E = getRelocationValueString(Rel, Val))
reportError(std::move(E), FileName);
OS << format(Fmt.data(), Address) << Name << "\t" << Val;
}
class PrettyPrinter {
public:
virtual ~PrettyPrinter() = default;
virtual void
printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes,
object::SectionedAddress Address, formatted_raw_ostream &OS,
StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP,
StringRef ObjectFilename, std::vector<RelocationRef> *Rels,
LiveVariablePrinter &LVP) {
if (SP && (PrintSource || PrintLines))
SP->printSourceLine(OS, Address, ObjectFilename, LVP);
LVP.printBetweenInsts(OS, false);
size_t Start = OS.tell();
if (!NoLeadingAddr)
OS << format("%8" PRIx64 ":", Address.Address);
if (!NoShowRawInsn) {
OS << ' ';
dumpBytes(Bytes, OS);
}
// The output of printInst starts with a tab. Print some spaces so that
// the tab has 1 column and advances to the target tab stop.
unsigned TabStop = getInstStartColumn(STI);
unsigned Column = OS.tell() - Start;
OS.indent(Column < TabStop - 1 ? TabStop - 1 - Column : 7 - Column % 8);
if (MI) {
// See MCInstPrinter::printInst. On targets where a PC relative immediate
// is relative to the next instruction and the length of a MCInst is
// difficult to measure (x86), this is the address of the next
// instruction.
uint64_t Addr =
Address.Address + (STI.getTargetTriple().isX86() ? Bytes.size() : 0);
IP.printInst(MI, Addr, "", STI, OS);
} else
OS << "\t<unknown>";
}
};
PrettyPrinter PrettyPrinterInst;
class HexagonPrettyPrinter : public PrettyPrinter {
public:
void printLead(ArrayRef<uint8_t> Bytes, uint64_t Address,
formatted_raw_ostream &OS) {
uint32_t opcode =
(Bytes[3] << 24) | (Bytes[2] << 16) | (Bytes[1] << 8) | Bytes[0];
if (!NoLeadingAddr)
OS << format("%8" PRIx64 ":", Address);
if (!NoShowRawInsn) {
OS << "\t";
dumpBytes(Bytes.slice(0, 4), OS);
OS << format("\t%08" PRIx32, opcode);
}
}
void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes,
object::SectionedAddress Address, formatted_raw_ostream &OS,
StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP,
StringRef ObjectFilename, std::vector<RelocationRef> *Rels,
LiveVariablePrinter &LVP) override {
if (SP && (PrintSource || PrintLines))
SP->printSourceLine(OS, Address, ObjectFilename, LVP, "");
if (!MI) {
printLead(Bytes, Address.Address, OS);
OS << " <unknown>";
return;
}
std::string Buffer;
{
raw_string_ostream TempStream(Buffer);
IP.printInst(MI, Address.Address, "", STI, TempStream);
}
StringRef Contents(Buffer);
// Split off bundle attributes
auto PacketBundle = Contents.rsplit('\n');
// Split off first instruction from the rest
auto HeadTail = PacketBundle.first.split('\n');
auto Preamble = " { ";
auto Separator = "";
// Hexagon's packets require relocations to be inline rather than
// clustered at the end of the packet.
std::vector<RelocationRef>::const_iterator RelCur = Rels->begin();
std::vector<RelocationRef>::const_iterator RelEnd = Rels->end();
auto PrintReloc = [&]() -> void {
while ((RelCur != RelEnd) && (RelCur->getOffset() <= Address.Address)) {
if (RelCur->getOffset() == Address.Address) {
printRelocation(OS, ObjectFilename, *RelCur, Address.Address, false);
return;
}
++RelCur;
}
};
while (!HeadTail.first.empty()) {
OS << Separator;
Separator = "\n";
if (SP && (PrintSource || PrintLines))
SP->printSourceLine(OS, Address, ObjectFilename, LVP, "");
printLead(Bytes, Address.Address, OS);
OS << Preamble;
Preamble = " ";
StringRef Inst;
auto Duplex = HeadTail.first.split('\v');
if (!Duplex.second.empty()) {
OS << Duplex.first;
OS << "; ";
Inst = Duplex.second;
}
else
Inst = HeadTail.first;
OS << Inst;
HeadTail = HeadTail.second.split('\n');
if (HeadTail.first.empty())
OS << " } " << PacketBundle.second;
PrintReloc();
Bytes = Bytes.slice(4);
Address.Address += 4;
}
}
};
HexagonPrettyPrinter HexagonPrettyPrinterInst;
class AMDGCNPrettyPrinter : public PrettyPrinter {
public:
void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes,
object::SectionedAddress Address, formatted_raw_ostream &OS,
StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP,
StringRef ObjectFilename, std::vector<RelocationRef> *Rels,
LiveVariablePrinter &LVP) override {
if (SP && (PrintSource || PrintLines))
SP->printSourceLine(OS, Address, ObjectFilename, LVP);
if (MI) {
SmallString<40> InstStr;
raw_svector_ostream IS(InstStr);
IP.printInst(MI, Address.Address, "", STI, IS);
OS << left_justify(IS.str(), 60);
} else {
// an unrecognized encoding - this is probably data so represent it
// using the .long directive, or .byte directive if fewer than 4 bytes
// remaining
if (Bytes.size() >= 4) {
OS << format("\t.long 0x%08" PRIx32 " ",
support::endian::read32<support::little>(Bytes.data()));
OS.indent(42);
} else {
OS << format("\t.byte 0x%02" PRIx8, Bytes[0]);
for (unsigned int i = 1; i < Bytes.size(); i++)
OS << format(", 0x%02" PRIx8, Bytes[i]);
OS.indent(55 - (6 * Bytes.size()));
}
}
OS << format("// %012" PRIX64 ":", Address.Address);
if (Bytes.size() >= 4) {
// D should be casted to uint32_t here as it is passed by format to
// snprintf as vararg.
for (uint32_t D : makeArrayRef(
reinterpret_cast<const support::little32_t *>(Bytes.data()),
Bytes.size() / 4))
OS << format(" %08" PRIX32, D);
} else {
for (unsigned char B : Bytes)
OS << format(" %02" PRIX8, B);
}
if (!Annot.empty())
OS << " // " << Annot;
}
};
AMDGCNPrettyPrinter AMDGCNPrettyPrinterInst;
class BPFPrettyPrinter : public PrettyPrinter {
public:
void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes,
object::SectionedAddress Address, formatted_raw_ostream &OS,
StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP,
StringRef ObjectFilename, std::vector<RelocationRef> *Rels,
LiveVariablePrinter &LVP) override {
if (SP && (PrintSource || PrintLines))
SP->printSourceLine(OS, Address, ObjectFilename, LVP);
if (!NoLeadingAddr)
OS << format("%8" PRId64 ":", Address.Address / 8);
if (!NoShowRawInsn) {
OS << "\t";
dumpBytes(Bytes, OS);
}
if (MI)
IP.printInst(MI, Address.Address, "", STI, OS);
else
OS << "\t<unknown>";
}
};
BPFPrettyPrinter BPFPrettyPrinterInst;
PrettyPrinter &selectPrettyPrinter(Triple const &Triple) {
switch(Triple.getArch()) {
default:
return PrettyPrinterInst;
case Triple::hexagon:
return HexagonPrettyPrinterInst;
case Triple::amdgcn:
return AMDGCNPrettyPrinterInst;
case Triple::bpfel:
case Triple::bpfeb:
return BPFPrettyPrinterInst;
}
}
}
static uint8_t getElfSymbolType(const ObjectFile *Obj, const SymbolRef &Sym) {
assert(Obj->isELF());
if (auto *Elf32LEObj = dyn_cast<ELF32LEObjectFile>(Obj))
return unwrapOrError(Elf32LEObj->getSymbol(Sym.getRawDataRefImpl()),
Obj->getFileName())
->getType();
if (auto *Elf64LEObj = dyn_cast<ELF64LEObjectFile>(Obj))
return unwrapOrError(Elf64LEObj->getSymbol(Sym.getRawDataRefImpl()),
Obj->getFileName())
->getType();
if (auto *Elf32BEObj = dyn_cast<ELF32BEObjectFile>(Obj))
return unwrapOrError(Elf32BEObj->getSymbol(Sym.getRawDataRefImpl()),
Obj->getFileName())
->getType();
if (auto *Elf64BEObj = cast<ELF64BEObjectFile>(Obj))
return unwrapOrError(Elf64BEObj->getSymbol(Sym.getRawDataRefImpl()),
Obj->getFileName())
->getType();
llvm_unreachable("Unsupported binary format");
}
template <class ELFT> static void
addDynamicElfSymbols(const ELFObjectFile<ELFT> *Obj,
std::map<SectionRef, SectionSymbolsTy> &AllSymbols) {
for (auto Symbol : Obj->getDynamicSymbolIterators()) {
uint8_t SymbolType = Symbol.getELFType();
if (SymbolType == ELF::STT_SECTION)
continue;
uint64_t Address = unwrapOrError(Symbol.getAddress(), Obj->getFileName());
// ELFSymbolRef::getAddress() returns size instead of value for common
// symbols which is not desirable for disassembly output. Overriding.
if (SymbolType == ELF::STT_COMMON)
Address = unwrapOrError(Obj->getSymbol(Symbol.getRawDataRefImpl()),
Obj->getFileName())
->st_value;
StringRef Name = unwrapOrError(Symbol.getName(), Obj->getFileName());
if (Name.empty())
continue;
section_iterator SecI =
unwrapOrError(Symbol.getSection(), Obj->getFileName());
if (SecI == Obj->section_end())
continue;
AllSymbols[*SecI].emplace_back(Address, Name, SymbolType);
}
}
static void
addDynamicElfSymbols(const ObjectFile *Obj,
std::map<SectionRef, SectionSymbolsTy> &AllSymbols) {
assert(Obj->isELF());
if (auto *Elf32LEObj = dyn_cast<ELF32LEObjectFile>(Obj))
addDynamicElfSymbols(Elf32LEObj, AllSymbols);
else if (auto *Elf64LEObj = dyn_cast<ELF64LEObjectFile>(Obj))
addDynamicElfSymbols(Elf64LEObj, AllSymbols);
else if (auto *Elf32BEObj = dyn_cast<ELF32BEObjectFile>(Obj))
addDynamicElfSymbols(Elf32BEObj, AllSymbols);
else if (auto *Elf64BEObj = cast<ELF64BEObjectFile>(Obj))
addDynamicElfSymbols(Elf64BEObj, AllSymbols);
else
llvm_unreachable("Unsupported binary format");
}
static void addPltEntries(const ObjectFile *Obj,
std::map<SectionRef, SectionSymbolsTy> &AllSymbols,
StringSaver &Saver) {
Optional<SectionRef> Plt = None;
for (const SectionRef &Section : Obj->sections()) {
Expected<StringRef> SecNameOrErr = Section.getName();
if (!SecNameOrErr) {
consumeError(SecNameOrErr.takeError());
continue;
}
if (*SecNameOrErr == ".plt")
Plt = Section;
}
if (!Plt)
return;
if (auto *ElfObj = dyn_cast<ELFObjectFileBase>(Obj)) {
for (auto PltEntry : ElfObj->getPltAddresses()) {
if (PltEntry.first) {
SymbolRef Symbol(*PltEntry.first, ElfObj);
uint8_t SymbolType = getElfSymbolType(Obj, Symbol);
if (Expected<StringRef> NameOrErr = Symbol.getName()) {
if (!NameOrErr->empty())
AllSymbols[*Plt].emplace_back(
PltEntry.second, Saver.save((*NameOrErr + "@plt").str()),
SymbolType);
continue;
} else {
// The warning has been reported in disassembleObject().
consumeError(NameOrErr.takeError());
}
}
reportWarning("PLT entry at 0x" + Twine::utohexstr(PltEntry.second) +
" references an invalid symbol",
Obj->getFileName());
}
}
}
// Normally the disassembly output will skip blocks of zeroes. This function
// returns the number of zero bytes that can be skipped when dumping the
// disassembly of the instructions in Buf.
static size_t countSkippableZeroBytes(ArrayRef<uint8_t> Buf) {
// Find the number of leading zeroes.
size_t N = 0;
while (N < Buf.size() && !Buf[N])
++N;
// We may want to skip blocks of zero bytes, but unless we see
// at least 8 of them in a row.
if (N < 8)
return 0;
// We skip zeroes in multiples of 4 because do not want to truncate an
// instruction if it starts with a zero byte.
return N & ~0x3;
}
// Returns a map from sections to their relocations.
static std::map<SectionRef, std::vector<RelocationRef>>
getRelocsMap(object::ObjectFile const &Obj) {
std::map<SectionRef, std::vector<RelocationRef>> Ret;
uint64_t I = (uint64_t)-1;
for (SectionRef Sec : Obj.sections()) {
++I;
Expected<section_iterator> RelocatedOrErr = Sec.getRelocatedSection();
if (!RelocatedOrErr)
reportError(Obj.getFileName(),
"section (" + Twine(I) +
"): failed to get a relocated section: " +
toString(RelocatedOrErr.takeError()));
section_iterator Relocated = *RelocatedOrErr;
if (Relocated == Obj.section_end() || !checkSectionFilter(*Relocated).Keep)
continue;
std::vector<RelocationRef> &V = Ret[*Relocated];
append_range(V, Sec.relocations());
// Sort relocations by address.
llvm::stable_sort(V, isRelocAddressLess);
}
return Ret;
}
// Used for --adjust-vma to check if address should be adjusted by the
// specified value for a given section.
// For ELF we do not adjust non-allocatable sections like debug ones,
// because they are not loadable.
// TODO: implement for other file formats.
static bool shouldAdjustVA(const SectionRef &Section) {
const ObjectFile *Obj = Section.getObject();
if (Obj->isELF())
return ELFSectionRef(Section).getFlags() & ELF::SHF_ALLOC;
return false;
}
typedef std::pair<uint64_t, char> MappingSymbolPair;
static char getMappingSymbolKind(ArrayRef<MappingSymbolPair> MappingSymbols,
uint64_t Address) {
auto It =
partition_point(MappingSymbols, [Address](const MappingSymbolPair &Val) {
return Val.first <= Address;
});
// Return zero for any address before the first mapping symbol; this means
// we should use the default disassembly mode, depending on the target.
if (It == MappingSymbols.begin())
return '\x00';
return (It - 1)->second;
}
static uint64_t dumpARMELFData(uint64_t SectionAddr, uint64_t Index,
uint64_t End, const ObjectFile *Obj,
ArrayRef<uint8_t> Bytes,
ArrayRef<MappingSymbolPair> MappingSymbols,
raw_ostream &OS) {
support::endianness Endian =
Obj->isLittleEndian() ? support::little : support::big;
OS << format("%8" PRIx64 ":\t", SectionAddr + Index);
if (Index + 4 <= End) {
dumpBytes(Bytes.slice(Index, 4), OS);
OS << "\t.word\t"
<< format_hex(support::endian::read32(Bytes.data() + Index, Endian),
10);
return 4;
}
if (Index + 2 <= End) {
dumpBytes(Bytes.slice(Index, 2), OS);
OS << "\t\t.short\t"
<< format_hex(support::endian::read16(Bytes.data() + Index, Endian),
6);
return 2;
}
dumpBytes(Bytes.slice(Index, 1), OS);
OS << "\t\t.byte\t" << format_hex(Bytes[0], 4);
return 1;
}
static void dumpELFData(uint64_t SectionAddr, uint64_t Index, uint64_t End,
ArrayRef<uint8_t> Bytes) {
// print out data up to 8 bytes at a time in hex and ascii
uint8_t AsciiData[9] = {'\0'};
uint8_t Byte;
int NumBytes = 0;
for (; Index < End; ++Index) {
if (NumBytes == 0)
outs() << format("%8" PRIx64 ":", SectionAddr + Index);
Byte = Bytes.slice(Index)[0];
outs() << format(" %02x", Byte);
AsciiData[NumBytes] = isPrint(Byte) ? Byte : '.';
uint8_t IndentOffset = 0;
NumBytes++;
if (Index == End - 1 || NumBytes > 8) {
// Indent the space for less than 8 bytes data.
// 2 spaces for byte and one for space between bytes
IndentOffset = 3 * (8 - NumBytes);
for (int Excess = NumBytes; Excess < 8; Excess++)
AsciiData[Excess] = '\0';
NumBytes = 8;
}
if (NumBytes == 8) {
AsciiData[8] = '\0';
outs() << std::string(IndentOffset, ' ') << " ";
outs() << reinterpret_cast<char *>(AsciiData);
outs() << '\n';
NumBytes = 0;
}
}
}
SymbolInfoTy objdump::createSymbolInfo(const ObjectFile *Obj,
const SymbolRef &Symbol) {
const StringRef FileName = Obj->getFileName();
const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName);
const StringRef Name = unwrapOrError(Symbol.getName(), FileName);
if (Obj->isXCOFF() && SymbolDescription) {
const auto *XCOFFObj = cast<XCOFFObjectFile>(Obj);
DataRefImpl SymbolDRI = Symbol.getRawDataRefImpl();
const uint32_t SymbolIndex = XCOFFObj->getSymbolIndex(SymbolDRI.p);
Optional<XCOFF::StorageMappingClass> Smc =
getXCOFFSymbolCsectSMC(XCOFFObj, Symbol);
return SymbolInfoTy(Addr, Name, Smc, SymbolIndex,
isLabel(XCOFFObj, Symbol));
} else
return SymbolInfoTy(Addr, Name,
Obj->isELF() ? getElfSymbolType(Obj, Symbol)
: (uint8_t)ELF::STT_NOTYPE);
}
static SymbolInfoTy createDummySymbolInfo(const ObjectFile *Obj,
const uint64_t Addr, StringRef &Name,
uint8_t Type) {
if (Obj->isXCOFF() && SymbolDescription)
return SymbolInfoTy(Addr, Name, None, None, false);
else
return SymbolInfoTy(Addr, Name, Type);
}
static void
collectLocalBranchTargets(ArrayRef<uint8_t> Bytes, const MCInstrAnalysis *MIA,
MCDisassembler *DisAsm, MCInstPrinter *IP,
const MCSubtargetInfo *STI, uint64_t SectionAddr,
uint64_t Start, uint64_t End,
std::unordered_map<uint64_t, std::string> &Labels) {
// So far only supports X86.
if (!STI->getTargetTriple().isX86())
return;
Labels.clear();
unsigned LabelCount = 0;
Start += SectionAddr;
End += SectionAddr;
uint64_t Index = Start;
while (Index < End) {
// Disassemble a real instruction and record function-local branch labels.
MCInst Inst;
uint64_t Size;
bool Disassembled = DisAsm->getInstruction(
Inst, Size, Bytes.slice(Index - SectionAddr), Index, nulls());
if (Size == 0)
Size = 1;
if (Disassembled && MIA) {
uint64_t Target;
bool TargetKnown = MIA->evaluateBranch(Inst, Index, Size, Target);
if (TargetKnown && (Target >= Start && Target < End) &&
!Labels.count(Target))
Labels[Target] = ("L" + Twine(LabelCount++)).str();
}
Index += Size;
}
}
static StringRef getSegmentName(const MachOObjectFile *MachO,
const SectionRef &Section) {
if (MachO) {
DataRefImpl DR = Section.getRawDataRefImpl();
StringRef SegmentName = MachO->getSectionFinalSegmentName(DR);
return SegmentName;
}
return "";
}
static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
MCContext &Ctx, MCDisassembler *PrimaryDisAsm,
MCDisassembler *SecondaryDisAsm,
const MCInstrAnalysis *MIA, MCInstPrinter *IP,
const MCSubtargetInfo *PrimarySTI,
const MCSubtargetInfo *SecondarySTI,
PrettyPrinter &PIP,
SourcePrinter &SP, bool InlineRelocs) {
const MCSubtargetInfo *STI = PrimarySTI;
MCDisassembler *DisAsm = PrimaryDisAsm;
bool PrimaryIsThumb = false;
if (isArmElf(Obj))
PrimaryIsThumb = STI->checkFeatures("+thumb-mode");
std::map<SectionRef, std::vector<RelocationRef>> RelocMap;
if (InlineRelocs)
RelocMap = getRelocsMap(*Obj);
bool Is64Bits = Obj->getBytesInAddress() > 4;
// Create a mapping from virtual address to symbol name. This is used to
// pretty print the symbols while disassembling.
std::map<SectionRef, SectionSymbolsTy> AllSymbols;
SectionSymbolsTy AbsoluteSymbols;
const StringRef FileName = Obj->getFileName();
const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(Obj);
for (const SymbolRef &Symbol : Obj->symbols()) {
Expected<StringRef> NameOrErr = Symbol.getName();
if (!NameOrErr) {
reportWarning(toString(NameOrErr.takeError()), FileName);
continue;
}
if (NameOrErr->empty() && !(Obj->isXCOFF() && SymbolDescription))
continue;
if (Obj->isELF() && getElfSymbolType(Obj, Symbol) == ELF::STT_SECTION)
continue;
// Don't ask a Mach-O STAB symbol for its section unless you know that
// STAB symbol's section field refers to a valid section index. Otherwise
// the symbol may error trying to load a section that does not exist.
if (MachO) {
DataRefImpl SymDRI = Symbol.getRawDataRefImpl();
uint8_t NType = (MachO->is64Bit() ?
MachO->getSymbol64TableEntry(SymDRI).n_type:
MachO->getSymbolTableEntry(SymDRI).n_type);
if (NType & MachO::N_STAB)
continue;
}
section_iterator SecI = unwrapOrError(Symbol.getSection(), FileName);
if (SecI != Obj->section_end())
AllSymbols[*SecI].push_back(createSymbolInfo(Obj, Symbol));
else
AbsoluteSymbols.push_back(createSymbolInfo(Obj, Symbol));
}
if (AllSymbols.empty() && Obj->isELF())
addDynamicElfSymbols(Obj, AllSymbols);
BumpPtrAllocator A;
StringSaver Saver(A);
addPltEntries(Obj, AllSymbols, Saver);
// Create a mapping from virtual address to section. An empty section can
// cause more than one section at the same address. Sort such sections to be
// before same-addressed non-empty sections so that symbol lookups prefer the
// non-empty section.
std::vector<std::pair<uint64_t, SectionRef>> SectionAddresses;
for (SectionRef Sec : Obj->sections())
SectionAddresses.emplace_back(Sec.getAddress(), Sec);
llvm::stable_sort(SectionAddresses, [](const auto &LHS, const auto &RHS) {
if (LHS.first != RHS.first)
return LHS.first < RHS.first;
return LHS.second.getSize() < RHS.second.getSize();
});
// Linked executables (.exe and .dll files) typically don't include a real
// symbol table but they might contain an export table.
if (const auto *COFFObj = dyn_cast<COFFObjectFile>(Obj)) {
for (const auto &ExportEntry : COFFObj->export_directories()) {
StringRef Name;
if (Error E = ExportEntry.getSymbolName(Name))
reportError(std::move(E), Obj->getFileName());
if (Name.empty())
continue;
uint32_t RVA;
if (Error E = ExportEntry.getExportRVA(RVA))
reportError(std::move(E), Obj->getFileName());
uint64_t VA = COFFObj->getImageBase() + RVA;
auto Sec = partition_point(
SectionAddresses, [VA](const std::pair<uint64_t, SectionRef> &O) {
return O.first <= VA;
});
if (Sec != SectionAddresses.begin()) {
--Sec;
AllSymbols[Sec->second].emplace_back(VA, Name, ELF::STT_NOTYPE);
} else
AbsoluteSymbols.emplace_back(VA, Name, ELF::STT_NOTYPE);
}
}
// Sort all the symbols, this allows us to use a simple binary search to find
// Multiple symbols can have the same address. Use a stable sort to stabilize
// the output.
StringSet<> FoundDisasmSymbolSet;
for (std::pair<const SectionRef, SectionSymbolsTy> &SecSyms : AllSymbols)
llvm::stable_sort(SecSyms.second);
llvm::stable_sort(AbsoluteSymbols);
std::unique_ptr<DWARFContext> DICtx;
LiveVariablePrinter LVP(*Ctx.getRegisterInfo(), *STI);
if (DbgVariables != DVDisabled) {
DICtx = DWARFContext::create(*Obj);
for (const std::unique_ptr<DWARFUnit> &CU : DICtx->compile_units())
LVP.addCompileUnit(CU->getUnitDIE(false));
}
LLVM_DEBUG(LVP.dump());
for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
if (FilterSections.empty() && !DisassembleAll &&
(!Section.isText() || Section.isVirtual()))
continue;
uint64_t SectionAddr = Section.getAddress();
uint64_t SectSize = Section.getSize();
if (!SectSize)
continue;
// Get the list of all the symbols in this section.
SectionSymbolsTy &Symbols = AllSymbols[Section];
std::vector<MappingSymbolPair> MappingSymbols;
if (hasMappingSymbols(Obj)) {
for (const auto &Symb : Symbols) {
uint64_t Address = Symb.Addr;
StringRef Name = Symb.Name;
if (Name.startswith("$d"))
MappingSymbols.emplace_back(Address - SectionAddr, 'd');
if (Name.startswith("$x"))
MappingSymbols.emplace_back(Address - SectionAddr, 'x');
if (Name.startswith("$a"))
MappingSymbols.emplace_back(Address - SectionAddr, 'a');
if (Name.startswith("$t"))
MappingSymbols.emplace_back(Address - SectionAddr, 't');
}
}
llvm::sort(MappingSymbols);
if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) {
// AMDGPU disassembler uses symbolizer for printing labels
std::unique_ptr<MCRelocationInfo> RelInfo(
TheTarget->createMCRelocationInfo(TripleName, Ctx));
if (RelInfo) {
std::unique_ptr<MCSymbolizer> Symbolizer(
TheTarget->createMCSymbolizer(
TripleName, nullptr, nullptr, &Symbols, &Ctx, std::move(RelInfo)));
DisAsm->setSymbolizer(std::move(Symbolizer));
}
}
StringRef SegmentName = getSegmentName(MachO, Section);
StringRef SectionName = unwrapOrError(Section.getName(), Obj->getFileName());
// If the section has no symbol at the start, just insert a dummy one.
if (Symbols.empty() || Symbols[0].Addr != 0) {
Symbols.insert(Symbols.begin(),
createDummySymbolInfo(Obj, SectionAddr, SectionName,
Section.isText() ? ELF::STT_FUNC
: ELF::STT_OBJECT));
}
SmallString<40> Comments;
raw_svector_ostream CommentStream(Comments);
ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(
unwrapOrError(Section.getContents(), Obj->getFileName()));
uint64_t VMAAdjustment = 0;
if (shouldAdjustVA(Section))
VMAAdjustment = AdjustVMA;
uint64_t Size;
uint64_t Index;
bool PrintedSection = false;
std::vector<RelocationRef> Rels = RelocMap[Section];
std::vector<RelocationRef>::const_iterator RelCur = Rels.begin();
std::vector<RelocationRef>::const_iterator RelEnd = Rels.end();
// Disassemble symbol by symbol.
for (unsigned SI = 0, SE = Symbols.size(); SI != SE; ++SI) {
std::string SymbolName = Symbols[SI].Name.str();
if (Demangle)
SymbolName = demangle(SymbolName);
// Skip if --disassemble-symbols is not empty and the symbol is not in
// the list.
if (!DisasmSymbolSet.empty() && !DisasmSymbolSet.count(SymbolName))
continue;
uint64_t Start = Symbols[SI].Addr;
if (Start < SectionAddr || StopAddress <= Start)
continue;
else
FoundDisasmSymbolSet.insert(SymbolName);
// The end is the section end, the beginning of the next symbol, or
// --stop-address.
uint64_t End = std::min<uint64_t>(SectionAddr + SectSize, StopAddress);
if (SI + 1 < SE)
End = std::min(End, Symbols[SI + 1].Addr);
if (Start >= End || End <= StartAddress)
continue;
Start -= SectionAddr;
End -= SectionAddr;
if (!PrintedSection) {
PrintedSection = true;
outs() << "\nDisassembly of section ";
if (!SegmentName.empty())
outs() << SegmentName << ",";
outs() << SectionName << ":\n";
}
outs() << '\n';
if (!NoLeadingAddr)
outs() << format(Is64Bits ? "%016" PRIx64 " " : "%08" PRIx64 " ",
SectionAddr + Start + VMAAdjustment);
if (Obj->isXCOFF() && SymbolDescription) {
outs() << getXCOFFSymbolDescription(Symbols[SI], SymbolName) << ":\n";
} else
outs() << '<' << SymbolName << ">:\n";
// Don't print raw contents of a virtual section. A virtual section
// doesn't have any contents in the file.
if (Section.isVirtual()) {
outs() << "...\n";
continue;
}
auto Status = DisAsm->onSymbolStart(Symbols[SI], Size,
Bytes.slice(Start, End - Start),
SectionAddr + Start, CommentStream);
// To have round trippable disassembly, we fall back to decoding the
// remaining bytes as instructions.
//
// If there is a failure, we disassemble the failed region as bytes before
// falling back. The target is expected to print nothing in this case.
//
// If there is Success or SoftFail i.e no 'real' failure, we go ahead by
// Size bytes before falling back.
// So if the entire symbol is 'eaten' by the target:
// Start += Size // Now Start = End and we will never decode as
// // instructions
//
// Right now, most targets return None i.e ignore to treat a symbol
// separately. But WebAssembly decodes preludes for some symbols.
//
if (Status.hasValue()) {
if (Status.getValue() == MCDisassembler::Fail) {
outs() << "// Error in decoding " << SymbolName
<< " : Decoding failed region as bytes.\n";
for (uint64_t I = 0; I < Size; ++I) {
outs() << "\t.byte\t " << format_hex(Bytes[I], 1, /*Upper=*/true)
<< "\n";
}
}
} else {
Size = 0;
}
Start += Size;
Index = Start;
if (SectionAddr < StartAddress)
Index = std::max<uint64_t>(Index, StartAddress - SectionAddr);
// If there is a data/common symbol inside an ELF text section and we are
// only disassembling text (applicable all architectures), we are in a
// situation where we must print the data and not disassemble it.
if (Obj->isELF() && !DisassembleAll && Section.isText()) {
uint8_t SymTy = Symbols[SI].Type;
if (SymTy == ELF::STT_OBJECT || SymTy == ELF::STT_COMMON) {
dumpELFData(SectionAddr, Index, End, Bytes);
Index = End;
}
}
bool CheckARMELFData = hasMappingSymbols(Obj) &&
Symbols[SI].Type != ELF::STT_OBJECT &&
!DisassembleAll;
bool DumpARMELFData = false;
formatted_raw_ostream FOS(outs());
std::unordered_map<uint64_t, std::string> AllLabels;
if (SymbolizeOperands)
collectLocalBranchTargets(Bytes, MIA, DisAsm, IP, PrimarySTI,
SectionAddr, Index, End, AllLabels);
while (Index < End) {
// ARM and AArch64 ELF binaries can interleave data and text in the
// same section. We rely on the markers introduced to understand what
// we need to dump. If the data marker is within a function, it is
// denoted as a word/short etc.
if (CheckARMELFData) {
char Kind = getMappingSymbolKind(MappingSymbols, Index);
DumpARMELFData = Kind == 'd';
if (SecondarySTI) {
if (Kind == 'a') {
STI = PrimaryIsThumb ? SecondarySTI : PrimarySTI;
DisAsm = PrimaryIsThumb ? SecondaryDisAsm : PrimaryDisAsm;
} else if (Kind == 't') {
STI = PrimaryIsThumb ? PrimarySTI : SecondarySTI;
DisAsm = PrimaryIsThumb ? PrimaryDisAsm : SecondaryDisAsm;
}
}
}
if (DumpARMELFData) {
Size = dumpARMELFData(SectionAddr, Index, End, Obj, Bytes,
MappingSymbols, FOS);
} else {
// When -z or --disassemble-zeroes are given we always dissasemble
// them. Otherwise we might want to skip zero bytes we see.
if (!DisassembleZeroes) {
uint64_t MaxOffset = End - Index;
// For --reloc: print zero blocks patched by relocations, so that
// relocations can be shown in the dump.
if (RelCur != RelEnd)
MaxOffset = RelCur->getOffset() - Index;
if (size_t N =
countSkippableZeroBytes(Bytes.slice(Index, MaxOffset))) {
FOS << "\t\t..." << '\n';
Index += N;
continue;
}
}
// Print local label if there's any.
auto Iter = AllLabels.find(SectionAddr + Index);
if (Iter != AllLabels.end())
FOS << "<" << Iter->second << ">:\n";
// Disassemble a real instruction or a data when disassemble all is
// provided
MCInst Inst;
bool Disassembled =
DisAsm->getInstruction(Inst, Size, Bytes.slice(Index),
SectionAddr + Index, CommentStream);
if (Size == 0)
Size = 1;
LVP.update({Index, Section.getIndex()},
{Index + Size, Section.getIndex()}, Index + Size != End);
PIP.printInst(
*IP, Disassembled ? &Inst : nullptr, Bytes.slice(Index, Size),
{SectionAddr + Index + VMAAdjustment, Section.getIndex()}, FOS,
"", *STI, &SP, Obj->getFileName(), &Rels, LVP);
FOS << CommentStream.str();
Comments.clear();
// If disassembly has failed, avoid analysing invalid/incomplete
// instruction information. Otherwise, try to resolve the target
// address (jump target or memory operand address) and print it on the
// right of the instruction.
if (Disassembled && MIA) {
uint64_t Target;
bool PrintTarget =
MIA->evaluateBranch(Inst, SectionAddr + Index, Size, Target);
if (!PrintTarget)
if (Optional<uint64_t> MaybeTarget =
MIA->evaluateMemoryOperandAddress(
Inst, SectionAddr + Index, Size)) {
Target = *MaybeTarget;
PrintTarget = true;
// Do not print real address when symbolizing.
if (!SymbolizeOperands)
FOS << " # " << Twine::utohexstr(Target);
}
if (PrintTarget) {
// In a relocatable object, the target's section must reside in
// the same section as the call instruction or it is accessed
// through a relocation.
//
// In a non-relocatable object, the target may be in any section.
// In that case, locate the section(s) containing the target
// address and find the symbol in one of those, if possible.
//
// N.B. We don't walk the relocations in the relocatable case yet.
std::vector<const SectionSymbolsTy *> TargetSectionSymbols;
if (!Obj->isRelocatableObject()) {
auto It = llvm::partition_point(
SectionAddresses,
[=](const std::pair<uint64_t, SectionRef> &O) {
return O.first <= Target;
});
uint64_t TargetSecAddr = 0;
while (It != SectionAddresses.begin()) {
--It;
if (TargetSecAddr == 0)
TargetSecAddr = It->first;
if (It->first != TargetSecAddr)
break;
TargetSectionSymbols.push_back(&AllSymbols[It->second]);
}
} else {
TargetSectionSymbols.push_back(&Symbols);
}
TargetSectionSymbols.push_back(&AbsoluteSymbols);
// Find the last symbol in the first candidate section whose
// offset is less than or equal to the target. If there are no
// such symbols, try in the next section and so on, before finally
// using the nearest preceding absolute symbol (if any), if there
// are no other valid symbols.
const SymbolInfoTy *TargetSym = nullptr;
for (const SectionSymbolsTy *TargetSymbols :
TargetSectionSymbols) {
auto It = llvm::partition_point(
*TargetSymbols,
[=](const SymbolInfoTy &O) { return O.Addr <= Target; });
if (It != TargetSymbols->begin()) {
TargetSym = &*(It - 1);
break;
}
}
// Print the labels corresponding to the target if there's any.
bool LabelAvailable = AllLabels.count(Target);
if (TargetSym != nullptr) {
uint64_t TargetAddress = TargetSym->Addr;
uint64_t Disp = Target - TargetAddress;
std::string TargetName = TargetSym->Name.str();
if (Demangle)
TargetName = demangle(TargetName);
FOS << " <";
if (!Disp) {
// Always Print the binary symbol precisely corresponding to
// the target address.
FOS << TargetName;
} else if (!LabelAvailable) {
// Always Print the binary symbol plus an offset if there's no
// local label corresponding to the target address.
FOS << TargetName << "+0x" << Twine::utohexstr(Disp);
} else {
FOS << AllLabels[Target];
}
FOS << ">";
} else if (LabelAvailable) {
FOS << " <" << AllLabels[Target] << ">";
}
}
}
}
LVP.printAfterInst(FOS);
FOS << "\n";
// Hexagon does this in pretty printer
if (Obj->getArch() != Triple::hexagon) {
// Print relocation for instruction and data.
while (RelCur != RelEnd) {
uint64_t Offset = RelCur->getOffset();
// If this relocation is hidden, skip it.
if (getHidden(*RelCur) || SectionAddr + Offset < StartAddress) {
++RelCur;
continue;
}
// Stop when RelCur's offset is past the disassembled
// instruction/data. Note that it's possible the disassembled data
// is not the complete data: we might see the relocation printed in
// the middle of the data, but this matches the binutils objdump
// output.
if (Offset >= Index + Size)
break;
// When --adjust-vma is used, update the address printed.
if (RelCur->getSymbol() != Obj->symbol_end()) {
Expected<section_iterator> SymSI =
RelCur->getSymbol()->getSection();
if (SymSI && *SymSI != Obj->section_end() &&
shouldAdjustVA(**SymSI))
Offset += AdjustVMA;
}
printRelocation(FOS, Obj->getFileName(), *RelCur,
SectionAddr + Offset, Is64Bits);
LVP.printAfterOtherLine(FOS, true);
++RelCur;
}
}
Index += Size;
}
}
}
StringSet<> MissingDisasmSymbolSet =
set_difference(DisasmSymbolSet, FoundDisasmSymbolSet);
for (StringRef Sym : MissingDisasmSymbolSet.keys())
reportWarning("failed to disassemble missing symbol " + Sym, FileName);
}
static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
const Target *TheTarget = getTarget(Obj);
// Package up features to be passed to target/subtarget
SubtargetFeatures Features = Obj->getFeatures();
if (!MAttrs.empty())
for (unsigned I = 0; I != MAttrs.size(); ++I)
Features.AddFeature(MAttrs[I]);
std::unique_ptr<const MCRegisterInfo> MRI(
TheTarget->createMCRegInfo(TripleName));
if (!MRI)
reportError(Obj->getFileName(),
"no register info for target " + TripleName);
// Set up disassembler.
MCTargetOptions MCOptions;
std::unique_ptr<const MCAsmInfo> AsmInfo(
TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
if (!AsmInfo)
reportError(Obj->getFileName(),
"no assembly info for target " + TripleName);
if (MCPU.empty())
MCPU = Obj->tryGetCPUName().getValueOr("").str();
std::unique_ptr<const MCSubtargetInfo> STI(
TheTarget->createMCSubtargetInfo(TripleName, MCPU, Features.getString()));
if (!STI)
reportError(Obj->getFileName(),
"no subtarget info for target " + TripleName);
std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
if (!MII)
reportError(Obj->getFileName(),
"no instruction info for target " + TripleName);
MCObjectFileInfo MOFI;
MCContext Ctx(AsmInfo.get(), MRI.get(), &MOFI);
// FIXME: for now initialize MCObjectFileInfo with default values
MOFI.InitMCObjectFileInfo(Triple(TripleName), false, Ctx);
std::unique_ptr<MCDisassembler> DisAsm(
TheTarget->createMCDisassembler(*STI, Ctx));
if (!DisAsm)
reportError(Obj->getFileName(), "no disassembler for target " + TripleName);
// If we have an ARM object file, we need a second disassembler, because
// ARM CPUs have two different instruction sets: ARM mode, and Thumb mode.
// We use mapping symbols to switch between the two assemblers, where
// appropriate.
std::unique_ptr<MCDisassembler> SecondaryDisAsm;
std::unique_ptr<const MCSubtargetInfo> SecondarySTI;
if (isArmElf(Obj) && !STI->checkFeatures("+mclass")) {
if (STI->checkFeatures("+thumb-mode"))
Features.AddFeature("-thumb-mode");
else
Features.AddFeature("+thumb-mode");
SecondarySTI.reset(TheTarget->createMCSubtargetInfo(TripleName, MCPU,
Features.getString()));
SecondaryDisAsm.reset(TheTarget->createMCDisassembler(*SecondarySTI, Ctx));
}
std::unique_ptr<const MCInstrAnalysis> MIA(
TheTarget->createMCInstrAnalysis(MII.get()));
int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
std::unique_ptr<MCInstPrinter> IP(TheTarget->createMCInstPrinter(
Triple(TripleName), AsmPrinterVariant, *AsmInfo, *MII, *MRI));
if (!IP)
reportError(Obj->getFileName(),
"no instruction printer for target " + TripleName);
IP->setPrintImmHex(PrintImmHex);
IP->setPrintBranchImmAsAddress(true);
IP->setSymbolizeOperands(SymbolizeOperands);
IP->setMCInstrAnalysis(MIA.get());
PrettyPrinter &PIP = selectPrettyPrinter(Triple(TripleName));
SourcePrinter SP(Obj, TheTarget->getName());
for (StringRef Opt : DisassemblerOptions)
if (!IP->applyTargetSpecificCLOption(Opt))
reportError(Obj->getFileName(),
"Unrecognized disassembler option: " + Opt);
disassembleObject(TheTarget, Obj, Ctx, DisAsm.get(), SecondaryDisAsm.get(),
MIA.get(), IP.get(), STI.get(), SecondarySTI.get(), PIP,
SP, InlineRelocs);
}
void objdump::printRelocations(const ObjectFile *Obj) {
StringRef Fmt = Obj->getBytesInAddress() > 4 ? "%016" PRIx64 :
"%08" PRIx64;
// Regular objdump doesn't print relocations in non-relocatable object
// files.
if (!Obj->isRelocatableObject())
return;
// Build a mapping from relocation target to a vector of relocation
// sections. Usually, there is an only one relocation section for
// each relocated section.
MapVector<SectionRef, std::vector<SectionRef>> SecToRelSec;
uint64_t Ndx;
for (const SectionRef &Section : ToolSectionFilter(*Obj, &Ndx)) {
if (Section.relocation_begin() == Section.relocation_end())
continue;
Expected<section_iterator> SecOrErr = Section.getRelocatedSection();
if (!SecOrErr)
reportError(Obj->getFileName(),
"section (" + Twine(Ndx) +
"): unable to get a relocation target: " +
toString(SecOrErr.takeError()));
SecToRelSec[**SecOrErr].push_back(Section);
}
for (std::pair<SectionRef, std::vector<SectionRef>> &P : SecToRelSec) {
StringRef SecName = unwrapOrError(P.first.getName(), Obj->getFileName());
outs() << "RELOCATION RECORDS FOR [" << SecName << "]:\n";
uint32_t OffsetPadding = (Obj->getBytesInAddress() > 4 ? 16 : 8);
uint32_t TypePadding = 24;
outs() << left_justify("OFFSET", OffsetPadding) << " "
<< left_justify("TYPE", TypePadding) << " "
<< "VALUE\n";
for (SectionRef Section : P.second) {
for (const RelocationRef &Reloc : Section.relocations()) {
uint64_t Address = Reloc.getOffset();
SmallString<32> RelocName;
SmallString<32> ValueStr;
if (Address < StartAddress || Address > StopAddress || getHidden(Reloc))
continue;
Reloc.getTypeName(RelocName);
if (Error E = getRelocationValueString(Reloc, ValueStr))
reportError(std::move(E), Obj->getFileName());
outs() << format(Fmt.data(), Address) << " "
<< left_justify(RelocName, TypePadding) << " " << ValueStr
<< "\n";
}
}
outs() << "\n";
}
}
void objdump::printDynamicRelocations(const ObjectFile *Obj) {
// For the moment, this option is for ELF only
if (!Obj->isELF())
return;
const auto *Elf = dyn_cast<ELFObjectFileBase>(Obj);
if (!Elf || Elf->getEType() != ELF::ET_DYN) {
reportError(Obj->getFileName(), "not a dynamic object");
return;
}
std::vector<SectionRef> DynRelSec = Obj->dynamic_relocation_sections();
if (DynRelSec.empty())
return;
outs() << "DYNAMIC RELOCATION RECORDS\n";
StringRef Fmt = Obj->getBytesInAddress() > 4 ? "%016" PRIx64 : "%08" PRIx64;
for (const SectionRef &Section : DynRelSec)
for (const RelocationRef &Reloc : Section.relocations()) {
uint64_t Address = Reloc.getOffset();
SmallString<32> RelocName;
SmallString<32> ValueStr;
Reloc.getTypeName(RelocName);
if (Error E = getRelocationValueString(Reloc, ValueStr))
reportError(std::move(E), Obj->getFileName());
outs() << format(Fmt.data(), Address) << " " << RelocName << " "
<< ValueStr << "\n";
}
}
// Returns true if we need to show LMA column when dumping section headers. We
// show it only when the platform is ELF and either we have at least one section
// whose VMA and LMA are different and/or when --show-lma flag is used.
static bool shouldDisplayLMA(const ObjectFile *Obj) {
if (!Obj->isELF())
return false;
for (const SectionRef &S : ToolSectionFilter(*Obj))
if (S.getAddress() != getELFSectionLMA(S))
return true;
return ShowLMA;
}
static size_t getMaxSectionNameWidth(const ObjectFile *Obj) {
// Default column width for names is 13 even if no names are that long.
size_t MaxWidth = 13;
for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
StringRef Name = unwrapOrError(Section.getName(), Obj->getFileName());
MaxWidth = std::max(MaxWidth, Name.size());
}
return MaxWidth;
}
void objdump::printSectionHeaders(const ObjectFile *Obj) {
size_t NameWidth = getMaxSectionNameWidth(Obj);
size_t AddressWidth = 2 * Obj->getBytesInAddress();
bool HasLMAColumn = shouldDisplayLMA(Obj);
if (HasLMAColumn)
outs() << "Sections:\n"
"Idx "
<< left_justify("Name", NameWidth) << " Size "
<< left_justify("VMA", AddressWidth) << " "
<< left_justify("LMA", AddressWidth) << " Type\n";
else
outs() << "Sections:\n"
"Idx "
<< left_justify("Name", NameWidth) << " Size "
<< left_justify("VMA", AddressWidth) << " Type\n";
uint64_t Idx;
for (const SectionRef &Section : ToolSectionFilter(*Obj, &Idx)) {
StringRef Name = unwrapOrError(Section.getName(), Obj->getFileName());
uint64_t VMA = Section.getAddress();
if (shouldAdjustVA(Section))
VMA += AdjustVMA;
uint64_t Size = Section.getSize();
std::string Type = Section.isText() ? "TEXT" : "";
if (Section.isData())
Type += Type.empty() ? "DATA" : " DATA";
if (Section.isBSS())
Type += Type.empty() ? "BSS" : " BSS";
if (HasLMAColumn)
outs() << format("%3" PRIu64 " %-*s %08" PRIx64 " ", Idx, NameWidth,
Name.str().c_str(), Size)
<< format_hex_no_prefix(VMA, AddressWidth) << " "
<< format_hex_no_prefix(getELFSectionLMA(Section), AddressWidth)
<< " " << Type << "\n";
else
outs() << format("%3" PRIu64 " %-*s %08" PRIx64 " ", Idx, NameWidth,
Name.str().c_str(), Size)
<< format_hex_no_prefix(VMA, AddressWidth) << " " << Type << "\n";
}
outs() << "\n";
}
void objdump::printSectionContents(const ObjectFile *Obj) {
const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(Obj);
for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
StringRef Name = unwrapOrError(Section.getName(), Obj->getFileName());
uint64_t BaseAddr = Section.getAddress();
uint64_t Size = Section.getSize();
if (!Size)
continue;
outs() << "Contents of section ";
StringRef SegmentName = getSegmentName(MachO, Section);
if (!SegmentName.empty())
outs() << SegmentName << ",";
outs() << Name << ":\n";
if (Section.isBSS()) {
outs() << format("<skipping contents of bss section at [%04" PRIx64
", %04" PRIx64 ")>\n",
BaseAddr, BaseAddr + Size);
continue;
}
StringRef Contents = unwrapOrError(Section.getContents(), Obj->getFileName());
// Dump out the content as hex and printable ascii characters.
for (std::size_t Addr = 0, End = Contents.size(); Addr < End; Addr += 16) {
outs() << format(" %04" PRIx64 " ", BaseAddr + Addr);
// Dump line of hex.
for (std::size_t I = 0; I < 16; ++I) {
if (I != 0 && I % 4 == 0)
outs() << ' ';
if (Addr + I < End)
outs() << hexdigit((Contents[Addr + I] >> 4) & 0xF, true)
<< hexdigit(Contents[Addr + I] & 0xF, true);
else
outs() << " ";
}
// Print ascii.
outs() << " ";
for (std::size_t I = 0; I < 16 && Addr + I < End; ++I) {
if (isPrint(static_cast<unsigned char>(Contents[Addr + I]) & 0xFF))
outs() << Contents[Addr + I];
else
outs() << ".";
}
outs() << "\n";
}
}
}
void objdump::printSymbolTable(const ObjectFile *O, StringRef ArchiveName,
StringRef ArchitectureName, bool DumpDynamic) {
if (O->isCOFF() && !DumpDynamic) {
outs() << "SYMBOL TABLE:\n";
printCOFFSymbolTable(cast<const COFFObjectFile>(O));
return;
}
const StringRef FileName = O->getFileName();
if (!DumpDynamic) {
outs() << "SYMBOL TABLE:\n";
for (auto I = O->symbol_begin(); I != O->symbol_end(); ++I)
printSymbol(O, *I, FileName, ArchiveName, ArchitectureName, DumpDynamic);
return;
}
outs() << "DYNAMIC SYMBOL TABLE:\n";
if (!O->isELF()) {
reportWarning(
"this operation is not currently supported for this file format",
FileName);
return;
}
const ELFObjectFileBase *ELF = cast<const ELFObjectFileBase>(O);
for (auto I = ELF->getDynamicSymbolIterators().begin();
I != ELF->getDynamicSymbolIterators().end(); ++I)
printSymbol(O, *I, FileName, ArchiveName, ArchitectureName, DumpDynamic);
}
void objdump::printSymbol(const ObjectFile *O, const SymbolRef &Symbol,
StringRef FileName, StringRef ArchiveName,
StringRef ArchitectureName, bool DumpDynamic) {
const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(O);
uint64_t Address = unwrapOrError(Symbol.getAddress(), FileName, ArchiveName,
ArchitectureName);
if ((Address < StartAddress) || (Address > StopAddress))
return;
SymbolRef::Type Type =
unwrapOrError(Symbol.getType(), FileName, ArchiveName, ArchitectureName);
uint32_t Flags =
unwrapOrError(Symbol.getFlags(), FileName, ArchiveName, ArchitectureName);
// Don't ask a Mach-O STAB symbol for its section unless you know that
// STAB symbol's section field refers to a valid section index. Otherwise
// the symbol may error trying to load a section that does not exist.
bool IsSTAB = false;
if (MachO) {
DataRefImpl SymDRI = Symbol.getRawDataRefImpl();
uint8_t NType =
(MachO->is64Bit() ? MachO->getSymbol64TableEntry(SymDRI).n_type
: MachO->getSymbolTableEntry(SymDRI).n_type);
if (NType & MachO::N_STAB)
IsSTAB = true;
}
section_iterator Section = IsSTAB
? O->section_end()
: unwrapOrError(Symbol.getSection(), FileName,
ArchiveName, ArchitectureName);
StringRef Name;
if (Type == SymbolRef::ST_Debug && Section != O->section_end()) {
if (Expected<StringRef> NameOrErr = Section->getName())
Name = *NameOrErr;
else
consumeError(NameOrErr.takeError());
} else {
Name = unwrapOrError(Symbol.getName(), FileName, ArchiveName,
ArchitectureName);
}
bool Global = Flags & SymbolRef::SF_Global;
bool Weak = Flags & SymbolRef::SF_Weak;
bool Absolute = Flags & SymbolRef::SF_Absolute;
bool Common = Flags & SymbolRef::SF_Common;
bool Hidden = Flags & SymbolRef::SF_Hidden;
char GlobLoc = ' ';
if ((Section != O->section_end() || Absolute) && !Weak)
GlobLoc = Global ? 'g' : 'l';
char IFunc