[ELF] Add --why-extract= to query why archive members/lazy object files are extracted
Similar to D69607 but for archive member extraction unrelated to GC. This patch adds --why-extract=.
Prior art:
GNU ld -M prints
```
Archive member included to satisfy reference by file (symbol)
a.a(a.o) main.o (a)
b.a(b.o) (b())
```
-M is mainly for input section/symbol assignment <-> output section mapping
(often huge output) and the information may appear ad-hoc.
Apple ld64
```
__Z1bv forced load of b.a(b.o)
_a forced load of a.a(a.o)
```
It doesn't say the reference file.
Arm's proprietary linker
```
Selecting member vsnprintf.o(c_wfu.l) to define vsnprintf.
...
Loading member vsnprintf.o from c_wfu.l.
definition: vsnprintf
reference : _printf_a
```
---
--why-extract= gives the user the full data (which is much shorter than GNU ld
-Map). It is easy to track a chain of references to one archive member with a
one-liner, e.g.
```
% ld.lld main.o a_b.a b_c.a c.a -o /dev/null --why-extract=- | tee stdout
reference extracted symbol
main.o a_b.a(a_b.o) a
a_b.a(a_b.o) b_c.a(b_c.o) b()
b_c.a(b_c.o) c.a(c.o) c()
% ruby -ane 'BEGIN{p={}}; p[$F[1]]=[$F[0],$F[2]] if $.>1; END{x="c.a(c.o)"; while y=p[x]; puts "#{y[0]} extracts #{x} to resolve #{y[1]}"; x=y[0] end}' stdout
b_c.a(b_c.o) extracts c.a(c.o) to resolve c()
a_b.a(a_b.o) extracts b_c.a(b_c.o) to resolve b()
main.o extracts a_b.a(a_b.o) to resolve a
```
Archive member extraction happens before --gc-sections, so this may not be a live path
under --gc-sections, but I think it is a good approximation in practice.
* Specifying a file avoids output interleaving with --verbose.
* Required `=` prevents accidental overwrite of an input if the user forgets `=`. (Most of compiler drivers' long options accept `=` but not ` `)
Differential Revision: https://reviews.llvm.org/D109572
diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index a3d28d5..f9851d0 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -127,6 +127,7 @@
llvm::StringRef sysroot;
llvm::StringRef thinLTOCacheDir;
llvm::StringRef thinLTOIndexOnlyArg;
+ llvm::StringRef whyExtract;
llvm::StringRef ltoBasicBlockSections;
std::pair<llvm::StringRef, llvm::StringRef> thinLTOObjectSuffixReplace;
std::pair<llvm::StringRef, llvm::StringRef> thinLTOPrefixReplace;
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 34667c4..6607c0f 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -94,6 +94,7 @@
objectFiles.clear();
sharedFiles.clear();
backwardReferences.clear();
+ whyExtract.clear();
tar = nullptr;
memset(&in, 0, sizeof(in));
@@ -1171,6 +1172,7 @@
config->warnCommon = args.hasFlag(OPT_warn_common, OPT_no_warn_common, false);
config->warnSymbolOrdering =
args.hasFlag(OPT_warn_symbol_ordering, OPT_no_warn_symbol_ordering, true);
+ config->whyExtract = args.getLastArgValue(OPT_why_extract);
config->zCombreloc = getZFlag(args, "combreloc", "nocombreloc", true);
config->zCopyreloc = getZFlag(args, "copyreloc", "nocopyreloc", true);
config->zForceBti = hasZOption(args, "force-bti");
@@ -1696,13 +1698,16 @@
}
// Force Sym to be entered in the output.
-static void handleUndefined(Symbol *sym) {
+static void handleUndefined(Symbol *sym, const char *option) {
// Since a symbol may not be used inside the program, LTO may
// eliminate it. Mark the symbol as "used" to prevent it.
sym->isUsedInRegularObj = true;
- if (sym->isLazy())
- sym->fetch();
+ if (!sym->isLazy())
+ return;
+ sym->fetch();
+ if (!config->whyExtract.empty())
+ whyExtract.emplace_back(option, sym->file, *sym);
}
// As an extension to GNU linkers, lld supports a variant of `-u`
@@ -1725,7 +1730,7 @@
}
for (Symbol *sym : syms)
- handleUndefined(sym);
+ handleUndefined(sym, "--undefined-glob");
}
static void handleLibcall(StringRef name) {
@@ -2192,6 +2197,9 @@
e.message());
if (auto e = tryCreateFile(config->mapFile))
error("cannot open map file " + config->mapFile + ": " + e.message());
+ if (auto e = tryCreateFile(config->whyExtract))
+ error("cannot open --why-extract= file " + config->whyExtract + ": " +
+ e.message());
}
if (errorCount())
return;
@@ -2246,7 +2254,7 @@
// If an entry symbol is in a static archive, pull out that file now.
if (Symbol *sym = symtab->find(config->entry))
- handleUndefined(sym);
+ handleUndefined(sym, "--entry");
// Handle the `--undefined-glob <pattern>` options.
for (StringRef pat : args::getStrings(args, OPT_undefined_glob))
diff --git a/lld/ELF/MapFile.cpp b/lld/ELF/MapFile.cpp
index 239c6c3..c4690ae 100644
--- a/lld/ELF/MapFile.cpp
+++ b/lld/ELF/MapFile.cpp
@@ -215,6 +215,25 @@
}
}
+void elf::writeWhyExtract() {
+ if (config->whyExtract.empty())
+ return;
+
+ std::error_code ec;
+ raw_fd_ostream os(config->whyExtract, ec, sys::fs::OF_None);
+ if (ec) {
+ error("cannot open --why-extract= file " + config->whyExtract + ": " +
+ ec.message());
+ return;
+ }
+
+ os << "reference\textracted\tsymbol\n";
+ for (auto &entry : whyExtract) {
+ os << std::get<0>(entry) << '\t' << toString(std::get<1>(entry)) << '\t'
+ << toString(std::get<2>(entry)) << '\n';
+ }
+}
+
static void print(StringRef a, StringRef b) {
lld::outs() << left_justify(a, 49) << " " << b << "\n";
}
diff --git a/lld/ELF/MapFile.h b/lld/ELF/MapFile.h
index c4da18f..1b8c016 100644
--- a/lld/ELF/MapFile.h
+++ b/lld/ELF/MapFile.h
@@ -12,6 +12,7 @@
namespace lld {
namespace elf {
void writeMapFile();
+void writeWhyExtract();
void writeCrossReferenceTable();
void writeArchiveStats();
} // namespace elf
diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index b5c1be5..874399d 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -492,6 +492,8 @@
"Force load of all members in a static library",
"Do not force load of all members in a static library (default)">;
+def why_extract: JJ<"why-extract=">, HelpText<"Print to a file about why archive members are extracted">;
+
defm wrap : Eq<"wrap", "Redirect symbol references to __wrap_symbol and "
"__real_symbol references to symbol">,
MetaVarName<"<symbol>">;
diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp
index cef303f..5f95a1b 100644
--- a/lld/ELF/Symbols.cpp
+++ b/lld/ELF/Symbols.cpp
@@ -64,6 +64,8 @@
Defined *ElfSym::tlsModuleBase;
DenseMap<const Symbol *, std::pair<const InputFile *, const InputFile *>>
elf::backwardReferences;
+SmallVector<std::tuple<std::string, const InputFile *, const Symbol &>, 0>
+ elf::whyExtract;
static uint64_t getSymVA(const Symbol &sym, int64_t &addend) {
switch (sym.kind()) {
@@ -321,6 +323,11 @@
message(toString(sym->file) + s + sym->getName());
}
+static void recordWhyExtract(const InputFile *reference,
+ const InputFile &extracted, const Symbol &sym) {
+ whyExtract.emplace_back(toString(reference), &extracted, sym);
+}
+
void elf::maybeWarnUnorderableSymbol(const Symbol *sym) {
if (!config->warnSymbolOrdering)
return;
@@ -533,6 +540,9 @@
file->groupId < other.file->groupId;
fetch();
+ if (!config->whyExtract.empty())
+ recordWhyExtract(other.file, *file, *this);
+
// We don't report backward references to weak symbols as they can be
// overridden later.
//
@@ -742,7 +752,10 @@
return;
}
+ const InputFile *oldFile = file;
other.fetch();
+ if (!config->whyExtract.empty())
+ recordWhyExtract(oldFile, *file, *this);
}
void Symbol::resolveShared(const SharedSymbol &other) {
diff --git a/lld/ELF/Symbols.h b/lld/ELF/Symbols.h
index d486beb..4f5cc3f 100644
--- a/lld/ELF/Symbols.h
+++ b/lld/ELF/Symbols.h
@@ -20,6 +20,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/ELF.h"
+#include <tuple>
namespace lld {
// Returns a string representation for a symbol for diagnostics.
@@ -582,6 +583,11 @@
std::pair<const InputFile *, const InputFile *>>
backwardReferences;
+// A tuple of (reference, extractedFile, sym). Used by --why-extract=.
+extern SmallVector<std::tuple<std::string, const InputFile *, const Symbol &>,
+ 0>
+ whyExtract;
+
} // namespace elf
} // namespace lld
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index a9b0854f..0fdc648 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -622,11 +622,12 @@
for (OutputSection *sec : outputSections)
sec->addr = 0;
- // Handle --print-map(-M)/--Map, --cref and --print-archive-stats=. Dump them
- // before checkSections() because the files may be useful in case
- // checkSections() or openFile() fails, for example, due to an erroneous file
- // size.
+ // Handle --print-map(-M)/--Map, --why-extract=, --cref and
+ // --print-archive-stats=. Dump them before checkSections() because the files
+ // may be useful in case checkSections() or openFile() fails, for example, due
+ // to an erroneous file size.
writeMapFile();
+ writeWhyExtract();
writeCrossReferenceTable();
writeArchiveStats();