[lld-macho] Associate each Symbol with an InputFile

This makes our error messages more informative. But the bigger motivation is for
LTO symbol resolution, which will be in an upcoming diff. The changes in this
one are largely mechanical.

Reviewed By: #lld-macho, smeenai

Differential Revision: https://reviews.llvm.org/D94316

GitOrigin-RevId: 163dcd851309d0bfbbbe0b7810335f30d88ee879
diff --git a/MachO/Driver.cpp b/MachO/Driver.cpp
index 59ea302..6d4dbf1 100644
--- a/MachO/Driver.cpp
+++ b/MachO/Driver.cpp
@@ -285,7 +285,8 @@
     } else if (config->forceLoadObjC) {
       for (const object::Archive::Symbol &sym : file->symbols())
         if (sym.getName().startswith(objc::klass))
-          symtab->addUndefined(sym.getName(), /*isWeakRef=*/false);
+          symtab->addUndefined(sym.getName(), /*file=*/nullptr,
+                               /*isWeakRef=*/false);
 
       // TODO: no need to look for ObjC sections for a given archive member if
       // we already found that it contains an ObjC symbol. We should also
@@ -510,7 +511,7 @@
       continue;
 
     auto *isec = make<InputSection>();
-    isec->file = common->file;
+    isec->file = common->getFile();
     isec->name = section_names::common;
     isec->segname = segment_names::data;
     isec->align = common->align;
@@ -521,7 +522,7 @@
     isec->flags = S_ZEROFILL;
     inputSections.push_back(isec);
 
-    replaceSymbol<Defined>(sym, sym->getName(), isec, /*value=*/0,
+    replaceSymbol<Defined>(sym, sym->getName(), isec->file, isec, /*value=*/0,
                            /*isWeakDef=*/false,
                            /*isExternal=*/true, common->privateExtern);
   }
@@ -721,6 +722,7 @@
   target = createTargetInfo(args);
 
   config->entry = symtab->addUndefined(args.getLastArgValue(OPT_e, "_main"),
+                                       /*file=*/nullptr,
                                        /*isWeakRef=*/false);
   config->outputFile = args.getLastArgValue(OPT_o, "a.out");
   config->installName =
diff --git a/MachO/ExportTrie.cpp b/MachO/ExportTrie.cpp
index bd0c000..478e81a 100644
--- a/MachO/ExportTrie.cpp
+++ b/MachO/ExportTrie.cpp
@@ -44,7 +44,6 @@
 #include "llvm/Support/LEB128.h"
 
 using namespace llvm;
-using namespace llvm::MachO;
 using namespace lld;
 using namespace lld::macho;
 
@@ -62,6 +61,7 @@
   uint8_t flags = 0;
   ExportInfo(const Symbol &sym, uint64_t imageBase)
       : address(sym.getVA() - imageBase) {
+    using namespace llvm::MachO;
     // Set the symbol type.
     if (sym.isWeakDef())
       flags |= EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION;
diff --git a/MachO/InputFiles.cpp b/MachO/InputFiles.cpp
index d75a857..3fc3f43 100644
--- a/MachO/InputFiles.cpp
+++ b/MachO/InputFiles.cpp
@@ -336,23 +336,23 @@
 
   if (sym.n_type & (N_EXT | N_PEXT)) {
     assert((sym.n_type & N_EXT) && "invalid input");
-    return symtab->addDefined(name, isec, value, sym.n_desc & N_WEAK_DEF,
-                              sym.n_type & N_PEXT);
+    return symtab->addDefined(name, isec->file, isec, value,
+                              sym.n_desc & N_WEAK_DEF, sym.n_type & N_PEXT);
   }
-  return make<Defined>(name, isec, value, sym.n_desc & N_WEAK_DEF,
+  return make<Defined>(name, isec->file, isec, value, sym.n_desc & N_WEAK_DEF,
                        /*isExternal=*/false, /*isPrivateExtern=*/false);
 }
 
 // Absolute symbols are defined symbols that do not have an associated
 // InputSection. They cannot be weak.
 static macho::Symbol *createAbsolute(const structs::nlist_64 &sym,
-                                     StringRef name) {
+                                     InputFile *file, StringRef name) {
   if (sym.n_type & (N_EXT | N_PEXT)) {
     assert((sym.n_type & N_EXT) && "invalid input");
-    return symtab->addDefined(name, nullptr, sym.n_value, /*isWeakDef=*/false,
-                              sym.n_type & N_PEXT);
+    return symtab->addDefined(name, file, nullptr, sym.n_value,
+                              /*isWeakDef=*/false, sym.n_type & N_PEXT);
   }
-  return make<Defined>(name, nullptr, sym.n_value, /*isWeakDef=*/false,
+  return make<Defined>(name, file, nullptr, sym.n_value, /*isWeakDef=*/false,
                        /*isExternal=*/false, /*isPrivateExtern=*/false);
 }
 
@@ -362,12 +362,12 @@
   switch (type) {
   case N_UNDF:
     return sym.n_value == 0
-               ? symtab->addUndefined(name, sym.n_desc & N_WEAK_REF)
+               ? symtab->addUndefined(name, this, sym.n_desc & N_WEAK_REF)
                : symtab->addCommon(name, this, sym.n_value,
                                    1 << GET_COMM_ALIGN(sym.n_desc),
                                    sym.n_type & N_PEXT);
   case N_ABS:
-    return createAbsolute(sym, name);
+    return createAbsolute(sym, this, name);
   case N_PBUD:
   case N_INDR:
     error("TODO: support symbols of type " + std::to_string(type));
diff --git a/MachO/SymbolTable.cpp b/MachO/SymbolTable.cpp
index 2f0844f..78e807b 100644
--- a/MachO/SymbolTable.cpp
+++ b/MachO/SymbolTable.cpp
@@ -37,9 +37,9 @@
   return {sym, true};
 }
 
-Symbol *SymbolTable::addDefined(StringRef name, InputSection *isec,
-                                uint32_t value, bool isWeakDef,
-                                bool isPrivateExtern) {
+Symbol *SymbolTable::addDefined(StringRef name, InputFile *file,
+                                InputSection *isec, uint32_t value,
+                                bool isWeakDef, bool isPrivateExtern) {
   Symbol *s;
   bool wasInserted;
   bool overridesWeakDef = false;
@@ -54,8 +54,11 @@
           defined->privateExtern &= isPrivateExtern;
         return s;
       }
-      if (!defined->isWeakDef())
-        error("duplicate symbol: " + name);
+      if (!defined->isWeakDef()) {
+        error("duplicate symbol: " + name + "\n>>> defined in " +
+              toString(defined->getFile()) + "\n>>> defined in " +
+              toString(file));
+      }
     } else if (auto *dysym = dyn_cast<DylibSymbol>(s)) {
       overridesWeakDef = !isWeakDef && dysym->isWeakDef();
     }
@@ -64,13 +67,14 @@
   }
 
   Defined *defined =
-      replaceSymbol<Defined>(s, name, isec, value, isWeakDef,
+      replaceSymbol<Defined>(s, name, file, isec, value, isWeakDef,
                              /*isExternal=*/true, isPrivateExtern);
   defined->overridesWeakDef = overridesWeakDef;
   return s;
 }
 
-Symbol *SymbolTable::addUndefined(StringRef name, bool isWeakRef) {
+Symbol *SymbolTable::addUndefined(StringRef name, InputFile *file,
+                                  bool isWeakRef) {
   Symbol *s;
   bool wasInserted;
   std::tie(s, wasInserted) = insert(name);
@@ -78,7 +82,7 @@
   auto refState = isWeakRef ? RefState::Weak : RefState::Strong;
 
   if (wasInserted)
-    replaceSymbol<Undefined>(s, name, refState);
+    replaceSymbol<Undefined>(s, name, file, refState);
   else if (auto *lazy = dyn_cast<LazySymbol>(s))
     lazy->fetchArchiveMember();
   else if (auto *dynsym = dyn_cast<DylibSymbol>(s))
@@ -162,11 +166,12 @@
   return s;
 }
 
-void lld::macho::treatUndefinedSymbol(StringRef symbolName,
-                                      StringRef fileName) {
-  std::string message = ("undefined symbol: " + symbolName).str();
+void lld::macho::treatUndefinedSymbol(const Undefined &sym) {
+  std::string message = "undefined symbol: " + toString(sym);
+  std::string fileName = toString(sym.getFile());
+
   if (!fileName.empty())
-    message += ("\n>>> referenced by " + fileName).str();
+    message += "\n>>> referenced by " + fileName;
   switch (config->undefinedSymbolTreatment) {
   case UndefinedSymbolTreatment::suppress:
     break;
diff --git a/MachO/SymbolTable.h b/MachO/SymbolTable.h
index 871687f..15a7844 100644
--- a/MachO/SymbolTable.h
+++ b/MachO/SymbolTable.h
@@ -20,9 +20,11 @@
 class ArchiveFile;
 class DylibFile;
 class InputFile;
+class ObjFile;
 class InputSection;
 class MachHeaderSection;
 class Symbol;
+class Undefined;
 
 /*
  * Note that the SymbolTable handles name collisions by calling
@@ -32,10 +34,10 @@
  */
 class SymbolTable {
 public:
-  Symbol *addDefined(StringRef name, InputSection *isec, uint32_t value,
-                     bool isWeakDef, bool isPrivateExtern);
+  Symbol *addDefined(StringRef name, InputFile *, InputSection *,
+                     uint32_t value, bool isWeakDef, bool isPrivateExtern);
 
-  Symbol *addUndefined(StringRef name, bool isWeakRef);
+  Symbol *addUndefined(StringRef name, InputFile *, bool isWeakRef);
 
   Symbol *addCommon(StringRef name, InputFile *, uint64_t size, uint32_t align,
                     bool isPrivateExtern);
@@ -56,7 +58,7 @@
   std::vector<Symbol *> symVector;
 };
 
-extern void treatUndefinedSymbol(StringRef symbolName, StringRef fileName);
+void treatUndefinedSymbol(const Undefined &);
 
 extern SymbolTable *symtab;
 
diff --git a/MachO/Symbols.cpp b/MachO/Symbols.cpp
index 4c83188..69ca1f6 100644
--- a/MachO/Symbols.cpp
+++ b/MachO/Symbols.cpp
@@ -44,7 +44,7 @@
   return isec->getFileOffset() + value;
 }
 
-void LazySymbol::fetchArchiveMember() { file->fetch(sym); }
+void LazySymbol::fetchArchiveMember() { getFile()->fetch(sym); }
 
 uint64_t DSOHandle::getVA() const { return header->addr; }
 
diff --git a/MachO/Symbols.h b/MachO/Symbols.h
index 7f987c7..3299601 100644
--- a/MachO/Symbols.h
+++ b/MachO/Symbols.h
@@ -9,6 +9,7 @@
 #ifndef LLD_MACHO_SYMBOLS_H
 #define LLD_MACHO_SYMBOLS_H
 
+#include "InputFiles.h"
 #include "InputSection.h"
 #include "Target.h"
 #include "lld/Common/ErrorHandler.h"
@@ -21,8 +22,6 @@
 
 class InputSection;
 class MachHeaderSection;
-class DylibFile;
-class ArchiveFile;
 
 struct StringRefZ {
   StringRefZ(const char *s) : data(s), size(-1) {}
@@ -83,20 +82,23 @@
 
   uint32_t symtabIndex = UINT32_MAX;
 
+  InputFile *getFile() const { return file; }
+
 protected:
-  Symbol(Kind k, StringRefZ name)
-      : symbolKind(k), nameData(name.data), nameSize(name.size) {}
+  Symbol(Kind k, StringRefZ name, InputFile *file)
+      : symbolKind(k), nameData(name.data), nameSize(name.size), file(file) {}
 
   Kind symbolKind;
   const char *nameData;
   mutable uint32_t nameSize;
+  InputFile *file;
 };
 
 class Defined : public Symbol {
 public:
-  Defined(StringRefZ name, InputSection *isec, uint32_t value, bool isWeakDef,
-          bool isExternal, bool isPrivateExtern)
-      : Symbol(DefinedKind, name), isec(isec), value(value),
+  Defined(StringRefZ name, InputFile *file, InputSection *isec, uint32_t value,
+          bool isWeakDef, bool isExternal, bool isPrivateExtern)
+      : Symbol(DefinedKind, name, file), isec(isec), value(value),
         overridesWeakDef(false), privateExtern(isPrivateExtern),
         weakDef(isWeakDef), external(isExternal) {}
 
@@ -116,6 +118,7 @@
 
   static bool classof(const Symbol *s) { return s->kind() == DefinedKind; }
 
+  InputFile *file;
   InputSection *isec;
   uint32_t value;
 
@@ -136,8 +139,8 @@
 
 class Undefined : public Symbol {
 public:
-  Undefined(StringRefZ name, RefState refState)
-      : Symbol(UndefinedKind, name), refState(refState) {
+  Undefined(StringRefZ name, InputFile *file, RefState refState)
+      : Symbol(UndefinedKind, name, file), refState(refState) {
     assert(refState != RefState::Unreferenced);
   }
 
@@ -167,7 +170,7 @@
 public:
   CommonSymbol(StringRefZ name, InputFile *file, uint64_t size, uint32_t align,
                bool isPrivateExtern)
-      : Symbol(CommonKind, name), file(file), size(size),
+      : Symbol(CommonKind, name, file), size(size),
         align(align != 1 ? align : llvm::PowerOf2Ceil(size)),
         privateExtern(isPrivateExtern) {
     // TODO: cap maximum alignment
@@ -175,7 +178,6 @@
 
   static bool classof(const Symbol *s) { return s->kind() == CommonKind; }
 
-  InputFile *const file;
   const uint64_t size;
   const uint32_t align;
   const bool privateExtern;
@@ -185,18 +187,18 @@
 public:
   DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef,
               RefState refState, bool isTlv)
-      : Symbol(DylibKind, name), file(file), refState(refState),
-        weakDef(isWeakDef), tlv(isTlv) {}
+      : Symbol(DylibKind, name, file), refState(refState), weakDef(isWeakDef),
+        tlv(isTlv) {}
 
   bool isWeakDef() const override { return weakDef; }
   bool isWeakRef() const override { return refState == RefState::Weak; }
   bool isReferenced() const { return refState != RefState::Unreferenced; }
   bool isTlv() const override { return tlv; }
   bool hasStubsHelper() const { return stubsHelperIndex != UINT32_MAX; }
+  DylibFile *getFile() const { return cast<DylibFile>(file); }
 
   static bool classof(const Symbol *s) { return s->kind() == DylibKind; }
 
-  DylibFile *file;
   uint32_t stubsHelperIndex = UINT32_MAX;
   uint32_t lazyBindOffset = UINT32_MAX;
 
@@ -210,14 +212,14 @@
 class LazySymbol : public Symbol {
 public:
   LazySymbol(ArchiveFile *file, const llvm::object::Archive::Symbol &sym)
-      : Symbol(LazyKind, sym.getName()), file(file), sym(sym) {}
+      : Symbol(LazyKind, sym.getName(), file), sym(sym) {}
+
+  ArchiveFile *getFile() const { return cast<ArchiveFile>(file); }
+  void fetchArchiveMember();
 
   static bool classof(const Symbol *s) { return s->kind() == LazyKind; }
 
-  void fetchArchiveMember();
-
 private:
-  ArchiveFile *file;
   const llvm::object::Archive::Symbol sym;
 };
 
@@ -235,7 +237,7 @@
 class DSOHandle : public Symbol {
 public:
   DSOHandle(const MachHeaderSection *header)
-      : Symbol(DSOHandleKind, name), header(header) {}
+      : Symbol(DSOHandleKind, name, nullptr), header(header) {}
 
   const MachHeaderSection *header;
 
diff --git a/MachO/SyntheticSections.cpp b/MachO/SyntheticSections.cpp
index a5696fc..4fa03b5 100644
--- a/MachO/SyntheticSections.cpp
+++ b/MachO/SyntheticSections.cpp
@@ -265,15 +265,15 @@
 static void encodeDylibOrdinal(const DylibSymbol *dysym, Binding &lastBinding,
                                raw_svector_ostream &os) {
   using namespace llvm::MachO;
-  if (lastBinding.ordinal != dysym->file->ordinal) {
-    if (dysym->file->ordinal <= BIND_IMMEDIATE_MASK) {
+  if (lastBinding.ordinal != dysym->getFile()->ordinal) {
+    if (dysym->getFile()->ordinal <= BIND_IMMEDIATE_MASK) {
       os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
-                                 dysym->file->ordinal);
+                                 dysym->getFile()->ordinal);
     } else {
       os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
-      encodeULEB128(dysym->file->ordinal, os);
+      encodeULEB128(dysym->getFile()->ordinal, os);
     }
-    lastBinding.ordinal = dysym->file->ordinal;
+    lastBinding.ordinal = dysym->getFile()->ordinal;
   }
 }
 
@@ -442,10 +442,9 @@
   in.got->addEntry(stubBinder);
 
   inputSections.push_back(in.imageLoaderCache);
-  dyldPrivate =
-      make<Defined>("__dyld_private", in.imageLoaderCache, 0,
-                    /*isWeakDef=*/false,
-                    /*isExternal=*/false, /*isPrivateExtern=*/false);
+  dyldPrivate = make<Defined>("__dyld_private", nullptr, in.imageLoaderCache, 0,
+                              /*isWeakDef=*/false,
+                              /*isExternal=*/false, /*isPrivateExtern=*/false);
 }
 
 ImageLoaderCacheSection::ImageLoaderCacheSection() {
@@ -522,12 +521,12 @@
   uint64_t offset = in.lazyPointers->addr - dataSeg->firstSection()->addr +
                     sym.stubsIndex * WordSize;
   encodeULEB128(offset, os);
-  if (sym.file->ordinal <= MachO::BIND_IMMEDIATE_MASK) {
+  if (sym.getFile()->ordinal <= MachO::BIND_IMMEDIATE_MASK) {
     os << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
-                               sym.file->ordinal);
+                               sym.getFile()->ordinal);
   } else {
     os << static_cast<uint8_t>(MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
-    encodeULEB128(sym.file->ordinal, os);
+    encodeULEB128(sym.getFile()->ordinal, os);
   }
 
   uint8_t flags = MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM;
@@ -786,7 +785,7 @@
       nList->n_desc |= defined->isExternalWeakDef() ? MachO::N_WEAK_DEF : 0;
     } else if (auto *dysym = dyn_cast<DylibSymbol>(entry.sym)) {
       uint16_t n_desc = nList->n_desc;
-      MachO::SET_LIBRARY_ORDINAL(n_desc, dysym->file->ordinal);
+      MachO::SET_LIBRARY_ORDINAL(n_desc, dysym->getFile()->ordinal);
       nList->n_type = MachO::N_EXT;
       n_desc |= dysym->isWeakRef() ? MachO::N_WEAK_REF : 0;
       nList->n_desc = n_desc;
diff --git a/MachO/Writer.cpp b/MachO/Writer.cpp
index 81df8de..2c06a1a 100644
--- a/MachO/Writer.cpp
+++ b/MachO/Writer.cpp
@@ -439,8 +439,8 @@
       if (target->hasAttr(r.type, RelocAttrBits::SUBTRAHEND))
         continue;
       if (auto *sym = r.referent.dyn_cast<lld::macho::Symbol *>()) {
-        if (isa<Undefined>(sym))
-          treatUndefinedSymbol(toString(*sym), toString(isec->file));
+        if (auto *undefined = dyn_cast<Undefined>(sym))
+          treatUndefinedSymbol(*undefined);
         else if (target->validateSymbolRelocation(sym, isec, r))
           prepareSymbolRelocation(sym, isec, r);
       } else {
@@ -458,7 +458,8 @@
       if (defined->overridesWeakDef)
         in.weakBinding->addNonWeakDefinition(defined);
     } else if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) {
-      dysym->file->refState = std::max(dysym->file->refState, dysym->refState);
+      dysym->getFile()->refState =
+          std::max(dysym->getFile()->refState, dysym->refState);
     }
   }
 }
diff --git a/test/MachO/invalid/abs-duplicate.s b/test/MachO/invalid/abs-duplicate.s
index ba0bbdb..1cd11d6 100644
--- a/test/MachO/invalid/abs-duplicate.s
+++ b/test/MachO/invalid/abs-duplicate.s
@@ -4,7 +4,9 @@
 # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/weakfoo.s -o %t/weakfoo.o
 # RUN: not %lld -lSystem %t/test.o %t/weakfoo.o -o %t/test 2>&1 | FileCheck %s
 
-# CHECK: lld: error: duplicate symbol: _weakfoo
+# CHECK:      error: duplicate symbol: _weakfoo
+# CHECK-NEXT: >>> defined in {{.*}}/test.o
+# CHECK-NEXT: >>> defined in {{.*}}/weakfoo.o
 
 #--- weakfoo.s
 .globl _weakfoo
diff --git a/test/MachO/invalid/duplicate-symbol.s b/test/MachO/invalid/duplicate-symbol.s
index 392630a..fc9fd76 100644
--- a/test/MachO/invalid/duplicate-symbol.s
+++ b/test/MachO/invalid/duplicate-symbol.s
@@ -1,10 +1,12 @@
 # REQUIRES: x86
 # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
 # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t-dup.o
-# RUN: not %lld -o /dev/null %t-dup.o %t.o 2>&1 | FileCheck %s
-# RUN: not %lld -o /dev/null %t.o %t.o 2>&1 | FileCheck %s
+# RUN: not %lld -o /dev/null %t-dup.o %t.o 2>&1 | FileCheck %s -DFILE_1=%t-dup.o -DFILE_2=%t.o
+# RUN: not %lld -o /dev/null %t.o %t.o 2>&1 | FileCheck %s -DFILE_1=%t.o -DFILE_2=%t.o
 
-# CHECK: error: duplicate symbol: _main
+# CHECK:      error: duplicate symbol: _main
+# CHECK-NEXT: >>> defined in [[FILE_1]]
+# CHECK-NEXT: >>> defined in [[FILE_2]]
 
 .text
 .global _main