[WebAssembly] Don't load weak undefined symbols from archive files

Summary: Fixes PR40494

Subscribers: dschuff, jgravelle-google, aheejin, sunfish, llvm-commits

Differential Revision: https://reviews.llvm.org/D57370

git-svn-id: https://llvm.org/svn/llvm-project/lld/trunk@352554 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/test/wasm/archive-weak-undefined.ll b/test/wasm/archive-weak-undefined.ll
new file mode 100644
index 0000000..3b92612
--- /dev/null
+++ b/test/wasm/archive-weak-undefined.ll
@@ -0,0 +1,19 @@
+; RUN: llc -filetype=obj %s -o %t.o
+; RUN: llc -filetype=obj %S/Inputs/ret32.ll -o %t.a1.o
+; RUN: rm -f %t.a
+; RUN: llvm-ar rcs %t.a %t.a1.o
+; RUN: wasm-ld %t.o %t.a -o %t.wasm
+; RUN: obj2yaml %t.wasm | FileCheck %s
+
+target triple = "wasm32-unknown-unknown"
+
+declare extern_weak i32 @ret32()
+
+define void @_start() {
+entry:
+  %call1 = call i32 @ret32()
+  ret void
+}
+
+; CHECK: Name: undefined function ret32
+; CHECK-NOT: Name: ret32
diff --git a/wasm/Driver.cpp b/wasm/Driver.cpp
index 164fe65..eb32802 100644
--- a/wasm/Driver.cpp
+++ b/wasm/Driver.cpp
@@ -297,22 +297,31 @@
 // the call instruction that passes Wasm validation.
 static void handleWeakUndefines() {
   for (Symbol *Sym : Symtab->getSymbols()) {
-    if (!Sym->isUndefined() || !Sym->isWeak())
-      continue;
-    auto *FuncSym = dyn_cast<FunctionSymbol>(Sym);
-    if (!FuncSym)
+    if (!Sym->isUndefWeak())
       continue;
 
-    // It is possible for undefined functions not to have a signature (eg. if
-    // added via "--undefined"), but weak undefined ones do have a signature.
-    assert(FuncSym->Signature);
-    const WasmSignature &Sig = *FuncSym->Signature;
+    const WasmSignature *Sig = nullptr;
+
+    if (auto *FuncSym = dyn_cast<FunctionSymbol>(Sym)) {
+      // It is possible for undefined functions not to have a signature (eg. if
+      // added via "--undefined"), but weak undefined ones do have a signature.
+      assert(FuncSym->Signature);
+      Sig = FuncSym->Signature;
+    } else if (auto *LazySym = dyn_cast<LazySymbol>(Sym)) {
+      // Lazy symbols may not be functions and therefore can have a null
+      // signature.
+      Sig = LazySym->Signature;
+    }
+
+    if (!Sig)
+      continue;
+
 
     // Add a synthetic dummy for weak undefined functions.  These dummies will
     // be GC'd if not used as the target of any "call" instructions.
     std::string SymName = toString(*Sym);
     StringRef DebugName = Saver.save("undefined function " + SymName);
-    auto *Func = make<SyntheticFunction>(Sig, Sym->getName(), DebugName);
+    auto *Func = make<SyntheticFunction>(*Sig, Sym->getName(), DebugName);
     Func->setBody(UnreachableFn);
     // Ensure it compares equal to the null pointer, and so that table relocs
     // don't pull in the stub body (only call-operand relocs should do that).
diff --git a/wasm/SymbolTable.cpp b/wasm/SymbolTable.cpp
index fccaaf3..f490d4c 100644
--- a/wasm/SymbolTable.cpp
+++ b/wasm/SymbolTable.cpp
@@ -241,10 +241,12 @@
 
   if (shouldReplace(S, File, Flags)) {
     // If the new defined function doesn't have signture (i.e. bitcode
-    // functions) but the old symbols does then preserve the old signature
+    // functions) but the old symbol does then preserve the old signature
     const WasmSignature *OldSig = nullptr;
     if (auto* F = dyn_cast<FunctionSymbol>(S))
       OldSig = F->Signature;
+    if (auto *L = dyn_cast<LazySymbol>(S))
+      OldSig = L->Signature;
     auto NewSym = replaceSymbol<DefinedFunction>(S, Name, Flags, File, Function);
     if (!NewSym->Signature)
       NewSym->Signature = OldSig;
@@ -377,15 +379,31 @@
   std::tie(S, WasInserted) = insert(Name, nullptr);
 
   if (WasInserted) {
-    replaceSymbol<LazySymbol>(S, Name, File, *Sym);
+    replaceSymbol<LazySymbol>(S, Name, 0, File, *Sym);
     return;
   }
 
-  // If there is an existing undefined symbol, load a new one from the archive.
-  if (S->isUndefined()) {
-    LLVM_DEBUG(dbgs() << "replacing existing undefined\n");
-    File->addMember(Sym);
+  if (!S->isUndefined())
+    return;
+
+  // The existing symbol is undefined, load a new one from the archive,
+  // unless the the existing symbol is weak in which case replace the undefined
+  // symbols with a LazySymbol.
+  if (S->isWeak()) {
+    const WasmSignature *OldSig = nullptr;
+    // In the case of an UndefinedFunction we need to preserve the expected
+    // signature.
+    if (auto *F = dyn_cast<UndefinedFunction>(S))
+      OldSig = F->Signature;
+    LLVM_DEBUG(dbgs() << "replacing existing weak undefined symbol\n");
+    auto NewSym = replaceSymbol<LazySymbol>(S, Name, WASM_SYMBOL_BINDING_WEAK,
+                                            File, *Sym);
+    NewSym->Signature = OldSig;
+    return;
   }
+
+  LLVM_DEBUG(dbgs() << "replacing existing undefined\n");
+  File->addMember(Sym);
 }
 
 bool SymbolTable::addComdat(StringRef Name) {
diff --git a/wasm/Symbols.h b/wasm/Symbols.h
index 28bcb5b..63c0239 100644
--- a/wasm/Symbols.h
+++ b/wasm/Symbols.h
@@ -63,6 +63,13 @@
   bool isWeak() const;
   bool isHidden() const;
 
+  // True if this is an undefined weak symbol. This only works once
+  // all input files have been added.
+  bool isUndefWeak() const {
+    // See comment on lazy symbols for details.
+    return isWeak() && (isUndefined() || isLazy());
+  }
+
   // Returns the symbol name.
   StringRef getName() const { return Name; }
 
@@ -313,15 +320,31 @@
   InputEvent *Event;
 };
 
+// LazySymbol represents a symbol that is not yet in the link, but we know where
+// to find it if needed. If the resolver finds both Undefined and Lazy for the
+// same name, it will ask the Lazy to load a file.
+//
+// A special complication is the handling of weak undefined symbols. They should
+// not load a file, but we have to remember we have seen both the weak undefined
+// and the lazy. We represent that with a lazy symbol with a weak binding. This
+// means that code looking for undefined symbols normally also has to take lazy
+// symbols into consideration.
 class LazySymbol : public Symbol {
 public:
-  LazySymbol(StringRef Name, InputFile *File,
+  LazySymbol(StringRef Name, uint32_t Flags, InputFile *File,
              const llvm::object::Archive::Symbol &Sym)
-      : Symbol(Name, LazyKind, 0, File), ArchiveSymbol(Sym) {}
+      : Symbol(Name, LazyKind, Flags, File), ArchiveSymbol(Sym) {}
 
   static bool classof(const Symbol *S) { return S->kind() == LazyKind; }
   void fetch();
 
+  // Lazy symbols can have a signature because they can replace an
+  // UndefinedFunction which which case we need to be able to preserve the
+  // signture.
+  // TODO(sbc): This repetition of the signature field is inelegant.  Revisit
+  // the use of class hierarchy to represent symbol taxonomy.
+  const WasmSignature *Signature = nullptr;
+
 private:
   llvm::object::Archive::Symbol ArchiveSymbol;
 };