Add --undefined-glob which is an --undefined with wildcard pattern match

This patch adds new command line option `--undefined-glob` to lld.
That option is a variant of `--undefined` but accepts wildcard
patterns so that all symbols that match with a given pattern are
handled as if they were given by `-u`.

`-u foo` is to force resolve symbol foo if foo is not a defined symbol
and there's a static archive that contains a definition of symbol foo.

Now, you can specify a wildcard pattern as an argument for `--undefined-glob`.
So, if you want to include all JNI symbols (which start with "Java_"), you
can do that by passing `--undefined-glob "Java_*"` to the linker, for example.

In this patch, I use the same glob pattern matcher as the version script
processor is using, so it does not only support `*` but also `?` and `[...]`.

Differential Revision: https://reviews.llvm.org/D63244

git-svn-id: https://llvm.org/svn/llvm-project/lld/trunk@363396 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/ELF/Driver.cpp b/ELF/Driver.cpp
index 1517e60..e4c1b79 100644
--- a/ELF/Driver.cpp
+++ b/ELF/Driver.cpp
@@ -50,6 +50,7 @@
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compression.h"
+#include "llvm/Support/GlobPattern.h"
 #include "llvm/Support/LEB128.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/TarWriter.h"
@@ -1319,12 +1320,8 @@
 }
 
 // Force Sym to be entered in the output. Used for -u or equivalent.
-static void handleUndefined(StringRef Name) {
-  Symbol *Sym = Symtab->find(Name);
-  if (!Sym)
-    return;
-
-  // Since symbol S may not be used inside the program, LTO may
+static void handleUndefined(Symbol *Sym) {
+  // Since a symbol may not be used inside the program, LTO may
   // eliminate it. Mark the symbol as "used" to prevent it.
   Sym->IsUsedInRegularObj = true;
 
@@ -1332,6 +1329,29 @@
     Sym->fetch();
 }
 
+// As an extention to GNU linkers, lld supports a variant of `-u`
+// which accepts wildcard patterns. All symbols that match a given
+// pattern are handled as if they were given by `-u`.
+static void handleUndefinedGlob(StringRef Arg) {
+  Expected<GlobPattern> Pat = GlobPattern::create(Arg);
+  if (!Pat) {
+    error("--undefined-glob: " + toString(Pat.takeError()));
+    return;
+  }
+
+  std::vector<Symbol *> Syms;
+  Symtab->forEachSymbol([&](Symbol *Sym) {
+    // Calling Sym->fetch() from here is not safe because it may
+    // add new symbols to the symbol table, invalidating the
+    // current iterator. So we just keep a note.
+    if (Pat->match(Sym->getName()))
+      Syms.push_back(Sym);
+  });
+
+  for (Symbol *Sym : Syms)
+    handleUndefined(Sym);
+}
+
 static void handleLibcall(StringRef Name) {
   Symbol *Sym = Symtab->find(Name);
   if (!Sym || !Sym->isLazy())
@@ -1698,11 +1718,17 @@
     addUndefined(Name);
 
   // Handle the `--undefined <sym>` options.
-  for (StringRef S : Config->Undefined)
-    handleUndefined(S);
+  for (StringRef Arg : Config->Undefined)
+    if (Symbol *Sym = Symtab->find(Arg))
+      handleUndefined(Sym);
 
   // If an entry symbol is in a static archive, pull out that file now.
-  handleUndefined(Config->Entry);
+  if (Symbol *Sym = Symtab->find(Config->Entry))
+    handleUndefined(Sym);
+
+  // Handle the `--undefined-glob <pattern>` options.
+  for (StringRef Pat : args::getStrings(Args, OPT_undefined_glob))
+    handleUndefinedGlob(Pat);
 
   // If any of our inputs are bitcode files, the LTO code generator may create
   // references to certain library functions that might not be explicit in the
diff --git a/ELF/Options.td b/ELF/Options.td
index 81eb5cd..6c6222c 100644
--- a/ELF/Options.td
+++ b/ELF/Options.td
@@ -366,6 +366,9 @@
 defm undefined: Eq<"undefined", "Force undefined symbol during linking">,
   MetaVarName<"<symbol>">;
 
+defm undefined_glob: Eq<"undefined-glob", "Force undefined symbol during linking">,
+  MetaVarName<"<pattern>">;
+
 defm unresolved_symbols:
   Eq<"unresolved-symbols", "Determine how to handle unresolved symbols">;
 
diff --git a/docs/ld.lld.1 b/docs/ld.lld.1
index 64b1ad7..c432aac 100644
--- a/docs/ld.lld.1
+++ b/docs/ld.lld.1
@@ -504,9 +504,23 @@
 Trace references to
 .Ar symbol .
 .It Fl -undefined Ns = Ns Ar symbol , Fl u Ar symbol
-Force
+If
 .Ar symbol
-to be an undefined symbol during linking.
+is not defined after symbol resolution, and there's a static library
+that contains an object file defining the symbol, load the member
+to include the object file in the output file.
+.It Fl -undefined-glob Ns = Ns Ar pattern
+Synonym for
+.Fl -undefined ,
+except that it takes a glob pattern. In a glob pattern,
+.Cm *
+matches zero or more characters,
+.Cm ?
+matches any single character, and
+.Cm [...]
+matches the characters within brackets. All symbols that match
+a given pattern are handled as if they were given as arguments of
+.Fl -undefined .
 .It Fl -unresolved-symbols Ns = Ns Ar value
 Determine how to handle unresolved symbols.
 .It Fl -use-android-relr-tags
diff --git a/test/ELF/undefined-glob.s b/test/ELF/undefined-glob.s
new file mode 100644
index 0000000..0f423b5
--- /dev/null
+++ b/test/ELF/undefined-glob.s
@@ -0,0 +1,58 @@
+# REQUIRES: x86
+
+# RUN: echo '.globl foo1; foo1:' | \
+# RUN:   llvm-mc -filetype=obj -triple=x86_64-unknown-linux - -o %t1.o
+# RUN: echo '.globl foo2; foo2:' | \
+# RUN:   llvm-mc -filetype=obj -triple=x86_64-unknown-linux - -o %t2.o
+# RUN: echo '.globl foo32; foo32:' | \
+# RUN:   llvm-mc -filetype=obj -triple=x86_64-unknown-linux - -o %t3.o
+# RUN: echo '.globl bar; bar:' | \
+# RUN:   llvm-mc -filetype=obj -triple=x86_64-unknown-linux - -o %t4.o
+# RUN: rm -f %t.a
+# RUN: llvm-ar rcs %t.a %t1.o %t2.o %t3.o %t4.o
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o
+
+# RUN: ld.lld -o %t.exe %t.o %t.a
+# RUN: llvm-readobj --symbols %t.exe | FileCheck --check-prefix=NO-OPT %s
+
+# NO-OPT-NOT: foo
+# NO-OPT-NOT: bar
+
+# RUN: ld.lld -o %t.exe %t.o %t.a --undefined-glob foo1
+# RUN: llvm-readobj --symbols %t.exe | FileCheck --check-prefix=FOO1 %s
+
+# FOO1: foo1
+# FOO1-NOT: foo2
+
+# RUN: ld.lld -o %t.exe %t.o %t.a --undefined-glob 'foo*'
+# RUN: llvm-readobj --symbols %t.exe | FileCheck --check-prefix=FOO-STAR %s
+
+# FOO-STAR: foo1
+# FOO-STAR: foo2
+# FOO-STAR: foo32
+# FOO-STAR-NOT: bar
+
+# RUN: ld.lld -o %t.exe %t.o %t.a --undefined-glob 'foo?'
+# RUN: llvm-readobj --symbols %t.exe | FileCheck --check-prefix=FOO-Q %s
+
+# FOO-Q: foo1
+# FOO-Q: foo2
+# FOO-Q-NOT: foo32
+# FOO-Q-NOT: bar
+
+# RUN: ld.lld -o %t.exe %t.o %t.a --undefined-glob 'foo[13]*'
+# RUN: llvm-readobj --symbols %t.exe | FileCheck --check-prefix=FOO13 %s
+
+# FOO13: foo1
+# FOO13-NOT: foo2
+# FOO13: foo32
+# FOO13-NOT: bar
+
+# RUN: not ld.lld -o %t.exe %t.o %t.a --undefined-glob '[' 2>&1 | \
+# RUN:   FileCheck -check-prefix=BAD-PATTERN %s
+
+# BAD-PATTERN: --undefined-glob: invalid glob pattern: [
+
+.globl _start
+_start: