ELF: New symbol table design.

This patch implements a new design for the symbol table that stores
SymbolBodies within a memory region of the Symbol object. Symbols are mutated
by constructing SymbolBodies in place over existing SymbolBodies, rather
than by mutating pointers. As mentioned in the initial proposal [1], this
memory layout helps reduce the cache miss rate by improving memory locality.

Performance numbers:

           old(s) new(s)
Without debug info:
chrome      7.178  6.432 (-11.5%)
LLVMgold.so 0.505  0.502 (-0.5%)
clang       0.954  0.827 (-15.4%)
llvm-as     0.052  0.045 (-15.5%)
With debug info:
scylla      5.695  5.613 (-1.5%)
clang      14.396 14.143 (-1.8%)

Performance counter results show that the fewer required indirections is
indeed the cause of the improved performance. For example, when linking
chrome, stalled cycles decreases from 14,556,444,002 to 12,959,238,310, and
instructions per cycle increases from 0.78 to 0.83. We are also executing
many fewer instructions (15,516,401,933 down to 15,002,434,310), probably
because we spend less time allocating SymbolBodies.

The new mechanism by which symbols are added to the symbol table is by calling
add* functions on the SymbolTable.

In this patch, I handle local symbols by storing them inside "unparented"
SymbolBodies. This is suboptimal, but if we do want to try to avoid allocating
these SymbolBodies, we can probably do that separately.

I also removed a few members from the SymbolBody class that were only being
used to pass information from the input file to the symbol table.

This patch implements the new design for the ELF linker only. I intend to
prepare a similar patch for the COFF linker.

[1] http://lists.llvm.org/pipermail/llvm-dev/2016-April/098832.html

Differential Revision: http://reviews.llvm.org/D19752

llvm-svn: 268178
diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp
index f73a9c7..d4e2026 100644
--- a/lld/ELF/Symbols.cpp
+++ b/lld/ELF/Symbols.cpp
@@ -79,7 +79,7 @@
     return 0;
   case SymbolBody::LazyArchiveKind:
   case SymbolBody::LazyObjectKind:
-    assert(Body.Backref->IsUsedInRegularObj && "lazy symbol reached writer");
+    assert(Body.symbol()->IsUsedInRegularObj && "lazy symbol reached writer");
     return 0;
   case SymbolBody::DefinedBitcodeKind:
     llvm_unreachable("should have been replaced");
@@ -89,22 +89,19 @@
 
 SymbolBody::SymbolBody(Kind K, uint32_t NameOffset, uint8_t StOther,
                        uint8_t Type)
-    : SymbolKind(K), Type(Type), Binding(STB_LOCAL), StOther(StOther),
+    : SymbolKind(K), IsLocal(true), Type(Type), StOther(StOther),
       NameOffset(NameOffset) {
   init();
 }
 
-SymbolBody::SymbolBody(Kind K, StringRef Name, uint8_t Binding, uint8_t StOther,
-                       uint8_t Type)
-    : SymbolKind(K), Type(Type), Binding(Binding), StOther(StOther),
+SymbolBody::SymbolBody(Kind K, StringRef Name, uint8_t StOther, uint8_t Type)
+    : SymbolKind(K), IsLocal(false), Type(Type), StOther(StOther),
       Name({Name.data(), Name.size()}) {
-  assert(!isLocal());
   init();
 }
 
 void SymbolBody::init() {
   NeedsCopyOrPltAddr = false;
-  CanOmitFromDynSym = false;
 }
 
 // Returns true if a symbol can be replaced at load-time by a symbol
@@ -122,14 +119,14 @@
     return false;
 
   // Only symbols that appear in dynsym can be preempted.
-  if (!Backref->includeInDynsym())
+  if (!symbol()->includeInDynsym())
     return false;
 
   // Normally only default visibility symbols can be preempted, but -Bsymbolic
   // means that not even they can be preempted.
   if (Config->Bsymbolic || (Config->BsymbolicFunctions && isFunc()))
     return !isDefined();
-  return Backref->Visibility == STV_DEFAULT;
+  return symbol()->Visibility == STV_DEFAULT;
 }
 
 template <class ELFT>
@@ -177,79 +174,35 @@
   return 0;
 }
 
-// Returns 1, 0 or -1 if this symbol should take precedence
-// over the Other, tie or lose, respectively.
-int SymbolBody::compare(SymbolBody *Other) {
-  assert(!isLazy() && !Other->isLazy());
-  std::tuple<bool, bool, bool> L(isDefined(), !isShared(), !isWeak());
-  std::tuple<bool, bool, bool> R(Other->isDefined(), !Other->isShared(),
-                                 !Other->isWeak());
-
-  // Compare the two by symbol type.
-  if (L > R)
-    return -Other->compare(this);
-  if (L != R)
-    return -1;
-  if (!isDefined() || isShared() || isWeak())
-    return 1;
-
-  // If both are equal in terms of symbol type, then at least
-  // one of them must be a common symbol. Otherwise, they conflict.
-  auto *A = dyn_cast<DefinedCommon>(this);
-  auto *B = dyn_cast<DefinedCommon>(Other);
-  if (!A && !B)
-    return 0;
-
-  // If both are common, the larger one is chosen.
-  if (A && B) {
-    if (Config->WarnCommon)
-      warning("multiple common of " + A->getName());
-    A->Alignment = B->Alignment = std::max(A->Alignment, B->Alignment);
-    return A->Size < B->Size ? -1 : 1;
-  }
-
-  // Non-common symbols takes precedence over common symbols.
-  if (Config->WarnCommon)
-    warning("common " + this->getName() + " is overridden");
-  return A ? -1 : 1;
-}
-
-Defined::Defined(Kind K, StringRef Name, uint8_t Binding, uint8_t StOther,
-                 uint8_t Type)
-    : SymbolBody(K, Name, Binding, StOther, Type) {}
+Defined::Defined(Kind K, StringRef Name, uint8_t StOther, uint8_t Type)
+    : SymbolBody(K, Name, StOther, Type) {}
 
 Defined::Defined(Kind K, uint32_t NameOffset, uint8_t StOther, uint8_t Type)
     : SymbolBody(K, NameOffset, StOther, Type) {}
 
-DefinedBitcode::DefinedBitcode(StringRef Name, bool IsWeak, uint8_t StOther)
-    : Defined(DefinedBitcodeKind, Name, IsWeak ? STB_WEAK : STB_GLOBAL,
-              StOther, 0 /* Type */) {}
+DefinedBitcode::DefinedBitcode(StringRef Name, uint8_t StOther, uint8_t Type,
+                               BitcodeFile *F)
+    : Defined(DefinedBitcodeKind, Name, StOther, Type), File(F) {}
 
 bool DefinedBitcode::classof(const SymbolBody *S) {
   return S->kind() == DefinedBitcodeKind;
 }
 
-Undefined::Undefined(StringRef Name, uint8_t Binding, uint8_t StOther,
-                     uint8_t Type, bool IsBitcode)
-    : SymbolBody(SymbolBody::UndefinedKind, Name, Binding, StOther, Type) {
-  this->IsUndefinedBitcode = IsBitcode;
-}
+Undefined::Undefined(StringRef Name, uint8_t StOther, uint8_t Type)
+    : SymbolBody(SymbolBody::UndefinedKind, Name, StOther, Type) {}
 
 Undefined::Undefined(uint32_t NameOffset, uint8_t StOther, uint8_t Type)
-    : SymbolBody(SymbolBody::UndefinedKind, NameOffset, StOther, Type) {
-  this->IsUndefinedBitcode = false;
-}
+    : SymbolBody(SymbolBody::UndefinedKind, NameOffset, StOther, Type) {}
 
 template <typename ELFT>
 DefinedSynthetic<ELFT>::DefinedSynthetic(StringRef N, uintX_t Value,
                                          OutputSectionBase<ELFT> &Section)
-    : Defined(SymbolBody::DefinedSyntheticKind, N, STB_GLOBAL, STV_HIDDEN,
-              0 /* Type */),
+    : Defined(SymbolBody::DefinedSyntheticKind, N, STV_HIDDEN, 0 /* Type */),
       Value(Value), Section(Section) {}
 
 DefinedCommon::DefinedCommon(StringRef N, uint64_t Size, uint64_t Alignment,
-                             uint8_t Binding, uint8_t StOther, uint8_t Type)
-    : Defined(SymbolBody::DefinedCommonKind, N, Binding, StOther, Type),
+                             uint8_t StOther, uint8_t Type)
+    : Defined(SymbolBody::DefinedCommonKind, N, StOther, Type),
       Alignment(Alignment), Size(Size) {}
 
 std::unique_ptr<InputFile> Lazy::getFile() {
@@ -301,8 +254,8 @@
 bool Symbol::includeInDynsym() const {
   if (Visibility != STV_DEFAULT && Visibility != STV_PROTECTED)
     return false;
-  return (ExportDynamic && VersionScriptGlobal) || Body->isShared() ||
-         (Body->isUndefined() && Config->Shared);
+  return (ExportDynamic && VersionScriptGlobal) || body()->isShared() ||
+         (body()->isUndefined() && Config->Shared);
 }
 
 template uint32_t SymbolBody::template getVA<ELF32LE>(uint32_t) const;