[llvm-objcopy] Make removeSectionReferences batched

Summary:
Removing a large number of sections from a file with a lot of symbols can have abysmal (i.e. O(n^2)) performance, e.g. when running `--only-section` to extract one section out of a large file.

This comes from iterating over all symbols in the symbol table each time we remove a section, to remove references to the section we just removed.
Instead, do just one pass of symbol removal by passing a hash set of all the sections we'd like to remove references to.

This fixes a regression when running llvm-objcopy -j <one section> on an object file with many sections and symbols -- on my machine, running `objcopy -j .keep_me huge-input.o /tmp/foo.o` takes .3s with GNU objcopy, 1.3s with an updated llvm-objcopy, and 7+ minutes with llvm-objcopy prior to this patch.

Reviewers: MaskRay, jhenderson, jakehehrlich, alexshap, espindola

Reviewed By: MaskRay, jhenderson

Subscribers: echristo, emaste, arichardson, mgrang, jdoerfert, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D58296

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@354597 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/tools/llvm-objcopy/ELF/Object.cpp b/tools/llvm-objcopy/ELF/Object.cpp
index 8a065d5..201b767 100644
--- a/tools/llvm-objcopy/ELF/Object.cpp
+++ b/tools/llvm-objcopy/ELF/Object.cpp
@@ -25,6 +25,7 @@
 #include <cstddef>
 #include <cstdint>
 #include <iterator>
+#include <unordered_set>
 #include <utility>
 #include <vector>
 
@@ -49,7 +50,8 @@
   Phdr.p_align = Seg.Align;
 }
 
-Error SectionBase::removeSectionReferences(const SectionBase *Sec) {
+Error SectionBase::removeSectionReferences(
+    function_ref<bool(const SectionBase *)> ToRemove) {
   return Error::success();
 }
 
@@ -432,17 +434,17 @@
   Size += this->EntrySize;
 }
 
-Error SymbolTableSection::removeSectionReferences(const SectionBase *Sec) {
-  if (SectionIndexTable == Sec)
+Error SymbolTableSection::removeSectionReferences(
+    function_ref<bool(const SectionBase *)> ToRemove) {
+  if (ToRemove(SectionIndexTable))
     SectionIndexTable = nullptr;
-  if (SymbolNames == Sec) {
+  if (ToRemove(SymbolNames))
     return createStringError(llvm::errc::invalid_argument,
                              "String table %s cannot be removed because it is "
                              "referenced by the symbol table %s",
                              SymbolNames->Name.data(), this->Name.data());
-  }
   return removeSymbols(
-      [Sec](const Symbol &Sym) { return Sym.DefinedIn == Sec; });
+      [ToRemove](const Symbol &Sym) { return ToRemove(Sym.DefinedIn); });
 }
 
 void SymbolTableSection::updateSymbols(function_ref<void(Symbol &)> Callable) {
@@ -546,8 +548,8 @@
 
 template <class SymTabType>
 Error RelocSectionWithSymtabBase<SymTabType>::removeSectionReferences(
-    const SectionBase *Sec) {
-  if (Symbols == Sec)
+    function_ref<bool(const SectionBase *)> ToRemove) {
+  if (ToRemove(Symbols))
     return createStringError(llvm::errc::invalid_argument,
                              "Symbol table %s cannot be removed because it is "
                              "referenced by the relocation section %s.",
@@ -646,8 +648,9 @@
   Visitor.visit(*this);
 }
 
-Error Section::removeSectionReferences(const SectionBase *Sec) {
-  if (LinkSection == Sec)
+Error Section::removeSectionReferences(
+    function_ref<bool(const SectionBase *)> ToRemove) {
+  if (ToRemove(LinkSection))
     return createStringError(llvm::errc::invalid_argument,
                              "Section %s cannot be removed because it is "
                              "referenced by the section %s",
@@ -1351,13 +1354,19 @@
   // Now make sure there are no remaining references to the sections that will
   // be removed. Sometimes it is impossible to remove a reference so we emit
   // an error here instead.
+  std::unordered_set<const SectionBase *> RemoveSections;
+  RemoveSections.reserve(std::distance(Iter, std::end(Sections)));
   for (auto &RemoveSec : make_range(Iter, std::end(Sections))) {
     for (auto &Segment : Segments)
       Segment->removeSection(RemoveSec.get());
-    for (auto &KeepSec : make_range(std::begin(Sections), Iter))
-      if (Error E = KeepSec->removeSectionReferences(RemoveSec.get()))
-        return E;
+    RemoveSections.insert(RemoveSec.get());
   }
+  for (auto &KeepSec : make_range(std::begin(Sections), Iter))
+    if (Error E = KeepSec->removeSectionReferences(
+            [&RemoveSections](const SectionBase *Sec) {
+              return RemoveSections.find(Sec) != RemoveSections.end();
+            }))
+      return E;
   // Now finally get rid of them all togethor.
   Sections.erase(Iter, std::end(Sections));
   return Error::success();
diff --git a/tools/llvm-objcopy/ELF/Object.h b/tools/llvm-objcopy/ELF/Object.h
index 0953eb7..ec400b5 100644
--- a/tools/llvm-objcopy/ELF/Object.h
+++ b/tools/llvm-objcopy/ELF/Object.h
@@ -273,7 +273,9 @@
 
   virtual void initialize(SectionTableRef SecTable);
   virtual void finalize();
-  virtual Error removeSectionReferences(const SectionBase *Sec);
+  // Remove references to these sections. The list of sections must be sorted.
+  virtual Error
+  removeSectionReferences(function_ref<bool(const SectionBase *)> ToRemove);
   virtual Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove);
   virtual void accept(SectionVisitor &Visitor) const = 0;
   virtual void accept(MutableSectionVisitor &Visitor) = 0;
@@ -334,7 +336,8 @@
 
   void accept(SectionVisitor &Visitor) const override;
   void accept(MutableSectionVisitor &Visitor) override;
-  Error removeSectionReferences(const SectionBase *Sec) override;
+  Error removeSectionReferences(
+      function_ref<bool(const SectionBase *)> ToRemove) override;
   void initialize(SectionTableRef SecTable) override;
   void finalize() override;
 };
@@ -521,7 +524,8 @@
   Symbol *getSymbolByIndex(uint32_t Index);
   void updateSymbols(function_ref<void(Symbol &)> Callable);
 
-  Error removeSectionReferences(const SectionBase *Sec) override;
+  Error removeSectionReferences(
+      function_ref<bool(const SectionBase *)> ToRemove) override;
   void initialize(SectionTableRef SecTable) override;
   void finalize() override;
   void accept(SectionVisitor &Visitor) const override;
@@ -573,7 +577,8 @@
   RelocSectionWithSymtabBase() = default;
 
 public:
-  Error removeSectionReferences(const SectionBase *Sec) override;
+  Error removeSectionReferences(
+      function_ref<bool(const SectionBase *)> ToRemove) override;
   void initialize(SectionTableRef SecTable) override;
   void finalize() override;
 };