[ThinLTOCodeGenerator] Add support for index-based WPD

Differential revision: https://reviews.llvm.org/D68950


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@375219 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/LTO/ThinLTOCodeGenerator.cpp b/lib/LTO/ThinLTOCodeGenerator.cpp
index db3ca90..d151de1 100644
--- a/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -53,6 +53,7 @@
 #include "llvm/Transforms/IPO/FunctionImport.h"
 #include "llvm/Transforms/IPO/Internalize.h"
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
 #include "llvm/Transforms/ObjCARC.h"
 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
 
@@ -225,7 +226,8 @@
 }
 
 static void optimizeModule(Module &TheModule, TargetMachine &TM,
-                           unsigned OptLevel, bool Freestanding) {
+                           unsigned OptLevel, bool Freestanding,
+                           ModuleSummaryIndex *Index) {
   // Populate the PassManager
   PassManagerBuilder PMB;
   PMB.LibraryInfo = new TargetLibraryInfoImpl(TM.getTargetTriple());
@@ -239,6 +241,7 @@
   // Already did this in verifyLoadedModule().
   PMB.VerifyInput = false;
   PMB.VerifyOutput = false;
+  PMB.ImportSummary = Index;
 
   legacy::PassManager PM;
 
@@ -433,7 +436,7 @@
     saveTempBitcode(TheModule, SaveTempsDir, count, ".3.imported.bc");
   }
 
-  optimizeModule(TheModule, TM, OptLevel, Freestanding);
+  optimizeModule(TheModule, TM, OptLevel, Freestanding, &Index);
 
   saveTempBitcode(TheModule, SaveTempsDir, count, ".4.opt.bc");
 
@@ -577,29 +580,36 @@
   return CombinedIndex;
 }
 
-static void internalizeAndPromoteInIndex(
-    const StringMap<FunctionImporter::ExportSetTy> &ExportLists,
-    const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
-    const DenseMap<GlobalValue::GUID, const GlobalValueSummary *>
-        &PrevailingCopy,
-    ModuleSummaryIndex &Index) {
-  auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) {
+struct IsExported {
+  const StringMap<FunctionImporter::ExportSetTy> &ExportLists;
+  const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols;
+
+  IsExported(const StringMap<FunctionImporter::ExportSetTy> &ExportLists,
+             const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols)
+      : ExportLists(ExportLists), GUIDPreservedSymbols(GUIDPreservedSymbols) {}
+
+  bool operator()(StringRef ModuleIdentifier, GlobalValue::GUID GUID) const {
     const auto &ExportList = ExportLists.find(ModuleIdentifier);
     return (ExportList != ExportLists.end() &&
             ExportList->second.count(GUID)) ||
            GUIDPreservedSymbols.count(GUID);
-  };
+  }
+};
 
-  auto isPrevailing = [&](GlobalValue::GUID GUID, const GlobalValueSummary *S) {
+struct IsPrevailing {
+  const DenseMap<GlobalValue::GUID, const GlobalValueSummary *> &PrevailingCopy;
+  IsPrevailing(const DenseMap<GlobalValue::GUID, const GlobalValueSummary *>
+                   &PrevailingCopy)
+      : PrevailingCopy(PrevailingCopy) {}
+
+  bool operator()(GlobalValue::GUID GUID, const GlobalValueSummary *S) const {
     const auto &Prevailing = PrevailingCopy.find(GUID);
     // Not in map means that there was only one copy, which must be prevailing.
     if (Prevailing == PrevailingCopy.end())
       return true;
     return Prevailing->second == S;
   };
-
-  thinLTOInternalizeAndPromoteInIndex(Index, isExported, isPrevailing);
-}
+};
 
 static void computeDeadSymbolsInIndex(
     ModuleSummaryIndex &Index,
@@ -656,8 +666,9 @@
 
   // Promote the exported values in the index, so that they are promoted
   // in the module.
-  internalizeAndPromoteInIndex(ExportLists, GUIDPreservedSymbols,
-                               PrevailingCopy, Index);
+  thinLTOInternalizeAndPromoteInIndex(
+      Index, IsExported(ExportLists, GUIDPreservedSymbols),
+      IsPrevailing(PrevailingCopy));
 
   promoteModule(TheModule, Index);
 }
@@ -814,8 +825,9 @@
 
   // Promote the exported values in the index, so that they are promoted
   // in the module.
-  internalizeAndPromoteInIndex(ExportLists, GUIDPreservedSymbols,
-                               PrevailingCopy, Index);
+  thinLTOInternalizeAndPromoteInIndex(
+      Index, IsExported(ExportLists, GUIDPreservedSymbols),
+      IsPrevailing(PrevailingCopy));
 
   promoteModule(TheModule, Index);
 
@@ -834,7 +846,8 @@
   initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
 
   // Optimize now
-  optimizeModule(TheModule, *TMBuilder.create(), OptLevel, Freestanding);
+  optimizeModule(TheModule, *TMBuilder.create(), OptLevel, Freestanding,
+                 nullptr);
 }
 
 /// Write out the generated object file, either from CacheEntryPath or from
@@ -955,6 +968,15 @@
   // Synthesize entry counts for functions in the combined index.
   computeSyntheticCounts(*Index);
 
+  // Perform index-based WPD. This will return immediately if there are
+  // no index entries in the typeIdMetadata map (e.g. if we are instead
+  // performing IR-based WPD in hybrid regular/thin LTO mode).
+  std::map<ValueInfo, std::vector<VTableSlotSummary>> LocalWPDTargetsMap;
+  std::set<GlobalValue::GUID> ExportedGUIDs;
+  runWholeProgramDevirtOnIndex(*Index, ExportedGUIDs, LocalWPDTargetsMap);
+  for (auto GUID : ExportedGUIDs)
+    GUIDPreservedSymbols.insert(GUID);
+
   // Collect the import/export lists for all modules from the call-graph in the
   // combined index.
   StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
@@ -979,8 +1001,12 @@
   // Use global summary-based analysis to identify symbols that can be
   // internalized (because they aren't exported or preserved as per callback).
   // Changes are made in the index, consumed in the ThinLTO backends.
-  internalizeAndPromoteInIndex(ExportLists, GUIDPreservedSymbols,
-                               PrevailingCopy, *Index);
+  updateIndexWPDForExports(*Index,
+                           IsExported(ExportLists, GUIDPreservedSymbols),
+                           LocalWPDTargetsMap);
+  thinLTOInternalizeAndPromoteInIndex(
+      *Index, IsExported(ExportLists, GUIDPreservedSymbols),
+      IsPrevailing(PrevailingCopy));
 
   // Make sure that every module has an entry in the ExportLists, ImportList,
   // GVSummary and ResolvedODR maps to enable threaded access to these maps
diff --git a/test/ThinLTO/X86/devirt_promote_legacy.ll b/test/ThinLTO/X86/devirt_promote_legacy.ll
new file mode 100644
index 0000000..1d17b3f
--- /dev/null
+++ b/test/ThinLTO/X86/devirt_promote_legacy.ll
@@ -0,0 +1,57 @@
+; REQUIRES: x86-registered-target
+
+; Test devirtualization requiring promotion of local targets, where the
+; promotion is required by one devirtualization and needs to be updated
+; for a second devirtualization in the defining module as a post-pass
+; update.
+
+; Generate unsplit module with summary for ThinLTO index-based WPD.
+; RUN: opt -thinlto-bc -o %t3.o %s
+; RUN: opt -thinlto-bc -o %t4.o %p/Inputs/devirt_promote.ll
+
+; RUN: llvm-lto -thinlto-action=run %t3.o %t4.o --thinlto-save-temps=%t5. \
+; RUN:   --pass-remarks=. \
+; RUN:   --exported-symbol=test \
+; RUN:   --exported-symbol=test2 \
+; RUN:   --exported-symbol=_ZTV1B 2>&1 | FileCheck %s --check-prefix=REMARK
+; RUN: llvm-dis %t5.0.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR1
+; RUN: llvm-dis %t5.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR2
+
+; We should devirt call to _ZN1A1nEi once in importing module and once
+; in original (exporting) module.
+; REMARK-COUNT-2: single-impl: devirtualized a call to _ZN1A1nEi.llvm.
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+%struct.A = type { i32 (...)** }
+
+; CHECK-IR1-LABEL: define i32 @test
+define i32 @test(%struct.A* %obj, i32 %a) {
+entry:
+  %0 = bitcast %struct.A* %obj to i8***
+  %vtable = load i8**, i8*** %0
+  %1 = bitcast i8** %vtable to i8*
+  %p = call i1 @llvm.type.test(i8* %1, metadata !"_ZTS1A")
+  call void @llvm.assume(i1 %p)
+  %fptrptr = getelementptr i8*, i8** %vtable, i32 1
+  %2 = bitcast i8** %fptrptr to i32 (%struct.A*, i32)**
+  %fptr1 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %2, align 8
+
+  ; Check that the call was devirtualized.
+  ; CHECK-IR1: %call = tail call i32 bitcast (void ()* @_ZN1A1nEi
+  %call = tail call i32 %fptr1(%struct.A* nonnull %obj, i32 %a)
+
+  ret i32 %call
+}
+; CHECK-IR1-LABEL: ret i32
+; CHECK-IR1-LABEL: }
+
+; CHECK-IR2: define i32 @test2
+; Check that the call was devirtualized.
+; CHECK-IR2:   %call4 = tail call i32 @_ZN1A1nEi
+
+declare i1 @llvm.type.test(i8*, metadata)
+declare void @llvm.assume(i1)
+
+attributes #0 = { noinline optnone }