[ThinLTO] Import virtual method with single implementation in hybrid mode

Differential revision: https://reviews.llvm.org/D68782


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@375083 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Transforms/IPO/WholeProgramDevirt.cpp b/lib/Transforms/IPO/WholeProgramDevirt.cpp
index e1578cc..ec54e52 100644
--- a/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -383,11 +383,6 @@
            !SummaryTypeCheckedLoadUsers.empty();
   }
 
-  void markSummaryHasTypeTestAssumeUsers() {
-    SummaryHasTypeTestAssumeUsers = true;
-    AllCallSitesDevirted = false;
-  }
-
   void addSummaryTypeCheckedLoadUser(FunctionSummary *FS) {
     SummaryTypeCheckedLoadUsers.push_back(FS);
     AllCallSitesDevirted = false;
@@ -395,7 +390,8 @@
 
   void addSummaryTypeTestAssumeUser(FunctionSummary *FS) {
     SummaryTypeTestAssumeUsers.push_back(FS);
-    markSummaryHasTypeTestAssumeUsers();
+    SummaryHasTypeTestAssumeUsers = true;
+    AllCallSitesDevirted = false;
   }
 
   void markDevirt() {
@@ -504,7 +500,8 @@
 
   void applySingleImplDevirt(VTableSlotInfo &SlotInfo, Constant *TheFn,
                              bool &IsExported);
-  bool trySingleImplDevirt(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
+  bool trySingleImplDevirt(ModuleSummaryIndex *ExportSummary,
+                           MutableArrayRef<VirtualCallTarget> TargetsForSlot,
                            VTableSlotInfo &SlotInfo,
                            WholeProgramDevirtResolution *Res);
 
@@ -923,9 +920,38 @@
     Apply(P.second);
 }
 
+static bool AddCalls(VTableSlotInfo &SlotInfo, const ValueInfo &Callee) {
+  // We can't add calls if we haven't seen a definition
+  if (Callee.getSummaryList().empty())
+    return false;
+
+  // Insert calls into the summary index so that the devirtualized targets
+  // are eligible for import.
+  // FIXME: Annotate type tests with hotness. For now, mark these as hot
+  // to better ensure we have the opportunity to inline them.
+  bool IsExported = false;
+  auto &S = Callee.getSummaryList()[0];
+  CalleeInfo CI(CalleeInfo::HotnessType::Hot, /* RelBF = */ 0);
+  auto AddCalls = [&](CallSiteInfo &CSInfo) {
+    for (auto *FS : CSInfo.SummaryTypeCheckedLoadUsers) {
+      FS->addCall({Callee, CI});
+      IsExported |= S->modulePath() != FS->modulePath();
+    }
+    for (auto *FS : CSInfo.SummaryTypeTestAssumeUsers) {
+      FS->addCall({Callee, CI});
+      IsExported |= S->modulePath() != FS->modulePath();
+    }
+  };
+  AddCalls(SlotInfo.CSInfo);
+  for (auto &P : SlotInfo.ConstCSInfo)
+    AddCalls(P.second);
+  return IsExported;
+}
+
 bool DevirtModule::trySingleImplDevirt(
-    MutableArrayRef<VirtualCallTarget> TargetsForSlot,
-    VTableSlotInfo &SlotInfo, WholeProgramDevirtResolution *Res) {
+    ModuleSummaryIndex *ExportSummary,
+    MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
+    WholeProgramDevirtResolution *Res) {
   // See if the program contains a single implementation of this virtual
   // function.
   Function *TheFn = TargetsForSlot[0].Fn;
@@ -965,6 +991,10 @@
     TheFn->setVisibility(GlobalValue::HiddenVisibility);
     TheFn->setName(NewName);
   }
+  if (ValueInfo TheFnVI = ExportSummary->getValueInfo(TheFn->getGUID()))
+    // Any needed promotion of 'TheFn' has already been done during
+    // LTO unit split, so we can ignore return value of AddCalls.
+    AddCalls(SlotInfo, TheFnVI);
 
   Res->TheKind = WholeProgramDevirtResolution::SingleImpl;
   Res->SingleImplName = TheFn->getName();
@@ -1000,27 +1030,7 @@
     DevirtTargets.insert(TheFn);
 
   auto &S = TheFn.getSummaryList()[0];
-  bool IsExported = false;
-
-  // Insert calls into the summary index so that the devirtualized targets
-  // are eligible for import.
-  // FIXME: Annotate type tests with hotness. For now, mark these as hot
-  // to better ensure we have the opportunity to inline them.
-  CalleeInfo CI(CalleeInfo::HotnessType::Hot, /* RelBF = */ 0);
-  auto AddCalls = [&](CallSiteInfo &CSInfo) {
-    for (auto *FS : CSInfo.SummaryTypeCheckedLoadUsers) {
-      FS->addCall({TheFn, CI});
-      IsExported |= S->modulePath() != FS->modulePath();
-    }
-    for (auto *FS : CSInfo.SummaryTypeTestAssumeUsers) {
-      FS->addCall({TheFn, CI});
-      IsExported |= S->modulePath() != FS->modulePath();
-    }
-  };
-  AddCalls(SlotInfo.CSInfo);
-  for (auto &P : SlotInfo.ConstCSInfo)
-    AddCalls(P.second);
-
+  bool IsExported = AddCalls(SlotInfo, TheFn);
   if (IsExported)
     ExportedGUIDs.insert(TheFn.getGUID());
 
@@ -1847,8 +1857,7 @@
         // FIXME: Only add live functions.
         for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) {
           for (Metadata *MD : MetadataByGUID[VF.GUID]) {
-            CallSlots[{MD, VF.Offset}]
-                .CSInfo.markSummaryHasTypeTestAssumeUsers();
+            CallSlots[{MD, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS);
           }
         }
         for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) {
@@ -1861,7 +1870,7 @@
           for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) {
             CallSlots[{MD, VC.VFunc.Offset}]
                 .ConstCSInfo[VC.Args]
-                .markSummaryHasTypeTestAssumeUsers();
+                .addSummaryTypeTestAssumeUser(FS);
           }
         }
         for (const FunctionSummary::ConstVCall &VC :
@@ -1893,7 +1902,7 @@
                        cast<MDString>(S.first.TypeID)->getString())
                    .WPDRes[S.first.ByteOffset];
 
-      if (!trySingleImplDevirt(TargetsForSlot, S.second, Res)) {
+      if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res)) {
         DidVirtualConstProp |=
             tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first);
 
diff --git a/test/ThinLTO/X86/Inputs/devirt_single_hybrid_bar.ll b/test/ThinLTO/X86/Inputs/devirt_single_hybrid_bar.ll
new file mode 100644
index 0000000..414e7d5
--- /dev/null
+++ b/test/ThinLTO/X86/Inputs/devirt_single_hybrid_bar.ll
@@ -0,0 +1,58 @@
+; ModuleID = 'bar.cpp'
+source_filename = "bar.cpp"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.A = type { i32 (...)** }
+
+$_ZNK1A1fEv = comdat any
+
+$_ZTV1A = comdat any
+
+$_ZTS1A = comdat any
+
+$_ZTI1A = comdat any
+
+@_ZTV1A = linkonce_odr hidden unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* bitcast ({ i8*, i8* }* @_ZTI1A to i8*), i8* bitcast (i32 (%struct.A*)* @_ZNK1A1fEv to i8*)] }, comdat, align 8, !type !0, !type !1
+@_ZTVN10__cxxabiv117__class_type_infoE = external dso_local global i8*
+@_ZTS1A = linkonce_odr hidden constant [3 x i8] c"1A\00", comdat, align 1
+@_ZTI1A = linkonce_odr hidden constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @_ZTS1A, i32 0, i32 0) }, comdat, align 8
+
+; Function Attrs: uwtable
+define hidden i32 @_Z3barv() local_unnamed_addr #0 {
+entry:
+  %b = alloca %struct.A, align 8
+  %0 = bitcast %struct.A* %b to i8*
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %0)
+  %1 = getelementptr inbounds %struct.A, %struct.A* %b, i64 0, i32 0
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1A, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %1, align 8, !tbaa !4
+  %call = call i32 @_Z3fooP1A(%struct.A* nonnull %b)
+  %add = add nsw i32 %call, 10
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %0) #4
+  ret i32 %add
+}
+
+; Function Attrs: argmemonly nounwind willreturn
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
+
+declare dso_local i32 @_Z3fooP1A(%struct.A*) local_unnamed_addr
+
+; Function Attrs: argmemonly nounwind willreturn
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)
+
+; Function Attrs: nounwind uwtable
+define linkonce_odr hidden i32 @_ZNK1A1fEv(%struct.A* %this) unnamed_addr comdat align 2 {
+entry:
+  ret i32 3
+}
+
+!llvm.module.flags = !{!2}
+!llvm.ident = !{!3}
+
+!0 = !{i64 16, !"_ZTS1A"}
+!1 = !{i64 16, !"_ZTSM1AKFivE.virtual"}
+!2 = !{i32 1, !"wchar_size", i32 4}
+!3 = !{!"clang version 10.0.0 (trunk 373596)"}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"vtable pointer", !6, i64 0}
+!6 = !{!"Simple C++ TBAA"}
diff --git a/test/ThinLTO/X86/Inputs/devirt_single_hybrid_foo.ll b/test/ThinLTO/X86/Inputs/devirt_single_hybrid_foo.ll
new file mode 100644
index 0000000..d29f74b
--- /dev/null
+++ b/test/ThinLTO/X86/Inputs/devirt_single_hybrid_foo.ll
@@ -0,0 +1,35 @@
+; ModuleID = 'foo.cpp'
+source_filename = "foo.cpp"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.A = type { i32 (...)** }
+
+; Function Attrs: uwtable
+define hidden i32 @_Z3fooP1A(%struct.A* %pA) local_unnamed_addr {
+entry:
+  %0 = bitcast %struct.A* %pA to i32 (%struct.A*)***
+  %vtable = load i32 (%struct.A*)**, i32 (%struct.A*)*** %0, align 8, !tbaa !2
+  %1 = bitcast i32 (%struct.A*)** %vtable to i8*
+  %2 = tail call i1 @llvm.type.test(i8* %1, metadata !"_ZTS1A")
+  tail call void @llvm.assume(i1 %2)
+  %3 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable, align 8
+  %call = tail call i32 %3(%struct.A* %pA)
+  %add = add nsw i32 %call, 10
+  ret i32 %add
+}
+
+; Function Attrs: nounwind readnone willreturn
+declare i1 @llvm.type.test(i8*, metadata)
+
+; Function Attrs: nounwind willreturn
+declare void @llvm.assume(i1)
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 10.0.0 (trunk 373596)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"vtable pointer", !4, i64 0}
+!4 = !{!"Simple C++ TBAA"}
diff --git a/test/ThinLTO/X86/devirt_single_hybrid.ll b/test/ThinLTO/X86/devirt_single_hybrid.ll
new file mode 100644
index 0000000..5b2df6c
--- /dev/null
+++ b/test/ThinLTO/X86/devirt_single_hybrid.ll
@@ -0,0 +1,55 @@
+; Check that we import and inline virtual method with single implementation
+; when we're running hybrid LTO.
+;
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit %s -o %t-main.bc
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit %p/Inputs/devirt_single_hybrid_foo.ll -o %t-foo.bc
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit %p/Inputs/devirt_single_hybrid_bar.ll -o %t-bar.bc
+; RUN: llvm-lto2 run -save-temps %t-main.bc %t-foo.bc %t-bar.bc -pass-remarks=. -o %t \
+; RUN:    -r=%t-foo.bc,_Z3fooP1A,pl \
+; RUN:    -r=%t-main.bc,main,plx \
+; RUN:    -r=%t-main.bc,_Z3barv,l \
+; RUN:    -r=%t-bar.bc,_Z3barv,pl \
+; RUN:    -r=%t-bar.bc,_Z3fooP1A, \
+; RUN:    -r=%t-bar.bc,_ZNK1A1fEv,pl \
+; RUN:    -r=%t-bar.bc,_ZTV1A,l \
+; RUN:    -r=%t-bar.bc,_ZTVN10__cxxabiv117__class_type_infoE, \
+; RUN:    -r=%t-bar.bc,_ZTS1A,pl \
+; RUN:    -r=%t-bar.bc,_ZTI1A,pl \
+; RUN:    -r=%t-bar.bc,_ZNK1A1fEv, \
+; RUN:    -r=%t-bar.bc,_ZTV1A,pl \
+; RUN:    -r=%t-bar.bc,_ZTI1A, 2>&1 | FileCheck %s --check-prefix=REMARK
+; RUN: llvm-dis %t.1.3.import.bc -o - | FileCheck %s --check-prefix=IMPORT
+; RUN: llvm-dis %t.1.5.precodegen.bc -o - | FileCheck %s --check-prefix=CODEGEN
+
+; REMARK-COUNT-3: single-impl: devirtualized a call to _ZNK1A1fEv
+
+; IMPORT:       define available_externally hidden i32 @_ZNK1A1fEv(%struct.A* %this)
+; IMPORT-NEXT:  entry:
+; IMPORT-NEXT:      ret i32 3
+
+; CODEGEN:        define hidden i32 @main()
+; CODEGEN-NEXT:   entry:
+; CODEGEN-NEXT:     ret i32 23
+
+; Virtual method should have been optimized out
+; CODEGEN-NOT: _ZNK1A1fEv
+
+; ModuleID = 'main.cpp'
+source_filename = "main.cpp"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: norecurse uwtable
+define hidden i32 @main() local_unnamed_addr {
+entry:
+  %call = tail call i32 @_Z3barv()
+  ret i32 %call
+}
+
+declare dso_local i32 @_Z3barv() local_unnamed_addr
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 10.0.0 (trunk 373596)"}