[ctxprof] Flatten indirect call info in pre-thinlink compilation (#134766)
Same idea as in #134723 - flatten indirect call info in `"VP"` `MD_prof` metadata for the thinlinker, for cases that aren't covered by a contextual profile. If we don't ICP an indirect call target in the specialized module, the call will fall to the copy of that target outside the specialized module. If the graph under that target also has some indirect calls, in the absence of this pass, we'd have a steeper performance regression - because none of those would have a chance to be ICPed.
diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
index 023b5a9..6f1c369 100644
--- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h
+++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
@@ -21,6 +21,10 @@
class CtxProfAnalysis;
+using FlatIndirectTargets = DenseMap<GlobalValue::GUID, uint64_t>;
+using CtxProfFlatIndirectCallProfile =
+ DenseMap<GlobalValue::GUID, DenseMap<uint32_t, FlatIndirectTargets>>;
+
/// The instrumented contextual profile, produced by the CtxProfAnalysis.
class PGOContextualProfile {
friend class CtxProfAnalysis;
@@ -101,6 +105,7 @@
void visit(ConstVisitor, const Function *F = nullptr) const;
const CtxProfFlatProfile flatten() const;
+ const CtxProfFlatIndirectCallProfile flattenVirtCalls() const;
bool invalidate(Module &, const PreservedAnalyses &PA,
ModuleAnalysisManager::Invalidator &) {
diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp b/llvm/lib/Analysis/CtxProfAnalysis.cpp
index 9ca2f7d..d203e27 100644
--- a/llvm/lib/Analysis/CtxProfAnalysis.cpp
+++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp
@@ -334,6 +334,20 @@
return Flat;
}
+const CtxProfFlatIndirectCallProfile
+PGOContextualProfile::flattenVirtCalls() const {
+ CtxProfFlatIndirectCallProfile Ret;
+ preorderVisit<const PGOCtxProfContext::CallTargetMapTy,
+ const PGOCtxProfContext>(
+ Profiles.Contexts, [&](const PGOCtxProfContext &Ctx) {
+ auto &Targets = Ret[Ctx.guid()];
+ for (const auto &[ID, SubctxSet] : Ctx.callsites())
+ for (const auto &Subctx : SubctxSet)
+ Targets[ID][Subctx.first] += Subctx.second.getEntrycount();
+ });
+ return Ret;
+}
+
void CtxProfAnalysis::collectIndirectCallPromotionList(
CallBase &IC, Result &Profile,
SetVector<std::pair<CallBase *, Function *>> &Candidates) {
diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
index ffe0f38..05f364a 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
@@ -36,9 +36,12 @@
#include "llvm/Transforms/Scalar/DCE.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <deque>
+#include <functional>
using namespace llvm;
+#define DEBUG_TYPE "ctx_prof_flatten"
+
namespace {
class ProfileAnnotator final {
@@ -414,6 +417,57 @@
I.eraseFromParent();
}
+void annotateIndirectCall(
+ Module &M, CallBase &CB,
+ const DenseMap<uint32_t, FlatIndirectTargets> &FlatProf,
+ const InstrProfCallsite &Ins) {
+ auto Idx = Ins.getIndex()->getZExtValue();
+ auto FIt = FlatProf.find(Idx);
+ if (FIt == FlatProf.end())
+ return;
+ const auto &Targets = FIt->second;
+ SmallVector<InstrProfValueData, 2> Data;
+ uint64_t Sum = 0;
+ for (auto &[Guid, Count] : Targets) {
+ Data.push_back({/*.Value=*/Guid, /*.Count=*/Count});
+ Sum += Count;
+ }
+
+ llvm::sort(Data,
+ [](const InstrProfValueData &A, const InstrProfValueData &B) {
+ return A.Count > B.Count;
+ });
+ llvm::annotateValueSite(M, CB, Data, Sum,
+ InstrProfValueKind::IPVK_IndirectCallTarget,
+ Data.size());
+ LLVM_DEBUG(dbgs() << "[ctxprof] flat indirect call prof: " << CB
+ << CB.getMetadata(LLVMContext::MD_prof) << "\n");
+}
+
+// We normally return a "Changed" bool, but the calling pass' run assumes
+// something will change - some profile will be added - so this won't add much
+// by returning false when applicable.
+void annotateIndirectCalls(Module &M, const CtxProfAnalysis::Result &CtxProf) {
+ const auto FlatIndCalls = CtxProf.flattenVirtCalls();
+ for (auto &F : M) {
+ if (F.isDeclaration())
+ continue;
+ auto FlatProfIter = FlatIndCalls.find(AssignGUIDPass::getGUID(F));
+ if (FlatProfIter == FlatIndCalls.end())
+ continue;
+ const auto &FlatProf = FlatProfIter->second;
+ for (auto &BB : F) {
+ for (auto &I : BB) {
+ auto *CB = dyn_cast<CallBase>(&I);
+ if (!CB || !CB->isIndirectCall())
+ continue;
+ if (auto *Ins = CtxProfAnalysis::getCallsiteInstrumentation(*CB))
+ annotateIndirectCall(M, *CB, FlatProf, *Ins);
+ }
+ }
+ }
+}
+
} // namespace
PreservedAnalyses PGOCtxProfFlatteningPass::run(Module &M,
@@ -437,6 +491,8 @@
if (!IsPreThinlink && !CtxProf.isInSpecializedModule())
return PreservedAnalyses::none();
+ if (IsPreThinlink)
+ annotateIndirectCalls(M, CtxProf);
const auto FlattenedProfile = CtxProf.flatten();
for (auto &F : M) {
diff --git a/llvm/test/Analysis/CtxProfAnalysis/flatten-insert-icp-mdprof.ll b/llvm/test/Analysis/CtxProfAnalysis/flatten-insert-icp-mdprof.ll
new file mode 100644
index 0000000..13beddc
--- /dev/null
+++ b/llvm/test/Analysis/CtxProfAnalysis/flatten-insert-icp-mdprof.ll
@@ -0,0 +1,50 @@
+; REQUIRES:x86_64-linux
+
+; Test flattening indirect calls into "VP" MD_prof metadata, in prelink.
+
+; RUN: split-file %s %t
+; RUN: llvm-ctxprof-util fromYAML --input %t/profile.yaml --output %t/profile.ctxprofdata
+; RUN: opt -passes=ctx-prof-flatten-prethinlink %t/example.ll -use-ctx-profile=%t/profile.ctxprofdata \
+; RUN: -S -o - | FileCheck %s --check-prefix=PRELINK
+
+; PRELINK: call void @llvm.instrprof.callsite(ptr @foo, i64 1234, i32 2, i32 0, ptr %p)
+; PRELINK-NEXT: call void %p(), !prof ![[VPPROF:[0-9]+]]
+; PRELINK-NEXT: call void @llvm.instrprof.callsite(ptr @foo, i64 1234, i32 2, i32 1, ptr @bar)
+; PRELINK-NEXT: call void @bar(){{$}}
+; PRELINK: ![[VPPROF]] = !{!"VP", i32 0, i64 5, i64 5678, i64 4, i64 5555, i64 1}
+
+; RUN: cp %t/example.ll %t/1234.ll
+; RUN: opt -passes=ctx-prof-flatten %t/1234.ll -use-ctx-profile=%t/profile.ctxprofdata \
+; RUN: -S -o - | FileCheck %s --check-prefix=POSTLINK
+; RUN: opt -passes=ctx-prof-flatten %t/example.ll -use-ctx-profile=%t/profile.ctxprofdata \
+; RUN: -S -o - | FileCheck %s --check-prefix=POSTLINK
+
+; POSTLINK-NOT: call void %p(), !prof
+;--- example.ll
+
+declare !guid !0 void @bar()
+
+define void @foo(ptr %p) !guid !1 {
+ call void @llvm.instrprof.increment(ptr @foo, i64 1234, i32 1, i32 0)
+ call void @llvm.instrprof.callsite(ptr @foo, i64 1234, i32 2, i32 0, ptr %p)
+ call void %p()
+ call void @llvm.instrprof.callsite(ptr @foo, i64 1234, i32 2, i32 1, ptr @bar)
+ call void @bar()
+ ret void
+}
+
+!0 = !{i64 8888}
+!1 = !{i64 1234}
+
+;--- profile.yaml
+Contexts:
+ - Guid: 1234
+ TotalRootEntryCount: 5
+ Counters: [5]
+ Callsites:
+ - - Guid: 5555
+ Counters: [1]
+ - Guid: 5678
+ Counters: [4]
+ - - Guid: 8888
+ Counters: [5]