[SampleFDO] add suffix elision control for fcn names

Summary:
Add hooks for determining the policy used to decide whether/how
to chop off symbol 'suffixes' when locating a given function
in a sample profile.

Prior to this change, any function symbols of the form "X.Y" were
elided/truncated into just "X" when looking up things in a sample
profile data file.

With this change, the policy on suffixes can be changed by adding a
new attribute "sample-profile-suffix-elision-policy" to the function:
this attribute can have the value "all" (the default), "selected", or
"none". A value of "all" preserves the previous behavior (chop off
everything after the first "." character, then treat that as the
symbol name). A value of "selected" chops off only the rightmost
".llvm.XXXX" suffix (where "XXX" is any string not containing a "."
char). A value of "none" indicates that names should be left as is.

Subscribers: jdoerfert, wmi, mtrofin, danielcdh, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D58832

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@356146 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/include/llvm/ProfileData/SampleProf.h b/include/llvm/ProfileData/SampleProf.h
index ca3e2de..7e32f1d 100644
--- a/include/llvm/ProfileData/SampleProf.h
+++ b/include/llvm/ProfileData/SampleProf.h
@@ -410,6 +410,34 @@
     return getNameInModule(Name, M);
   }
 
+  /// Return the canonical name for a function, taking into account
+  /// suffix elision policy attributes.
+  static StringRef getCanonicalFnName(const Function &F) {
+    static const char *knownSuffixes[] = { ".llvm.", ".part." };
+    auto AttrName = "sample-profile-suffix-elision-policy";
+    auto Attr = F.getFnAttribute(AttrName).getValueAsString();
+    if (Attr == "" || Attr == "all") {
+      return F.getName().split('.').first;
+    } else if (Attr == "selected") {
+      StringRef Cand(F.getName());
+      for (const auto &Suf : knownSuffixes) {
+        StringRef Suffix(Suf);
+        auto It = Cand.rfind(Suffix);
+        if (It == StringRef::npos)
+          return Cand;
+        auto Dit = Cand.rfind('.');
+        if (Dit == It + Suffix.size() - 1)
+          Cand = Cand.substr(0, It);
+      }
+      return Cand;
+    } else if (Attr == "none") {
+      return F.getName();
+    } else {
+      assert(false && "internal error: unknown suffix elision policy");
+    }
+    return F.getName();
+  }
+
   /// Translate \p Name into its original name in Module.
   /// When the Format is not SPF_Compact_Binary, \p Name needs no translation.
   /// When the Format is SPF_Compact_Binary, \p Name in current FunctionSamples
@@ -465,11 +493,9 @@
         /// built in post-thin-link phase and var promotion has been done,
         /// we need to add the substring of function name without the suffix
         /// into the GUIDToFuncNameMap.
-        auto pos = OrigName.find('.');
-        if (pos != StringRef::npos) {
-          StringRef NewName = OrigName.substr(0, pos);
-          GUIDToFuncNameMap.insert({Function::getGUID(NewName), NewName});
-        }
+        StringRef CanonName = getCanonicalFnName(F);
+        if (CanonName != OrigName)
+          GUIDToFuncNameMap.insert({Function::getGUID(CanonName), CanonName});
       }
       CurrentModule = &M;
     }
diff --git a/include/llvm/ProfileData/SampleProfReader.h b/include/llvm/ProfileData/SampleProfReader.h
index 32b8f91..969cdea 100644
--- a/include/llvm/ProfileData/SampleProfReader.h
+++ b/include/llvm/ProfileData/SampleProfReader.h
@@ -286,10 +286,11 @@
 
   /// Return the samples collected for function \p F.
   FunctionSamples *getSamplesFor(const Function &F) {
-    // The function name may have been updated by adding suffix. In sample
-    // profile, the function names are all stripped, so we need to strip
-    // the function name suffix before matching with profile.
-    return getSamplesFor(F.getName().split('.').first);
+    // The function name may have been updated by adding suffix. Call
+    // a helper to (optionally) strip off suffixes so that we can
+    // match against the original function name in the profile.
+    StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
+    return getSamplesFor(CanonName);
   }
 
   /// Return the samples collected for function \p F.
diff --git a/lib/ProfileData/SampleProfReader.cpp b/lib/ProfileData/SampleProfReader.cpp
index 2e43c95..192b6c7 100644
--- a/lib/ProfileData/SampleProfReader.cpp
+++ b/lib/ProfileData/SampleProfReader.cpp
@@ -593,8 +593,8 @@
 void SampleProfileReaderCompactBinary::collectFuncsToUse(const Module &M) {
   FuncsToUse.clear();
   for (auto &F : M) {
-    StringRef Fname = F.getName().split('.').first;
-    FuncsToUse.insert(Fname);
+    StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
+    FuncsToUse.insert(CanonName);
   }
 }
 
diff --git a/unittests/ProfileData/SampleProfTest.cpp b/unittests/ProfileData/SampleProfTest.cpp
index 6e59593..a9a8b11 100644
--- a/unittests/ProfileData/SampleProfTest.cpp
+++ b/unittests/ProfileData/SampleProfTest.cpp
@@ -199,6 +199,78 @@
     VerifySummary(*PS);
     delete PS;
   }
+
+  void addFunctionSamples(StringMap<FunctionSamples> *Smap, const char *Fname,
+                          uint64_t TotalSamples, uint64_t HeadSamples) {
+    StringRef Name(Fname);
+    FunctionSamples FcnSamples;
+    FcnSamples.setName(Name);
+    FcnSamples.addTotalSamples(TotalSamples);
+    FcnSamples.addHeadSamples(HeadSamples);
+    FcnSamples.addBodySamples(1, 0, HeadSamples);
+    (*Smap)[Name] = FcnSamples;
+  }
+
+  StringMap<FunctionSamples> setupFcnSamplesForElisionTest(StringRef Policy) {
+    StringMap<FunctionSamples> Smap;
+    addFunctionSamples(&Smap, "foo", uint64_t(20301), uint64_t(1437));
+    if (Policy == "" || Policy == "all")
+      return Smap;
+    addFunctionSamples(&Smap, "foo.bar", uint64_t(20303), uint64_t(1439));
+    if (Policy == "selected")
+      return Smap;
+    addFunctionSamples(&Smap, "foo.llvm.2465", uint64_t(20305), uint64_t(1441));
+    return Smap;
+  }
+
+  void createFunctionWithSampleProfileElisionPolicy(Module *M,
+                                                    const char *Fname,
+                                                    StringRef Policy) {
+    FunctionType *FnType =
+        FunctionType::get(Type::getVoidTy(Context), {}, false);
+    auto Inserted = M->getOrInsertFunction(Fname, FnType);
+    auto Fcn = cast<Function>(Inserted.getCallee());
+    if (Policy != "")
+      Fcn->addFnAttr("sample-profile-suffix-elision-policy", Policy);
+  }
+
+  void setupModuleForElisionTest(Module *M, StringRef Policy) {
+    createFunctionWithSampleProfileElisionPolicy(M, "foo", Policy);
+    createFunctionWithSampleProfileElisionPolicy(M, "foo.bar", Policy);
+    createFunctionWithSampleProfileElisionPolicy(M, "foo.llvm.2465", Policy);
+  }
+
+  void testSuffixElisionPolicy(SampleProfileFormat Format, StringRef Policy,
+                               const StringMap<uint64_t> &Expected) {
+    SmallVector<char, 128> ProfilePath;
+    std::error_code EC;
+    EC = llvm::sys::fs::createTemporaryFile("profile", "", ProfilePath);
+    ASSERT_TRUE(NoError(EC));
+    StringRef ProfileFile(ProfilePath.data(), ProfilePath.size());
+
+    Module M("my_module", Context);
+    setupModuleForElisionTest(&M, Policy);
+    StringMap<FunctionSamples> ProfMap = setupFcnSamplesForElisionTest(Policy);
+
+    // write profile
+    createWriter(Format, ProfileFile);
+    EC = Writer->write(ProfMap);
+    ASSERT_TRUE(NoError(EC));
+    Writer->getOutputStream().flush();
+
+    // read profile
+    readProfile(M, ProfileFile);
+    EC = Reader->read();
+    ASSERT_TRUE(NoError(EC));
+
+    for (auto I = Expected.begin(); I != Expected.end(); ++I) {
+      uint64_t Esamples = uint64_t(-1);
+      FunctionSamples *Samples = Reader->getSamplesFor(I->getKey());
+      if (Samples != nullptr)
+        Esamples = Samples->getTotalSamples();
+      ASSERT_EQ(I->getValue(), Esamples);
+    }
+  }
 };
 
 TEST_F(SampleProfTest, roundtrip_text_profile) {
@@ -251,4 +323,77 @@
   ASSERT_EQ(BodySamples.get(), Max);
 }
 
+TEST_F(SampleProfTest, default_suffix_elision_text) {
+  // Default suffix elision policy: strip everything after first dot.
+  // This implies that all suffix variants will map to "foo", so
+  // we don't expect to see any entries for them in the sample
+  // profile.
+  StringMap<uint64_t> Expected;
+  Expected["foo"] = uint64_t(20301);
+  Expected["foo.bar"] = uint64_t(-1);
+  Expected["foo.llvm.2465"] = uint64_t(-1);
+  testSuffixElisionPolicy(SampleProfileFormat::SPF_Text, "", Expected);
+}
+
+TEST_F(SampleProfTest, default_suffix_elision_compact_binary) {
+  // Default suffix elision policy: strip everything after first dot.
+  // This implies that all suffix variants will map to "foo", so
+  // we don't expect to see any entries for them in the sample
+  // profile.
+  StringMap<uint64_t> Expected;
+  Expected["foo"] = uint64_t(20301);
+  Expected["foo.bar"] = uint64_t(-1);
+  Expected["foo.llvm.2465"] = uint64_t(-1);
+  testSuffixElisionPolicy(SampleProfileFormat::SPF_Compact_Binary, "",
+                          Expected);
+}
+
+TEST_F(SampleProfTest, selected_suffix_elision_text) {
+  // Profile is created and searched using the "selected"
+  // suffix elision policy: we only strip a .XXX suffix if
+  // it matches a pattern known to be generated by the compiler
+  // (e.g. ".llvm.<digits>").
+  StringMap<uint64_t> Expected;
+  Expected["foo"] = uint64_t(20301);
+  Expected["foo.bar"] = uint64_t(20303);
+  Expected["foo.llvm.2465"] = uint64_t(-1);
+  testSuffixElisionPolicy(SampleProfileFormat::SPF_Text, "selected", Expected);
+}
+
+TEST_F(SampleProfTest, selected_suffix_elision_compact_binary) {
+  // Profile is created and searched using the "selected"
+  // suffix elision policy: we only strip a .XXX suffix if
+  // it matches a pattern known to be generated by the compiler
+  // (e.g. ".llvm.<digits>").
+  StringMap<uint64_t> Expected;
+  Expected["foo"] = uint64_t(20301);
+  Expected["foo.bar"] = uint64_t(20303);
+  Expected["foo.llvm.2465"] = uint64_t(-1);
+  testSuffixElisionPolicy(SampleProfileFormat::SPF_Compact_Binary, "selected",
+                          Expected);
+}
+
+TEST_F(SampleProfTest, none_suffix_elision_text) {
+  // Profile is created and searched using the "none"
+  // suffix elision policy: no stripping of suffixes at all.
+  // Here we expect to see all variants in the profile.
+  StringMap<uint64_t> Expected;
+  Expected["foo"] = uint64_t(20301);
+  Expected["foo.bar"] = uint64_t(20303);
+  Expected["foo.llvm.2465"] = uint64_t(20305);
+  testSuffixElisionPolicy(SampleProfileFormat::SPF_Text, "none", Expected);
+}
+
+TEST_F(SampleProfTest, none_suffix_elision_compact_binary) {
+  // Profile is created and searched using the "none"
+  // suffix elision policy: no stripping of suffixes at all.
+  // Here we expect to see all variants in the profile.
+  StringMap<uint64_t> Expected;
+  Expected["foo"] = uint64_t(20301);
+  Expected["foo.bar"] = uint64_t(20303);
+  Expected["foo.llvm.2465"] = uint64_t(20305);
+  testSuffixElisionPolicy(SampleProfileFormat::SPF_Compact_Binary, "none",
+                          Expected);
+}
+
 } // end anonymous namespace