|  | //===- SampleProfileMatcher.cpp - Sampling-based Stale Profile Matcher ----===// | 
|  | // | 
|  | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|  | // See https://llvm.org/LICENSE.txt for license information. | 
|  | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | // | 
|  | // This file implements the SampleProfileMatcher used for stale | 
|  | // profile matching. | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #include "llvm/Transforms/IPO/SampleProfileMatcher.h" | 
|  | #include "llvm/Demangle/Demangle.h" | 
|  | #include "llvm/IR/IntrinsicInst.h" | 
|  | #include "llvm/IR/MDBuilder.h" | 
|  | #include "llvm/Support/CommandLine.h" | 
|  | #include "llvm/Transforms/Utils/LongestCommonSequence.h" | 
|  |  | 
|  | using namespace llvm; | 
|  | using namespace sampleprof; | 
|  |  | 
|  | #define DEBUG_TYPE "sample-profile-matcher" | 
|  |  | 
|  | static cl::opt<unsigned> FuncProfileSimilarityThreshold( | 
|  | "func-profile-similarity-threshold", cl::Hidden, cl::init(80), | 
|  | cl::desc("Consider a profile matches a function if the similarity of their " | 
|  | "callee sequences is above the specified percentile.")); | 
|  |  | 
|  | static cl::opt<unsigned> MinFuncCountForCGMatching( | 
|  | "min-func-count-for-cg-matching", cl::Hidden, cl::init(5), | 
|  | cl::desc("The minimum number of basic blocks required for a function to " | 
|  | "run stale profile call graph matching.")); | 
|  |  | 
|  | static cl::opt<unsigned> MinCallCountForCGMatching( | 
|  | "min-call-count-for-cg-matching", cl::Hidden, cl::init(3), | 
|  | cl::desc("The minimum number of call anchors required for a function to " | 
|  | "run stale profile call graph matching.")); | 
|  |  | 
|  | static cl::opt<bool> LoadFuncProfileforCGMatching( | 
|  | "load-func-profile-for-cg-matching", cl::Hidden, cl::init(true), | 
|  | cl::desc( | 
|  | "Load top-level profiles that the sample reader initially skipped for " | 
|  | "the call-graph matching (only meaningful for extended binary " | 
|  | "format)")); | 
|  |  | 
|  | extern cl::opt<bool> SalvageStaleProfile; | 
|  | extern cl::opt<bool> SalvageUnusedProfile; | 
|  | extern cl::opt<bool> PersistProfileStaleness; | 
|  | extern cl::opt<bool> ReportProfileStaleness; | 
|  |  | 
|  | static cl::opt<unsigned> SalvageStaleProfileMaxCallsites( | 
|  | "salvage-stale-profile-max-callsites", cl::Hidden, cl::init(UINT_MAX), | 
|  | cl::desc("The maximum number of callsites in a function, above which stale " | 
|  | "profile matching will be skipped.")); | 
|  |  | 
|  | void SampleProfileMatcher::findIRAnchors(const Function &F, | 
|  | AnchorMap &IRAnchors) const { | 
|  | // For inlined code, recover the original callsite and callee by finding the | 
|  | // top-level inline frame. e.g. For frame stack "main:1 @ foo:2 @ bar:3", the | 
|  | // top-level frame is "main:1", the callsite is "1" and the callee is "foo". | 
|  | auto FindTopLevelInlinedCallsite = [](const DILocation *DIL) { | 
|  | assert((DIL && DIL->getInlinedAt()) && "No inlined callsite"); | 
|  | const DILocation *PrevDIL = nullptr; | 
|  | do { | 
|  | PrevDIL = DIL; | 
|  | DIL = DIL->getInlinedAt(); | 
|  | } while (DIL->getInlinedAt()); | 
|  |  | 
|  | LineLocation Callsite = FunctionSamples::getCallSiteIdentifier( | 
|  | DIL, FunctionSamples::ProfileIsFS); | 
|  | StringRef CalleeName = PrevDIL->getSubprogramLinkageName(); | 
|  | return std::make_pair(Callsite, FunctionId(CalleeName)); | 
|  | }; | 
|  |  | 
|  | auto GetCanonicalCalleeName = [](const CallBase *CB) { | 
|  | StringRef CalleeName = UnknownIndirectCallee; | 
|  | if (Function *Callee = CB->getCalledFunction()) | 
|  | CalleeName = FunctionSamples::getCanonicalFnName(Callee->getName()); | 
|  | return CalleeName; | 
|  | }; | 
|  |  | 
|  | // Extract profile matching anchors in the IR. | 
|  | for (auto &BB : F) { | 
|  | for (auto &I : BB) { | 
|  | DILocation *DIL = I.getDebugLoc(); | 
|  | if (!DIL) | 
|  | continue; | 
|  |  | 
|  | if (FunctionSamples::ProfileIsProbeBased) { | 
|  | if (auto Probe = extractProbe(I)) { | 
|  | // Flatten inlined IR for the matching. | 
|  | if (DIL->getInlinedAt()) { | 
|  | IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL)); | 
|  | } else { | 
|  | // Use empty StringRef for basic block probe. | 
|  | StringRef CalleeName; | 
|  | if (const auto *CB = dyn_cast<CallBase>(&I)) { | 
|  | // Skip the probe inst whose callee name is "llvm.pseudoprobe". | 
|  | if (!isa<IntrinsicInst>(&I)) | 
|  | CalleeName = GetCanonicalCalleeName(CB); | 
|  | } | 
|  | LineLocation Loc = LineLocation(Probe->Id, 0); | 
|  | IRAnchors.emplace(Loc, FunctionId(CalleeName)); | 
|  | } | 
|  | } | 
|  | } else { | 
|  | // TODO: For line-number based profile(AutoFDO), currently only support | 
|  | // find callsite anchors. In future, we need to parse all the non-call | 
|  | // instructions to extract the line locations for profile matching. | 
|  | if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I)) | 
|  | continue; | 
|  |  | 
|  | if (DIL->getInlinedAt()) { | 
|  | IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL)); | 
|  | } else { | 
|  | LineLocation Callsite = FunctionSamples::getCallSiteIdentifier( | 
|  | DIL, FunctionSamples::ProfileIsFS); | 
|  | StringRef CalleeName = GetCanonicalCalleeName(dyn_cast<CallBase>(&I)); | 
|  | IRAnchors.emplace(Callsite, FunctionId(CalleeName)); | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS, | 
|  | AnchorMap &ProfileAnchors) const { | 
|  | auto isInvalidLineOffset = [](uint32_t LineOffset) { | 
|  | return LineOffset & 0x8000; | 
|  | }; | 
|  |  | 
|  | auto InsertAnchor = [](const LineLocation &Loc, const FunctionId &CalleeName, | 
|  | AnchorMap &ProfileAnchors) { | 
|  | auto Ret = ProfileAnchors.try_emplace(Loc, CalleeName); | 
|  | if (!Ret.second) { | 
|  | // For multiple callees, which indicates it's an indirect call, we use a | 
|  | // dummy name(UnknownIndirectCallee) as the indrect callee name. | 
|  | Ret.first->second = FunctionId(UnknownIndirectCallee); | 
|  | } | 
|  | }; | 
|  |  | 
|  | for (const auto &I : FS.getBodySamples()) { | 
|  | const LineLocation &Loc = I.first; | 
|  | if (isInvalidLineOffset(Loc.LineOffset)) | 
|  | continue; | 
|  | for (const auto &C : I.second.getCallTargets()) | 
|  | InsertAnchor(Loc, C.first, ProfileAnchors); | 
|  | } | 
|  |  | 
|  | for (const auto &I : FS.getCallsiteSamples()) { | 
|  | const LineLocation &Loc = I.first; | 
|  | if (isInvalidLineOffset(Loc.LineOffset)) | 
|  | continue; | 
|  | for (const auto &C : I.second) | 
|  | InsertAnchor(Loc, C.first, ProfileAnchors); | 
|  | } | 
|  | } | 
|  |  | 
|  | bool SampleProfileMatcher::functionHasProfile(const FunctionId &IRFuncName, | 
|  | Function *&FuncWithoutProfile) { | 
|  | FuncWithoutProfile = nullptr; | 
|  | auto R = FunctionsWithoutProfile.find(IRFuncName); | 
|  | if (R != FunctionsWithoutProfile.end()) | 
|  | FuncWithoutProfile = R->second; | 
|  | return !FuncWithoutProfile; | 
|  | } | 
|  |  | 
|  | bool SampleProfileMatcher::isProfileUnused(const FunctionId &ProfileFuncName) { | 
|  | return SymbolMap->find(ProfileFuncName) == SymbolMap->end(); | 
|  | } | 
|  |  | 
|  | bool SampleProfileMatcher::functionMatchesProfile( | 
|  | const FunctionId &IRFuncName, const FunctionId &ProfileFuncName, | 
|  | bool FindMatchedProfileOnly) { | 
|  | if (IRFuncName == ProfileFuncName) | 
|  | return true; | 
|  | if (!SalvageUnusedProfile) | 
|  | return false; | 
|  |  | 
|  | // If IR function doesn't have profile and the profile is unused, try | 
|  | // matching them. | 
|  | Function *IRFunc = nullptr; | 
|  | if (functionHasProfile(IRFuncName, IRFunc) || | 
|  | !isProfileUnused(ProfileFuncName)) | 
|  | return false; | 
|  |  | 
|  | assert(FunctionId(IRFunc->getName()) != ProfileFuncName && | 
|  | "IR function should be different from profile function to match"); | 
|  | return functionMatchesProfile(*IRFunc, ProfileFuncName, | 
|  | FindMatchedProfileOnly); | 
|  | } | 
|  |  | 
|  | LocToLocMap | 
|  | SampleProfileMatcher::longestCommonSequence(const AnchorList &AnchorList1, | 
|  | const AnchorList &AnchorList2, | 
|  | bool MatchUnusedFunction) { | 
|  | LocToLocMap MatchedAnchors; | 
|  | llvm::longestCommonSequence<LineLocation, FunctionId>( | 
|  | AnchorList1, AnchorList2, | 
|  | [&](const FunctionId &A, const FunctionId &B) { | 
|  | return functionMatchesProfile( | 
|  | A, B, | 
|  | !MatchUnusedFunction // Find matched function only | 
|  | ); | 
|  | }, | 
|  | [&](LineLocation A, LineLocation B) { | 
|  | MatchedAnchors.try_emplace(A, B); | 
|  | }); | 
|  | return MatchedAnchors; | 
|  | } | 
|  |  | 
|  | void SampleProfileMatcher::matchNonCallsiteLocs( | 
|  | const LocToLocMap &MatchedAnchors, const AnchorMap &IRAnchors, | 
|  | LocToLocMap &IRToProfileLocationMap) { | 
|  | auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) { | 
|  | // Skip the unchanged location mapping to save memory. | 
|  | if (From != To) | 
|  | IRToProfileLocationMap.insert({From, To}); | 
|  | }; | 
|  |  | 
|  | // Use function's beginning location as the initial anchor. | 
|  | int32_t LocationDelta = 0; | 
|  | SmallVector<LineLocation> LastMatchedNonAnchors; | 
|  | for (const auto &IR : IRAnchors) { | 
|  | const auto &Loc = IR.first; | 
|  | bool IsMatchedAnchor = false; | 
|  | // Match the anchor location in lexical order. | 
|  | auto R = MatchedAnchors.find(Loc); | 
|  | if (R != MatchedAnchors.end()) { | 
|  | const auto &Candidate = R->second; | 
|  | InsertMatching(Loc, Candidate); | 
|  | LLVM_DEBUG(dbgs() << "Callsite with callee:" << IR.second.stringRef() | 
|  | << " is matched from " << Loc << " to " << Candidate | 
|  | << "\n"); | 
|  | LocationDelta = Candidate.LineOffset - Loc.LineOffset; | 
|  |  | 
|  | // Match backwards for non-anchor locations. | 
|  | // The locations in LastMatchedNonAnchors have been matched forwards | 
|  | // based on the previous anchor, spilt it evenly and overwrite the | 
|  | // second half based on the current anchor. | 
|  | for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2; | 
|  | I < LastMatchedNonAnchors.size(); I++) { | 
|  | const auto &L = LastMatchedNonAnchors[I]; | 
|  | uint32_t CandidateLineOffset = L.LineOffset + LocationDelta; | 
|  | LineLocation Candidate(CandidateLineOffset, L.Discriminator); | 
|  | InsertMatching(L, Candidate); | 
|  | LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L | 
|  | << " to " << Candidate << "\n"); | 
|  | } | 
|  |  | 
|  | IsMatchedAnchor = true; | 
|  | LastMatchedNonAnchors.clear(); | 
|  | } | 
|  |  | 
|  | // Match forwards for non-anchor locations. | 
|  | if (!IsMatchedAnchor) { | 
|  | uint32_t CandidateLineOffset = Loc.LineOffset + LocationDelta; | 
|  | LineLocation Candidate(CandidateLineOffset, Loc.Discriminator); | 
|  | InsertMatching(Loc, Candidate); | 
|  | LLVM_DEBUG(dbgs() << "Location is matched from " << Loc << " to " | 
|  | << Candidate << "\n"); | 
|  | LastMatchedNonAnchors.emplace_back(Loc); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | // Filter the non-call locations from IRAnchors and ProfileAnchors and write | 
|  | // them into a list for random access later. | 
|  | void SampleProfileMatcher::getFilteredAnchorList( | 
|  | const AnchorMap &IRAnchors, const AnchorMap &ProfileAnchors, | 
|  | AnchorList &FilteredIRAnchorsList, AnchorList &FilteredProfileAnchorList) { | 
|  | for (const auto &I : IRAnchors) { | 
|  | if (I.second.stringRef().empty()) | 
|  | continue; | 
|  | FilteredIRAnchorsList.emplace_back(I); | 
|  | } | 
|  |  | 
|  | for (const auto &I : ProfileAnchors) | 
|  | FilteredProfileAnchorList.emplace_back(I); | 
|  | } | 
|  |  | 
|  | // Call target name anchor based profile fuzzy matching. | 
|  | // Input: | 
|  | // For IR locations, the anchor is the callee name of direct callsite; For | 
|  | // profile locations, it's the call target name for BodySamples or inlinee's | 
|  | // profile name for CallsiteSamples. | 
|  | // Matching heuristic: | 
|  | // First match all the anchors using the diff algorithm, then split the | 
|  | // non-anchor locations between the two anchors evenly, first half are matched | 
|  | // based on the start anchor, second half are matched based on the end anchor. | 
|  | // For example, given: | 
|  | // IR locations:      [1, 2(foo), 3, 5, 6(bar), 7] | 
|  | // Profile locations: [1, 2, 3(foo), 4, 7, 8(bar), 9] | 
|  | // The matching gives: | 
|  | //   [1,    2(foo), 3,  5,  6(bar), 7] | 
|  | //    |     |       |   |     |     | | 
|  | //   [1, 2, 3(foo), 4,  7,  8(bar), 9] | 
|  | // The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9]. | 
|  | void SampleProfileMatcher::runStaleProfileMatching( | 
|  | const Function &F, const AnchorMap &IRAnchors, | 
|  | const AnchorMap &ProfileAnchors, LocToLocMap &IRToProfileLocationMap, | 
|  | bool RunCFGMatching, bool RunCGMatching) { | 
|  | if (!RunCFGMatching && !RunCGMatching) | 
|  | return; | 
|  | LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName() | 
|  | << "\n"); | 
|  | assert(IRToProfileLocationMap.empty() && | 
|  | "Run stale profile matching only once per function"); | 
|  |  | 
|  | AnchorList FilteredProfileAnchorList; | 
|  | AnchorList FilteredIRAnchorsList; | 
|  | getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList, | 
|  | FilteredProfileAnchorList); | 
|  |  | 
|  | if (FilteredIRAnchorsList.empty() || FilteredProfileAnchorList.empty()) | 
|  | return; | 
|  |  | 
|  | if (FilteredIRAnchorsList.size() > SalvageStaleProfileMaxCallsites || | 
|  | FilteredProfileAnchorList.size() > SalvageStaleProfileMaxCallsites) { | 
|  | LLVM_DEBUG(dbgs() << "Skip stale profile matching for " << F.getName() | 
|  | << " because the number of callsites in the IR is " | 
|  | << FilteredIRAnchorsList.size() | 
|  | << " and in the profile is " | 
|  | << FilteredProfileAnchorList.size() << "\n"); | 
|  | return; | 
|  | } | 
|  |  | 
|  | // Match the callsite anchors by finding the longest common subsequence | 
|  | // between IR and profile. | 
|  | // Define a match between two anchors as follows: | 
|  | // 1) The function names of anchors are the same. | 
|  | // 2) The similarity between the anchor functions is above a threshold if | 
|  | // RunCGMatching is set. | 
|  | // For 2), we only consider the anchor functions from IR and profile don't | 
|  | // appear on either side to reduce the matching scope. Note that we need to | 
|  | // use IR anchor as base(A side) to align with the order of | 
|  | // IRToProfileLocationMap. | 
|  | LocToLocMap MatchedAnchors = | 
|  | longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList, | 
|  | RunCGMatching /* Match unused functions */); | 
|  |  | 
|  | // CFG level matching: | 
|  | // Apply the callsite matchings to infer matching for the basic | 
|  | // block(non-callsite) locations and write the result to | 
|  | // IRToProfileLocationMap. | 
|  | if (RunCFGMatching) | 
|  | matchNonCallsiteLocs(MatchedAnchors, IRAnchors, IRToProfileLocationMap); | 
|  | } | 
|  |  | 
|  | void SampleProfileMatcher::runOnFunction(Function &F) { | 
|  | // We need to use flattened function samples for matching. | 
|  | // Unlike IR, which includes all callsites from the source code, the callsites | 
|  | // in profile only show up when they are hit by samples, i,e. the profile | 
|  | // callsites in one context may differ from those in another context. To get | 
|  | // the maximum number of callsites, we merge the function profiles from all | 
|  | // contexts, aka, the flattened profile to find profile anchors. | 
|  | const auto *FSForMatching = getFlattenedSamplesFor(F); | 
|  | if (SalvageUnusedProfile && !FSForMatching) { | 
|  | // Apply the matching in place to find the new function's matched profile. | 
|  | auto R = FuncToProfileNameMap.find(&F); | 
|  | if (R != FuncToProfileNameMap.end()) { | 
|  | FSForMatching = getFlattenedSamplesFor(R->second); | 
|  | // Try to find the salvaged top-level profiles that are explicitly loaded | 
|  | // for the matching, see "functionMatchesProfileHelper" for the details. | 
|  | if (!FSForMatching && LoadFuncProfileforCGMatching) | 
|  | FSForMatching = Reader.getSamplesFor(R->second.stringRef()); | 
|  | } | 
|  | } | 
|  | if (!FSForMatching) | 
|  | return; | 
|  |  | 
|  | // Anchors for IR. It's a map from IR location to callee name, callee name is | 
|  | // empty for non-call instruction and use a dummy name(UnknownIndirectCallee) | 
|  | // for unknown indrect callee name. | 
|  | AnchorMap IRAnchors; | 
|  | findIRAnchors(F, IRAnchors); | 
|  | // Anchors for profile. It's a map from callsite location to a set of callee | 
|  | // name. | 
|  | AnchorMap ProfileAnchors; | 
|  | findProfileAnchors(*FSForMatching, ProfileAnchors); | 
|  |  | 
|  | // Compute the callsite match states for profile staleness report. | 
|  | if (ReportProfileStaleness || PersistProfileStaleness) | 
|  | recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, nullptr); | 
|  |  | 
|  | if (!SalvageStaleProfile) | 
|  | return; | 
|  | // For probe-based profiles, run matching only when profile checksum is | 
|  | // mismatched. | 
|  | bool ChecksumMismatch = FunctionSamples::ProfileIsProbeBased && | 
|  | !ProbeManager->profileIsValid(F, *FSForMatching); | 
|  | bool RunCFGMatching = | 
|  | !FunctionSamples::ProfileIsProbeBased || ChecksumMismatch; | 
|  | bool RunCGMatching = SalvageUnusedProfile; | 
|  | // For imported functions, the checksum metadata(pseudo_probe_desc) are | 
|  | // dropped, so we leverage function attribute(profile-checksum-mismatch) to | 
|  | // transfer the info: add the attribute during pre-link phase and check it | 
|  | // during post-link phase(see "profileIsValid"). | 
|  | if (ChecksumMismatch && LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) | 
|  | F.addFnAttr("profile-checksum-mismatch"); | 
|  |  | 
|  | // The matching result will be saved to IRToProfileLocationMap, create a | 
|  | // new map for each function. | 
|  | auto &IRToProfileLocationMap = getIRToProfileLocationMap(F); | 
|  | runStaleProfileMatching(F, IRAnchors, ProfileAnchors, IRToProfileLocationMap, | 
|  | RunCFGMatching, RunCGMatching); | 
|  | // Find and update callsite match states after matching. | 
|  | if (RunCFGMatching && (ReportProfileStaleness || PersistProfileStaleness)) | 
|  | recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, | 
|  | &IRToProfileLocationMap); | 
|  | } | 
|  |  | 
|  | void SampleProfileMatcher::recordCallsiteMatchStates( | 
|  | const Function &F, const AnchorMap &IRAnchors, | 
|  | const AnchorMap &ProfileAnchors, | 
|  | const LocToLocMap *IRToProfileLocationMap) { | 
|  | bool IsPostMatch = IRToProfileLocationMap != nullptr; | 
|  | auto &CallsiteMatchStates = | 
|  | FuncCallsiteMatchStates[FunctionSamples::getCanonicalFnName(F.getName())]; | 
|  |  | 
|  | auto MapIRLocToProfileLoc = [&](const LineLocation &IRLoc) { | 
|  | // IRToProfileLocationMap is null in pre-match phrase. | 
|  | if (!IRToProfileLocationMap) | 
|  | return IRLoc; | 
|  | const auto &ProfileLoc = IRToProfileLocationMap->find(IRLoc); | 
|  | if (ProfileLoc != IRToProfileLocationMap->end()) | 
|  | return ProfileLoc->second; | 
|  | else | 
|  | return IRLoc; | 
|  | }; | 
|  |  | 
|  | for (const auto &I : IRAnchors) { | 
|  | // After fuzzy profile matching, use the matching result to remap the | 
|  | // current IR callsite. | 
|  | const auto &ProfileLoc = MapIRLocToProfileLoc(I.first); | 
|  | const auto &IRCalleeId = I.second; | 
|  | const auto &It = ProfileAnchors.find(ProfileLoc); | 
|  | if (It == ProfileAnchors.end()) | 
|  | continue; | 
|  | const auto &ProfCalleeId = It->second; | 
|  | if (IRCalleeId == ProfCalleeId) { | 
|  | auto It = CallsiteMatchStates.find(ProfileLoc); | 
|  | if (It == CallsiteMatchStates.end()) | 
|  | CallsiteMatchStates.emplace(ProfileLoc, MatchState::InitialMatch); | 
|  | else if (IsPostMatch) { | 
|  | if (It->second == MatchState::InitialMatch) | 
|  | It->second = MatchState::UnchangedMatch; | 
|  | else if (It->second == MatchState::InitialMismatch) | 
|  | It->second = MatchState::RecoveredMismatch; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | // Check if there are any callsites in the profile that does not match to any | 
|  | // IR callsites. | 
|  | for (const auto &I : ProfileAnchors) { | 
|  | const auto &Loc = I.first; | 
|  | assert(!I.second.stringRef().empty() && "Callees should not be empty"); | 
|  | auto It = CallsiteMatchStates.find(Loc); | 
|  | if (It == CallsiteMatchStates.end()) | 
|  | CallsiteMatchStates.emplace(Loc, MatchState::InitialMismatch); | 
|  | else if (IsPostMatch) { | 
|  | // Update the state if it's not matched(UnchangedMatch or | 
|  | // RecoveredMismatch). | 
|  | if (It->second == MatchState::InitialMismatch) | 
|  | It->second = MatchState::UnchangedMismatch; | 
|  | else if (It->second == MatchState::InitialMatch) | 
|  | It->second = MatchState::RemovedMatch; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | void SampleProfileMatcher::countMismatchedFuncSamples(const FunctionSamples &FS, | 
|  | bool IsTopLevel) { | 
|  | const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID()); | 
|  | // Skip the function that is external or renamed. | 
|  | if (!FuncDesc) | 
|  | return; | 
|  |  | 
|  | if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) { | 
|  | if (IsTopLevel) | 
|  | NumStaleProfileFunc++; | 
|  | // Given currently all probe ids are after block probe ids, once the | 
|  | // checksum is mismatched, it's likely all the callites are mismatched and | 
|  | // dropped. We conservatively count all the samples as mismatched and stop | 
|  | // counting the inlinees' profiles. | 
|  | MismatchedFunctionSamples += FS.getTotalSamples(); | 
|  | return; | 
|  | } | 
|  |  | 
|  | // Even the current-level function checksum is matched, it's possible that the | 
|  | // nested inlinees' checksums are mismatched that affect the inlinee's sample | 
|  | // loading, we need to go deeper to check the inlinees' function samples. | 
|  | // Similarly, count all the samples as mismatched if the inlinee's checksum is | 
|  | // mismatched using this recursive function. | 
|  | for (const auto &I : FS.getCallsiteSamples()) | 
|  | for (const auto &CS : I.second) | 
|  | countMismatchedFuncSamples(CS.second, false); | 
|  | } | 
|  |  | 
|  | void SampleProfileMatcher::countMismatchedCallsiteSamples( | 
|  | const FunctionSamples &FS) { | 
|  | auto It = FuncCallsiteMatchStates.find(FS.getFuncName()); | 
|  | // Skip it if no mismatched callsite or this is an external function. | 
|  | if (It == FuncCallsiteMatchStates.end() || It->second.empty()) | 
|  | return; | 
|  | const auto &CallsiteMatchStates = It->second; | 
|  |  | 
|  | auto findMatchState = [&](const LineLocation &Loc) { | 
|  | auto It = CallsiteMatchStates.find(Loc); | 
|  | if (It == CallsiteMatchStates.end()) | 
|  | return MatchState::Unknown; | 
|  | return It->second; | 
|  | }; | 
|  |  | 
|  | auto AttributeMismatchedSamples = [&](const enum MatchState &State, | 
|  | uint64_t Samples) { | 
|  | if (isMismatchState(State)) | 
|  | MismatchedCallsiteSamples += Samples; | 
|  | else if (State == MatchState::RecoveredMismatch) | 
|  | RecoveredCallsiteSamples += Samples; | 
|  | }; | 
|  |  | 
|  | // The non-inlined callsites are saved in the body samples of function | 
|  | // profile, go through it to count the non-inlined callsite samples. | 
|  | for (const auto &I : FS.getBodySamples()) | 
|  | AttributeMismatchedSamples(findMatchState(I.first), I.second.getSamples()); | 
|  |  | 
|  | // Count the inlined callsite samples. | 
|  | for (const auto &I : FS.getCallsiteSamples()) { | 
|  | auto State = findMatchState(I.first); | 
|  | uint64_t CallsiteSamples = 0; | 
|  | for (const auto &CS : I.second) | 
|  | CallsiteSamples += CS.second.getTotalSamples(); | 
|  | AttributeMismatchedSamples(State, CallsiteSamples); | 
|  |  | 
|  | if (isMismatchState(State)) | 
|  | continue; | 
|  |  | 
|  | // When the current level of inlined call site matches the profiled call | 
|  | // site, we need to go deeper along the inline tree to count mismatches from | 
|  | // lower level inlinees. | 
|  | for (const auto &CS : I.second) | 
|  | countMismatchedCallsiteSamples(CS.second); | 
|  | } | 
|  | } | 
|  |  | 
|  | void SampleProfileMatcher::countMismatchCallsites(const FunctionSamples &FS) { | 
|  | auto It = FuncCallsiteMatchStates.find(FS.getFuncName()); | 
|  | // Skip it if no mismatched callsite or this is an external function. | 
|  | if (It == FuncCallsiteMatchStates.end() || It->second.empty()) | 
|  | return; | 
|  | const auto &MatchStates = It->second; | 
|  | [[maybe_unused]] bool OnInitialState = | 
|  | isInitialState(MatchStates.begin()->second); | 
|  | for (const auto &I : MatchStates) { | 
|  | TotalProfiledCallsites++; | 
|  | assert( | 
|  | (OnInitialState ? isInitialState(I.second) : isFinalState(I.second)) && | 
|  | "Profile matching state is inconsistent"); | 
|  |  | 
|  | if (isMismatchState(I.second)) | 
|  | NumMismatchedCallsites++; | 
|  | else if (I.second == MatchState::RecoveredMismatch) | 
|  | NumRecoveredCallsites++; | 
|  | } | 
|  | } | 
|  |  | 
|  | void SampleProfileMatcher::countCallGraphRecoveredSamples( | 
|  | const FunctionSamples &FS, | 
|  | std::unordered_set<FunctionId> &CallGraphRecoveredProfiles) { | 
|  | if (CallGraphRecoveredProfiles.count(FS.getFunction())) { | 
|  | NumCallGraphRecoveredFuncSamples += FS.getTotalSamples(); | 
|  | return; | 
|  | } | 
|  |  | 
|  | for (const auto &CM : FS.getCallsiteSamples()) { | 
|  | for (const auto &CS : CM.second) { | 
|  | countCallGraphRecoveredSamples(CS.second, CallGraphRecoveredProfiles); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | void SampleProfileMatcher::computeAndReportProfileStaleness() { | 
|  | if (!ReportProfileStaleness && !PersistProfileStaleness) | 
|  | return; | 
|  |  | 
|  | std::unordered_set<FunctionId> CallGraphRecoveredProfiles; | 
|  | if (SalvageUnusedProfile) { | 
|  | for (const auto &I : FuncToProfileNameMap) { | 
|  | CallGraphRecoveredProfiles.insert(I.second); | 
|  | if (GlobalValue::isAvailableExternallyLinkage(I.first->getLinkage())) | 
|  | continue; | 
|  | NumCallGraphRecoveredProfiledFunc++; | 
|  | } | 
|  | } | 
|  |  | 
|  | // Count profile mismatches for profile staleness report. | 
|  | for (const auto &F : M) { | 
|  | if (skipProfileForFunction(F)) | 
|  | continue; | 
|  | // As the stats will be merged by linker, skip reporting the metrics for | 
|  | // imported functions to avoid repeated counting. | 
|  | if (GlobalValue::isAvailableExternallyLinkage(F.getLinkage())) | 
|  | continue; | 
|  | const auto *FS = Reader.getSamplesFor(F); | 
|  | if (!FS) | 
|  | continue; | 
|  | TotalProfiledFunc++; | 
|  | TotalFunctionSamples += FS->getTotalSamples(); | 
|  |  | 
|  | if (SalvageUnusedProfile && !CallGraphRecoveredProfiles.empty()) | 
|  | countCallGraphRecoveredSamples(*FS, CallGraphRecoveredProfiles); | 
|  |  | 
|  | // Checksum mismatch is only used in pseudo-probe mode. | 
|  | if (FunctionSamples::ProfileIsProbeBased) | 
|  | countMismatchedFuncSamples(*FS, true); | 
|  |  | 
|  | // Count mismatches and samples for calliste. | 
|  | countMismatchCallsites(*FS); | 
|  | countMismatchedCallsiteSamples(*FS); | 
|  | } | 
|  |  | 
|  | if (ReportProfileStaleness) { | 
|  | if (FunctionSamples::ProfileIsProbeBased) { | 
|  | errs() << "(" << NumStaleProfileFunc << "/" << TotalProfiledFunc | 
|  | << ") of functions' profile are invalid and (" | 
|  | << MismatchedFunctionSamples << "/" << TotalFunctionSamples | 
|  | << ") of samples are discarded due to function hash mismatch.\n"; | 
|  | } | 
|  | if (SalvageUnusedProfile) { | 
|  | errs() << "(" << NumCallGraphRecoveredProfiledFunc << "/" | 
|  | << TotalProfiledFunc << ") of functions' profile are matched and (" | 
|  | << NumCallGraphRecoveredFuncSamples << "/" << TotalFunctionSamples | 
|  | << ") of samples are reused by call graph matching.\n"; | 
|  | } | 
|  |  | 
|  | errs() << "(" << (NumMismatchedCallsites + NumRecoveredCallsites) << "/" | 
|  | << TotalProfiledCallsites | 
|  | << ") of callsites' profile are invalid and (" | 
|  | << (MismatchedCallsiteSamples + RecoveredCallsiteSamples) << "/" | 
|  | << TotalFunctionSamples | 
|  | << ") of samples are discarded due to callsite location mismatch.\n"; | 
|  | errs() << "(" << NumRecoveredCallsites << "/" | 
|  | << (NumRecoveredCallsites + NumMismatchedCallsites) | 
|  | << ") of callsites and (" << RecoveredCallsiteSamples << "/" | 
|  | << (RecoveredCallsiteSamples + MismatchedCallsiteSamples) | 
|  | << ") of samples are recovered by stale profile matching.\n"; | 
|  | } | 
|  |  | 
|  | if (PersistProfileStaleness) { | 
|  | LLVMContext &Ctx = M.getContext(); | 
|  | MDBuilder MDB(Ctx); | 
|  |  | 
|  | SmallVector<std::pair<StringRef, uint64_t>> ProfStatsVec; | 
|  | if (FunctionSamples::ProfileIsProbeBased) { | 
|  | ProfStatsVec.emplace_back("NumStaleProfileFunc", NumStaleProfileFunc); | 
|  | ProfStatsVec.emplace_back("TotalProfiledFunc", TotalProfiledFunc); | 
|  | ProfStatsVec.emplace_back("MismatchedFunctionSamples", | 
|  | MismatchedFunctionSamples); | 
|  | ProfStatsVec.emplace_back("TotalFunctionSamples", TotalFunctionSamples); | 
|  | } | 
|  |  | 
|  | if (SalvageUnusedProfile) { | 
|  | ProfStatsVec.emplace_back("NumCallGraphRecoveredProfiledFunc", | 
|  | NumCallGraphRecoveredProfiledFunc); | 
|  | ProfStatsVec.emplace_back("NumCallGraphRecoveredFuncSamples", | 
|  | NumCallGraphRecoveredFuncSamples); | 
|  | } | 
|  |  | 
|  | ProfStatsVec.emplace_back("NumMismatchedCallsites", NumMismatchedCallsites); | 
|  | ProfStatsVec.emplace_back("NumRecoveredCallsites", NumRecoveredCallsites); | 
|  | ProfStatsVec.emplace_back("TotalProfiledCallsites", TotalProfiledCallsites); | 
|  | ProfStatsVec.emplace_back("MismatchedCallsiteSamples", | 
|  | MismatchedCallsiteSamples); | 
|  | ProfStatsVec.emplace_back("RecoveredCallsiteSamples", | 
|  | RecoveredCallsiteSamples); | 
|  |  | 
|  | auto *MD = MDB.createLLVMStats(ProfStatsVec); | 
|  | auto *NMD = M.getOrInsertNamedMetadata("llvm.stats"); | 
|  | NMD->addOperand(MD); | 
|  | } | 
|  | } | 
|  |  | 
|  | void SampleProfileMatcher::findFunctionsWithoutProfile() { | 
|  | // TODO: Support MD5 profile. | 
|  | if (FunctionSamples::UseMD5) | 
|  | return; | 
|  | StringSet<> NamesInProfile; | 
|  | if (auto NameTable = Reader.getNameTable()) { | 
|  | for (auto Name : *NameTable) | 
|  | NamesInProfile.insert(Name.stringRef()); | 
|  | } | 
|  |  | 
|  | for (auto &F : M) { | 
|  | // Skip declarations, as even if the function can be matched, we have | 
|  | // nothing to do with it. | 
|  | if (F.isDeclaration()) | 
|  | continue; | 
|  |  | 
|  | StringRef CanonFName = FunctionSamples::getCanonicalFnName(F.getName()); | 
|  | const auto *FS = getFlattenedSamplesFor(F); | 
|  | if (FS) | 
|  | continue; | 
|  |  | 
|  | // For extended binary, functions fully inlined may not be loaded in the | 
|  | // top-level profile, so check the NameTable which has the all symbol names | 
|  | // in profile. | 
|  | if (NamesInProfile.count(CanonFName)) | 
|  | continue; | 
|  |  | 
|  | // For extended binary, non-profiled function symbols are in the profile | 
|  | // symbol list table. | 
|  | if (PSL && PSL->contains(CanonFName)) | 
|  | continue; | 
|  |  | 
|  | LLVM_DEBUG(dbgs() << "Function " << CanonFName | 
|  | << " is not in profile or profile symbol list.\n"); | 
|  | FunctionsWithoutProfile[FunctionId(CanonFName)] = &F; | 
|  | } | 
|  | } | 
|  |  | 
|  | bool SampleProfileMatcher::functionMatchesProfileHelper( | 
|  | const Function &IRFunc, const FunctionId &ProfFunc) { | 
|  | // The value is in the range [0, 1]. The bigger the value is, the more similar | 
|  | // two sequences are. | 
|  | float Similarity = 0.0; | 
|  |  | 
|  | // Match the functions if they have the same base name(after demangling) and | 
|  | // skip the similarity check. | 
|  | ItaniumPartialDemangler Demangler; | 
|  | // Helper lambda to demangle and get the base name. If the demangling failed, | 
|  | // return an empty string. | 
|  | auto GetBaseName = [&](StringRef FName) { | 
|  | auto FunctionName = FName.str(); | 
|  | if (Demangler.partialDemangle(FunctionName.c_str())) | 
|  | return std::string(); | 
|  | size_t BaseNameSize = 0; | 
|  | // The demangler API follows the __cxa_demangle one, and thus needs a | 
|  | // pointer that originates from malloc (or nullptr) and the caller is | 
|  | // responsible for free()-ing the buffer. | 
|  | char *BaseNamePtr = Demangler.getFunctionBaseName(nullptr, &BaseNameSize); | 
|  | std::string Result = (BaseNamePtr && BaseNameSize) | 
|  | ? std::string(BaseNamePtr, BaseNameSize) | 
|  | : std::string(); | 
|  | free(BaseNamePtr); | 
|  | return Result; | 
|  | }; | 
|  | auto IRBaseName = GetBaseName(IRFunc.getName()); | 
|  | auto ProfBaseName = GetBaseName(ProfFunc.stringRef()); | 
|  | if (!IRBaseName.empty() && IRBaseName == ProfBaseName) { | 
|  | LLVM_DEBUG(dbgs() << "The functions " << IRFunc.getName() << "(IR) and " | 
|  | << ProfFunc << "(Profile) share the same base name: " | 
|  | << IRBaseName << ".\n"); | 
|  | return true; | 
|  | } | 
|  |  | 
|  | const auto *FSForMatching = getFlattenedSamplesFor(ProfFunc); | 
|  | // With extbinary profile format, initial profile loading only reads profile | 
|  | // based on current function names in the module. | 
|  | // However, if a function is renamed, sample loader skips to load its original | 
|  | // profile(which has a different name), we will miss this case. To address | 
|  | // this, we load the top-level profile candidate explicitly for the matching. | 
|  | if (!FSForMatching && LoadFuncProfileforCGMatching) { | 
|  | DenseSet<StringRef> TopLevelFunc({ProfFunc.stringRef()}); | 
|  | if (std::error_code EC = Reader.read(TopLevelFunc)) | 
|  | return false; | 
|  | FSForMatching = Reader.getSamplesFor(ProfFunc.stringRef()); | 
|  | LLVM_DEBUG({ | 
|  | if (FSForMatching) | 
|  | dbgs() << "Read top-level function " << ProfFunc | 
|  | << " for call-graph matching\n"; | 
|  | }); | 
|  | } | 
|  | if (!FSForMatching) | 
|  | return false; | 
|  | // The check for similarity or checksum may not be reliable if the function is | 
|  | // tiny, we use the number of basic block as a proxy for the function | 
|  | // complexity and skip the matching if it's too small. | 
|  | if (IRFunc.size() < MinFuncCountForCGMatching || | 
|  | FSForMatching->getBodySamples().size() < MinFuncCountForCGMatching) | 
|  | return false; | 
|  |  | 
|  | // For probe-based function, we first trust the checksum info. If the checksum | 
|  | // doesn't match, we continue checking for similarity. | 
|  | if (FunctionSamples::ProfileIsProbeBased) { | 
|  | const auto *FuncDesc = ProbeManager->getDesc(IRFunc); | 
|  | if (FuncDesc && | 
|  | !ProbeManager->profileIsHashMismatched(*FuncDesc, *FSForMatching)) { | 
|  | LLVM_DEBUG(dbgs() << "The checksums for " << IRFunc.getName() | 
|  | << "(IR) and " << ProfFunc << "(Profile) match.\n"); | 
|  |  | 
|  | return true; | 
|  | } | 
|  | } | 
|  |  | 
|  | AnchorMap IRAnchors; | 
|  | findIRAnchors(IRFunc, IRAnchors); | 
|  | AnchorMap ProfileAnchors; | 
|  | findProfileAnchors(*FSForMatching, ProfileAnchors); | 
|  |  | 
|  | AnchorList FilteredIRAnchorsList; | 
|  | AnchorList FilteredProfileAnchorList; | 
|  | getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList, | 
|  | FilteredProfileAnchorList); | 
|  |  | 
|  | // Similarly skip the matching if the num of anchors is not enough. | 
|  | if (FilteredIRAnchorsList.size() < MinCallCountForCGMatching || | 
|  | FilteredProfileAnchorList.size() < MinCallCountForCGMatching) | 
|  | return false; | 
|  |  | 
|  | // Use the diff algorithm to find the LCS between IR and profile. | 
|  |  | 
|  | // Don't recursively match the callee function to avoid infinite matching, | 
|  | // callee functions will be handled later since it's processed in top-down | 
|  | // order . | 
|  | LocToLocMap MatchedAnchors = | 
|  | longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList, | 
|  | false /* Match unused functions */); | 
|  |  | 
|  | Similarity = static_cast<float>(MatchedAnchors.size()) / | 
|  | FilteredProfileAnchorList.size(); | 
|  |  | 
|  | LLVM_DEBUG(dbgs() << "The similarity between " << IRFunc.getName() | 
|  | << "(IR) and " << ProfFunc << "(profile) is " | 
|  | << format("%.2f", Similarity) << "\n"); | 
|  | assert((Similarity >= 0 && Similarity <= 1.0) && | 
|  | "Similarity value should be in [0, 1]"); | 
|  | return Similarity * 100 > FuncProfileSimilarityThreshold; | 
|  | } | 
|  |  | 
|  | // If FindMatchedProfileOnly is set to true, only use the processed function | 
|  | // results. This is used for skipping the repeated recursive matching. | 
|  | bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc, | 
|  | const FunctionId &ProfFunc, | 
|  | bool FindMatchedProfileOnly) { | 
|  | auto R = FuncProfileMatchCache.find({&IRFunc, ProfFunc}); | 
|  | if (R != FuncProfileMatchCache.end()) | 
|  | return R->second; | 
|  |  | 
|  | if (FindMatchedProfileOnly) | 
|  | return false; | 
|  |  | 
|  | bool Matched = functionMatchesProfileHelper(IRFunc, ProfFunc); | 
|  | FuncProfileMatchCache[{&IRFunc, ProfFunc}] = Matched; | 
|  | if (Matched) { | 
|  | FuncToProfileNameMap[&IRFunc] = ProfFunc; | 
|  | LLVM_DEBUG(dbgs() << "Function:" << IRFunc.getName() | 
|  | << " matches profile:" << ProfFunc << "\n"); | 
|  | } | 
|  |  | 
|  | return Matched; | 
|  | } | 
|  |  | 
|  | void SampleProfileMatcher::UpdateWithSalvagedProfiles() { | 
|  | DenseSet<StringRef> ProfileSalvagedFuncs; | 
|  | // Update FuncNameToProfNameMap and SymbolMap. | 
|  | for (auto &I : FuncToProfileNameMap) { | 
|  | assert(I.first && "New function is null"); | 
|  | FunctionId FuncName(I.first->getName()); | 
|  | ProfileSalvagedFuncs.insert(I.second.stringRef()); | 
|  | FuncNameToProfNameMap->emplace(FuncName, I.second); | 
|  |  | 
|  | // We need to remove the old entry to avoid duplicating the function | 
|  | // processing. | 
|  | SymbolMap->erase(FuncName); | 
|  | SymbolMap->emplace(I.second, I.first); | 
|  | } | 
|  |  | 
|  | // With extbinary profile format, initial profile loading only reads profile | 
|  | // based on current function names in the module, so we need to load top-level | 
|  | // profiles for functions with different profile name explicitly after | 
|  | // function-profile name map is established with stale profile matching. | 
|  | Reader.read(ProfileSalvagedFuncs); | 
|  | Reader.setFuncNameToProfNameMap(*FuncNameToProfNameMap); | 
|  | } | 
|  |  | 
|  | void SampleProfileMatcher::runOnModule() { | 
|  | ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles, | 
|  | FunctionSamples::ProfileIsCS); | 
|  | if (SalvageUnusedProfile) | 
|  | findFunctionsWithoutProfile(); | 
|  |  | 
|  | // Process the matching in top-down order so that the caller matching result | 
|  | // can be used to the callee matching. | 
|  | std::vector<Function *> TopDownFunctionList; | 
|  | TopDownFunctionList.reserve(M.size()); | 
|  | buildTopDownFuncOrder(CG, TopDownFunctionList); | 
|  | for (auto *F : TopDownFunctionList) { | 
|  | if (skipProfileForFunction(*F)) | 
|  | continue; | 
|  | runOnFunction(*F); | 
|  | } | 
|  |  | 
|  | if (SalvageUnusedProfile) | 
|  | UpdateWithSalvagedProfiles(); | 
|  |  | 
|  | if (SalvageStaleProfile) | 
|  | distributeIRToProfileLocationMap(); | 
|  |  | 
|  | computeAndReportProfileStaleness(); | 
|  | } | 
|  |  | 
|  | void SampleProfileMatcher::distributeIRToProfileLocationMap( | 
|  | FunctionSamples &FS) { | 
|  | const auto ProfileMappings = FuncMappings.find(FS.getFuncName()); | 
|  | if (ProfileMappings != FuncMappings.end()) { | 
|  | FS.setIRToProfileLocationMap(&(ProfileMappings->second)); | 
|  | } | 
|  |  | 
|  | for (auto &Callees : | 
|  | const_cast<CallsiteSampleMap &>(FS.getCallsiteSamples())) { | 
|  | for (auto &FS : Callees.second) { | 
|  | distributeIRToProfileLocationMap(FS.second); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | // Use a central place to distribute the matching results. Outlined and inlined | 
|  | // profile with the function name will be set to the same pointer. | 
|  | void SampleProfileMatcher::distributeIRToProfileLocationMap() { | 
|  | for (auto &I : Reader.getProfiles()) { | 
|  | distributeIRToProfileLocationMap(I.second); | 
|  | } | 
|  | } |