//===- SampleProfileMatcher.cpp - Sampling-based Stale Profile Matcher ----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the SampleProfileMatcher used for stale
// profile matching.
//
//===----------------------------------------------------------------------===//

#include "llvm/Transforms/IPO/SampleProfileMatcher.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/Support/CommandLine.h"

using namespace llvm;
using namespace sampleprof;

#define DEBUG_TYPE "sample-profile-matcher"

static cl::opt<unsigned> FuncProfileSimilarityThreshold(
    "func-profile-similarity-threshold", cl::Hidden, cl::init(80),
    cl::desc("Consider a profile matches a function if the similarity of their "
             "callee sequences is above the specified percentile."));

static cl::opt<unsigned> MinFuncCountForCGMatching(
    "min-func-count-for-cg-matching", cl::Hidden, cl::init(5),
    cl::desc("The minimum number of basic blocks required for a function to "
             "run stale profile call graph matching."));

static cl::opt<unsigned> MinCallCountForCGMatching(
    "min-call-count-for-cg-matching", cl::Hidden, cl::init(3),
    cl::desc("The minimum number of call anchors required for a function to "
             "run stale profile call graph matching."));

static cl::opt<bool> LoadFuncProfileforCGMatching(
    "load-func-profile-for-cg-matching", cl::Hidden, cl::init(false),
    cl::desc(
        "Load top-level profiles that the sample reader initially skipped for "
        "the call-graph matching (only meaningful for extended binary "
        "format)"));

extern cl::opt<bool> SalvageStaleProfile;
extern cl::opt<bool> SalvageUnusedProfile;
extern cl::opt<bool> PersistProfileStaleness;
extern cl::opt<bool> ReportProfileStaleness;

static cl::opt<unsigned> SalvageStaleProfileMaxCallsites(
    "salvage-stale-profile-max-callsites", cl::Hidden, cl::init(UINT_MAX),
    cl::desc("The maximum number of callsites in a function, above which stale "
             "profile matching will be skipped."));

void SampleProfileMatcher::findIRAnchors(const Function &F,
                                         AnchorMap &IRAnchors) const {
  // For inlined code, recover the original callsite and callee by finding the
  // top-level inline frame. e.g. For frame stack "main:1 @ foo:2 @ bar:3", the
  // top-level frame is "main:1", the callsite is "1" and the callee is "foo".
  auto FindTopLevelInlinedCallsite = [](const DILocation *DIL) {
    assert((DIL && DIL->getInlinedAt()) && "No inlined callsite");
    const DILocation *PrevDIL = nullptr;
    do {
      PrevDIL = DIL;
      DIL = DIL->getInlinedAt();
    } while (DIL->getInlinedAt());

    LineLocation Callsite = FunctionSamples::getCallSiteIdentifier(
        DIL, FunctionSamples::ProfileIsFS);
    StringRef CalleeName = PrevDIL->getSubprogramLinkageName();
    return std::make_pair(Callsite, FunctionId(CalleeName));
  };

  auto GetCanonicalCalleeName = [](const CallBase *CB) {
    StringRef CalleeName = UnknownIndirectCallee;
    if (Function *Callee = CB->getCalledFunction())
      CalleeName = FunctionSamples::getCanonicalFnName(Callee->getName());
    return CalleeName;
  };

  // Extract profile matching anchors in the IR.
  for (auto &BB : F) {
    for (auto &I : BB) {
      DILocation *DIL = I.getDebugLoc();
      if (!DIL)
        continue;

      if (FunctionSamples::ProfileIsProbeBased) {
        if (auto Probe = extractProbe(I)) {
          // Flatten inlined IR for the matching.
          if (DIL->getInlinedAt()) {
            IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
          } else {
            // Use empty StringRef for basic block probe.
            StringRef CalleeName;
            if (const auto *CB = dyn_cast<CallBase>(&I)) {
              // Skip the probe inst whose callee name is "llvm.pseudoprobe".
              if (!isa<IntrinsicInst>(&I))
                CalleeName = GetCanonicalCalleeName(CB);
            }
            LineLocation Loc = LineLocation(Probe->Id, 0);
            IRAnchors.emplace(Loc, FunctionId(CalleeName));
          }
        }
      } else {
        // TODO: For line-number based profile(AutoFDO), currently only support
        // find callsite anchors. In future, we need to parse all the non-call
        // instructions to extract the line locations for profile matching.
        if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I))
          continue;

        if (DIL->getInlinedAt()) {
          IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
        } else {
          LineLocation Callsite = FunctionSamples::getCallSiteIdentifier(
              DIL, FunctionSamples::ProfileIsFS);
          StringRef CalleeName = GetCanonicalCalleeName(dyn_cast<CallBase>(&I));
          IRAnchors.emplace(Callsite, FunctionId(CalleeName));
        }
      }
    }
  }
}

void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS,
                                              AnchorMap &ProfileAnchors) const {
  auto isInvalidLineOffset = [](uint32_t LineOffset) {
    return LineOffset & 0x8000;
  };

  auto InsertAnchor = [](const LineLocation &Loc, const FunctionId &CalleeName,
                         AnchorMap &ProfileAnchors) {
    auto Ret = ProfileAnchors.try_emplace(Loc, CalleeName);
    if (!Ret.second) {
      // For multiple callees, which indicates it's an indirect call, we use a
      // dummy name(UnknownIndirectCallee) as the indrect callee name.
      Ret.first->second = FunctionId(UnknownIndirectCallee);
    }
  };

  for (const auto &I : FS.getBodySamples()) {
    const LineLocation &Loc = I.first;
    if (isInvalidLineOffset(Loc.LineOffset))
      continue;
    for (const auto &C : I.second.getCallTargets())
      InsertAnchor(Loc, C.first, ProfileAnchors);
  }

  for (const auto &I : FS.getCallsiteSamples()) {
    const LineLocation &Loc = I.first;
    if (isInvalidLineOffset(Loc.LineOffset))
      continue;
    for (const auto &C : I.second)
      InsertAnchor(Loc, C.first, ProfileAnchors);
  }
}

bool SampleProfileMatcher::functionHasProfile(const FunctionId &IRFuncName,
                                              Function *&FuncWithoutProfile) {
  FuncWithoutProfile = nullptr;
  auto R = FunctionsWithoutProfile.find(IRFuncName);
  if (R != FunctionsWithoutProfile.end())
    FuncWithoutProfile = R->second;
  return !FuncWithoutProfile;
}

bool SampleProfileMatcher::isProfileUnused(const FunctionId &ProfileFuncName) {
  return SymbolMap->find(ProfileFuncName) == SymbolMap->end();
}

bool SampleProfileMatcher::functionMatchesProfile(
    const FunctionId &IRFuncName, const FunctionId &ProfileFuncName,
    bool FindMatchedProfileOnly) {
  if (IRFuncName == ProfileFuncName)
    return true;
  if (!SalvageUnusedProfile)
    return false;

  // If IR function doesn't have profile and the profile is unused, try
  // matching them.
  Function *IRFunc = nullptr;
  if (functionHasProfile(IRFuncName, IRFunc) ||
      !isProfileUnused(ProfileFuncName))
    return false;

  assert(FunctionId(IRFunc->getName()) != ProfileFuncName &&
         "IR function should be different from profile function to match");
  return functionMatchesProfile(*IRFunc, ProfileFuncName,
                                FindMatchedProfileOnly);
}

LocToLocMap
SampleProfileMatcher::longestCommonSequence(const AnchorList &AnchorList1,
                                            const AnchorList &AnchorList2,
                                            bool MatchUnusedFunction) {
  int32_t Size1 = AnchorList1.size(), Size2 = AnchorList2.size(),
          MaxDepth = Size1 + Size2;
  auto Index = [&](int32_t I) { return I + MaxDepth; };

  LocToLocMap EqualLocations;
  if (MaxDepth == 0)
    return EqualLocations;

  // Backtrack the SES result.
  auto Backtrack = [&](const std::vector<std::vector<int32_t>> &Trace,
                       const AnchorList &AnchorList1,
                       const AnchorList &AnchorList2,
                       LocToLocMap &EqualLocations) {
    int32_t X = Size1, Y = Size2;
    for (int32_t Depth = Trace.size() - 1; X > 0 || Y > 0; Depth--) {
      const auto &P = Trace[Depth];
      int32_t K = X - Y;
      int32_t PrevK = K;
      if (K == -Depth || (K != Depth && P[Index(K - 1)] < P[Index(K + 1)]))
        PrevK = K + 1;
      else
        PrevK = K - 1;

      int32_t PrevX = P[Index(PrevK)];
      int32_t PrevY = PrevX - PrevK;
      while (X > PrevX && Y > PrevY) {
        X--;
        Y--;
        EqualLocations.insert({AnchorList1[X].first, AnchorList2[Y].first});
      }

      if (Depth == 0)
        break;

      if (Y == PrevY)
        X--;
      else if (X == PrevX)
        Y--;
      X = PrevX;
      Y = PrevY;
    }
  };

  // The greedy LCS/SES algorithm.

  // An array contains the endpoints of the furthest reaching D-paths.
  std::vector<int32_t> V(2 * MaxDepth + 1, -1);
  V[Index(1)] = 0;
  // Trace is used to backtrack the SES result.
  std::vector<std::vector<int32_t>> Trace;
  for (int32_t Depth = 0; Depth <= MaxDepth; Depth++) {
    Trace.push_back(V);
    for (int32_t K = -Depth; K <= Depth; K += 2) {
      int32_t X = 0, Y = 0;
      if (K == -Depth || (K != Depth && V[Index(K - 1)] < V[Index(K + 1)]))
        X = V[Index(K + 1)];
      else
        X = V[Index(K - 1)] + 1;
      Y = X - K;
      while (X < Size1 && Y < Size2 &&
             functionMatchesProfile(
                 AnchorList1[X].second, AnchorList2[Y].second,
                 !MatchUnusedFunction /* Find matched function only */))
        X++, Y++;

      V[Index(K)] = X;

      if (X >= Size1 && Y >= Size2) {
        // Length of an SES is D.
        Backtrack(Trace, AnchorList1, AnchorList2, EqualLocations);
        return EqualLocations;
      }
    }
  }
  // Length of an SES is greater than MaxDepth.
  return EqualLocations;
}

void SampleProfileMatcher::matchNonCallsiteLocs(
    const LocToLocMap &MatchedAnchors, const AnchorMap &IRAnchors,
    LocToLocMap &IRToProfileLocationMap) {
  auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) {
    // Skip the unchanged location mapping to save memory.
    if (From != To)
      IRToProfileLocationMap.insert({From, To});
  };

  // Use function's beginning location as the initial anchor.
  int32_t LocationDelta = 0;
  SmallVector<LineLocation> LastMatchedNonAnchors;
  for (const auto &IR : IRAnchors) {
    const auto &Loc = IR.first;
    bool IsMatchedAnchor = false;
    // Match the anchor location in lexical order.
    auto R = MatchedAnchors.find(Loc);
    if (R != MatchedAnchors.end()) {
      const auto &Candidate = R->second;
      InsertMatching(Loc, Candidate);
      LLVM_DEBUG(dbgs() << "Callsite with callee:" << IR.second.stringRef()
                        << " is matched from " << Loc << " to " << Candidate
                        << "\n");
      LocationDelta = Candidate.LineOffset - Loc.LineOffset;

      // Match backwards for non-anchor locations.
      // The locations in LastMatchedNonAnchors have been matched forwards
      // based on the previous anchor, spilt it evenly and overwrite the
      // second half based on the current anchor.
      for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2;
           I < LastMatchedNonAnchors.size(); I++) {
        const auto &L = LastMatchedNonAnchors[I];
        uint32_t CandidateLineOffset = L.LineOffset + LocationDelta;
        LineLocation Candidate(CandidateLineOffset, L.Discriminator);
        InsertMatching(L, Candidate);
        LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L
                          << " to " << Candidate << "\n");
      }

      IsMatchedAnchor = true;
      LastMatchedNonAnchors.clear();
    }

    // Match forwards for non-anchor locations.
    if (!IsMatchedAnchor) {
      uint32_t CandidateLineOffset = Loc.LineOffset + LocationDelta;
      LineLocation Candidate(CandidateLineOffset, Loc.Discriminator);
      InsertMatching(Loc, Candidate);
      LLVM_DEBUG(dbgs() << "Location is matched from " << Loc << " to "
                        << Candidate << "\n");
      LastMatchedNonAnchors.emplace_back(Loc);
    }
  }
}

// Filter the non-call locations from IRAnchors and ProfileAnchors and write
// them into a list for random access later.
void SampleProfileMatcher::getFilteredAnchorList(
    const AnchorMap &IRAnchors, const AnchorMap &ProfileAnchors,
    AnchorList &FilteredIRAnchorsList, AnchorList &FilteredProfileAnchorList) {
  for (const auto &I : IRAnchors) {
    if (I.second.stringRef().empty())
      continue;
    FilteredIRAnchorsList.emplace_back(I);
  }

  for (const auto &I : ProfileAnchors)
    FilteredProfileAnchorList.emplace_back(I);
}

// Call target name anchor based profile fuzzy matching.
// Input:
// For IR locations, the anchor is the callee name of direct callsite; For
// profile locations, it's the call target name for BodySamples or inlinee's
// profile name for CallsiteSamples.
// Matching heuristic:
// First match all the anchors using the diff algorithm, then split the
// non-anchor locations between the two anchors evenly, first half are matched
// based on the start anchor, second half are matched based on the end anchor.
// For example, given:
// IR locations:      [1, 2(foo), 3, 5, 6(bar), 7]
// Profile locations: [1, 2, 3(foo), 4, 7, 8(bar), 9]
// The matching gives:
//   [1,    2(foo), 3,  5,  6(bar), 7]
//    |     |       |   |     |     |
//   [1, 2, 3(foo), 4,  7,  8(bar), 9]
// The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9].
void SampleProfileMatcher::runStaleProfileMatching(
    const Function &F, const AnchorMap &IRAnchors,
    const AnchorMap &ProfileAnchors, LocToLocMap &IRToProfileLocationMap,
    bool RunCFGMatching, bool RunCGMatching) {
  if (!RunCFGMatching && !RunCGMatching)
    return;
  LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName()
                    << "\n");
  assert(IRToProfileLocationMap.empty() &&
         "Run stale profile matching only once per function");

  AnchorList FilteredProfileAnchorList;
  AnchorList FilteredIRAnchorsList;
  getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
                        FilteredProfileAnchorList);

  if (FilteredIRAnchorsList.empty() || FilteredProfileAnchorList.empty())
    return;

  if (FilteredIRAnchorsList.size() > SalvageStaleProfileMaxCallsites ||
      FilteredProfileAnchorList.size() > SalvageStaleProfileMaxCallsites) {
    LLVM_DEBUG(dbgs() << "Skip stale profile matching for " << F.getName()
                      << " because the number of callsites in the IR is "
                      << FilteredIRAnchorsList.size()
                      << " and in the profile is "
                      << FilteredProfileAnchorList.size() << "\n");
    return;
  }

  // Match the callsite anchors by finding the longest common subsequence
  // between IR and profile.
  // Define a match between two anchors as follows:
  // 1) The function names of anchors are the same.
  // 2) The similarity between the anchor functions is above a threshold if
  // RunCGMatching is set.
  // For 2), we only consider the anchor functions from IR and profile don't
  // appear on either side to reduce the matching scope. Note that we need to
  // use IR anchor as base(A side) to align with the order of
  // IRToProfileLocationMap.
  LocToLocMap MatchedAnchors =
      longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList,
                            RunCGMatching /* Match unused functions */);

  // CFG level matching:
  // Apply the callsite matchings to infer matching for the basic
  // block(non-callsite) locations and write the result to
  // IRToProfileLocationMap.
  if (RunCFGMatching)
    matchNonCallsiteLocs(MatchedAnchors, IRAnchors, IRToProfileLocationMap);
}

void SampleProfileMatcher::runOnFunction(Function &F) {
  // We need to use flattened function samples for matching.
  // Unlike IR, which includes all callsites from the source code, the callsites
  // in profile only show up when they are hit by samples, i,e. the profile
  // callsites in one context may differ from those in another context. To get
  // the maximum number of callsites, we merge the function profiles from all
  // contexts, aka, the flattened profile to find profile anchors.
  const auto *FSForMatching = getFlattenedSamplesFor(F);
  if (SalvageUnusedProfile && !FSForMatching) {
    // Apply the matching in place to find the new function's matched profile.
    auto R = FuncToProfileNameMap.find(&F);
    if (R != FuncToProfileNameMap.end()) {
      FSForMatching = getFlattenedSamplesFor(R->second);
      // Try to find the salvaged top-level profiles that are explicitly loaded
      // for the matching, see "functionMatchesProfileHelper" for the details.
      if (!FSForMatching && LoadFuncProfileforCGMatching)
        FSForMatching = Reader.getSamplesFor(R->second.stringRef());
    }
  }
  if (!FSForMatching)
    return;

  // Anchors for IR. It's a map from IR location to callee name, callee name is
  // empty for non-call instruction and use a dummy name(UnknownIndirectCallee)
  // for unknown indrect callee name.
  AnchorMap IRAnchors;
  findIRAnchors(F, IRAnchors);
  // Anchors for profile. It's a map from callsite location to a set of callee
  // name.
  AnchorMap ProfileAnchors;
  findProfileAnchors(*FSForMatching, ProfileAnchors);

  // Compute the callsite match states for profile staleness report.
  if (ReportProfileStaleness || PersistProfileStaleness)
    recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, nullptr);

  if (!SalvageStaleProfile)
    return;
  // For probe-based profiles, run matching only when profile checksum is
  // mismatched.
  bool ChecksumMismatch = FunctionSamples::ProfileIsProbeBased &&
                          !ProbeManager->profileIsValid(F, *FSForMatching);
  bool RunCFGMatching =
      !FunctionSamples::ProfileIsProbeBased || ChecksumMismatch;
  bool RunCGMatching = SalvageUnusedProfile;
  // For imported functions, the checksum metadata(pseudo_probe_desc) are
  // dropped, so we leverage function attribute(profile-checksum-mismatch) to
  // transfer the info: add the attribute during pre-link phase and check it
  // during post-link phase(see "profileIsValid").
  if (ChecksumMismatch && LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink)
    F.addFnAttr("profile-checksum-mismatch");

  // The matching result will be saved to IRToProfileLocationMap, create a
  // new map for each function.
  auto &IRToProfileLocationMap = getIRToProfileLocationMap(F);
  runStaleProfileMatching(F, IRAnchors, ProfileAnchors, IRToProfileLocationMap,
                          RunCFGMatching, RunCGMatching);
  // Find and update callsite match states after matching.
  if (RunCFGMatching && (ReportProfileStaleness || PersistProfileStaleness))
    recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors,
                              &IRToProfileLocationMap);
}

void SampleProfileMatcher::recordCallsiteMatchStates(
    const Function &F, const AnchorMap &IRAnchors,
    const AnchorMap &ProfileAnchors,
    const LocToLocMap *IRToProfileLocationMap) {
  bool IsPostMatch = IRToProfileLocationMap != nullptr;
  auto &CallsiteMatchStates =
      FuncCallsiteMatchStates[FunctionSamples::getCanonicalFnName(F.getName())];

  auto MapIRLocToProfileLoc = [&](const LineLocation &IRLoc) {
    // IRToProfileLocationMap is null in pre-match phrase.
    if (!IRToProfileLocationMap)
      return IRLoc;
    const auto &ProfileLoc = IRToProfileLocationMap->find(IRLoc);
    if (ProfileLoc != IRToProfileLocationMap->end())
      return ProfileLoc->second;
    else
      return IRLoc;
  };

  for (const auto &I : IRAnchors) {
    // After fuzzy profile matching, use the matching result to remap the
    // current IR callsite.
    const auto &ProfileLoc = MapIRLocToProfileLoc(I.first);
    const auto &IRCalleeId = I.second;
    const auto &It = ProfileAnchors.find(ProfileLoc);
    if (It == ProfileAnchors.end())
      continue;
    const auto &ProfCalleeId = It->second;
    if (IRCalleeId == ProfCalleeId) {
      auto It = CallsiteMatchStates.find(ProfileLoc);
      if (It == CallsiteMatchStates.end())
        CallsiteMatchStates.emplace(ProfileLoc, MatchState::InitialMatch);
      else if (IsPostMatch) {
        if (It->second == MatchState::InitialMatch)
          It->second = MatchState::UnchangedMatch;
        else if (It->second == MatchState::InitialMismatch)
          It->second = MatchState::RecoveredMismatch;
      }
    }
  }

  // Check if there are any callsites in the profile that does not match to any
  // IR callsites.
  for (const auto &I : ProfileAnchors) {
    const auto &Loc = I.first;
    assert(!I.second.stringRef().empty() && "Callees should not be empty");
    auto It = CallsiteMatchStates.find(Loc);
    if (It == CallsiteMatchStates.end())
      CallsiteMatchStates.emplace(Loc, MatchState::InitialMismatch);
    else if (IsPostMatch) {
      // Update the state if it's not matched(UnchangedMatch or
      // RecoveredMismatch).
      if (It->second == MatchState::InitialMismatch)
        It->second = MatchState::UnchangedMismatch;
      else if (It->second == MatchState::InitialMatch)
        It->second = MatchState::RemovedMatch;
    }
  }
}

void SampleProfileMatcher::countMismatchedFuncSamples(const FunctionSamples &FS,
                                                      bool IsTopLevel) {
  const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID());
  // Skip the function that is external or renamed.
  if (!FuncDesc)
    return;

  if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) {
    if (IsTopLevel)
      NumStaleProfileFunc++;
    // Given currently all probe ids are after block probe ids, once the
    // checksum is mismatched, it's likely all the callites are mismatched and
    // dropped. We conservatively count all the samples as mismatched and stop
    // counting the inlinees' profiles.
    MismatchedFunctionSamples += FS.getTotalSamples();
    return;
  }

  // Even the current-level function checksum is matched, it's possible that the
  // nested inlinees' checksums are mismatched that affect the inlinee's sample
  // loading, we need to go deeper to check the inlinees' function samples.
  // Similarly, count all the samples as mismatched if the inlinee's checksum is
  // mismatched using this recursive function.
  for (const auto &I : FS.getCallsiteSamples())
    for (const auto &CS : I.second)
      countMismatchedFuncSamples(CS.second, false);
}

void SampleProfileMatcher::countMismatchedCallsiteSamples(
    const FunctionSamples &FS) {
  auto It = FuncCallsiteMatchStates.find(FS.getFuncName());
  // Skip it if no mismatched callsite or this is an external function.
  if (It == FuncCallsiteMatchStates.end() || It->second.empty())
    return;
  const auto &CallsiteMatchStates = It->second;

  auto findMatchState = [&](const LineLocation &Loc) {
    auto It = CallsiteMatchStates.find(Loc);
    if (It == CallsiteMatchStates.end())
      return MatchState::Unknown;
    return It->second;
  };

  auto AttributeMismatchedSamples = [&](const enum MatchState &State,
                                        uint64_t Samples) {
    if (isMismatchState(State))
      MismatchedCallsiteSamples += Samples;
    else if (State == MatchState::RecoveredMismatch)
      RecoveredCallsiteSamples += Samples;
  };

  // The non-inlined callsites are saved in the body samples of function
  // profile, go through it to count the non-inlined callsite samples.
  for (const auto &I : FS.getBodySamples())
    AttributeMismatchedSamples(findMatchState(I.first), I.second.getSamples());

  // Count the inlined callsite samples.
  for (const auto &I : FS.getCallsiteSamples()) {
    auto State = findMatchState(I.first);
    uint64_t CallsiteSamples = 0;
    for (const auto &CS : I.second)
      CallsiteSamples += CS.second.getTotalSamples();
    AttributeMismatchedSamples(State, CallsiteSamples);

    if (isMismatchState(State))
      continue;

    // When the current level of inlined call site matches the profiled call
    // site, we need to go deeper along the inline tree to count mismatches from
    // lower level inlinees.
    for (const auto &CS : I.second)
      countMismatchedCallsiteSamples(CS.second);
  }
}

void SampleProfileMatcher::countMismatchCallsites(const FunctionSamples &FS) {
  auto It = FuncCallsiteMatchStates.find(FS.getFuncName());
  // Skip it if no mismatched callsite or this is an external function.
  if (It == FuncCallsiteMatchStates.end() || It->second.empty())
    return;
  const auto &MatchStates = It->second;
  [[maybe_unused]] bool OnInitialState =
      isInitialState(MatchStates.begin()->second);
  for (const auto &I : MatchStates) {
    TotalProfiledCallsites++;
    assert(
        (OnInitialState ? isInitialState(I.second) : isFinalState(I.second)) &&
        "Profile matching state is inconsistent");

    if (isMismatchState(I.second))
      NumMismatchedCallsites++;
    else if (I.second == MatchState::RecoveredMismatch)
      NumRecoveredCallsites++;
  }
}

void SampleProfileMatcher::countCallGraphRecoveredSamples(
    const FunctionSamples &FS,
    std::unordered_set<FunctionId> &CallGraphRecoveredProfiles) {
  if (CallGraphRecoveredProfiles.count(FS.getFunction())) {
    NumCallGraphRecoveredFuncSamples += FS.getTotalSamples();
    return;
  }

  for (const auto &CM : FS.getCallsiteSamples()) {
    for (const auto &CS : CM.second) {
      countCallGraphRecoveredSamples(CS.second, CallGraphRecoveredProfiles);
    }
  }
}

void SampleProfileMatcher::computeAndReportProfileStaleness() {
  if (!ReportProfileStaleness && !PersistProfileStaleness)
    return;

  std::unordered_set<FunctionId> CallGraphRecoveredProfiles;
  if (SalvageUnusedProfile) {
    for (const auto &I : FuncToProfileNameMap) {
      CallGraphRecoveredProfiles.insert(I.second);
      if (GlobalValue::isAvailableExternallyLinkage(I.first->getLinkage()))
        continue;
      NumCallGraphRecoveredProfiledFunc++;
    }
  }

  // Count profile mismatches for profile staleness report.
  for (const auto &F : M) {
    if (skipProfileForFunction(F))
      continue;
    // As the stats will be merged by linker, skip reporting the metrics for
    // imported functions to avoid repeated counting.
    if (GlobalValue::isAvailableExternallyLinkage(F.getLinkage()))
      continue;
    const auto *FS = Reader.getSamplesFor(F);
    if (!FS)
      continue;
    TotalProfiledFunc++;
    TotalFunctionSamples += FS->getTotalSamples();

    if (SalvageUnusedProfile && !CallGraphRecoveredProfiles.empty())
      countCallGraphRecoveredSamples(*FS, CallGraphRecoveredProfiles);

    // Checksum mismatch is only used in pseudo-probe mode.
    if (FunctionSamples::ProfileIsProbeBased)
      countMismatchedFuncSamples(*FS, true);

    // Count mismatches and samples for calliste.
    countMismatchCallsites(*FS);
    countMismatchedCallsiteSamples(*FS);
  }

  if (ReportProfileStaleness) {
    if (FunctionSamples::ProfileIsProbeBased) {
      errs() << "(" << NumStaleProfileFunc << "/" << TotalProfiledFunc
             << ") of functions' profile are invalid and ("
             << MismatchedFunctionSamples << "/" << TotalFunctionSamples
             << ") of samples are discarded due to function hash mismatch.\n";
    }
    if (SalvageUnusedProfile) {
      errs() << "(" << NumCallGraphRecoveredProfiledFunc << "/"
             << TotalProfiledFunc << ") of functions' profile are matched and ("
             << NumCallGraphRecoveredFuncSamples << "/" << TotalFunctionSamples
             << ") of samples are reused by call graph matching.\n";
    }

    errs() << "(" << (NumMismatchedCallsites + NumRecoveredCallsites) << "/"
           << TotalProfiledCallsites
           << ") of callsites' profile are invalid and ("
           << (MismatchedCallsiteSamples + RecoveredCallsiteSamples) << "/"
           << TotalFunctionSamples
           << ") of samples are discarded due to callsite location mismatch.\n";
    errs() << "(" << NumRecoveredCallsites << "/"
           << (NumRecoveredCallsites + NumMismatchedCallsites)
           << ") of callsites and (" << RecoveredCallsiteSamples << "/"
           << (RecoveredCallsiteSamples + MismatchedCallsiteSamples)
           << ") of samples are recovered by stale profile matching.\n";
  }

  if (PersistProfileStaleness) {
    LLVMContext &Ctx = M.getContext();
    MDBuilder MDB(Ctx);

    SmallVector<std::pair<StringRef, uint64_t>> ProfStatsVec;
    if (FunctionSamples::ProfileIsProbeBased) {
      ProfStatsVec.emplace_back("NumStaleProfileFunc", NumStaleProfileFunc);
      ProfStatsVec.emplace_back("TotalProfiledFunc", TotalProfiledFunc);
      ProfStatsVec.emplace_back("MismatchedFunctionSamples",
                                MismatchedFunctionSamples);
      ProfStatsVec.emplace_back("TotalFunctionSamples", TotalFunctionSamples);
    }

    if (SalvageUnusedProfile) {
      ProfStatsVec.emplace_back("NumCallGraphRecoveredProfiledFunc",
                                NumCallGraphRecoveredProfiledFunc);
      ProfStatsVec.emplace_back("NumCallGraphRecoveredFuncSamples",
                                NumCallGraphRecoveredFuncSamples);
    }

    ProfStatsVec.emplace_back("NumMismatchedCallsites", NumMismatchedCallsites);
    ProfStatsVec.emplace_back("NumRecoveredCallsites", NumRecoveredCallsites);
    ProfStatsVec.emplace_back("TotalProfiledCallsites", TotalProfiledCallsites);
    ProfStatsVec.emplace_back("MismatchedCallsiteSamples",
                              MismatchedCallsiteSamples);
    ProfStatsVec.emplace_back("RecoveredCallsiteSamples",
                              RecoveredCallsiteSamples);

    auto *MD = MDB.createLLVMStats(ProfStatsVec);
    auto *NMD = M.getOrInsertNamedMetadata("llvm.stats");
    NMD->addOperand(MD);
  }
}

void SampleProfileMatcher::findFunctionsWithoutProfile() {
  // TODO: Support MD5 profile.
  if (FunctionSamples::UseMD5)
    return;
  StringSet<> NamesInProfile;
  if (auto NameTable = Reader.getNameTable()) {
    for (auto Name : *NameTable)
      NamesInProfile.insert(Name.stringRef());
  }

  for (auto &F : M) {
    // Skip declarations, as even if the function can be matched, we have
    // nothing to do with it.
    if (F.isDeclaration())
      continue;

    StringRef CanonFName = FunctionSamples::getCanonicalFnName(F.getName());
    const auto *FS = getFlattenedSamplesFor(F);
    if (FS)
      continue;

    // For extended binary, functions fully inlined may not be loaded in the
    // top-level profile, so check the NameTable which has the all symbol names
    // in profile.
    if (NamesInProfile.count(CanonFName))
      continue;

    // For extended binary, non-profiled function symbols are in the profile
    // symbol list table.
    if (PSL && PSL->contains(CanonFName))
      continue;

    LLVM_DEBUG(dbgs() << "Function " << CanonFName
                      << " is not in profile or profile symbol list.\n");
    FunctionsWithoutProfile[FunctionId(CanonFName)] = &F;
  }
}

bool SampleProfileMatcher::functionMatchesProfileHelper(
    const Function &IRFunc, const FunctionId &ProfFunc) {
  // The value is in the range [0, 1]. The bigger the value is, the more similar
  // two sequences are.
  float Similarity = 0.0;

  const auto *FSForMatching = getFlattenedSamplesFor(ProfFunc);
  // With extbinary profile format, initial profile loading only reads profile
  // based on current function names in the module.
  // However, if a function is renamed, sample loader skips to load its original
  // profile(which has a different name), we will miss this case. To address
  // this, we load the top-level profile candidate explicitly for the matching.
  if (!FSForMatching && LoadFuncProfileforCGMatching) {
    DenseSet<StringRef> TopLevelFunc({ProfFunc.stringRef()});
    if (std::error_code EC = Reader.read(TopLevelFunc))
      return false;
    FSForMatching = Reader.getSamplesFor(ProfFunc.stringRef());
    LLVM_DEBUG({
      if (FSForMatching)
        dbgs() << "Read top-level function " << ProfFunc
               << " for call-graph matching\n";
    });
  }
  if (!FSForMatching)
    return false;
  // The check for similarity or checksum may not be reliable if the function is
  // tiny, we use the number of basic block as a proxy for the function
  // complexity and skip the matching if it's too small.
  if (IRFunc.size() < MinFuncCountForCGMatching ||
      FSForMatching->getBodySamples().size() < MinFuncCountForCGMatching)
    return false;

  // For probe-based function, we first trust the checksum info. If the checksum
  // doesn't match, we continue checking for similarity.
  if (FunctionSamples::ProfileIsProbeBased) {
    const auto *FuncDesc = ProbeManager->getDesc(IRFunc);
    if (FuncDesc &&
        !ProbeManager->profileIsHashMismatched(*FuncDesc, *FSForMatching)) {
      LLVM_DEBUG(dbgs() << "The checksums for " << IRFunc.getName()
                        << "(IR) and " << ProfFunc << "(Profile) match.\n");

      return true;
    }
  }

  AnchorMap IRAnchors;
  findIRAnchors(IRFunc, IRAnchors);
  AnchorMap ProfileAnchors;
  findProfileAnchors(*FSForMatching, ProfileAnchors);

  AnchorList FilteredIRAnchorsList;
  AnchorList FilteredProfileAnchorList;
  getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
                        FilteredProfileAnchorList);

  // Similarly skip the matching if the num of anchors is not enough.
  if (FilteredIRAnchorsList.size() < MinCallCountForCGMatching ||
      FilteredProfileAnchorList.size() < MinCallCountForCGMatching)
    return false;

  // Use the diff algorithm to find the LCS between IR and profile.

  // Don't recursively match the callee function to avoid infinite matching,
  // callee functions will be handled later since it's processed in top-down
  // order .
  LocToLocMap MatchedAnchors =
      longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList,
                            false /* Match unused functions */);

  Similarity =
      static_cast<float>(MatchedAnchors.size()) * 2 /
      (FilteredIRAnchorsList.size() + FilteredProfileAnchorList.size());

  LLVM_DEBUG(dbgs() << "The similarity between " << IRFunc.getName()
                    << "(IR) and " << ProfFunc << "(profile) is "
                    << format("%.2f", Similarity) << "\n");
  assert((Similarity >= 0 && Similarity <= 1.0) &&
         "Similarity value should be in [0, 1]");
  return Similarity * 100 > FuncProfileSimilarityThreshold;
}

// If FindMatchedProfileOnly is set to true, only use the processed function
// results. This is used for skipping the repeated recursive matching.
bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc,
                                                  const FunctionId &ProfFunc,
                                                  bool FindMatchedProfileOnly) {
  auto R = FuncProfileMatchCache.find({&IRFunc, ProfFunc});
  if (R != FuncProfileMatchCache.end())
    return R->second;

  if (FindMatchedProfileOnly)
    return false;

  bool Matched = functionMatchesProfileHelper(IRFunc, ProfFunc);
  FuncProfileMatchCache[{&IRFunc, ProfFunc}] = Matched;
  if (Matched) {
    FuncToProfileNameMap[&IRFunc] = ProfFunc;
    LLVM_DEBUG(dbgs() << "Function:" << IRFunc.getName()
                      << " matches profile:" << ProfFunc << "\n");
  }

  return Matched;
}

void SampleProfileMatcher::UpdateWithSalvagedProfiles() {
  DenseSet<StringRef> ProfileSalvagedFuncs;
  // Update FuncNameToProfNameMap and SymbolMap.
  for (auto &I : FuncToProfileNameMap) {
    assert(I.first && "New function is null");
    FunctionId FuncName(I.first->getName());
    ProfileSalvagedFuncs.insert(I.second.stringRef());
    FuncNameToProfNameMap->emplace(FuncName, I.second);

    // We need to remove the old entry to avoid duplicating the function
    // processing.
    SymbolMap->erase(FuncName);
    SymbolMap->emplace(I.second, I.first);
  }

  // With extbinary profile format, initial profile loading only reads profile
  // based on current function names in the module, so we need to load top-level
  // profiles for functions with different profile name explicitly after
  // function-profile name map is established with stale profile matching.
  Reader.read(ProfileSalvagedFuncs);
  Reader.setFuncNameToProfNameMap(*FuncNameToProfNameMap);
}

void SampleProfileMatcher::runOnModule() {
  ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
                                   FunctionSamples::ProfileIsCS);
  if (SalvageUnusedProfile)
    findFunctionsWithoutProfile();

  // Process the matching in top-down order so that the caller matching result
  // can be used to the callee matching.
  std::vector<Function *> TopDownFunctionList;
  TopDownFunctionList.reserve(M.size());
  buildTopDownFuncOrder(CG, TopDownFunctionList);
  for (auto *F : TopDownFunctionList) {
    if (skipProfileForFunction(*F))
      continue;
    runOnFunction(*F);
  }

  if (SalvageUnusedProfile)
    UpdateWithSalvagedProfiles();

  if (SalvageStaleProfile)
    distributeIRToProfileLocationMap();

  computeAndReportProfileStaleness();
}

void SampleProfileMatcher::distributeIRToProfileLocationMap(
    FunctionSamples &FS) {
  const auto ProfileMappings = FuncMappings.find(FS.getFuncName());
  if (ProfileMappings != FuncMappings.end()) {
    FS.setIRToProfileLocationMap(&(ProfileMappings->second));
  }

  for (auto &Callees :
       const_cast<CallsiteSampleMap &>(FS.getCallsiteSamples())) {
    for (auto &FS : Callees.second) {
      distributeIRToProfileLocationMap(FS.second);
    }
  }
}

// Use a central place to distribute the matching results. Outlined and inlined
// profile with the function name will be set to the same pointer.
void SampleProfileMatcher::distributeIRToProfileLocationMap() {
  for (auto &I : Reader.getProfiles()) {
    distributeIRToProfileLocationMap(I.second);
  }
}
