| //=-- ProfilesummaryBuilder.cpp - Profile summary computation ---------------=// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file contains support for computing profile summary data. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "llvm/IR/Attributes.h" |
| #include "llvm/IR/Function.h" |
| #include "llvm/IR/Metadata.h" |
| #include "llvm/IR/Type.h" |
| #include "llvm/ProfileData/InstrProf.h" |
| #include "llvm/ProfileData/ProfileCommon.h" |
| #include "llvm/ProfileData/SampleProf.h" |
| #include "llvm/Support/Casting.h" |
| #include "llvm/Support/CommandLine.h" |
| |
| using namespace llvm; |
| |
| cl::opt<bool> UseContextLessSummary( |
| "profile-summary-contextless", cl::Hidden, cl::init(false), cl::ZeroOrMore, |
| cl::desc("Merge context profiles before calculating thresholds.")); |
| |
| // The following two parameters determine the threshold for a count to be |
| // considered hot/cold. These two parameters are percentile values (multiplied |
| // by 10000). If the counts are sorted in descending order, the minimum count to |
| // reach ProfileSummaryCutoffHot gives the threshold to determine a hot count. |
| // Similarly, the minimum count to reach ProfileSummaryCutoffCold gives the |
| // threshold for determining cold count (everything <= this threshold is |
| // considered cold). |
| cl::opt<int> ProfileSummaryCutoffHot( |
| "profile-summary-cutoff-hot", cl::Hidden, cl::init(990000), cl::ZeroOrMore, |
| cl::desc("A count is hot if it exceeds the minimum count to" |
| " reach this percentile of total counts.")); |
| |
| cl::opt<int> ProfileSummaryCutoffCold( |
| "profile-summary-cutoff-cold", cl::Hidden, cl::init(999999), cl::ZeroOrMore, |
| cl::desc("A count is cold if it is below the minimum count" |
| " to reach this percentile of total counts.")); |
| |
| cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold( |
| "profile-summary-huge-working-set-size-threshold", cl::Hidden, |
| cl::init(15000), cl::ZeroOrMore, |
| cl::desc("The code working set size is considered huge if the number of" |
| " blocks required to reach the -profile-summary-cutoff-hot" |
| " percentile exceeds this count.")); |
| |
| cl::opt<unsigned> ProfileSummaryLargeWorkingSetSizeThreshold( |
| "profile-summary-large-working-set-size-threshold", cl::Hidden, |
| cl::init(12500), cl::ZeroOrMore, |
| cl::desc("The code working set size is considered large if the number of" |
| " blocks required to reach the -profile-summary-cutoff-hot" |
| " percentile exceeds this count.")); |
| |
| // The next two options override the counts derived from summary computation and |
| // are useful for debugging purposes. |
| cl::opt<int> ProfileSummaryHotCount( |
| "profile-summary-hot-count", cl::ReallyHidden, cl::ZeroOrMore, |
| cl::desc("A fixed hot count that overrides the count derived from" |
| " profile-summary-cutoff-hot")); |
| |
| cl::opt<int> ProfileSummaryColdCount( |
| "profile-summary-cold-count", cl::ReallyHidden, cl::ZeroOrMore, |
| cl::desc("A fixed cold count that overrides the count derived from" |
| " profile-summary-cutoff-cold")); |
| |
| // A set of cutoff values. Each value, when divided by ProfileSummary::Scale |
| // (which is 1000000) is a desired percentile of total counts. |
| static const uint32_t DefaultCutoffsData[] = { |
| 10000, /* 1% */ |
| 100000, /* 10% */ |
| 200000, 300000, 400000, 500000, 600000, 700000, 800000, |
| 900000, 950000, 990000, 999000, 999900, 999990, 999999}; |
| const ArrayRef<uint32_t> ProfileSummaryBuilder::DefaultCutoffs = |
| DefaultCutoffsData; |
| |
| const ProfileSummaryEntry & |
| ProfileSummaryBuilder::getEntryForPercentile(const SummaryEntryVector &DS, |
| uint64_t Percentile) { |
| auto It = partition_point(DS, [=](const ProfileSummaryEntry &Entry) { |
| return Entry.Cutoff < Percentile; |
| }); |
| // The required percentile has to be <= one of the percentiles in the |
| // detailed summary. |
| if (It == DS.end()) |
| report_fatal_error("Desired percentile exceeds the maximum cutoff"); |
| return *It; |
| } |
| |
| void InstrProfSummaryBuilder::addRecord(const InstrProfRecord &R) { |
| // The first counter is not necessarily an entry count for IR |
| // instrumentation profiles. |
| // Eventually MaxFunctionCount will become obsolete and this can be |
| // removed. |
| addEntryCount(R.Counts[0]); |
| for (size_t I = 1, E = R.Counts.size(); I < E; ++I) |
| addInternalCount(R.Counts[I]); |
| } |
| |
| // To compute the detailed summary, we consider each line containing samples as |
| // equivalent to a block with a count in the instrumented profile. |
| void SampleProfileSummaryBuilder::addRecord( |
| const sampleprof::FunctionSamples &FS, bool isCallsiteSample) { |
| if (!isCallsiteSample) { |
| NumFunctions++; |
| if (FS.getHeadSamples() > MaxFunctionCount) |
| MaxFunctionCount = FS.getHeadSamples(); |
| } |
| for (const auto &I : FS.getBodySamples()) { |
| uint64_t Count = I.second.getSamples(); |
| addCount(Count); |
| } |
| for (const auto &I : FS.getCallsiteSamples()) |
| for (const auto &CS : I.second) |
| addRecord(CS.second, true); |
| } |
| |
| // The argument to this method is a vector of cutoff percentages and the return |
| // value is a vector of (Cutoff, MinCount, NumCounts) triplets. |
| void ProfileSummaryBuilder::computeDetailedSummary() { |
| if (DetailedSummaryCutoffs.empty()) |
| return; |
| llvm::sort(DetailedSummaryCutoffs); |
| auto Iter = CountFrequencies.begin(); |
| const auto End = CountFrequencies.end(); |
| |
| uint32_t CountsSeen = 0; |
| uint64_t CurrSum = 0, Count = 0; |
| |
| for (const uint32_t Cutoff : DetailedSummaryCutoffs) { |
| assert(Cutoff <= 999999); |
| APInt Temp(128, TotalCount); |
| APInt N(128, Cutoff); |
| APInt D(128, ProfileSummary::Scale); |
| Temp *= N; |
| Temp = Temp.sdiv(D); |
| uint64_t DesiredCount = Temp.getZExtValue(); |
| assert(DesiredCount <= TotalCount); |
| while (CurrSum < DesiredCount && Iter != End) { |
| Count = Iter->first; |
| uint32_t Freq = Iter->second; |
| CurrSum += (Count * Freq); |
| CountsSeen += Freq; |
| Iter++; |
| } |
| assert(CurrSum >= DesiredCount); |
| ProfileSummaryEntry PSE = {Cutoff, Count, CountsSeen}; |
| DetailedSummary.push_back(PSE); |
| } |
| } |
| |
| uint64_t |
| ProfileSummaryBuilder::getHotCountThreshold(const SummaryEntryVector &DS) { |
| auto &HotEntry = |
| ProfileSummaryBuilder::getEntryForPercentile(DS, ProfileSummaryCutoffHot); |
| uint64_t HotCountThreshold = HotEntry.MinCount; |
| if (ProfileSummaryHotCount.getNumOccurrences() > 0) |
| HotCountThreshold = ProfileSummaryHotCount; |
| return HotCountThreshold; |
| } |
| |
| uint64_t |
| ProfileSummaryBuilder::getColdCountThreshold(const SummaryEntryVector &DS) { |
| auto &ColdEntry = ProfileSummaryBuilder::getEntryForPercentile( |
| DS, ProfileSummaryCutoffCold); |
| uint64_t ColdCountThreshold = ColdEntry.MinCount; |
| if (ProfileSummaryColdCount.getNumOccurrences() > 0) |
| ColdCountThreshold = ProfileSummaryColdCount; |
| return ColdCountThreshold; |
| } |
| |
| std::unique_ptr<ProfileSummary> SampleProfileSummaryBuilder::getSummary() { |
| computeDetailedSummary(); |
| return std::make_unique<ProfileSummary>( |
| ProfileSummary::PSK_Sample, DetailedSummary, TotalCount, MaxCount, 0, |
| MaxFunctionCount, NumCounts, NumFunctions); |
| } |
| |
| std::unique_ptr<ProfileSummary> |
| SampleProfileSummaryBuilder::computeSummaryForProfiles( |
| const SampleProfileMap &Profiles) { |
| assert(NumFunctions == 0 && |
| "This can only be called on an empty summary builder"); |
| sampleprof::SampleProfileMap ContextLessProfiles; |
| const sampleprof::SampleProfileMap *ProfilesToUse = &Profiles; |
| // For CSSPGO, context-sensitive profile effectively split a function profile |
| // into many copies each representing the CFG profile of a particular calling |
| // context. That makes the count distribution looks more flat as we now have |
| // more function profiles each with lower counts, which in turn leads to lower |
| // hot thresholds. To compensate for that, by default we merge context |
| // profiles before computing profile summary. |
| if (UseContextLessSummary || (sampleprof::FunctionSamples::ProfileIsCS && |
| !UseContextLessSummary.getNumOccurrences())) { |
| for (const auto &I : Profiles) { |
| ContextLessProfiles[I.second.getName()].merge(I.second); |
| } |
| ProfilesToUse = &ContextLessProfiles; |
| } |
| |
| for (const auto &I : *ProfilesToUse) { |
| const sampleprof::FunctionSamples &Profile = I.second; |
| addRecord(Profile); |
| } |
| |
| return getSummary(); |
| } |
| |
| std::unique_ptr<ProfileSummary> InstrProfSummaryBuilder::getSummary() { |
| computeDetailedSummary(); |
| return std::make_unique<ProfileSummary>( |
| ProfileSummary::PSK_Instr, DetailedSummary, TotalCount, MaxCount, |
| MaxInternalBlockCount, MaxFunctionCount, NumCounts, NumFunctions); |
| } |
| |
| void InstrProfSummaryBuilder::addEntryCount(uint64_t Count) { |
| NumFunctions++; |
| |
| // Skip invalid count. |
| if (Count == (uint64_t)-1) |
| return; |
| |
| addCount(Count); |
| if (Count > MaxFunctionCount) |
| MaxFunctionCount = Count; |
| } |
| |
| void InstrProfSummaryBuilder::addInternalCount(uint64_t Count) { |
| // Skip invalid count. |
| if (Count == (uint64_t)-1) |
| return; |
| |
| addCount(Count); |
| if (Count > MaxInternalBlockCount) |
| MaxInternalBlockCount = Count; |
| } |