[BOLT][heatmap] Compute section utilization and partition score (#139193)
Heatmap groups samples into buckets of configurable size (`--block-size`
flag with 64 bytes as the default =X86 cache line size). Buckets are
mapped to containing sections; for buckets that cover multiple sections,
they are attributed to the first overlapping section. Buckets not mapped
to a section are reported as unmapped.
Heatmap reports **section hotness** which is a percentage of samples
attributed to the section.
Define **section utilization** as a percentage of buckets with non-zero
samples relative to the total number of section buckets.
Also define section **partition score** as a product of section hotness
(where total excludes unmapped buckets) and mapped utilization, ranging
from 0 to 1 (higher is better).
The intended use of new metrics is with **production profile** collected
from BOLT-optimized binary. In this case the partition score of .text
(hot text if function splitting is enabled) reflects **optimization
profile** representativeness and the quality of hot-cold splitting.
Partition score of 1 means that all samples fall into hot text, and all
buckets (cache lines) in hot text are exercised, equivalent to perfect
hot-cold splitting.
Test Plan: updated heatmap-preagg.test
diff --git a/bolt/lib/Profile/Heatmap.cpp b/bolt/lib/Profile/Heatmap.cpp
index 5fc3e06..c86a4c9 100644
--- a/bolt/lib/Profile/Heatmap.cpp
+++ b/bolt/lib/Profile/Heatmap.cpp
@@ -297,6 +297,7 @@
void Heatmap::printSectionHotness(raw_ostream &OS) const {
uint64_t NumTotalCounts = 0;
StringMap<uint64_t> SectionHotness;
+ StringMap<uint64_t> BucketUtilization;
unsigned TextSectionIndex = 0;
if (TextSections.empty())
@@ -325,23 +326,29 @@
continue;
}
SectionHotness[TextSections[TextSectionIndex].Name] += KV.second;
+ ++BucketUtilization[TextSections[TextSectionIndex].Name];
}
assert(NumTotalCounts > 0 &&
"total number of heatmap buckets should be greater than 0");
- OS << "Section Name, Begin Address, End Address, Percentage Hotness\n";
- for (auto &TextSection : TextSections) {
- OS << TextSection.Name << ", 0x"
- << Twine::utohexstr(TextSection.BeginAddress) << ", 0x"
- << Twine::utohexstr(TextSection.EndAddress) << ", "
- << format("%.4f",
- 100.0 * SectionHotness[TextSection.Name] / NumTotalCounts)
- << "\n";
+ OS << "Section Name, Begin Address, End Address, Percentage Hotness, "
+ << "Utilization Pct, Partition Score\n";
+ const uint64_t MappedCounts = NumTotalCounts - UnmappedHotness;
+ for (const auto [Name, Begin, End] : TextSections) {
+ const float Hotness = 1. * SectionHotness[Name] / NumTotalCounts;
+ const float MappedHotness =
+ MappedCounts ? 1. * SectionHotness[Name] / MappedCounts : 0;
+ const uint64_t NumBuckets =
+ End / BucketSize + !!(End % BucketSize) - Begin / BucketSize;
+ const float Utilization = 1. * BucketUtilization[Name] / NumBuckets;
+ const float PartitionScore = MappedHotness * Utilization;
+ OS << formatv("{0}, {1:x}, {2:x}, {3:f4}, {4:f4}, {5:f4}\n", Name, Begin,
+ End, 100. * Hotness, 100. * Utilization, PartitionScore);
}
if (UnmappedHotness > 0)
- OS << "[unmapped], 0x0, 0x0, "
- << format("%.4f", 100.0 * UnmappedHotness / NumTotalCounts) << "\n";
+ OS << formatv("[unmapped], 0x0, 0x0, {0:f4}, 0, 0\n",
+ 100.0 * UnmappedHotness / NumTotalCounts);
}
} // namespace bolt
} // namespace llvm
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 520bd67..cf6782a 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -1453,7 +1453,7 @@
}
void RewriteInstance::registerFragments() {
- if (!BC->HasSplitFunctions)
+ if (!BC->HasSplitFunctions || opts::HeatmapMode)
return;
// Process fragments with ambiguous parents separately as they are typically a
diff --git a/bolt/test/X86/heatmap-preagg.test b/bolt/test/X86/heatmap-preagg.test
index 00d4d52..702dc80 100644
--- a/bolt/test/X86/heatmap-preagg.test
+++ b/bolt/test/X86/heatmap-preagg.test
@@ -17,17 +17,19 @@
CHECK-HEATMAP: PERF2BOLT: read 81 aggregated LBR entries
CHECK-HEATMAP: HEATMAP: invalid traces: 1
-CHECK-SEC-HOT: .init, 0x401000, 0x40101b, 16.8545
-CHECK-SEC-HOT-NEXT: .plt, 0x401020, 0x4010b0, 4.7583
-CHECK-SEC-HOT-NEXT: .text, 0x4010b0, 0x401c25, 78.3872
-CHECK-SEC-HOT-NEXT: .fini, 0x401c28, 0x401c35, 0.0000
+CHECK-SEC-HOT: Section Name, Begin Address, End Address, Percentage Hotness, Utilization Pct, Partition Score
+CHECK-SEC-HOT-NEXT: .init, 0x401000, 0x40101b, 16.8545, 100.0000, 0.1685
+CHECK-SEC-HOT-NEXT: .plt, 0x401020, 0x4010b0, 4.7583, 66.6667, 0.0317
+CHECK-SEC-HOT-NEXT: .text, 0x4010b0, 0x401c25, 78.3872, 85.1064, 0.6671
+CHECK-SEC-HOT-NEXT: .fini, 0x401c28, 0x401c35, 0.0000, 0.0000, 0.0000
CHECK-HEATMAP-BAT: PERF2BOLT: read 79 aggregated LBR entries
CHECK-HEATMAP-BAT: HEATMAP: invalid traces: 2
-CHECK-SEC-HOT-BAT: .init, 0x401000, 0x40101b, 17.2888
-CHECK-SEC-HOT-BAT-NEXT: .plt, 0x401020, 0x4010b0, 5.6132
-CHECK-SEC-HOT-BAT-NEXT: .bolt.org.text, 0x4010b0, 0x401c25, 38.3385
-CHECK-SEC-HOT-BAT-NEXT: .fini, 0x401c28, 0x401c35, 0.0000
-CHECK-SEC-HOT-BAT-NEXT: .text, 0x800000, 0x8002cc, 38.7595
-CHECK-SEC-HOT-BAT-NEXT: .text.cold, 0x800300, 0x800415, 0.0000
+CHECK-SEC-HOT-BAT: Section Name, Begin Address, End Address, Percentage Hotness, Utilization Pct, Partition Score
+CHECK-SEC-HOT-BAT-NEXT: .init, 0x401000, 0x40101b, 17.2888, 100.0000, 0.1729
+CHECK-SEC-HOT-BAT-NEXT: .plt, 0x401020, 0x4010b0, 5.6132, 66.6667, 0.0374
+CHECK-SEC-HOT-BAT-NEXT: .bolt.org.text, 0x4010b0, 0x401c25, 38.3385, 51.0638, 0.1958
+CHECK-SEC-HOT-BAT-NEXT: .fini, 0x401c28, 0x401c35, 0.0000, 0.0000, 0.0000
+CHECK-SEC-HOT-BAT-NEXT: .text, 0x800000, 0x8002cc, 38.7595, 91.6667, 0.3553
+CHECK-SEC-HOT-BAT-NEXT: .text.cold, 0x800300, 0x800415, 0.0000, 0.0000, 0.0000