[scudo][standalone] Skip irrelevant regions during release
With the 'new' way of releasing on 32-bit, we iterate through all the
regions in between `First` and `Last`, which covers regions that do not
belong to the class size we are working with. This is effectively wasted
cycles.
With this change, we add a `SkipRegion` lambda to `releaseFreeMemoryToOS`
that will allow the release function to know when to skip a region.
For the 64-bit primary, since we are only working with 1 region, we never
skip.
Reviewed By: hctim
Differential Revision: https://reviews.llvm.org/D86399
GitOrigin-RevId: bd5ca4f0ed4adfa29150c18a621acb3e71d41450
diff --git a/primary32.h b/primary32.h
index e41b949..0a985fb 100644
--- a/primary32.h
+++ b/primary32.h
@@ -483,12 +483,15 @@
}
}
uptr TotalReleasedBytes = 0;
+ auto SkipRegion = [this, First, ClassId](uptr RegionIndex) {
+ return (PossibleRegions[First + RegionIndex] - 1U) != ClassId;
+ };
if (First && Last) {
const uptr Base = First * RegionSize;
const uptr NumberOfRegions = Last - First + 1U;
ReleaseRecorder Recorder(Base);
releaseFreeMemoryToOS(Sci->FreeList, Base, RegionSize, NumberOfRegions,
- BlockSize, &Recorder);
+ BlockSize, &Recorder, SkipRegion);
if (Recorder.getReleasedRangesCount() > 0) {
Sci->ReleaseInfo.PushedBlocksAtLastRelease = Sci->Stats.PushedBlocks;
Sci->ReleaseInfo.RangesReleased += Recorder.getReleasedRangesCount();
diff --git a/primary64.h b/primary64.h
index ad92ae2..933b1ee 100644
--- a/primary64.h
+++ b/primary64.h
@@ -479,9 +479,11 @@
}
}
+ auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; };
ReleaseRecorder Recorder(Region->RegionBeg, &Region->Data);
releaseFreeMemoryToOS(Region->FreeList, Region->RegionBeg,
- Region->AllocatedUser, 1U, BlockSize, &Recorder);
+ Region->AllocatedUser, 1U, BlockSize, &Recorder,
+ SkipRegion);
if (Recorder.getReleasedRangesCount() > 0) {
Region->ReleaseInfo.PushedBlocksAtLastRelease =
diff --git a/release.h b/release.h
index 748e1c0..cd9e66d 100644
--- a/release.h
+++ b/release.h
@@ -156,6 +156,11 @@
CurrentPage++;
}
+ void skipPages(uptr N) {
+ closeOpenedRange();
+ CurrentPage += N;
+ }
+
void finish() { closeOpenedRange(); }
private:
@@ -174,11 +179,11 @@
uptr CurrentRangeStatePage = 0;
};
-template <class TransferBatchT, class ReleaseRecorderT>
+template <class TransferBatchT, class ReleaseRecorderT, typename SkipRegionT>
NOINLINE void
releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, uptr Base,
uptr RegionSize, uptr NumberOfRegions, uptr BlockSize,
- ReleaseRecorderT *Recorder) {
+ ReleaseRecorderT *Recorder, SkipRegionT SkipRegion) {
const uptr PageSize = getPageSizeCached();
// Figure out the number of chunks per page and whether we can take a fast
@@ -283,10 +288,15 @@
FreePagesRangeTracker<ReleaseRecorderT> RangeTracker(Recorder);
if (SameBlockCountPerPage) {
// Fast path, every page has the same number of chunks affecting it.
- for (uptr I = 0; I < NumberOfRegions; I++)
+ for (uptr I = 0; I < NumberOfRegions; I++) {
+ if (SkipRegion(I)) {
+ RangeTracker.skipPages(PagesCount);
+ continue;
+ }
for (uptr J = 0; J < PagesCount; J++)
RangeTracker.processNextPage(Counters.get(I, J) ==
FullPagesBlockCountMax);
+ }
} else {
// Slow path, go through the pages keeping count how many chunks affect
// each page.
@@ -298,6 +308,10 @@
// up the number of chunks on the current page and checking on every step
// whether the page boundary was crossed.
for (uptr I = 0; I < NumberOfRegions; I++) {
+ if (SkipRegion(I)) {
+ RangeTracker.skipPages(PagesCount);
+ continue;
+ }
uptr PrevPageBoundary = 0;
uptr CurrentBoundary = 0;
for (uptr J = 0; J < PagesCount; J++) {
diff --git a/tests/release_test.cpp b/tests/release_test.cpp
index a693b97..9e991a7 100644
--- a/tests/release_test.cpp
+++ b/tests/release_test.cpp
@@ -190,9 +190,10 @@
}
// Release the memory.
+ auto SkipRegion = [](UNUSED scudo::uptr RegionIndex) { return false; };
ReleasedPagesRecorder Recorder;
releaseFreeMemoryToOS(FreeList, 0, MaxBlocks * BlockSize, 1U, BlockSize,
- &Recorder);
+ &Recorder, SkipRegion);
// Verify that there are no released pages touched by used chunks and all
// ranges of free chunks big enough to contain the entire memory pages had