[scudo] Release pages of larger block more frequently Release pages for large block (size greater than a page) is faster than the small blocks. Besides, larger blocks are supposed not to be used so often like smaller blocks which means we may hold several pages used by large block and rarely get chance to release them if there's no explicit M_PURGE call. Therefore, relax the release-interval condition for large block. This also fixes the assumption that FORCE_ALL should always try page release. Differential Revision: https://reviews.llvm.org/D151290 GitOrigin-RevId: 5172beb4fa204ebbfda831cca60c39b160faf2a6

commit: 7379ba69abe9d22185d92b2feebca1c709c165f4 [log] [tgz]
author: Chia-hung Duan <chiahungduan@google.com> Thu May 25 23:08:38 2023 +0000
committer: Copybara-Service <copybara-worker@google.com> Wed May 31 13:15:47 2023 -0700
tree: 95b6f36919fe4f7d6fef7ef2100a28a3283cb9fd
parent: ef2d5cb53dd7be757b86efb6321a221216172d85 [diff]
diff --git a/primary32.h b/primary32.h
index b3d6e53..6d306ef 100644
--- a/primary32.h
+++ b/primary32.h

@@ -131,6 +131,16 @@
     return CompactPtrGroupBase;
   }
 
+  ALWAYS_INLINE static bool isSmallBlock(uptr BlockSize) {
+    const uptr PageSize = getPageSizeCached();
+    return BlockSize < PageSize / 16U;
+  }
+
+  ALWAYS_INLINE static bool isLargeBlock(uptr BlockSize) {
+    const uptr PageSize = getPageSizeCached();
+    return BlockSize > PageSize;
+  }
+
   TransferBatch *popBatch(CacheT *C, uptr ClassId) {
     DCHECK_LT(ClassId, NumClasses);
     SizeClassInfo *Sci = getSizeClassInfo(ClassId);
@@ -740,12 +750,8 @@
     if (UNLIKELY(BytesInFreeList == 0))
       return 0;
 
-    bool MaySkip = false;
-
-    if (BytesInFreeList <= Sci->ReleaseInfo.BytesInFreeListAtLastCheckpoint) {
+    if (BytesInFreeList <= Sci->ReleaseInfo.BytesInFreeListAtLastCheckpoint)
       Sci->ReleaseInfo.BytesInFreeListAtLastCheckpoint = BytesInFreeList;
-      MaySkip = true;
-    }
 
     // Always update `BytesInFreeListAtLastCheckpoint` with the smallest value
     // so that we won't underestimate the releasable pages. For example, the
@@ -765,34 +771,38 @@
     // (BytesInFreeListAtLastCheckpoint - BytesInFreeList).
     const uptr PushedBytesDelta =
         BytesInFreeList - Sci->ReleaseInfo.BytesInFreeListAtLastCheckpoint;
-    if (PushedBytesDelta < PageSize)
-      MaySkip = true;
+    if (PushedBytesDelta < PageSize && ReleaseType != ReleaseToOS::ForceAll)
+      return 0;
 
     const bool CheckDensity =
-        BlockSize < PageSize / 16U && ReleaseType != ReleaseToOS::ForceAll;
+        isSmallBlock(BlockSize) && ReleaseType != ReleaseToOS::ForceAll;
     // Releasing smaller blocks is expensive, so we want to make sure that a
     // significant amount of bytes are free, and that there has been a good
     // amount of batches pushed to the freelist before attempting to release.
-    if (CheckDensity) {
-      if (ReleaseType == ReleaseToOS::Normal &&
-          PushedBytesDelta < Sci->AllocatedUser / 16U) {
-        MaySkip = true;
-      }
-    }
-
-    if (MaySkip && ReleaseType != ReleaseToOS::ForceAll)
-      return 0;
+    if (CheckDensity && ReleaseType == ReleaseToOS::Normal)
+      if (PushedBytesDelta < Sci->AllocatedUser / 16U)
+        return 0;
 
     if (ReleaseType == ReleaseToOS::Normal) {
       const s32 IntervalMs = atomic_load_relaxed(&ReleaseToOsIntervalMs);
       if (IntervalMs < 0)
         return 0;
-      if (Sci->ReleaseInfo.LastReleaseAtNs +
-              static_cast<u64>(IntervalMs) * 1000000 >
-          getMonotonicTimeFast()) {
-        return 0; // Memory was returned recently.
+
+      // The constant 8 here is selected from profiling some apps and the number
+      // of unreleased pages in the large size classes is around 16 pages or
+      // more. Choose half of it as a heuristic and which also avoids page
+      // release every time for every pushBlocks() attempt by large blocks.
+      const bool ByPassReleaseInterval =
+          isLargeBlock(BlockSize) && PushedBytesDelta > 8 * PageSize;
+      if (!ByPassReleaseInterval) {
+        if (Sci->ReleaseInfo.LastReleaseAtNs +
+                static_cast<u64>(IntervalMs) * 1000000 >
+            getMonotonicTimeFast()) {
+          // Memory was returned recently.
+          return 0;
+        }
       }
-    }
+    } // if (ReleaseType == ReleaseToOS::Normal)
 
     const uptr First = Sci->MinRegionIndex;
     const uptr Last = Sci->MaxRegionIndex;
@@ -835,7 +845,7 @@
         continue;
       }
       const uptr PushedBytesDelta = BytesInBG - BG.BytesInBGAtLastCheckpoint;
-      if (PushedBytesDelta < PageSize)
+      if (ReleaseType != ReleaseToOS::ForceAll && PushedBytesDelta < PageSize)
         continue;
 
       // Given the randomness property, we try to release the pages only if the

diff --git a/primary64.h b/primary64.h
index d3a1aea..2c76c5f 100644
--- a/primary64.h
+++ b/primary64.h

@@ -506,6 +506,11 @@
     return BlockSize < PageSize / 16U;
   }
 
+  ALWAYS_INLINE static bool isLargeBlock(uptr BlockSize) {
+    const uptr PageSize = getPageSizeCached();
+    return BlockSize > PageSize;
+  }
+
   // Push the blocks to their batch group. The layout will be like,
   //
   // FreeList - > BG -> BG -> BG
@@ -848,8 +853,6 @@
     if (UNLIKELY(BytesInFreeList == 0))
       return 0;
 
-    bool MaySkip = false;
-
     // Always update `BytesInFreeListAtLastCheckpoint` with the smallest value
     // so that we won't underestimate the releasable pages. For example, the
     // following is the region usage,
@@ -869,38 +872,43 @@
     if (BytesInFreeList <=
         Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint) {
       Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint = BytesInFreeList;
-      MaySkip = true;
     }
 
     const uptr RegionPushedBytesDelta =
         BytesInFreeList - Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint;
-    if (RegionPushedBytesDelta < PageSize)
-      MaySkip = true;
+    if (RegionPushedBytesDelta < PageSize &&
+        ReleaseType != ReleaseToOS::ForceAll) {
+      return 0;
+    }
 
     const bool CheckDensity = isSmallBlock(BlockSize);
     // Releasing smaller blocks is expensive, so we want to make sure that a
     // significant amount of bytes are free, and that there has been a good
     // amount of batches pushed to the freelist before attempting to release.
-    if (CheckDensity) {
-      if (ReleaseType == ReleaseToOS::Normal &&
-          RegionPushedBytesDelta < Region->TryReleaseThreshold) {
-        MaySkip = true;
-      }
-    }
-
-    if (MaySkip && ReleaseType != ReleaseToOS::ForceAll)
-      return 0;
+    if (CheckDensity && ReleaseType == ReleaseToOS::Normal)
+      if (RegionPushedBytesDelta < Region->TryReleaseThreshold)
+        return 0;
 
     if (ReleaseType == ReleaseToOS::Normal) {
       const s32 IntervalMs = atomic_load_relaxed(&ReleaseToOsIntervalMs);
       if (IntervalMs < 0)
         return 0;
-      if (Region->ReleaseInfo.LastReleaseAtNs +
-              static_cast<u64>(IntervalMs) * 1000000 >
-          getMonotonicTimeFast()) {
-        return 0; // Memory was returned recently.
+
+      // The constant 8 here is selected from profiling some apps and the number
+      // of unreleased pages in the large size classes is around 16 pages or
+      // more. Choose half of it as a heuristic and which also avoids page
+      // release every time for every pushBlocks() attempt by large blocks.
+      const bool ByPassReleaseInterval =
+          isLargeBlock(BlockSize) && RegionPushedBytesDelta > 8 * PageSize;
+      if (!ByPassReleaseInterval) {
+        if (Region->ReleaseInfo.LastReleaseAtNs +
+                static_cast<u64>(IntervalMs) * 1000000 >
+            getMonotonicTimeFast()) {
+          // Memory was returned recently.
+          return 0;
+        }
       }
-    }
+    } // if (ReleaseType == ReleaseToOS::Normal)
 
     const uptr GroupSize = (1U << GroupSizeLog);
     const uptr AllocatedUserEnd = Region->AllocatedUser + Region->RegionBeg;

diff --git a/tests/primary_test.cpp b/tests/primary_test.cpp
index 51a7038..fbb0cfe 100644
--- a/tests/primary_test.cpp
+++ b/tests/primary_test.cpp

@@ -322,7 +322,7 @@
   EXPECT_NE(P, nullptr);
   Cache.deallocate(ClassId, P);
   Cache.destroy(nullptr);
-  EXPECT_GT(Allocator->releaseToOS(scudo::ReleaseToOS::Force), 0U);
+  EXPECT_GT(Allocator->releaseToOS(scudo::ReleaseToOS::ForceAll), 0U);
 }
 
 SCUDO_TYPED_TEST(ScudoPrimaryTest, MemoryGroup) {
commit	7379ba69abe9d22185d92b2feebca1c709c165f4	[log] [tgz]
author	Chia-hung Duan <chiahungduan@google.com>	Thu May 25 23:08:38 2023 +0000
committer	Copybara-Service <copybara-worker@google.com>	Wed May 31 13:15:47 2023 -0700
tree	95b6f36919fe4f7d6fef7ef2100a28a3283cb9fd
parent	ef2d5cb53dd7be757b86efb6321a221216172d85 [diff]