[scudo][standalone] Release smaller blocks less often

Summary:
Releasing smaller blocks is costly and only yields significant
results when there is a large percentage of free bytes for a given
size class (see numbers below).

This CL introduces a couple of additional checks for sizes lower
than 256. First we want to make sure that there is enough free bytes,
relatively to the amount of allocated bytes. We are looking at 8X% to
9X% (smaller blocks require higher percentage). We also want to make
sure there has been enough activity with the freelist to make it
worth the time, so we now check that the bytes pushed to the freelist
is at least 1/16th of the allocated bytes for those classes.

Additionally, we clear batches before destroying them now - this
could have prevented some releases to occur (class id 0 rarely
releases anyway).

Here are the numbers, for about 1M allocations in multiple threads:

Size: 16
85% freed -> 0% released
86% freed -> 0% released
87% freed -> 0% released
88% freed -> 0% released
89% freed -> 0% released
90% freed -> 0% released
91% freed -> 0% released
92% freed -> 0% released
93% freed -> 0% released
94% freed -> 0% released
95% freed -> 0% released
96% freed -> 0% released
97% freed -> 2% released
98% freed -> 7% released
99% freed -> 27% released
Size: 32
85% freed -> 0% released
86% freed -> 0% released
87% freed -> 0% released
88% freed -> 0% released
89% freed -> 0% released
90% freed -> 0% released
91% freed -> 0% released
92% freed -> 0% released
93% freed -> 0% released
94% freed -> 0% released
95% freed -> 1% released
96% freed -> 3% released
97% freed -> 7% released
98% freed -> 17% released
99% freed -> 41% released
Size: 48
85% freed -> 0% released
86% freed -> 0% released
87% freed -> 0% released
88% freed -> 0% released
89% freed -> 0% released
90% freed -> 0% released
91% freed -> 0% released
92% freed -> 0% released
93% freed -> 0% released
94% freed -> 1% released
95% freed -> 3% released
96% freed -> 7% released
97% freed -> 13% released
98% freed -> 27% released
99% freed -> 52% released
Size: 64
85% freed -> 0% released
86% freed -> 0% released
87% freed -> 0% released
88% freed -> 0% released
89% freed -> 0% released
90% freed -> 0% released
91% freed -> 0% released
92% freed -> 1% released
93% freed -> 2% released
94% freed -> 3% released
95% freed -> 6% released
96% freed -> 11% released
97% freed -> 20% released
98% freed -> 35% released
99% freed -> 59% released
Size: 80
85% freed -> 0% released
86% freed -> 0% released
87% freed -> 0% released
88% freed -> 0% released
89% freed -> 0% released
90% freed -> 1% released
91% freed -> 1% released
92% freed -> 2% released
93% freed -> 4% released
94% freed -> 6% released
95% freed -> 10% released
96% freed -> 17% released
97% freed -> 26% released
98% freed -> 41% released
99% freed -> 64% released
Size: 96
85% freed -> 0% released
86% freed -> 0% released
87% freed -> 0% released
88% freed -> 0% released
89% freed -> 1% released
90% freed -> 1% released
91% freed -> 3% released
92% freed -> 4% released
93% freed -> 6% released
94% freed -> 10% released
95% freed -> 14% released
96% freed -> 21% released
97% freed -> 31% released
98% freed -> 47% released
99% freed -> 68% released
Size: 112
85% freed -> 0% released
86% freed -> 1% released
87% freed -> 1% released
88% freed -> 2% released
89% freed -> 3% released
90% freed -> 4% released
91% freed -> 6% released
92% freed -> 8% released
93% freed -> 11% released
94% freed -> 16% released
95% freed -> 22% released
96% freed -> 30% released
97% freed -> 40% released
98% freed -> 55% released
99% freed -> 74% released
Size: 128
85% freed -> 0% released
86% freed -> 1% released
87% freed -> 1% released
88% freed -> 2% released
89% freed -> 3% released
90% freed -> 4% released
91% freed -> 6% released
92% freed -> 8% released
93% freed -> 11% released
94% freed -> 16% released
95% freed -> 22% released
96% freed -> 30% released
97% freed -> 40% released
98% freed -> 55% released
99% freed -> 74% released
Size: 144
85% freed -> 1% released
86% freed -> 2% released
87% freed -> 3% released
88% freed -> 4% released
89% freed -> 6% released
90% freed -> 7% released
91% freed -> 10% released
92% freed -> 13% released
93% freed -> 17% released
94% freed -> 22% released
95% freed -> 28% released
96% freed -> 37% released
97% freed -> 47% released
98% freed -> 61% released
99% freed -> 78% released
Size: 160
85% freed -> 1% released
86% freed -> 2% released
87% freed -> 3% released
88% freed -> 4% released
89% freed -> 5% released
90% freed -> 7% released
91% freed -> 10% released
92% freed -> 13% released
93% freed -> 17% released
94% freed -> 22% released
95% freed -> 28% released
96% freed -> 37% released
97% freed -> 47% released
98% freed -> 61% released
99% freed -> 78% released
Size: 176
85% freed -> 2% released
86% freed -> 3% released
87% freed -> 4% released
88% freed -> 6% released
89% freed -> 7% released
90% freed -> 9% released
91% freed -> 12% released
92% freed -> 15% released
93% freed -> 20% released
94% freed -> 25% released
95% freed -> 32% released
96% freed -> 40% released
97% freed -> 51% released
98% freed -> 64% released
99% freed -> 80% released
Size: 192
85% freed -> 4% released
86% freed -> 5% released
87% freed -> 6% released
88% freed -> 8% released
89% freed -> 10% released
90% freed -> 13% released
91% freed -> 16% released
92% freed -> 20% released
93% freed -> 24% released
94% freed -> 30% released
95% freed -> 37% released
96% freed -> 45% released
97% freed -> 55% released
98% freed -> 68% released
99% freed -> 82% released
Size: 224
85% freed -> 8% released
86% freed -> 10% released
87% freed -> 12% released
88% freed -> 14% released
89% freed -> 17% released
90% freed -> 20% released
91% freed -> 23% released
92% freed -> 28% released
93% freed -> 33% released
94% freed -> 39% released
95% freed -> 46% released
96% freed -> 53% released
97% freed -> 63% released
98% freed -> 73% released
99% freed -> 85% released
Size: 240
85% freed -> 8% released
86% freed -> 10% released
87% freed -> 12% released
88% freed -> 14% released
89% freed -> 17% released
90% freed -> 20% released
91% freed -> 23% released
92% freed -> 28% released
93% freed -> 33% released
94% freed -> 39% released
95% freed -> 46% released
96% freed -> 54% released
97% freed -> 63% released
98% freed -> 73% released
99% freed -> 85% released

Reviewers: cferris, pcc, hctim, eugenis

Subscribers: #sanitizers, llvm-commits

Tags: #sanitizers

Differential Revision: https://reviews.llvm.org/D82031

GitOrigin-RevId: 79de8f8441d12b715b6616dde4b8b15507b88324
diff --git a/local_cache.h b/local_cache.h
index a6425fc..089aeb9 100644
--- a/local_cache.h
+++ b/local_cache.h
@@ -159,6 +159,7 @@
     DCHECK_GT(B->getCount(), 0);
     C->Count = B->getCount();
     B->copyToArray(C->Chunks);
+    B->clear();
     destroyBatch(ClassId, B);
     return true;
   }
diff --git a/primary32.h b/primary32.h
index 29a2680..2ee0f6c 100644
--- a/primary32.h
+++ b/primary32.h
@@ -444,6 +444,18 @@
     if (BytesPushed < PageSize)
       return 0; // Nothing new to release.
 
+    // Releasing smaller blocks is expensive, so we want to make sure that a
+    // significant amount of bytes are free, and that there has been a good
+    // amount of batches pushed to the freelist before attempting to release.
+    if (BlockSize < PageSize / 16U) {
+      if (!Force && BytesPushed < Sci->AllocatedUser / 16U)
+        return 0;
+      // We want 8x% to 9x% free bytes (the larger the bock, the lower the %).
+      if ((BytesInFreeList * 100U) / Sci->AllocatedUser <
+          (100U - 1U - BlockSize / 16U))
+        return 0;
+    }
+
     if (!Force) {
       const s32 IntervalMs = getReleaseToOsIntervalMs();
       if (IntervalMs < 0)
diff --git a/primary64.h b/primary64.h
index d476788..01e674b 100644
--- a/primary64.h
+++ b/primary64.h
@@ -213,9 +213,7 @@
     return reinterpret_cast<const char *>(RegionInfoArray);
   }
 
-  static uptr getRegionInfoArraySize() {
-    return sizeof(RegionInfoArray);
-  }
+  static uptr getRegionInfoArraySize() { return sizeof(RegionInfoArray); }
 
   static BlockInfo findNearestBlock(const char *RegionInfoData, uptr Ptr) {
     const RegionInfo *RegionInfoArray =
@@ -458,6 +456,18 @@
     if (BytesPushed < PageSize)
       return 0; // Nothing new to release.
 
+    // Releasing smaller blocks is expensive, so we want to make sure that a
+    // significant amount of bytes are free, and that there has been a good
+    // amount of batches pushed to the freelist before attempting to release.
+    if (BlockSize < PageSize / 16U) {
+      if (!Force && BytesPushed < Region->AllocatedUser / 16U)
+        return 0;
+      // We want 8x% to 9x% free bytes (the larger the bock, the lower the %).
+      if ((BytesInFreeList * 100U) / Region->AllocatedUser <
+          (100U - 1U - BlockSize / 16U))
+        return 0;
+    }
+
     if (!Force) {
       const s32 IntervalMs = getReleaseToOsIntervalMs();
       if (IntervalMs < 0)
diff --git a/release.cpp b/release.cpp
index e144b35..5d7c6c5 100644
--- a/release.cpp
+++ b/release.cpp
@@ -11,6 +11,6 @@
 namespace scudo {
 
 HybridMutex PackedCounterArray::Mutex = {};
-uptr PackedCounterArray::StaticBuffer[1024];
+uptr PackedCounterArray::StaticBuffer[PackedCounterArray::StaticBufferCount];
 
 } // namespace scudo
diff --git a/release.h b/release.h
index 323bf9d..fd55ea2 100644
--- a/release.h
+++ b/release.h
@@ -69,7 +69,8 @@
     BufferSize = (roundUpTo(N, static_cast<uptr>(1U) << PackingRatioLog) >>
                   PackingRatioLog) *
                  sizeof(*Buffer);
-    if (BufferSize <= StaticBufferSize && Mutex.tryLock()) {
+    if (BufferSize <= (StaticBufferCount * sizeof(Buffer[0])) &&
+        Mutex.tryLock()) {
       Buffer = &StaticBuffer[0];
       memset(Buffer, 0, BufferSize);
     } else {
@@ -114,6 +115,8 @@
 
   uptr getBufferSize() const { return BufferSize; }
 
+  static const uptr StaticBufferCount = 1024U;
+
 private:
   const uptr N;
   uptr CounterSizeBitsLog;
@@ -125,8 +128,7 @@
   uptr *Buffer;
 
   static HybridMutex Mutex;
-  static const uptr StaticBufferSize = 1024U;
-  static uptr StaticBuffer[StaticBufferSize];
+  static uptr StaticBuffer[StaticBufferCount];
 };
 
 template <class ReleaseRecorderT> class FreePagesRangeTracker {