[scudo][standalone] Correct releaseToOS behavior

Summary:
There was an issue in `releaseToOSMaybe`: one of the criteria to
decide if we should proceed with the release was wrong. Namely:

```
const uptr N = Sci->Stats.PoppedBlocks - Sci->Stats.PushedBlocks;
if (N * BlockSize < PageSize)
  return; // No chance to release anything.
```

I meant to check if the amount of bytes in the free list was lower
than a page, but this actually checks if the amount of **in use** bytes
was lower than a page.

The correct code is:

```
const uptr BytesInFreeList =
  Region->AllocatedUser -
  (Region->Stats.PoppedBlocks - Region->Stats.PushedBlocks) * BlockSize;
if (BytesInFreeList < PageSize)
  return 0; // No chance to release anything.
```

Consequences of the bug:
- if a class size has less than a page worth of in-use bytes (allocated
  or in a cache), reclaiming would not occur, whatever the amount of
  blocks in the free list; in real world scenarios this is unlikely to
  happen and be impactful;
- if a class size had less than a page worth of free bytes (and enough
  in-use bytes, etc), then reclaiming would be attempted, with likely
  no result. This means the reclaiming was overzealous at times.

I didn't have a good way to test for this, so I changed the prototype
of the function to return the number of bytes released, allowing to
get the information needed. The test added fails with the initial
criteria.

Another issue is that `ReleaseToOsInterval` can actually be 0, meaning
we always try to release (side note: it's terrible for performances).
so change a `> 0` check to `>= 0`.

Additionally, decrease the `CanRelease` threshold to `PageSize / 32`.
I still have to make that configurable but I will do it at another time.

Finally, rename some variables in `printStats`: I feel like "available"
was too ambiguous, so change it to "total".

Reviewers: morehouse, hctim, eugenis, vitalybuka, cferris

Reviewed By: morehouse

Subscribers: delcypher, #sanitizers, llvm-commits

Tags: #llvm, #sanitizers

Differential Revision: https://reviews.llvm.org/D68471

git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@373930 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/scudo/standalone/primary32.h b/lib/scudo/standalone/primary32.h
index 79a11bf..a9fbb70 100644
--- a/lib/scudo/standalone/primary32.h
+++ b/lib/scudo/standalone/primary32.h
@@ -72,9 +72,9 @@
       SizeClassInfo *Sci = getSizeClassInfo(I);
       Sci->RandState = getRandomU32(&Seed);
       // See comment in the 64-bit primary about releasing smaller size classes.
-      Sci->CanRelease = (ReleaseToOsInterval > 0) &&
+      Sci->CanRelease = (ReleaseToOsInterval >= 0) &&
                         (I != SizeClassMap::BatchClassId) &&
-                        (getSizeByClassId(I) >= (PageSize / 16));
+                        (getSizeByClassId(I) >= (PageSize / 32));
     }
     ReleaseToOsIntervalMs = ReleaseToOsInterval;
   }
@@ -161,14 +161,16 @@
       printStats(I, 0);
   }
 
-  void releaseToOS() {
+  uptr releaseToOS() {
+    uptr TotalReleasedBytes = 0;
     for (uptr I = 0; I < NumClasses; I++) {
       if (I == SizeClassMap::BatchClassId)
         continue;
       SizeClassInfo *Sci = getSizeClassInfo(I);
       ScopedLock L(Sci->Mutex);
-      releaseToOSMaybe(Sci, I, /*Force=*/true);
+      TotalReleasedBytes += releaseToOSMaybe(Sci, I, /*Force=*/true);
     }
+    return TotalReleasedBytes;
   }
 
 private:
@@ -339,35 +341,38 @@
            AvailableChunks, Rss >> 10);
   }
 
-  NOINLINE void releaseToOSMaybe(SizeClassInfo *Sci, uptr ClassId,
+  NOINLINE uptr releaseToOSMaybe(SizeClassInfo *Sci, uptr ClassId,
                                  bool Force = false) {
     const uptr BlockSize = getSizeByClassId(ClassId);
     const uptr PageSize = getPageSizeCached();
 
     CHECK_GE(Sci->Stats.PoppedBlocks, Sci->Stats.PushedBlocks);
-    const uptr N = Sci->Stats.PoppedBlocks - Sci->Stats.PushedBlocks;
-    if (N * BlockSize < PageSize)
-      return; // No chance to release anything.
+    const uptr BytesInFreeList =
+        Sci->AllocatedUser -
+        (Sci->Stats.PoppedBlocks - Sci->Stats.PushedBlocks) * BlockSize;
+    if (BytesInFreeList < PageSize)
+      return 0; // No chance to release anything.
     if ((Sci->Stats.PushedBlocks - Sci->ReleaseInfo.PushedBlocksAtLastRelease) *
             BlockSize <
         PageSize) {
-      return; // Nothing new to release.
+      return 0; // Nothing new to release.
     }
 
     if (!Force) {
       const s32 IntervalMs = ReleaseToOsIntervalMs;
       if (IntervalMs < 0)
-        return;
+        return 0;
       if (Sci->ReleaseInfo.LastReleaseAtNs +
               static_cast<uptr>(IntervalMs) * 1000000ULL >
           getMonotonicTime()) {
-        return; // Memory was returned recently.
+        return 0; // Memory was returned recently.
       }
     }
 
     // TODO(kostyak): currently not ideal as we loop over all regions and
     // iterate multiple times over the same freelist if a ClassId spans multiple
     // regions. But it will have to do for now.
+    uptr TotalReleasedBytes = 0;
     for (uptr I = MinRegionIndex; I <= MaxRegionIndex; I++) {
       if (PossibleRegions[I] == ClassId) {
         ReleaseRecorder Recorder(I * RegionSize);
@@ -377,10 +382,12 @@
           Sci->ReleaseInfo.PushedBlocksAtLastRelease = Sci->Stats.PushedBlocks;
           Sci->ReleaseInfo.RangesReleased += Recorder.getReleasedRangesCount();
           Sci->ReleaseInfo.LastReleasedBytes = Recorder.getReleasedBytes();
+          TotalReleasedBytes += Sci->ReleaseInfo.LastReleasedBytes;
         }
       }
     }
     Sci->ReleaseInfo.LastReleaseAtNs = getMonotonicTime();
+    return TotalReleasedBytes;
   }
 
   SizeClassInfo SizeClassInfoArray[NumClasses];
diff --git a/lib/scudo/standalone/primary64.h b/lib/scudo/standalone/primary64.h
index 96fd1e6..f56387b 100644
--- a/lib/scudo/standalone/primary64.h
+++ b/lib/scudo/standalone/primary64.h
@@ -79,9 +79,9 @@
       // memory accesses which ends up being fairly costly. The current lower
       // limit is mostly arbitrary and based on empirical observations.
       // TODO(kostyak): make the lower limit a runtime option
-      Region->CanRelease = (ReleaseToOsInterval > 0) &&
+      Region->CanRelease = (ReleaseToOsInterval >= 0) &&
                            (I != SizeClassMap::BatchClassId) &&
-                           (getSizeByClassId(I) >= (PageSize / 16));
+                           (getSizeByClassId(I) >= (PageSize / 32));
       Region->RandState = getRandomU32(&Seed);
     }
     ReleaseToOsIntervalMs = ReleaseToOsInterval;
@@ -167,14 +167,16 @@
       printStats(I, 0);
   }
 
-  void releaseToOS() {
+  uptr releaseToOS() {
+    uptr TotalReleasedBytes = 0;
     for (uptr I = 0; I < NumClasses; I++) {
       if (I == SizeClassMap::BatchClassId)
         continue;
       RegionInfo *Region = getRegionInfo(I);
       ScopedLock L(Region->Mutex);
-      releaseToOSMaybe(Region, I, /*Force=*/true);
+      TotalReleasedBytes += releaseToOSMaybe(Region, I, /*Force=*/true);
     }
+    return TotalReleasedBytes;
   }
 
 private:
@@ -259,7 +261,7 @@
     const uptr MappedUser = Region->MappedUser;
     const uptr TotalUserBytes = Region->AllocatedUser + MaxCount * Size;
     // Map more space for blocks, if necessary.
-    if (LIKELY(TotalUserBytes > MappedUser)) {
+    if (TotalUserBytes > MappedUser) {
       // Do the mmap for the user memory.
       const uptr UserMapSize =
           roundUpTo(TotalUserBytes - MappedUser, MapSizeIncrement);
@@ -325,43 +327,44 @@
     if (Region->MappedUser == 0)
       return;
     const uptr InUse = Region->Stats.PoppedBlocks - Region->Stats.PushedBlocks;
-    const uptr AvailableChunks =
-        Region->AllocatedUser / getSizeByClassId(ClassId);
+    const uptr TotalChunks = Region->AllocatedUser / getSizeByClassId(ClassId);
     Printf("%s %02zu (%6zu): mapped: %6zuK popped: %7zu pushed: %7zu inuse: "
-           "%6zu avail: %6zu rss: %6zuK releases: %6zu last released: %6zuK "
+           "%6zu total: %6zu rss: %6zuK releases: %6zu last released: %6zuK "
            "region: 0x%zx (0x%zx)\n",
            Region->Exhausted ? "F" : " ", ClassId, getSizeByClassId(ClassId),
            Region->MappedUser >> 10, Region->Stats.PoppedBlocks,
-           Region->Stats.PushedBlocks, InUse, AvailableChunks, Rss >> 10,
+           Region->Stats.PushedBlocks, InUse, TotalChunks, Rss >> 10,
            Region->ReleaseInfo.RangesReleased,
            Region->ReleaseInfo.LastReleasedBytes >> 10, Region->RegionBeg,
            getRegionBaseByClassId(ClassId));
   }
 
-  NOINLINE void releaseToOSMaybe(RegionInfo *Region, uptr ClassId,
+  NOINLINE uptr releaseToOSMaybe(RegionInfo *Region, uptr ClassId,
                                  bool Force = false) {
     const uptr BlockSize = getSizeByClassId(ClassId);
     const uptr PageSize = getPageSizeCached();
 
     CHECK_GE(Region->Stats.PoppedBlocks, Region->Stats.PushedBlocks);
-    const uptr N = Region->Stats.PoppedBlocks - Region->Stats.PushedBlocks;
-    if (N * BlockSize < PageSize)
-      return; // No chance to release anything.
+    const uptr BytesInFreeList =
+        Region->AllocatedUser -
+        (Region->Stats.PoppedBlocks - Region->Stats.PushedBlocks) * BlockSize;
+    if (BytesInFreeList < PageSize)
+      return 0; // No chance to release anything.
     if ((Region->Stats.PushedBlocks -
          Region->ReleaseInfo.PushedBlocksAtLastRelease) *
             BlockSize <
         PageSize) {
-      return; // Nothing new to release.
+      return 0; // Nothing new to release.
     }
 
     if (!Force) {
       const s32 IntervalMs = ReleaseToOsIntervalMs;
       if (IntervalMs < 0)
-        return;
+        return 0;
       if (Region->ReleaseInfo.LastReleaseAtNs +
               static_cast<uptr>(IntervalMs) * 1000000ULL >
           getMonotonicTime()) {
-        return; // Memory was returned recently.
+        return 0; // Memory was returned recently.
       }
     }
 
@@ -377,6 +380,7 @@
       Region->ReleaseInfo.LastReleasedBytes = Recorder.getReleasedBytes();
     }
     Region->ReleaseInfo.LastReleaseAtNs = getMonotonicTime();
+    return Recorder.getReleasedBytes();
   }
 };
 
diff --git a/lib/scudo/standalone/tests/primary_test.cpp b/lib/scudo/standalone/tests/primary_test.cpp
index 329a4c1..a6cfc6b 100644
--- a/lib/scudo/standalone/tests/primary_test.cpp
+++ b/lib/scudo/standalone/tests/primary_test.cpp
@@ -188,3 +188,32 @@
   testPrimaryThreaded<scudo::SizeClassAllocator32<SizeClassMap, 18U>>();
   testPrimaryThreaded<scudo::SizeClassAllocator64<SizeClassMap, 24U>>();
 }
+
+// Through a simple allocation that spans two pages, verify that releaseToOS
+// actually releases some bytes (at least one page worth). This is a regression
+// test for an error in how the release criteria were computed.
+template <typename Primary> static void testReleaseToOS() {
+  auto Deleter = [](Primary *P) {
+    P->unmapTestOnly();
+    delete P;
+  };
+  std::unique_ptr<Primary, decltype(Deleter)> Allocator(new Primary, Deleter);
+  Allocator->init(/*ReleaseToOsInterval=*/-1);
+  typename Primary::CacheT Cache;
+  Cache.init(nullptr, Allocator.get());
+  const scudo::uptr Size = scudo::getPageSizeCached() * 2;
+  EXPECT_TRUE(Primary::canAllocate(Size));
+  const scudo::uptr ClassId =
+      Primary::SizeClassMap::getClassIdBySize(Size);
+  void *P = Cache.allocate(ClassId);
+  EXPECT_NE(P, nullptr);
+  Cache.deallocate(ClassId, P);
+  Cache.destroy(nullptr);
+  EXPECT_GT(Allocator->releaseToOS(), 0U);
+}
+
+TEST(ScudoPrimaryTest, ReleaseToOS) {
+  using SizeClassMap = scudo::DefaultSizeClassMap;
+  testReleaseToOS<scudo::SizeClassAllocator32<SizeClassMap, 18U>>();
+  testReleaseToOS<scudo::SizeClassAllocator64<SizeClassMap, 24U>>();
+}