scudo: Support memory tagging in the secondary allocator.

This patch enhances the secondary allocator to be able to detect buffer
overflow, and (on hardware supporting memory tagging) use-after-free
and buffer underflow.

Use-after-free detection is implemented by setting memory page
protection to PROT_NONE on free. Because this must be done immediately
rather than after the memory has been quarantined, we no longer use the
combined allocator quarantine for secondary allocations. Instead, a
quarantine has been added to the secondary allocator cache.

Buffer overflow detection is implemented by aligning the allocation
to the right of the writable pages, so that any overflows will
spill into the guard page to the right of the allocation, which
will have PROT_NONE page protection. Because this would require the
secondary allocator to produce a header at the correct position,
the responsibility for ensuring chunk alignment has been moved to
the secondary allocator.

Buffer underflow detection has been implemented on hardware supporting
memory tagging by tagging the memory region between the start of the
mapping and the start of the allocation with a non-zero tag. Due to
the cost of pre-tagging secondary allocations and the memory bandwidth
cost of tagged accesses, the allocation itself uses a tag of 0 and
only the first four pages have memory tagging enabled.

This is a reland of commit 7a0da8894348 which was reverted in commit
9678b07e42ee. This reland includes the following changes:

- Fix the calculation of BlockSize which led to incorrect statistics
  returned by mallinfo().
- Add -Wno-pedantic to silence GCC warning.
- Optionally add some slack at the end of secondary allocations to help
  work around buggy applications that read off the end of their
  allocation.

Differential Revision: https://reviews.llvm.org/D93731

GitOrigin-RevId: 3f71ce85897cc92190af6a66c5b2dcffc85212e2
diff --git a/allocator_config.h b/allocator_config.h
index 042a307..8e103f2 100644
--- a/allocator_config.h
+++ b/allocator_config.h
@@ -71,6 +71,7 @@
 
   typedef MapAllocatorCache<DefaultConfig> SecondaryCache;
   static const u32 SecondaryCacheEntriesArraySize = 32U;
+  static const u32 SecondaryCacheQuarantineSize = 0U;
   static const u32 SecondaryCacheDefaultMaxEntriesCount = 32U;
   static const uptr SecondaryCacheDefaultMaxEntrySize = 1UL << 19;
   static const s32 SecondaryCacheMinReleaseToOsIntervalMs = INT32_MIN;
@@ -98,6 +99,7 @@
 
   typedef MapAllocatorCache<AndroidConfig> SecondaryCache;
   static const u32 SecondaryCacheEntriesArraySize = 256U;
+  static const u32 SecondaryCacheQuarantineSize = 32U;
   static const u32 SecondaryCacheDefaultMaxEntriesCount = 32U;
   static const uptr SecondaryCacheDefaultMaxEntrySize = 2UL << 20;
   static const s32 SecondaryCacheMinReleaseToOsIntervalMs = 0;
@@ -126,6 +128,7 @@
 
   typedef MapAllocatorCache<AndroidSvelteConfig> SecondaryCache;
   static const u32 SecondaryCacheEntriesArraySize = 16U;
+  static const u32 SecondaryCacheQuarantineSize = 32U;
   static const u32 SecondaryCacheDefaultMaxEntriesCount = 4U;
   static const uptr SecondaryCacheDefaultMaxEntrySize = 1UL << 18;
   static const s32 SecondaryCacheMinReleaseToOsIntervalMs = 0;
diff --git a/combined.h b/combined.h
index a0bf04d..3b112d8 100644
--- a/combined.h
+++ b/combined.h
@@ -71,12 +71,10 @@
       NewHeader.State = Chunk::State::Available;
       Chunk::compareExchangeHeader(Allocator.Cookie, Ptr, &NewHeader, &Header);
 
+      if (allocatorSupportsMemoryTagging<Params>())
+        Ptr = untagPointer(Ptr);
       void *BlockBegin = Allocator::getBlockBegin(Ptr, &NewHeader);
-      const uptr ClassId = NewHeader.ClassId;
-      if (LIKELY(ClassId))
-        Cache.deallocate(ClassId, BlockBegin);
-      else
-        Allocator.Secondary.deallocate(BlockBegin);
+      Cache.deallocate(NewHeader.ClassId, BlockBegin);
     }
 
     // We take a shortcut when allocating a quarantine batch by working with the
@@ -238,11 +236,26 @@
     TSD->Cache.destroy(&Stats);
   }
 
-  ALWAYS_INLINE void *untagPointerMaybe(void *Ptr) {
-    if (allocatorSupportsMemoryTagging<Params>())
-      return reinterpret_cast<void *>(
-          untagPointer(reinterpret_cast<uptr>(Ptr)));
-    return Ptr;
+  ALWAYS_INLINE void *getHeaderTaggedPointer(void *Ptr) {
+    if (!allocatorSupportsMemoryTagging<Params>())
+      return Ptr;
+    auto UntaggedPtr = untagPointer(Ptr);
+    if (UntaggedPtr != Ptr)
+      return UntaggedPtr;
+    // Secondary, or pointer allocated while memory tagging is unsupported or
+    // disabled. The tag mismatch is okay in the latter case because tags will
+    // not be checked.
+    return addHeaderTag(Ptr);
+  }
+
+  ALWAYS_INLINE uptr addHeaderTag(uptr Ptr) {
+    if (!allocatorSupportsMemoryTagging<Params>())
+      return Ptr;
+    return addFixedTag(Ptr, 2);
+  }
+
+  ALWAYS_INLINE void *addHeaderTag(void *Ptr) {
+    return reinterpret_cast<void *>(addHeaderTag(reinterpret_cast<uptr>(Ptr)));
   }
 
   NOINLINE u32 collectStackTrace() {
@@ -339,7 +352,7 @@
         TSD->unlock();
     }
     if (UNLIKELY(ClassId == 0))
-      Block = Secondary.allocate(NeededSize, Alignment, &SecondaryBlockEnd,
+      Block = Secondary.allocate(Options, Size, Alignment, &SecondaryBlockEnd,
                                  FillContents);
 
     if (UNLIKELY(!Block)) {
@@ -435,12 +448,21 @@
           TaggedPtr = prepareTaggedChunk(Ptr, Size, OddEvenMask, BlockEnd);
         }
         storeAllocationStackMaybe(Options, Ptr);
-      } else if (UNLIKELY(FillContents != NoFill)) {
-        // This condition is not necessarily unlikely, but since memset is
-        // costly, we might as well mark it as such.
-        memset(Block, FillContents == ZeroFill ? 0 : PatternFillByte,
-               PrimaryT::getSizeByClassId(ClassId));
+      } else {
+        Block = addHeaderTag(Block);
+        Ptr = addHeaderTag(Ptr);
+        if (UNLIKELY(FillContents != NoFill)) {
+          // This condition is not necessarily unlikely, but since memset is
+          // costly, we might as well mark it as such.
+          memset(Block, FillContents == ZeroFill ? 0 : PatternFillByte,
+                 PrimaryT::getSizeByClassId(ClassId));
+        }
       }
+    } else {
+      Block = addHeaderTag(Block);
+      Ptr = addHeaderTag(Ptr);
+      if (UNLIKELY(useMemoryTagging<Params>(Options)))
+        storeTags(reinterpret_cast<uptr>(Block), reinterpret_cast<uptr>(Ptr));
     }
 
     Chunk::UnpackedHeader Header = {};
@@ -494,7 +516,7 @@
     if (UNLIKELY(!isAligned(reinterpret_cast<uptr>(Ptr), MinAlignment)))
       reportMisalignedPointer(AllocatorAction::Deallocating, Ptr);
 
-    Ptr = untagPointerMaybe(Ptr);
+    Ptr = getHeaderTaggedPointer(Ptr);
 
     Chunk::UnpackedHeader Header;
     Chunk::loadHeader(Cookie, Ptr, &Header);
@@ -533,7 +555,7 @@
     }
 
     void *OldTaggedPtr = OldPtr;
-    OldPtr = untagPointerMaybe(OldPtr);
+    OldPtr = getHeaderTaggedPointer(OldPtr);
 
     // The following cases are handled by the C wrappers.
     DCHECK_NE(OldPtr, nullptr);
@@ -569,7 +591,7 @@
                                   Chunk::Origin::Malloc);
     }
 
-    void *BlockBegin = getBlockBegin(OldPtr, &OldHeader);
+    void *BlockBegin = getBlockBegin(OldTaggedPtr, &OldHeader);
     uptr BlockEnd;
     uptr OldSize;
     const uptr ClassId = OldHeader.ClassId;
@@ -579,18 +601,19 @@
       OldSize = OldHeader.SizeOrUnusedBytes;
     } else {
       BlockEnd = SecondaryT::getBlockEnd(BlockBegin);
-      OldSize = BlockEnd -
-                (reinterpret_cast<uptr>(OldPtr) + OldHeader.SizeOrUnusedBytes);
+      OldSize = BlockEnd - (reinterpret_cast<uptr>(OldTaggedPtr) +
+                            OldHeader.SizeOrUnusedBytes);
     }
     // If the new chunk still fits in the previously allocated block (with a
     // reasonable delta), we just keep the old block, and update the chunk
     // header to reflect the size change.
-    if (reinterpret_cast<uptr>(OldPtr) + NewSize <= BlockEnd) {
+    if (reinterpret_cast<uptr>(OldTaggedPtr) + NewSize <= BlockEnd) {
       if (NewSize > OldSize || (OldSize - NewSize) < getPageSizeCached()) {
         Chunk::UnpackedHeader NewHeader = OldHeader;
         NewHeader.SizeOrUnusedBytes =
             (ClassId ? NewSize
-                     : BlockEnd - (reinterpret_cast<uptr>(OldPtr) + NewSize)) &
+                     : BlockEnd -
+                           (reinterpret_cast<uptr>(OldTaggedPtr) + NewSize)) &
             Chunk::SizeOrUnusedBytesMask;
         Chunk::compareExchangeHeader(Cookie, OldPtr, &NewHeader, &OldHeader);
         if (UNLIKELY(ClassId && useMemoryTagging<Params>(Options))) {
@@ -683,14 +706,30 @@
     initThreadMaybe();
     const uptr From = Base;
     const uptr To = Base + Size;
-    auto Lambda = [this, From, To, Callback, Arg](uptr Block) {
+    bool MayHaveTaggedPrimary = allocatorSupportsMemoryTagging<Params>() &&
+                                systemSupportsMemoryTagging();
+    auto Lambda = [this, From, To, MayHaveTaggedPrimary, Callback,
+                   Arg](uptr Block) {
       if (Block < From || Block >= To)
         return;
       uptr Chunk;
       Chunk::UnpackedHeader Header;
-      if (getChunkFromBlock(Block, &Chunk, &Header) &&
-          Header.State == Chunk::State::Allocated) {
+      if (MayHaveTaggedPrimary) {
+        // A chunk header can either have a zero tag (tagged primary) or the
+        // header tag (secondary, or untagged primary). We don't know which so
+        // try both.
+        ScopedDisableMemoryTagChecks x;
+        if (!getChunkFromBlock(Block, &Chunk, &Header) &&
+            !getChunkFromBlock(addHeaderTag(Block), &Chunk, &Header))
+          return;
+      } else {
+        if (!getChunkFromBlock(addHeaderTag(Block), &Chunk, &Header))
+          return;
+      }
+      if (Header.State == Chunk::State::Allocated) {
         uptr TaggedChunk = Chunk;
+        if (allocatorSupportsMemoryTagging<Params>())
+          TaggedChunk = untagPointer(TaggedChunk);
         if (useMemoryTagging<Params>(Primary.Options.load()))
           TaggedChunk = loadTag(Chunk);
         Callback(TaggedChunk, getSize(reinterpret_cast<void *>(Chunk), &Header),
@@ -751,7 +790,7 @@
       return GuardedAlloc.getSize(Ptr);
 #endif // GWP_ASAN_HOOKS
 
-    Ptr = untagPointerMaybe(const_cast<void *>(Ptr));
+    Ptr = getHeaderTaggedPointer(const_cast<void *>(Ptr));
     Chunk::UnpackedHeader Header;
     Chunk::loadHeader(Cookie, Ptr, &Header);
     // Getting the usable size of a chunk only makes sense if it's allocated.
@@ -776,7 +815,7 @@
 #endif // GWP_ASAN_HOOKS
     if (!Ptr || !isAligned(reinterpret_cast<uptr>(Ptr), MinAlignment))
       return false;
-    Ptr = untagPointerMaybe(const_cast<void *>(Ptr));
+    Ptr = getHeaderTaggedPointer(const_cast<void *>(Ptr));
     Chunk::UnpackedHeader Header;
     return Chunk::isValid(Cookie, Ptr, &Header) &&
            Header.State == Chunk::State::Allocated;
@@ -786,8 +825,17 @@
     return useMemoryTagging<Params>(Primary.Options.load());
   }
   void disableMemoryTagging() {
-    if (allocatorSupportsMemoryTagging<Params>())
+    // If we haven't been initialized yet, we need to initialize now in order to
+    // prevent a future call to initThreadMaybe() from enabling memory tagging
+    // based on feature detection. But don't call initThreadMaybe() because it
+    // may end up calling the allocator (via pthread_atfork, via the post-init
+    // callback), which may cause mappings to be created with memory tagging
+    // enabled.
+    TSDRegistry.initOnceMaybe(this);
+    if (allocatorSupportsMemoryTagging<Params>()) {
+      Secondary.disableMemoryTagging();
       Primary.Options.clear(OptionBit::UseMemoryTagging);
+    }
   }
 
   void setTrackAllocationStacks(bool Track) {
@@ -803,6 +851,14 @@
     Primary.Options.setFillContentsMode(FillContents);
   }
 
+  void setAddLargeAllocationSlack(bool AddSlack) {
+    initThreadMaybe();
+    if (AddSlack)
+      Primary.Options.set(OptionBit::AddLargeAllocationSlack);
+    else
+      Primary.Options.clear(OptionBit::AddLargeAllocationSlack);
+  }
+
   const char *getStackDepotAddress() const {
     return reinterpret_cast<const char *>(&Depot);
   }
@@ -1028,6 +1084,8 @@
     const uptr SizeOrUnusedBytes = Header->SizeOrUnusedBytes;
     if (LIKELY(Header->ClassId))
       return SizeOrUnusedBytes;
+    if (allocatorSupportsMemoryTagging<Params>())
+      Ptr = untagPointer(const_cast<void *>(Ptr));
     return SecondaryT::getBlockEnd(getBlockBegin(Ptr, Header)) -
            reinterpret_cast<uptr>(Ptr) - SizeOrUnusedBytes;
   }
@@ -1053,11 +1111,14 @@
     // If the quarantine is disabled, the actual size of a chunk is 0 or larger
     // than the maximum allowed, we return a chunk directly to the backend.
     // This purposefully underflows for Size == 0.
-    const bool BypassQuarantine =
-        !Quarantine.getCacheSize() || ((Size - 1) >= QuarantineMaxChunkSize);
+    const bool BypassQuarantine = !Quarantine.getCacheSize() ||
+                                  ((Size - 1) >= QuarantineMaxChunkSize) ||
+                                  !NewHeader.ClassId;
     if (BypassQuarantine) {
       NewHeader.State = Chunk::State::Available;
       Chunk::compareExchangeHeader(Cookie, Ptr, &NewHeader, Header);
+      if (allocatorSupportsMemoryTagging<Params>())
+        Ptr = untagPointer(Ptr);
       void *BlockBegin = getBlockBegin(Ptr, &NewHeader);
       const uptr ClassId = NewHeader.ClassId;
       if (LIKELY(ClassId)) {
@@ -1067,7 +1128,10 @@
         if (UnlockRequired)
           TSD->unlock();
       } else {
-        Secondary.deallocate(BlockBegin);
+        if (UNLIKELY(useMemoryTagging<Params>(Options)))
+          storeTags(reinterpret_cast<uptr>(BlockBegin),
+                    reinterpret_cast<uptr>(Ptr));
+        Secondary.deallocate(Options, BlockBegin);
       }
     } else {
       NewHeader.State = Chunk::State::Quarantined;
diff --git a/common.h b/common.h
index 662b733..d9884df 100644
--- a/common.h
+++ b/common.h
@@ -165,6 +165,9 @@
 void unmap(void *Addr, uptr Size, uptr Flags = 0,
            MapPlatformData *Data = nullptr);
 
+void setMemoryPermission(uptr Addr, uptr Size, uptr Flags,
+                         MapPlatformData *Data = nullptr);
+
 void releasePagesToOS(uptr BaseAddress, uptr Offset, uptr Size,
                       MapPlatformData *Data = nullptr);
 
diff --git a/fuchsia.cpp b/fuchsia.cpp
index fa9d850..415a82e 100644
--- a/fuchsia.cpp
+++ b/fuchsia.cpp
@@ -134,6 +134,16 @@
   }
 }
 
+void setMemoryPermission(UNUSED uptr Addr, UNUSED uptr Size, UNUSED uptr Flags,
+                         UNUSED MapPlatformData *Data) {
+  const zx_vm_option_t Prot =
+      (Flags & MAP_NOACCESS) ? 0 : (ZX_VM_PERM_READ | ZX_VM_PERM_WRITE);
+  DCHECK(Data);
+  DCHECK_NE(Data->Vmar, ZX_HANDLE_INVALID);
+  if (_zx_vmar_protect(Data->Vmar, Prot, Addr, Size) != ZX_OK)
+    dieOnMapUnmapError();
+}
+
 void releasePagesToOS(UNUSED uptr BaseAddress, uptr Offset, uptr Size,
                       MapPlatformData *Data) {
   DCHECK(Data);
diff --git a/linux.cpp b/linux.cpp
index d246467..c2fdf21 100644
--- a/linux.cpp
+++ b/linux.cpp
@@ -50,14 +50,14 @@
     MmapProt = PROT_NONE;
   } else {
     MmapProt = PROT_READ | PROT_WRITE;
+  }
 #if defined(__aarch64__)
 #ifndef PROT_MTE
 #define PROT_MTE 0x20
 #endif
-    if (Flags & MAP_MEMTAG)
-      MmapProt |= PROT_MTE;
+  if (Flags & MAP_MEMTAG)
+    MmapProt |= PROT_MTE;
 #endif
-  }
   if (Addr) {
     // Currently no scenario for a noaccess mapping with a fixed address.
     DCHECK_EQ(Flags & MAP_NOACCESS, 0);
@@ -70,7 +70,7 @@
     return nullptr;
   }
 #if SCUDO_ANDROID
-  if (!(Flags & MAP_NOACCESS))
+  if (Name)
     prctl(ANDROID_PR_SET_VMA, ANDROID_PR_SET_VMA_ANON_NAME, P, Size, Name);
 #endif
   return P;
@@ -82,6 +82,13 @@
     dieOnMapUnmapError();
 }
 
+void setMemoryPermission(uptr Addr, uptr Size, uptr Flags,
+                         UNUSED MapPlatformData *Data) {
+  int Prot = (Flags & MAP_NOACCESS) ? PROT_NONE : (PROT_READ | PROT_WRITE);
+  if (mprotect(reinterpret_cast<void *>(Addr), Size, Prot) != 0)
+    dieOnMapUnmapError();
+}
+
 void releasePagesToOS(uptr BaseAddress, uptr Offset, uptr Size,
                       UNUSED MapPlatformData *Data) {
   void *Addr = reinterpret_cast<void *>(BaseAddress + Offset);
diff --git a/memtag.h b/memtag.h
index 07f08e1..c8f9281 100644
--- a/memtag.h
+++ b/memtag.h
@@ -23,7 +23,15 @@
 
 #if defined(__aarch64__) || defined(SCUDO_FUZZ)
 
+// We assume that Top-Byte Ignore is enabled if the architecture supports memory
+// tagging. Not all operating systems enable TBI, so we only claim architectural
+// support for memory tagging if the operating system enables TBI.
+#if SCUDO_LINUX
 inline constexpr bool archSupportsMemoryTagging() { return true; }
+#else
+inline constexpr bool archSupportsMemoryTagging() { return false; }
+#endif
+
 inline constexpr uptr archMemoryTagGranuleSize() { return 16; }
 
 inline uptr untagPointer(uptr Ptr) { return Ptr & ((1ULL << 56) - 1); }
@@ -120,6 +128,8 @@
   return TaggedPtr;
 }
 
+inline uptr addFixedTag(uptr Ptr, uptr Tag) { return Ptr | (Tag << 56); }
+
 inline uptr storeTags(uptr Begin, uptr End) {
   DCHECK(Begin % 16 == 0);
   if (Begin != End) {
@@ -245,6 +255,12 @@
   UNREACHABLE("memory tagging not supported");
 }
 
+inline uptr addFixedTag(uptr Ptr, uptr Tag) {
+  (void)Ptr;
+  (void)Tag;
+  UNREACHABLE("memory tagging not supported");
+}
+
 inline uptr storeTags(uptr Begin, uptr End) {
   (void)Begin;
   (void)End;
@@ -280,6 +296,10 @@
   *TaggedEnd = storeTags(*TaggedBegin, *TaggedBegin + Size);
 }
 
+inline void *untagPointer(void *Ptr) {
+  return reinterpret_cast<void *>(untagPointer(reinterpret_cast<uptr>(Ptr)));
+}
+
 template <typename Config>
 inline constexpr bool allocatorSupportsMemoryTagging() {
   return archSupportsMemoryTagging() && Config::MaySupportMemoryTagging;
diff --git a/options.h b/options.h
index 91301bf..d0277aa 100644
--- a/options.h
+++ b/options.h
@@ -24,6 +24,7 @@
   TrackAllocationStacks,
   UseOddEvenTags,
   UseMemoryTagging,
+  AddLargeAllocationSlack,
 };
 
 struct Options {
diff --git a/primary64.h b/primary64.h
index b41fc8b..7b930c9 100644
--- a/primary64.h
+++ b/primary64.h
@@ -60,7 +60,7 @@
   void initLinkerInitialized(s32 ReleaseToOsInterval) {
     // Reserve the space required for the Primary.
     PrimaryBase = reinterpret_cast<uptr>(
-        map(nullptr, PrimarySize, "scudo:primary", MAP_NOACCESS, &Data));
+        map(nullptr, PrimarySize, nullptr, MAP_NOACCESS, &Data));
 
     u32 Seed;
     const u64 Time = getMonotonicTime();
diff --git a/secondary.h b/secondary.h
index 0636401..366e0b1 100644
--- a/secondary.h
+++ b/secondary.h
@@ -28,7 +28,8 @@
 struct Header {
   LargeBlock::Header *Prev;
   LargeBlock::Header *Next;
-  uptr BlockEnd;
+  uptr CommitBase;
+  uptr CommitSize;
   uptr MapBase;
   uptr MapSize;
   [[no_unique_address]] MapPlatformData Data;
@@ -38,29 +39,42 @@
   return roundUpTo(sizeof(Header), 1U << SCUDO_MIN_ALIGNMENT_LOG);
 }
 
-static Header *getHeader(uptr Ptr) {
-  return reinterpret_cast<Header *>(Ptr - getHeaderSize());
+template <typename Config> static uptr addHeaderTag(uptr Ptr) {
+  if (allocatorSupportsMemoryTagging<Config>())
+    return addFixedTag(Ptr, 1);
+  return Ptr;
 }
 
-static Header *getHeader(const void *Ptr) {
-  return getHeader(reinterpret_cast<uptr>(Ptr));
+template <typename Config> static Header *getHeader(uptr Ptr) {
+  return reinterpret_cast<Header *>(addHeaderTag<Config>(Ptr) -
+                                    getHeaderSize());
+}
+
+template <typename Config> static Header *getHeader(const void *Ptr) {
+  return getHeader<Config>(reinterpret_cast<uptr>(Ptr));
 }
 
 } // namespace LargeBlock
 
+static void unmap(LargeBlock::Header *H) {
+  MapPlatformData Data = H->Data;
+  unmap(reinterpret_cast<void *>(H->MapBase), H->MapSize, UNMAP_ALL, &Data);
+}
+
 class MapAllocatorNoCache {
 public:
   void initLinkerInitialized(UNUSED s32 ReleaseToOsInterval) {}
   void init(UNUSED s32 ReleaseToOsInterval) {}
-  bool retrieve(UNUSED uptr Size, UNUSED LargeBlock::Header **H,
-                UNUSED bool *Zeroed) {
+  bool retrieve(UNUSED Options Options, UNUSED uptr Size, UNUSED uptr Alignment,
+                UNUSED LargeBlock::Header **H, UNUSED bool *Zeroed) {
     return false;
   }
-  bool store(UNUSED LargeBlock::Header *H) { return false; }
+  void store(UNUSED Options Options, UNUSED LargeBlock::Header *H) { unmap(H); }
   bool canCache(UNUSED uptr Size) { return false; }
   void disable() {}
   void enable() {}
   void releaseToOS() {}
+  void disableMemoryTagging() {}
   bool setOption(Option O, UNUSED sptr Value) {
     if (O == Option::ReleaseInterval || O == Option::MaxCacheEntriesCount ||
         O == Option::MaxCacheEntrySize)
@@ -70,6 +84,8 @@
   }
 };
 
+static const uptr MaxUnusedCachePages = 4U;
+
 template <typename Config> class MapAllocatorCache {
 public:
   // Ensure the default maximum specified fits the array.
@@ -89,67 +105,147 @@
     initLinkerInitialized(ReleaseToOsInterval);
   }
 
-  bool store(LargeBlock::Header *H) {
+  void store(Options Options, LargeBlock::Header *H) {
+    if (!canCache(H->CommitSize))
+      return unmap(H);
+
     bool EntryCached = false;
     bool EmptyCache = false;
+    const s32 Interval = atomic_load_relaxed(&ReleaseToOsIntervalMs);
     const u64 Time = getMonotonicTime();
     const u32 MaxCount = atomic_load_relaxed(&MaxEntriesCount);
-    {
+    CachedBlock Entry;
+    Entry.CommitBase = H->CommitBase;
+    Entry.CommitSize = H->CommitSize;
+    Entry.MapBase = H->MapBase;
+    Entry.MapSize = H->MapSize;
+    Entry.BlockBegin = reinterpret_cast<uptr>(H + 1);
+    Entry.Data = H->Data;
+    Entry.Time = Time;
+    if (useMemoryTagging<Config>(Options)) {
+      if (Interval == 0 && !SCUDO_FUCHSIA) {
+        // Release the memory and make it inaccessible at the same time by
+        // creating a new MAP_NOACCESS mapping on top of the existing mapping.
+        // Fuchsia does not support replacing mappings by creating a new mapping
+        // on top so we just do the two syscalls there.
+        Entry.Time = 0;
+        map(reinterpret_cast<void *>(Entry.CommitBase), Entry.CommitSize,
+            "scudo:secondary", MAP_RESIZABLE | MAP_NOACCESS | MAP_MEMTAG,
+            &Entry.Data);
+      } else {
+        setMemoryPermission(Entry.CommitBase, Entry.CommitSize, MAP_NOACCESS,
+                            &Entry.Data);
+      }
+    } else if (Interval == 0) {
+      releasePagesToOS(Entry.CommitBase, 0, Entry.CommitSize, &Entry.Data);
+      Entry.Time = 0;
+    }
+    do {
       ScopedLock L(Mutex);
+      if (useMemoryTagging<Config>(Options) && QuarantinePos == -1U) {
+        // If we get here then memory tagging was disabled in between when we
+        // read Options and when we locked Mutex. We can't insert our entry into
+        // the quarantine or the cache because the permissions would be wrong so
+        // just unmap it.
+        break;
+      }
+      if (Config::SecondaryCacheQuarantineSize &&
+          useMemoryTagging<Config>(Options)) {
+        QuarantinePos =
+            (QuarantinePos + 1) % Config::SecondaryCacheQuarantineSize;
+        if (!Quarantine[QuarantinePos].CommitBase) {
+          Quarantine[QuarantinePos] = Entry;
+          return;
+        }
+        CachedBlock PrevEntry = Quarantine[QuarantinePos];
+        Quarantine[QuarantinePos] = Entry;
+        if (OldestTime == 0)
+          OldestTime = Entry.Time;
+        Entry = PrevEntry;
+      }
       if (EntriesCount >= MaxCount) {
         if (IsFullEvents++ == 4U)
           EmptyCache = true;
       } else {
         for (u32 I = 0; I < MaxCount; I++) {
-          if (Entries[I].Block)
+          if (Entries[I].CommitBase)
             continue;
           if (I != 0)
             Entries[I] = Entries[0];
-          Entries[0].Block = reinterpret_cast<uptr>(H);
-          Entries[0].BlockEnd = H->BlockEnd;
-          Entries[0].MapBase = H->MapBase;
-          Entries[0].MapSize = H->MapSize;
-          Entries[0].Data = H->Data;
-          Entries[0].Time = Time;
+          Entries[0] = Entry;
           EntriesCount++;
+          if (OldestTime == 0)
+            OldestTime = Entry.Time;
           EntryCached = true;
           break;
         }
       }
-    }
-    s32 Interval;
+    } while (0);
     if (EmptyCache)
       empty();
-    else if ((Interval = atomic_load_relaxed(&ReleaseToOsIntervalMs)) >= 0)
+    else if (Interval >= 0)
       releaseOlderThan(Time - static_cast<u64>(Interval) * 1000000);
-    return EntryCached;
+    if (!EntryCached)
+      unmap(reinterpret_cast<void *>(Entry.MapBase), Entry.MapSize, UNMAP_ALL,
+            &Entry.Data);
   }
 
-  bool retrieve(uptr Size, LargeBlock::Header **H, bool *Zeroed) {
+  bool retrieve(Options Options, uptr Size, uptr Alignment,
+                LargeBlock::Header **H, bool *Zeroed) {
     const uptr PageSize = getPageSizeCached();
     const u32 MaxCount = atomic_load_relaxed(&MaxEntriesCount);
-    ScopedLock L(Mutex);
-    if (EntriesCount == 0)
-      return false;
-    for (u32 I = 0; I < MaxCount; I++) {
-      if (!Entries[I].Block)
-        continue;
-      const uptr BlockSize = Entries[I].BlockEnd - Entries[I].Block;
-      if (Size > BlockSize)
-        continue;
-      if (Size < BlockSize - PageSize * 4U)
-        continue;
-      *H = reinterpret_cast<LargeBlock::Header *>(Entries[I].Block);
-      *Zeroed = Entries[I].Time == 0;
-      Entries[I].Block = 0;
-      (*H)->BlockEnd = Entries[I].BlockEnd;
-      (*H)->MapBase = Entries[I].MapBase;
-      (*H)->MapSize = Entries[I].MapSize;
-      (*H)->Data = Entries[I].Data;
-      EntriesCount--;
-      return true;
+    bool Found = false;
+    CachedBlock Entry;
+    uptr HeaderPos;
+    {
+      ScopedLock L(Mutex);
+      if (EntriesCount == 0)
+        return false;
+      for (u32 I = 0; I < MaxCount; I++) {
+        const uptr CommitBase = Entries[I].CommitBase;
+        if (!CommitBase)
+          continue;
+        const uptr CommitSize = Entries[I].CommitSize;
+        const uptr AllocPos =
+            roundDownTo(CommitBase + CommitSize - Size, Alignment);
+        HeaderPos =
+            AllocPos - Chunk::getHeaderSize() - LargeBlock::getHeaderSize();
+        if (HeaderPos > CommitBase + CommitSize)
+          continue;
+        if (HeaderPos < CommitBase ||
+            AllocPos > CommitBase + PageSize * MaxUnusedCachePages)
+          continue;
+        Found = true;
+        Entry = Entries[I];
+        Entries[I].CommitBase = 0;
+        break;
+      }
     }
-    return false;
+    if (Found) {
+      *H = reinterpret_cast<LargeBlock::Header *>(
+          LargeBlock::addHeaderTag<Config>(HeaderPos));
+      *Zeroed = Entry.Time == 0;
+      if (useMemoryTagging<Config>(Options))
+        setMemoryPermission(Entry.CommitBase, Entry.CommitSize, 0, &Entry.Data);
+      uptr NewBlockBegin = reinterpret_cast<uptr>(*H + 1);
+      if (useMemoryTagging<Config>(Options)) {
+        if (*Zeroed)
+          storeTags(LargeBlock::addHeaderTag<Config>(Entry.CommitBase),
+                    NewBlockBegin);
+        else if (Entry.BlockBegin < NewBlockBegin)
+          storeTags(Entry.BlockBegin, NewBlockBegin);
+        else
+          storeTags(untagPointer(NewBlockBegin),
+                    untagPointer(Entry.BlockBegin));
+      }
+      (*H)->CommitBase = Entry.CommitBase;
+      (*H)->CommitSize = Entry.CommitSize;
+      (*H)->MapBase = Entry.MapBase;
+      (*H)->MapSize = Entry.MapSize;
+      (*H)->Data = Entry.Data;
+      EntriesCount--;
+    }
+    return Found;
   }
 
   bool canCache(uptr Size) {
@@ -181,6 +277,23 @@
 
   void releaseToOS() { releaseOlderThan(UINT64_MAX); }
 
+  void disableMemoryTagging() {
+    ScopedLock L(Mutex);
+    for (u32 I = 0; I != Config::SecondaryCacheQuarantineSize; ++I) {
+      if (Quarantine[I].CommitBase) {
+        unmap(reinterpret_cast<void *>(Quarantine[I].MapBase),
+              Quarantine[I].MapSize, UNMAP_ALL, &Quarantine[I].Data);
+        Quarantine[I].CommitBase = 0;
+      }
+    }
+    const u32 MaxCount = atomic_load_relaxed(&MaxEntriesCount);
+    for (u32 I = 0; I < MaxCount; I++)
+      if (Entries[I].CommitBase)
+        setMemoryPermission(Entries[I].CommitBase, Entries[I].CommitSize, 0,
+                            &Entries[I].Data);
+    QuarantinePos = -1U;
+  }
+
   void disable() { Mutex.lock(); }
 
   void enable() { Mutex.unlock(); }
@@ -196,12 +309,12 @@
     {
       ScopedLock L(Mutex);
       for (uptr I = 0; I < Config::SecondaryCacheEntriesArraySize; I++) {
-        if (!Entries[I].Block)
+        if (!Entries[I].CommitBase)
           continue;
         MapInfo[N].MapBase = reinterpret_cast<void *>(Entries[I].MapBase);
         MapInfo[N].MapSize = Entries[I].MapSize;
         MapInfo[N].Data = Entries[I].Data;
-        Entries[I].Block = 0;
+        Entries[I].CommitBase = 0;
         N++;
       }
       EntriesCount = 0;
@@ -212,37 +325,50 @@
             &MapInfo[I].Data);
   }
 
-  void releaseOlderThan(u64 Time) {
-    ScopedLock L(Mutex);
-    if (!EntriesCount)
-      return;
-    for (uptr I = 0; I < Config::SecondaryCacheEntriesArraySize; I++) {
-      if (!Entries[I].Block || !Entries[I].Time || Entries[I].Time > Time)
-        continue;
-      releasePagesToOS(Entries[I].Block, 0,
-                       Entries[I].BlockEnd - Entries[I].Block,
-                       &Entries[I].Data);
-      Entries[I].Time = 0;
-    }
-  }
-
   struct CachedBlock {
-    uptr Block;
-    uptr BlockEnd;
+    uptr CommitBase;
+    uptr CommitSize;
     uptr MapBase;
     uptr MapSize;
+    uptr BlockBegin;
     [[no_unique_address]] MapPlatformData Data;
     u64 Time;
   };
 
+  void releaseIfOlderThan(CachedBlock &Entry, u64 Time) {
+    if (!Entry.CommitBase || !Entry.Time)
+      return;
+    if (Entry.Time > Time) {
+      if (OldestTime == 0 || Entry.Time < OldestTime)
+        OldestTime = Entry.Time;
+      return;
+    }
+    releasePagesToOS(Entry.CommitBase, 0, Entry.CommitSize, &Entry.Data);
+    Entry.Time = 0;
+  }
+
+  void releaseOlderThan(u64 Time) {
+    ScopedLock L(Mutex);
+    if (!EntriesCount || OldestTime == 0 || OldestTime > Time)
+      return;
+    OldestTime = 0;
+    for (uptr I = 0; I < Config::SecondaryCacheQuarantineSize; I++)
+      releaseIfOlderThan(Quarantine[I], Time);
+    for (uptr I = 0; I < Config::SecondaryCacheEntriesArraySize; I++)
+      releaseIfOlderThan(Entries[I], Time);
+  }
+
   HybridMutex Mutex;
-  CachedBlock Entries[Config::SecondaryCacheEntriesArraySize];
   u32 EntriesCount;
+  u32 QuarantinePos;
   atomic_u32 MaxEntriesCount;
   atomic_uptr MaxEntrySize;
-  uptr LargestSize;
+  u64 OldestTime;
   u32 IsFullEvents;
   atomic_s32 ReleaseToOsIntervalMs;
+
+  CachedBlock Entries[Config::SecondaryCacheEntriesArraySize];
+  CachedBlock Quarantine[Config::SecondaryCacheQuarantineSize];
 };
 
 template <typename Config> class MapAllocator {
@@ -258,13 +384,15 @@
     initLinkerInitialized(S, ReleaseToOsInterval);
   }
 
-  void *allocate(uptr Size, uptr AlignmentHint = 0, uptr *BlockEnd = nullptr,
+  void *allocate(Options Options, uptr Size, uptr AlignmentHint = 0,
+                 uptr *BlockEnd = nullptr,
                  FillContentsMode FillContents = NoFill);
 
-  void deallocate(void *Ptr);
+  void deallocate(Options Options, void *Ptr);
 
   static uptr getBlockEnd(void *Ptr) {
-    return LargeBlock::getHeader(Ptr)->BlockEnd;
+    auto *B = LargeBlock::getHeader<Config>(Ptr);
+    return B->CommitBase + B->CommitSize;
   }
 
   static uptr getBlockSize(void *Ptr) {
@@ -284,8 +412,12 @@
   }
 
   template <typename F> void iterateOverBlocks(F Callback) const {
-    for (const auto &H : InUseBlocks)
-      Callback(reinterpret_cast<uptr>(&H) + LargeBlock::getHeaderSize());
+    for (const auto &H : InUseBlocks) {
+      uptr Ptr = reinterpret_cast<uptr>(&H) + LargeBlock::getHeaderSize();
+      if (allocatorSupportsMemoryTagging<Config>())
+        Ptr = untagPointer(Ptr);
+      Callback(Ptr);
+    }
   }
 
   uptr canCache(uptr Size) { return Cache.canCache(Size); }
@@ -294,6 +426,8 @@
 
   void releaseToOS() { Cache.releaseToOS(); }
 
+  void disableMemoryTagging() { Cache.disableMemoryTagging(); }
+
 private:
   typename Config::SecondaryCache Cache;
 
@@ -319,26 +453,36 @@
 // the committed memory will amount to something close to Size - AlignmentHint
 // (pending rounding and headers).
 template <typename Config>
-void *MapAllocator<Config>::allocate(uptr Size, uptr AlignmentHint,
-                                     uptr *BlockEnd,
+void *MapAllocator<Config>::allocate(Options Options, uptr Size, uptr Alignment,
+                                     uptr *BlockEndPtr,
                                      FillContentsMode FillContents) {
-  DCHECK_GE(Size, AlignmentHint);
+  if (Options.get(OptionBit::AddLargeAllocationSlack))
+    Size += 1UL << SCUDO_MIN_ALIGNMENT_LOG;
+  Alignment = Max(Alignment, 1UL << SCUDO_MIN_ALIGNMENT_LOG);
   const uptr PageSize = getPageSizeCached();
-  const uptr RoundedSize =
-      roundUpTo(Size + LargeBlock::getHeaderSize(), PageSize);
+  uptr RoundedSize =
+      roundUpTo(roundUpTo(Size, Alignment) + LargeBlock::getHeaderSize() +
+                    Chunk::getHeaderSize(),
+                PageSize);
+  if (Alignment > PageSize)
+    RoundedSize += Alignment - PageSize;
 
-  if (AlignmentHint < PageSize && Cache.canCache(RoundedSize)) {
+  if (Alignment < PageSize && Cache.canCache(RoundedSize)) {
     LargeBlock::Header *H;
     bool Zeroed;
-    if (Cache.retrieve(RoundedSize, &H, &Zeroed)) {
-      if (BlockEnd)
-        *BlockEnd = H->BlockEnd;
-      void *Ptr = reinterpret_cast<void *>(reinterpret_cast<uptr>(H) +
-                                           LargeBlock::getHeaderSize());
+    if (Cache.retrieve(Options, Size, Alignment, &H, &Zeroed)) {
+      const uptr BlockEnd = H->CommitBase + H->CommitSize;
+      if (BlockEndPtr)
+        *BlockEndPtr = BlockEnd;
+      uptr HInt = reinterpret_cast<uptr>(H);
+      if (allocatorSupportsMemoryTagging<Config>())
+        HInt = untagPointer(HInt);
+      const uptr PtrInt = HInt + LargeBlock::getHeaderSize();
+      void *Ptr = reinterpret_cast<void *>(PtrInt);
       if (FillContents && !Zeroed)
         memset(Ptr, FillContents == ZeroFill ? 0 : PatternFillByte,
-               H->BlockEnd - reinterpret_cast<uptr>(Ptr));
-      const uptr BlockSize = H->BlockEnd - reinterpret_cast<uptr>(H);
+               BlockEnd - PtrInt);
+      const uptr BlockSize = BlockEnd - HInt;
       {
         ScopedLock L(Mutex);
         InUseBlocks.push_back(H);
@@ -353,9 +497,8 @@
 
   MapPlatformData Data = {};
   const uptr MapSize = RoundedSize + 2 * PageSize;
-  uptr MapBase =
-      reinterpret_cast<uptr>(map(nullptr, MapSize, "scudo:secondary",
-                                 MAP_NOACCESS | MAP_ALLOWNOMEM, &Data));
+  uptr MapBase = reinterpret_cast<uptr>(
+      map(nullptr, MapSize, nullptr, MAP_NOACCESS | MAP_ALLOWNOMEM, &Data));
   if (UNLIKELY(!MapBase))
     return nullptr;
   uptr CommitBase = MapBase + PageSize;
@@ -363,11 +506,11 @@
 
   // In the unlikely event of alignments larger than a page, adjust the amount
   // of memory we want to commit, and trim the extra memory.
-  if (UNLIKELY(AlignmentHint >= PageSize)) {
+  if (UNLIKELY(Alignment >= PageSize)) {
     // For alignments greater than or equal to a page, the user pointer (eg: the
     // pointer that is returned by the C or C++ allocation APIs) ends up on a
     // page boundary , and our headers will live in the preceding page.
-    CommitBase = roundUpTo(MapBase + PageSize + 1, AlignmentHint) - PageSize;
+    CommitBase = roundUpTo(MapBase + PageSize + 1, Alignment) - PageSize;
     const uptr NewMapBase = CommitBase - PageSize;
     DCHECK_GE(NewMapBase, MapBase);
     // We only trim the extra memory on 32-bit platforms: 64-bit platforms
@@ -376,9 +519,8 @@
       unmap(reinterpret_cast<void *>(MapBase), NewMapBase - MapBase, 0, &Data);
       MapBase = NewMapBase;
     }
-    const uptr NewMapEnd = CommitBase + PageSize +
-                           roundUpTo((Size - AlignmentHint), PageSize) +
-                           PageSize;
+    const uptr NewMapEnd =
+        CommitBase + PageSize + roundUpTo(Size, PageSize) + PageSize;
     DCHECK_LE(NewMapEnd, MapEnd);
     if (SCUDO_WORDSIZE == 32U && NewMapEnd != MapEnd) {
       unmap(reinterpret_cast<void *>(NewMapEnd), MapEnd - NewMapEnd, 0, &Data);
@@ -387,16 +529,34 @@
   }
 
   const uptr CommitSize = MapEnd - PageSize - CommitBase;
-  const uptr Ptr = reinterpret_cast<uptr>(
-      map(reinterpret_cast<void *>(CommitBase), CommitSize, "scudo:secondary",
-          MAP_RESIZABLE, &Data));
-  LargeBlock::Header *H = reinterpret_cast<LargeBlock::Header *>(Ptr);
+  const uptr AllocPos = roundDownTo(CommitBase + CommitSize - Size, Alignment);
+  const uptr MaxUnusedCacheBytes = MaxUnusedCachePages * getPageSizeCached();
+  if (useMemoryTagging<Config>(Options) && CommitSize > MaxUnusedCacheBytes) {
+    const uptr UntaggedPos = Max(AllocPos, CommitBase + MaxUnusedCacheBytes);
+    map(reinterpret_cast<void *>(CommitBase), UntaggedPos - CommitBase,
+        "scudo:secondary", MAP_RESIZABLE | MAP_MEMTAG, &Data);
+    map(reinterpret_cast<void *>(UntaggedPos),
+        CommitBase + CommitSize - UntaggedPos, "scudo:secondary", MAP_RESIZABLE,
+        &Data);
+  } else {
+    map(reinterpret_cast<void *>(CommitBase), CommitSize, "scudo:secondary",
+        MAP_RESIZABLE | (useMemoryTagging<Config>(Options) ? MAP_MEMTAG : 0),
+        &Data);
+  }
+  const uptr HeaderPos =
+      AllocPos - Chunk::getHeaderSize() - LargeBlock::getHeaderSize();
+  LargeBlock::Header *H = reinterpret_cast<LargeBlock::Header *>(
+      LargeBlock::addHeaderTag<Config>(HeaderPos));
+  if (useMemoryTagging<Config>(Options))
+    storeTags(LargeBlock::addHeaderTag<Config>(CommitBase),
+              reinterpret_cast<uptr>(H + 1));
   H->MapBase = MapBase;
   H->MapSize = MapEnd - MapBase;
-  H->BlockEnd = CommitBase + CommitSize;
+  H->CommitBase = CommitBase;
+  H->CommitSize = CommitSize;
   H->Data = Data;
-  if (BlockEnd)
-    *BlockEnd = CommitBase + CommitSize;
+  if (BlockEndPtr)
+    *BlockEndPtr = CommitBase + CommitSize;
   {
     ScopedLock L(Mutex);
     InUseBlocks.push_back(H);
@@ -407,13 +567,13 @@
     Stats.add(StatAllocated, CommitSize);
     Stats.add(StatMapped, H->MapSize);
   }
-  return reinterpret_cast<void *>(Ptr + LargeBlock::getHeaderSize());
+  return reinterpret_cast<void *>(HeaderPos + LargeBlock::getHeaderSize());
 }
 
-template <typename Config> void MapAllocator<Config>::deallocate(void *Ptr) {
-  LargeBlock::Header *H = LargeBlock::getHeader(Ptr);
-  const uptr Block = reinterpret_cast<uptr>(H);
-  const uptr CommitSize = H->BlockEnd - Block;
+template <typename Config>
+void MapAllocator<Config>::deallocate(Options Options, void *Ptr) {
+  LargeBlock::Header *H = LargeBlock::getHeader<Config>(Ptr);
+  const uptr CommitSize = H->CommitSize;
   {
     ScopedLock L(Mutex);
     InUseBlocks.remove(H);
@@ -422,12 +582,7 @@
     Stats.sub(StatAllocated, CommitSize);
     Stats.sub(StatMapped, H->MapSize);
   }
-  if (Cache.canCache(CommitSize) && Cache.store(H))
-    return;
-  void *Addr = reinterpret_cast<void *>(H->MapBase);
-  const uptr Size = H->MapSize;
-  MapPlatformData Data = H->Data;
-  unmap(Addr, Size, UNMAP_ALL, &Data);
+  Cache.store(Options, H);
 }
 
 template <typename Config>
diff --git a/tests/combined_test.cpp b/tests/combined_test.cpp
index e917159..a6c6d82 100644
--- a/tests/combined_test.cpp
+++ b/tests/combined_test.cpp
@@ -45,38 +45,36 @@
 }
 
 template <class AllocatorT>
-bool isTaggedAllocation(AllocatorT *Allocator, scudo::uptr Size,
-                        scudo::uptr Alignment) {
-  return Allocator->useMemoryTaggingTestOnly() &&
-         scudo::systemDetectsMemoryTagFaultsTestOnly() &&
-         isPrimaryAllocation<AllocatorT>(Size, Alignment);
-}
-
-template <class AllocatorT>
 void checkMemoryTaggingMaybe(AllocatorT *Allocator, void *P, scudo::uptr Size,
                              scudo::uptr Alignment) {
-  if (!isTaggedAllocation(Allocator, Size, Alignment))
-    return;
-
-  Size = scudo::roundUpTo(Size, scudo::archMemoryTagGranuleSize());
-  EXPECT_DEATH(
-      {
-        disableDebuggerdMaybe();
-        reinterpret_cast<char *>(P)[-1] = 0xaa;
-      },
-      "");
-  EXPECT_DEATH(
-      {
-        disableDebuggerdMaybe();
-        reinterpret_cast<char *>(P)[Size] = 0xaa;
-      },
-      "");
+  const scudo::uptr MinAlignment = 1UL << SCUDO_MIN_ALIGNMENT_LOG;
+  Size = scudo::roundUpTo(Size, MinAlignment);
+  if (Allocator->useMemoryTaggingTestOnly())
+    EXPECT_DEATH(
+        {
+          disableDebuggerdMaybe();
+          reinterpret_cast<char *>(P)[-1] = 0xaa;
+        },
+        "");
+  if (isPrimaryAllocation<AllocatorT>(Size, Alignment)
+          ? Allocator->useMemoryTaggingTestOnly()
+          : Alignment == MinAlignment) {
+    EXPECT_DEATH(
+        {
+          disableDebuggerdMaybe();
+          reinterpret_cast<char *>(P)[Size] = 0xaa;
+        },
+        "");
+  }
 }
 
 template <typename Config> struct TestAllocator : scudo::Allocator<Config> {
   TestAllocator() {
     this->reset();
     this->initThreadMaybe();
+    if (scudo::archSupportsMemoryTagging() &&
+        !scudo::systemDetectsMemoryTagFaultsTestOnly())
+      this->disableMemoryTagging();
   }
   ~TestAllocator() { this->unmapTestOnly(); }
 };
@@ -180,8 +178,8 @@
   bool Found = false;
   for (scudo::uptr I = 0; I < 1024U && !Found; I++) {
     void *P = Allocator->allocate(NeedleSize, Origin);
-    if (Allocator->untagPointerMaybe(P) ==
-        Allocator->untagPointerMaybe(NeedleP))
+    if (Allocator->getHeaderTaggedPointer(P) ==
+        Allocator->getHeaderTaggedPointer(NeedleP))
       Found = true;
     Allocator->deallocate(P, Origin);
   }
@@ -248,38 +246,30 @@
 
   Allocator->releaseToOS();
 
-  if (Allocator->useMemoryTaggingTestOnly() &&
-      scudo::systemDetectsMemoryTagFaultsTestOnly()) {
-    // Check that use-after-free is detected.
-    for (scudo::uptr SizeLog = 0U; SizeLog <= 20U; SizeLog++) {
-      const scudo::uptr Size = 1U << SizeLog;
-      if (!isTaggedAllocation(Allocator.get(), Size, 1))
-        continue;
-      // UAF detection is probabilistic, so we repeat the test up to 256 times
-      // if necessary. With 15 possible tags this means a 1 in 15^256 chance of
-      // a false positive.
-      EXPECT_DEATH(
-          {
-            disableDebuggerdMaybe();
-            for (unsigned I = 0; I != 256; ++I) {
-              void *P = Allocator->allocate(Size, Origin);
-              Allocator->deallocate(P, Origin);
-              reinterpret_cast<char *>(P)[0] = 0xaa;
-            }
-          },
-          "");
-      EXPECT_DEATH(
-          {
-            disableDebuggerdMaybe();
-            for (unsigned I = 0; I != 256; ++I) {
-              void *P = Allocator->allocate(Size, Origin);
-              Allocator->deallocate(P, Origin);
-              reinterpret_cast<char *>(P)[Size - 1] = 0xaa;
-            }
-          },
-          "");
-    }
+  // Check that use-after-free is detected.
+  for (scudo::uptr SizeLog = 0U; SizeLog <= 20U; SizeLog++) {
+    const scudo::uptr Size = 1U << SizeLog;
+    if (!Allocator->useMemoryTaggingTestOnly())
+      continue;
+    EXPECT_DEATH(
+        {
+          disableDebuggerdMaybe();
+          void *P = Allocator->allocate(Size, Origin);
+          Allocator->deallocate(P, Origin);
+          reinterpret_cast<char *>(P)[0] = 0xaa;
+        },
+        "");
+    EXPECT_DEATH(
+        {
+          disableDebuggerdMaybe();
+          void *P = Allocator->allocate(Size, Origin);
+          Allocator->deallocate(P, Origin);
+          reinterpret_cast<char *>(P)[Size - 1] = 0xaa;
+        },
+        "");
+  }
 
+  if (Allocator->useMemoryTaggingTestOnly()) {
     // Check that disabling memory tagging works correctly.
     void *P = Allocator->allocate(2048, Origin);
     EXPECT_DEATH(reinterpret_cast<char *>(P)[2048] = 0xaa, "");
@@ -289,7 +279,7 @@
     Allocator->deallocate(P, Origin);
 
     P = Allocator->allocate(2048, Origin);
-    EXPECT_EQ(Allocator->untagPointerMaybe(P), P);
+    EXPECT_EQ(scudo::untagPointer(P), P);
     reinterpret_cast<char *>(P)[2048] = 0xaa;
     Allocator->deallocate(P, Origin);
 
diff --git a/tests/secondary_test.cpp b/tests/secondary_test.cpp
index 846ec8f..a557042 100644
--- a/tests/secondary_test.cpp
+++ b/tests/secondary_test.cpp
@@ -27,29 +27,29 @@
   std::unique_ptr<SecondaryT> L(new SecondaryT);
   L->init(&S);
   const scudo::uptr Size = 1U << 16;
-  void *P = L->allocate(Size);
+  void *P = L->allocate(scudo::Options{}, Size);
   EXPECT_NE(P, nullptr);
   memset(P, 'A', Size);
   EXPECT_GE(SecondaryT::getBlockSize(P), Size);
-  L->deallocate(P);
+  L->deallocate(scudo::Options{}, P);
   // If the Secondary can't cache that pointer, it will be unmapped.
   if (!L->canCache(Size))
     EXPECT_DEATH(memset(P, 'A', Size), "");
 
   const scudo::uptr Align = 1U << 16;
-  P = L->allocate(Size + Align, Align);
+  P = L->allocate(scudo::Options{}, Size + Align, Align);
   EXPECT_NE(P, nullptr);
   void *AlignedP = reinterpret_cast<void *>(
       scudo::roundUpTo(reinterpret_cast<scudo::uptr>(P), Align));
   memset(AlignedP, 'A', Size);
-  L->deallocate(P);
+  L->deallocate(scudo::Options{}, P);
 
   std::vector<void *> V;
   for (scudo::uptr I = 0; I < 32U; I++)
-    V.push_back(L->allocate(Size));
+    V.push_back(L->allocate(scudo::Options{}, Size));
   std::shuffle(V.begin(), V.end(), std::mt19937(std::random_device()()));
   while (!V.empty()) {
-    L->deallocate(V.back());
+    L->deallocate(scudo::Options{}, V.back());
     V.pop_back();
   }
   scudo::ScopedString Str(1024);
@@ -59,11 +59,14 @@
 
 struct NoCacheConfig {
   typedef scudo::MapAllocatorNoCache SecondaryCache;
+  static const bool MaySupportMemoryTagging = false;
 };
 
 struct TestConfig {
   typedef scudo::MapAllocatorCache<TestConfig> SecondaryCache;
+  static const bool MaySupportMemoryTagging = false;
   static const scudo::u32 SecondaryCacheEntriesArraySize = 128U;
+  static const scudo::u32 SecondaryCacheQuarantineSize = 0U;
   static const scudo::u32 SecondaryCacheDefaultMaxEntriesCount = 64U;
   static const scudo::uptr SecondaryCacheDefaultMaxEntrySize = 1UL << 20;
   static const scudo::s32 SecondaryCacheMinReleaseToOsIntervalMs = INT32_MIN;
@@ -97,12 +100,12 @@
             scudo::roundUpTo((1U << SizeLog) + Delta, MinAlign);
         const scudo::uptr Size =
             HeaderSize + UserSize + (Align > MinAlign ? Align - HeaderSize : 0);
-        void *P = L->allocate(Size, Align);
+        void *P = L->allocate(scudo::Options{}, Size, Align);
         EXPECT_NE(P, nullptr);
         void *AlignedP = reinterpret_cast<void *>(
             scudo::roundUpTo(reinterpret_cast<scudo::uptr>(P), Align));
         memset(AlignedP, 0xff, UserSize);
-        L->deallocate(P);
+        L->deallocate(scudo::Options{}, P);
       }
     }
   }
@@ -117,7 +120,7 @@
   std::vector<void *> V;
   const scudo::uptr PageSize = scudo::getPageSizeCached();
   for (scudo::uptr I = 0; I < 32U; I++)
-    V.push_back(L->allocate((std::rand() % 16) * PageSize));
+    V.push_back(L->allocate(scudo::Options{}, (std::rand() % 16) * PageSize));
   auto Lambda = [V](scudo::uptr Block) {
     EXPECT_NE(std::find(V.begin(), V.end(), reinterpret_cast<void *>(Block)),
               V.end());
@@ -126,7 +129,7 @@
   L->iterateOverBlocks(Lambda);
   L->enable();
   while (!V.empty()) {
-    L->deallocate(V.back());
+    L->deallocate(scudo::Options{}, V.back());
     V.pop_back();
   }
   scudo::ScopedString Str(1024);
@@ -172,14 +175,14 @@
   for (scudo::uptr I = 0; I < 128U; I++) {
     // Deallocate 75% of the blocks.
     const bool Deallocate = (rand() & 3) != 0;
-    void *P = L->allocate((std::rand() % 16) * PageSize);
+    void *P = L->allocate(scudo::Options{}, (std::rand() % 16) * PageSize);
     if (Deallocate)
-      L->deallocate(P);
+      L->deallocate(scudo::Options{}, P);
     else
       V.push_back(P);
   }
   while (!V.empty()) {
-    L->deallocate(V.back());
+    L->deallocate(scudo::Options{}, V.back());
     V.pop_back();
   }
 }
diff --git a/tsd_exclusive.h b/tsd_exclusive.h
index 1704c8c..c9a9de1 100644
--- a/tsd_exclusive.h
+++ b/tsd_exclusive.h
@@ -36,6 +36,13 @@
     initLinkerInitialized(Instance);
   }
 
+  void initOnceMaybe(Allocator *Instance) {
+    ScopedLock L(Mutex);
+    if (LIKELY(Initialized))
+      return;
+    initLinkerInitialized(Instance); // Sets Initialized.
+  }
+
   void unmapTestOnly() {}
 
   ALWAYS_INLINE void initThreadMaybe(Allocator *Instance, bool MinimalInit) {
@@ -80,13 +87,6 @@
   bool getDisableMemInit() { return State.DisableMemInit; }
 
 private:
-  void initOnceMaybe(Allocator *Instance) {
-    ScopedLock L(Mutex);
-    if (LIKELY(Initialized))
-      return;
-    initLinkerInitialized(Instance); // Sets Initialized.
-  }
-
   // Using minimal initialization allows for global initialization while keeping
   // the thread specific structure untouched. The fallback structure will be
   // used instead.
diff --git a/tsd_shared.h b/tsd_shared.h
index 6a68b3e..b9b6f38 100644
--- a/tsd_shared.h
+++ b/tsd_shared.h
@@ -38,6 +38,13 @@
     initLinkerInitialized(Instance);
   }
 
+  void initOnceMaybe(Allocator *Instance) {
+    ScopedLock L(Mutex);
+    if (LIKELY(Initialized))
+      return;
+    initLinkerInitialized(Instance); // Sets Initialized.
+  }
+
   void unmapTestOnly() { setCurrentTSD(nullptr); }
 
   ALWAYS_INLINE void initThreadMaybe(Allocator *Instance,
@@ -139,13 +146,6 @@
     *getTlsPtr() |= B;
   }
 
-  void initOnceMaybe(Allocator *Instance) {
-    ScopedLock L(Mutex);
-    if (LIKELY(Initialized))
-      return;
-    initLinkerInitialized(Instance); // Sets Initialized.
-  }
-
   NOINLINE void initThread(Allocator *Instance) {
     initOnceMaybe(Instance);
     // Initial context assignment is done in a plain round-robin fashion.
diff --git a/wrappers_c.inc b/wrappers_c.inc
index 9d64003..43efb02 100644
--- a/wrappers_c.inc
+++ b/wrappers_c.inc
@@ -260,4 +260,12 @@
       pattern_fill_contents ? scudo::PatternOrZeroFill : scudo::NoFill);
 }
 
+// Sets whether scudo adds a small amount of slack at the end of large
+// allocations, before the guard page. This can be enabled to work around buggy
+// applications that read a few bytes past the end of their allocation.
+INTERFACE WEAK void
+SCUDO_PREFIX(malloc_set_add_large_allocation_slack)(int add_slack) {
+  SCUDO_ALLOCATOR.setAddLargeAllocationSlack(add_slack);
+}
+
 } // extern "C"