[scudo][standalone] Implement TSD registry disabling

Summary:
In order to implement `malloc_{enable|disable}` we were just disabling
(or really locking) the Primary and the Secondary. That meant that
allocations could still be serviced from the TSD as long as the cache
wouldn't have to be filled from the Primary.

This wasn't working out for Android tests, so this change implements
registry disabling (eg: locking) so that `getTSDAndLock` doesn't
return a TSD if the allocator is disabled. This also means that the
Primary doesn't have to be disabled in this situation.

For the Shared Registry, we loop through all the TSDs and lock them.
For the Exclusive Registry, we add a `Disabled` boolean to the Registry
that forces `getTSDAndLock` to use the Fallback TSD instead of the
thread local one. Disabling the Registry is then done by locking the
Fallback TSD and setting the boolean in question (I don't think this
needed an atomic variable but I might be wrong).

I clang-formatted the whole thing as usual hence the couple of extra
whiteline changes in this CL.

Reviewers: cferris, pcc, hctim, morehouse, eugenis

Subscribers: jfb, #sanitizers, llvm-commits

Tags: #sanitizers, #llvm

Differential Revision: https://reviews.llvm.org/D71719

GitOrigin-RevId: 77e906ac78abda8bb2bdcc223e4b5c457f08bb04
diff --git a/combined.h b/combined.h
index 864436d..8a33f82 100644
--- a/combined.h
+++ b/combined.h
@@ -22,7 +22,7 @@
 #include "tsd.h"
 
 #ifdef GWP_ASAN_HOOKS
-# include "gwp_asan/guarded_pool_allocator.h"
+#include "gwp_asan/guarded_pool_allocator.h"
 // GWP-ASan is declared here in order to avoid indirect call overhead. It's also
 // instantiated outside of the Allocator class, as the allocator is only
 // zero-initialised. GWP-ASan requires constant initialisation, and the Scudo
@@ -414,19 +414,19 @@
     return NewPtr;
   }
 
-  // TODO(kostyak): while this locks the Primary & Secondary, it still allows
-  //                pointers to be fetched from the TSD. We ultimately want to
-  //                lock the registry as well. For now, it's good enough.
+  // TODO(kostyak): disable() is currently best-effort. There are some small
+  //                windows of time when an allocation could still succeed after
+  //                this function finishes. We will revisit that later.
   void disable() {
     initThreadMaybe();
-    Primary.disable();
+    TSDRegistry.disable();
     Secondary.disable();
   }
 
   void enable() {
     initThreadMaybe();
     Secondary.enable();
-    Primary.enable();
+    TSDRegistry.enable();
   }
 
   // The function returns the amount of bytes required to store the statistics,
diff --git a/flags.cpp b/flags.cpp
index 2540789..dd9f050 100644
--- a/flags.cpp
+++ b/flags.cpp
@@ -40,7 +40,7 @@
 
 #ifdef GWP_ASAN_HOOKS
 #define GWP_ASAN_OPTION(Type, Name, DefaultValue, Description)                 \
-  Parser->registerFlag("GWP_ASAN_"#Name, Description, FlagType::FT_##Type,     \
+  Parser->registerFlag("GWP_ASAN_" #Name, Description, FlagType::FT_##Type,    \
                        reinterpret_cast<void *>(&F->GWP_ASAN_##Name));
 #include "gwp_asan/options.inc"
 #undef GWP_ASAN_OPTION
diff --git a/tests/wrappers_c_test.cpp b/tests/wrappers_c_test.cpp
index 03f64c8..c3699f1 100644
--- a/tests/wrappers_c_test.cpp
+++ b/tests/wrappers_c_test.cpp
@@ -284,6 +284,21 @@
   free(P);
 }
 
+// We expect heap operations within a disable/enable scope to deadlock.
+TEST(ScudoWrappersCTest, MallocDisableDeadlock) {
+  EXPECT_DEATH(
+      {
+        void *P = malloc(Size);
+        EXPECT_NE(P, nullptr);
+        free(P);
+        malloc_disable();
+        alarm(1);
+        P = malloc(Size);
+        malloc_enable();
+      },
+      "");
+}
+
 #if !SCUDO_FUCHSIA
 TEST(ScudoWrappersCTest, MallocInfo) {
   char Buffer[64];
diff --git a/tsd_exclusive.h b/tsd_exclusive.h
index 971ae48..89b001a 100644
--- a/tsd_exclusive.h
+++ b/tsd_exclusive.h
@@ -48,7 +48,8 @@
   }
 
   ALWAYS_INLINE TSD<Allocator> *getTSDAndLock(bool *UnlockRequired) {
-    if (LIKELY(State == ThreadState::Initialized)) {
+    if (LIKELY(State == ThreadState::Initialized &&
+               !atomic_load(&Disabled, memory_order_acquire))) {
       *UnlockRequired = false;
       return &ThreadTSD;
     }
@@ -58,6 +59,18 @@
     return FallbackTSD;
   }
 
+  // To disable the exclusive TSD registry, we effectively lock the fallback TSD
+  // and force all threads to attempt to use it instead of their local one.
+  void disable() {
+    FallbackTSD->lock();
+    atomic_store(&Disabled, 1U, memory_order_release);
+  }
+
+  void enable() {
+    atomic_store(&Disabled, 0U, memory_order_release);
+    FallbackTSD->unlock();
+  }
+
 private:
   void initOnceMaybe(Allocator *Instance) {
     ScopedLock L(Mutex);
@@ -81,6 +94,7 @@
 
   pthread_key_t PThreadKey;
   bool Initialized;
+  atomic_u8 Disabled;
   TSD<Allocator> *FallbackTSD;
   HybridMutex Mutex;
   static THREADLOCAL ThreadState State;
diff --git a/tsd_shared.h b/tsd_shared.h
index 05b367e..3472950 100644
--- a/tsd_shared.h
+++ b/tsd_shared.h
@@ -72,6 +72,16 @@
     return getTSDAndLockSlow(TSD);
   }
 
+  void disable() {
+    for (u32 I = 0; I < NumberOfTSDs; I++)
+      TSDs[I].lock();
+  }
+
+  void enable() {
+    for (u32 I = 0; I < NumberOfTSDs; I++)
+      TSDs[I].unlock();
+  }
+
 private:
   ALWAYS_INLINE void setCurrentTSD(TSD<Allocator> *CurrentTSD) {
 #if _BIONIC