[TSan][Darwin] Avoid crashes due to interpreting non-zero shadow content as a pointer We would like to use TLS to store the ThreadState object (or at least a reference ot it), but on Darwin accessing TLS via __thread or manually by using pthread_key_* is problematic, because there are several places where interceptors are called when TLS is not accessible (early process startup, thread cleanup, ...). Previously, we used a "poor man's TLS" implementation, where we use the shadow memory of the pointer returned by pthread_self() to store a pointer to the ThreadState object. The problem with that was that certain operations can populate shadow bytes unbeknownst to TSan, and we later interpret these non-zero bytes as the pointer to our ThreadState object and crash on when dereferencing the pointer. This patch changes the storage location of our reference to the ThreadState object to "real" TLS. We make this work by artificially keeping this reference alive in the pthread_key destructor by resetting the key value with pthread_setspecific(). This change also fixes the issue were the ThreadState object is re-allocated after DestroyThreadState() because intercepted functions can still get called on the terminating thread after the THREAD_TERMINATE event. Radar-Id: rdar://problem/72010355 Reviewed By: dvyukov Differential Revision: https://reviews.llvm.org/D110236 GitOrigin-RevId: 858eb8fc11e2e683ef1cef956ac102bfc5f0a1a8

commit: 18edc4bfab45933788c203d216ca2c93151e2a96 [log] [tgz]
author: Julian Lettner <julian.lettner@apple.com> Tue Nov 30 12:12:14 2021 -0800
committer: Copybara-Service <copybara-worker@google.com> Tue Nov 30 14:56:43 2021 -0800
tree: 7fdfa013a9d8e128ba08c18b2318fc474edec99a
parent: 4858d527ce1dabf36be5b971a2f2da100d6b4037 [diff]
diff --git a/lib/tsan/rtl/tsan_platform_mac.cpp b/lib/tsan/rtl/tsan_platform_mac.cpp
index 10e0725..44b98d4 100644
--- a/lib/tsan/rtl/tsan_platform_mac.cpp
+++ b/lib/tsan/rtl/tsan_platform_mac.cpp

@@ -25,6 +25,7 @@
 #include "tsan_rtl.h"
 #include "tsan_flags.h"
 
+#include <limits.h>
 #include <mach/mach.h>
 #include <pthread.h>
 #include <signal.h>
@@ -45,70 +46,83 @@
 namespace __tsan {
 
 #if !SANITIZER_GO
-static void *SignalSafeGetOrAllocate(uptr *dst, uptr size) {
-  atomic_uintptr_t *a = (atomic_uintptr_t *)dst;
-  void *val = (void *)atomic_load_relaxed(a);
-  atomic_signal_fence(memory_order_acquire);  // Turns the previous load into
-                                              // acquire wrt signals.
-  if (UNLIKELY(val == nullptr)) {
-    val = (void *)internal_mmap(nullptr, size, PROT_READ | PROT_WRITE,
-                                MAP_PRIVATE | MAP_ANON, -1, 0);
-    CHECK(val);
-    void *cmp = nullptr;
-    if (!atomic_compare_exchange_strong(a, (uintptr_t *)&cmp, (uintptr_t)val,
-                                        memory_order_acq_rel)) {
-      internal_munmap(val, size);
-      val = cmp;
-    }
-  }
-  return val;
+static char main_thread_state[sizeof(ThreadState)] ALIGNED(
+    SANITIZER_CACHE_LINE_SIZE);
+static ThreadState *dead_thread_state;
+static pthread_key_t thread_state_key;
+
+// We rely on the following documented, but Darwin-specific behavior to keep the
+// reference to the ThreadState object alive in TLS:
+// pthread_key_create man page:
+//   If, after all the destructors have been called for all non-NULL values with
+//   associated destructors, there are still some non-NULL values with
+//   associated destructors, then the process is repeated.  If, after at least
+//   [PTHREAD_DESTRUCTOR_ITERATIONS] iterations of destructor calls for
+//   outstanding non-NULL values, there are still some non-NULL values with
+//   associated destructors, the implementation stops calling destructors.
+static_assert(PTHREAD_DESTRUCTOR_ITERATIONS == 4, "Small number of iterations");
+static void ThreadStateDestructor(void *thr) {
+  int res = pthread_setspecific(thread_state_key, thr);
+  CHECK_EQ(res, 0);
 }
 
-// On OS X, accessing TLVs via __thread or manually by using pthread_key_* is
-// problematic, because there are several places where interceptors are called
-// when TLVs are not accessible (early process startup, thread cleanup, ...).
-// The following provides a "poor man's TLV" implementation, where we use the
-// shadow memory of the pointer returned by pthread_self() to store a pointer to
-// the ThreadState object. The main thread's ThreadState is stored separately
-// in a static variable, because we need to access it even before the
-// shadow memory is set up.
-static uptr main_thread_identity = 0;
-ALIGNED(64) static char main_thread_state[sizeof(ThreadState)];
-static ThreadState *main_thread_state_loc = (ThreadState *)main_thread_state;
+static void InitializeThreadStateStorage() {
+  int res;
+  CHECK_EQ(thread_state_key, 0);
+  res = pthread_key_create(&thread_state_key, ThreadStateDestructor);
+  CHECK_EQ(res, 0);
+  res = pthread_setspecific(thread_state_key, main_thread_state);
+  CHECK_EQ(res, 0);
 
-// We cannot use pthread_self() before libpthread has been initialized.  Our
-// current heuristic for guarding this is checking `main_thread_identity` which
-// is only assigned in `__tsan::InitializePlatform`.
-static ThreadState **cur_thread_location() {
-  if (main_thread_identity == 0)
-    return &main_thread_state_loc;
-  uptr thread_identity = (uptr)pthread_self();
-  if (thread_identity == main_thread_identity)
-    return &main_thread_state_loc;
-  return (ThreadState **)MemToMeta(thread_identity);
+  auto dts = (ThreadState *)MmapOrDie(sizeof(ThreadState), "ThreadState");
+  dts->fast_state.SetIgnoreBit();
+  dts->ignore_interceptors = 1;
+  dts->is_dead = true;
+  const_cast<Tid &>(dts->tid) = kInvalidTid;
+  res = internal_mprotect(dts, sizeof(ThreadState), PROT_READ);  // immutable
+  CHECK_EQ(res, 0);
+  dead_thread_state = dts;
 }
 
 ThreadState *cur_thread() {
-  return (ThreadState *)SignalSafeGetOrAllocate(
-      (uptr *)cur_thread_location(), sizeof(ThreadState));
+  // Some interceptors get called before libpthread has been initialized and in
+  // these cases we must avoid calling any pthread APIs.
+  if (UNLIKELY(!thread_state_key)) {
+    return (ThreadState *)main_thread_state;
+  }
+
+  // We only reach this line after InitializeThreadStateStorage() ran, i.e,
+  // after TSan (and therefore libpthread) have been initialized.
+  ThreadState *thr = (ThreadState *)pthread_getspecific(thread_state_key);
+  if (UNLIKELY(!thr)) {
+    thr = (ThreadState *)MmapOrDie(sizeof(ThreadState), "ThreadState");
+    int res = pthread_setspecific(thread_state_key, thr);
+    CHECK_EQ(res, 0);
+  }
+  return thr;
 }
 
 void set_cur_thread(ThreadState *thr) {
-  *cur_thread_location() = thr;
+  int res = pthread_setspecific(thread_state_key, thr);
+  CHECK_EQ(res, 0);
 }
 
-// TODO(kuba.brecka): This is not async-signal-safe. In particular, we call
-// munmap first and then clear `fake_tls`; if we receive a signal in between,
-// handler will try to access the unmapped ThreadState.
 void cur_thread_finalize() {
-  ThreadState **thr_state_loc = cur_thread_location();
-  if (thr_state_loc == &main_thread_state_loc) {
+  ThreadState *thr = (ThreadState *)pthread_getspecific(thread_state_key);
+  CHECK(thr);
+  if (thr == (ThreadState *)main_thread_state) {
     // Calling dispatch_main() or xpc_main() actually invokes pthread_exit to
     // exit the main thread. Let's keep the main thread's ThreadState.
     return;
   }
-  internal_munmap(*thr_state_loc, sizeof(ThreadState));
-  *thr_state_loc = nullptr;
+  // Intercepted functions can still get called after cur_thread_finalize()
+  // (called from DestroyThreadState()), so put a fake thread state for "dead"
+  // threads.  An alternative solution would be to release the ThreadState
+  // object from THREAD_DESTROY (which is delivered later and on the parent
+  // thread) instead of THREAD_TERMINATE.
+  int res = pthread_setspecific(thread_state_key, dead_thread_state);
+  CHECK_EQ(res, 0);
+  UnmapOrDie(thr, sizeof(ThreadState));
 }
 #endif
 
@@ -215,11 +229,10 @@
       ThreadStart(thr, tid, GetTid(), ThreadType::Worker);
     }
   } else if (event == PTHREAD_INTROSPECTION_THREAD_TERMINATE) {
-    if (thread == pthread_self()) {
-      ThreadState *thr = cur_thread();
-      if (thr->tctx) {
-        DestroyThreadState();
-      }
+    CHECK_EQ(thread, pthread_self());
+    ThreadState *thr = cur_thread();
+    if (thr->tctx) {
+      DestroyThreadState();
     }
   }
 
@@ -246,8 +259,7 @@
 #if !SANITIZER_GO
   CheckAndProtect();
 
-  CHECK_EQ(main_thread_identity, 0);
-  main_thread_identity = (uptr)pthread_self();
+  InitializeThreadStateStorage();
 
   prev_pthread_introspection_hook =
       pthread_introspection_hook_install(&my_pthread_introspection_hook);
@@ -279,24 +291,11 @@
 extern "C" void __tsan_tls_initialization() {}
 
 void ImitateTlsWrite(ThreadState *thr, uptr tls_addr, uptr tls_size) {
-  // The pointer to the ThreadState object is stored in the shadow memory
-  // of the tls.
-  uptr tls_end = tls_addr + tls_size;
-  uptr thread_identity = (uptr)pthread_self();
   const uptr pc = StackTrace::GetNextInstructionPc(
       reinterpret_cast<uptr>(__tsan_tls_initialization));
-  if (thread_identity == main_thread_identity) {
-    MemoryRangeImitateWrite(thr, pc, tls_addr, tls_size);
-  } else {
-    uptr thr_state_start = thread_identity;
-    uptr thr_state_end = thr_state_start + sizeof(uptr);
-    CHECK_GE(thr_state_start, tls_addr);
-    CHECK_LE(thr_state_start, tls_addr + tls_size);
-    CHECK_GE(thr_state_end, tls_addr);
-    CHECK_LE(thr_state_end, tls_addr + tls_size);
-    MemoryRangeImitateWrite(thr, pc, tls_addr, thr_state_start - tls_addr);
-    MemoryRangeImitateWrite(thr, pc, thr_state_end, tls_end - thr_state_end);
-  }
+  // Unlike Linux, we only store a pointer to the ThreadState object in TLS;
+  // just mark the entire range as written to.
+  MemoryRangeImitateWrite(thr, pc, tls_addr, tls_size);
 }
 #endif
commit	18edc4bfab45933788c203d216ca2c93151e2a96	[log] [tgz]
author	Julian Lettner <julian.lettner@apple.com>	Tue Nov 30 12:12:14 2021 -0800
committer	Copybara-Service <copybara-worker@google.com>	Tue Nov 30 14:56:43 2021 -0800
tree	7fdfa013a9d8e128ba08c18b2318fc474edec99a
parent	4858d527ce1dabf36be5b971a2f2da100d6b4037 [diff]