tsan: fix false positives in dynamic libs with static tls

The added test demonstrates  loading a dynamic library with static TLS.
Such static TLS is a hack that allows a dynamic library to have faster TLS,
but it can be loaded only iff all threads happened to allocate some excess
of static TLS space for whatever reason. If it's not the case loading fails with:

dlopen: cannot load any more object with static TLS

We used to produce a false positive because dlopen will write into TLS
of all existing threads to initialize/zero TLS region for the loaded library.
And this appears to be racing with initialization of TLS in the thread
since we model a write into the whole static TLS region (we don't what part
of it is currently unused):

WARNING: ThreadSanitizer: data race (pid=2317365)
  Write of size 1 at 0x7f1fa9bfcdd7 by main thread:
    0 memset
    1 init_one_static_tls
    2 __pthread_init_static_tls
    [[ this is where main calls dlopen ]]
    3 main
  Previous write of size 8 at 0x7f1fa9bfcdd0 by thread T1:
    0 __tsan_tls_initialization

Fix this by ignoring accesses during dlopen.

Reviewed By: melver

Differential Revision: https://reviews.llvm.org/D114953

GitOrigin-RevId: 97b4e631173a5f2f1f79361f9fc56c8f85681f73
diff --git a/lib/asan/asan_interceptors.cpp b/lib/asan/asan_interceptors.cpp
index b289091..2ff314a 100644
--- a/lib/asan/asan_interceptors.cpp
+++ b/lib/asan/asan_interceptors.cpp
@@ -130,23 +130,24 @@
 #define COMMON_INTERCEPTOR_BLOCK_REAL(name) REAL(name)
 // Strict init-order checking is dlopen-hostile:
 // https://github.com/google/sanitizers/issues/178
-#define COMMON_INTERCEPTOR_ON_DLOPEN(filename, flag)                           \
-  do {                                                                         \
-    if (flags()->strict_init_order)                                            \
-      StopInitOrderChecking();                                                 \
-    CheckNoDeepBind(filename, flag);                                           \
-  } while (false)
-#define COMMON_INTERCEPTOR_ON_EXIT(ctx) OnExit()
-#define COMMON_INTERCEPTOR_LIBRARY_LOADED(filename, handle)
-#define COMMON_INTERCEPTOR_LIBRARY_UNLOADED()
-#define COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED (!asan_inited)
-#define COMMON_INTERCEPTOR_GET_TLS_RANGE(begin, end)                           \
-  if (AsanThread *t = GetCurrentThread()) {                                    \
-    *begin = t->tls_begin();                                                   \
-    *end = t->tls_end();                                                       \
-  } else {                                                                     \
-    *begin = *end = 0;                                                         \
-  }
+#  define COMMON_INTERCEPTOR_DLOPEN(filename, flag) \
+    ({                                              \
+      if (flags()->strict_init_order)               \
+        StopInitOrderChecking();                    \
+      CheckNoDeepBind(filename, flag);              \
+      REAL(dlopen)(filename, flag);                 \
+    })
+#  define COMMON_INTERCEPTOR_ON_EXIT(ctx) OnExit()
+#  define COMMON_INTERCEPTOR_LIBRARY_LOADED(filename, handle)
+#  define COMMON_INTERCEPTOR_LIBRARY_UNLOADED()
+#  define COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED (!asan_inited)
+#  define COMMON_INTERCEPTOR_GET_TLS_RANGE(begin, end) \
+    if (AsanThread *t = GetCurrentThread()) {          \
+      *begin = t->tls_begin();                         \
+      *end = t->tls_end();                             \
+    } else {                                           \
+      *begin = *end = 0;                               \
+    }
 
 #define COMMON_INTERCEPTOR_MEMMOVE_IMPL(ctx, to, from, size) \
   do {                                                       \
diff --git a/lib/memprof/memprof_interceptors.cpp b/lib/memprof/memprof_interceptors.cpp
index 5575ae2..459ad03 100644
--- a/lib/memprof/memprof_interceptors.cpp
+++ b/lib/memprof/memprof_interceptors.cpp
@@ -93,10 +93,6 @@
   do {                                                                         \
   } while (false)
 #define COMMON_INTERCEPTOR_BLOCK_REAL(name) REAL(name)
-#define COMMON_INTERCEPTOR_ON_DLOPEN(filename, flag)                           \
-  do {                                                                         \
-    CheckNoDeepBind(filename, flag);                                           \
-  } while (false)
 #define COMMON_INTERCEPTOR_ON_EXIT(ctx) OnExit()
 #define COMMON_INTERCEPTOR_LIBRARY_LOADED(filename, handle)
 #define COMMON_INTERCEPTOR_LIBRARY_UNLOADED()
diff --git a/lib/sanitizer_common/sanitizer_common_interceptors.inc b/lib/sanitizer_common/sanitizer_common_interceptors.inc
index abb38cc..d219734 100644
--- a/lib/sanitizer_common/sanitizer_common_interceptors.inc
+++ b/lib/sanitizer_common/sanitizer_common_interceptors.inc
@@ -21,7 +21,7 @@
 //   COMMON_INTERCEPTOR_FD_RELEASE
 //   COMMON_INTERCEPTOR_FD_ACCESS
 //   COMMON_INTERCEPTOR_SET_THREAD_NAME
-//   COMMON_INTERCEPTOR_ON_DLOPEN
+//   COMMON_INTERCEPTOR_DLOPEN
 //   COMMON_INTERCEPTOR_ON_EXIT
 //   COMMON_INTERCEPTOR_MUTEX_PRE_LOCK
 //   COMMON_INTERCEPTOR_MUTEX_POST_LOCK
@@ -206,9 +206,9 @@
     COMMON_INTERCEPTOR_READ_RANGE((ctx), (s),                       \
       common_flags()->strict_string_checks ? (internal_strlen(s)) + 1 : (n) )
 
-#ifndef COMMON_INTERCEPTOR_ON_DLOPEN
-#define COMMON_INTERCEPTOR_ON_DLOPEN(filename, flag) \
-  CheckNoDeepBind(filename, flag);
+#ifndef COMMON_INTERCEPTOR_DLOPEN
+#define COMMON_INTERCEPTOR_DLOPEN(filename, flag) \
+  ({ CheckNoDeepBind(filename, flag); REAL(dlopen)(filename, flag); })
 #endif
 
 #ifndef COMMON_INTERCEPTOR_GET_TLS_RANGE
@@ -6380,8 +6380,7 @@
   void *ctx;
   COMMON_INTERCEPTOR_ENTER_NOIGNORE(ctx, dlopen, filename, flag);
   if (filename) COMMON_INTERCEPTOR_READ_STRING(ctx, filename, 0);
-  COMMON_INTERCEPTOR_ON_DLOPEN(filename, flag);
-  void *res = REAL(dlopen)(filename, flag);
+  void *res = COMMON_INTERCEPTOR_DLOPEN(filename, flag);
   Symbolizer::GetOrInit()->InvalidateModuleList();
   COMMON_INTERCEPTOR_LIBRARY_LOADED(filename, res);
   return res;
diff --git a/lib/tsan/rtl/tsan_interceptors_posix.cpp b/lib/tsan/rtl/tsan_interceptors_posix.cpp
index 73df011..cf3dc90 100644
--- a/lib/tsan/rtl/tsan_interceptors_posix.cpp
+++ b/lib/tsan/rtl/tsan_interceptors_posix.cpp
@@ -2377,6 +2377,15 @@
     if (fd >= 0) FdClose(thr, pc, fd);           \
   }
 
+#define COMMON_INTERCEPTOR_DLOPEN(filename, flag) \
+  ({                                              \
+    CheckNoDeepBind(filename, flag);              \
+    ThreadIgnoreBegin(thr, 0);                    \
+    void *res = REAL(dlopen)(filename, flag);     \
+    ThreadIgnoreEnd(thr);                         \
+    res;                                          \
+  })
+
 #define COMMON_INTERCEPTOR_LIBRARY_LOADED(filename, handle) \
   libignore()->OnLibraryLoaded(filename)
 
diff --git a/test/tsan/Linux/dlopen_static_tls.cpp b/test/tsan/Linux/dlopen_static_tls.cpp
new file mode 100644
index 0000000..d7543a2
--- /dev/null
+++ b/test/tsan/Linux/dlopen_static_tls.cpp
@@ -0,0 +1,78 @@
+// RUN: %clangxx_tsan -O1 %s -DBUILD_SO -fPIC -shared -o %t-so.so
+// RUN: %clangxx_tsan -O1 %s %link_libcxx_tsan -o %t && %run %t 2>&1 | FileCheck %s
+
+// A test for loading a dynamic library with static TLS.
+// Such static TLS is a hack that allows a dynamic library to have faster TLS,
+// but it can be loaded only iff all threads happened to allocate some excess
+// of static TLS space for whatever reason. If it's not the case loading fails with:
+// dlopen: cannot load any more object with static TLS
+// We used to produce a false positive because dlopen will write into TLS
+// of all existing threads to initialize/zero TLS region for the loaded library.
+// And this appears to be racing with initialization of TLS in the thread
+// since we model a write into the whole static TLS region (we don't know what part
+// of it is currently unused):
+// WARNING: ThreadSanitizer: data race (pid=2317365)
+//   Write of size 1 at 0x7f1fa9bfcdd7 by main thread:
+//     #0 memset
+//     #1 init_one_static_tls
+//     #2 __pthread_init_static_tls
+//     [[ this is where main calls dlopen ]]
+//     #3 main
+//   Previous write of size 8 at 0x7f1fa9bfcdd0 by thread T1:
+//     #0 __tsan_tls_initialization
+
+#ifdef BUILD_SO
+
+__attribute__((tls_model("initial-exec"))) __thread char x = 42;
+__attribute__((tls_model("initial-exec"))) __thread char y;
+
+extern "C" int sofunc() { return ++x + ++y; }
+
+#else // BUILD_SO
+
+#  include "../test.h"
+#  include <dlfcn.h>
+#  include <string>
+
+__thread int x[1023];
+
+void *lib;
+void (*func)();
+int ready;
+
+void *thread(void *arg) {
+  barrier_wait(&barrier);
+  if (__atomic_load_n(&ready, __ATOMIC_ACQUIRE))
+    func();
+  if (dlclose(lib)) {
+    printf("error in dlclose: %s\n", dlerror());
+    exit(1);
+  }
+  return 0;
+}
+
+int main(int argc, char *argv[]) {
+  barrier_init(&barrier, 2);
+  pthread_t th;
+  pthread_create(&th, 0, thread, 0);
+  lib = dlopen((std::string(argv[0]) + "-so.so").c_str(), RTLD_NOW);
+  if (lib == 0) {
+    printf("error in dlopen: %s\n", dlerror());
+    return 1;
+  }
+  func = (void (*)())dlsym(lib, "sofunc");
+  if (func == 0) {
+    printf("error in dlsym: %s\n", dlerror());
+    return 1;
+  }
+  __atomic_store_n(&ready, 1, __ATOMIC_RELEASE);
+  barrier_wait(&barrier);
+  func();
+  pthread_join(th, 0);
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+
+#endif // BUILD_SO
+
+// CHECK: DONE