tsan: speed up pthread_setname_np

pthread_setname_np does linear search over all thread descriptors
to map pthread_t to the thread descriptor. This has O(N^2) complexity
and becomes much worse in the new tsan runtime that keeps all ever
existed threads in the thread registry.
Replace linear search with direct access if pthread_setname_np
is called for the current thread (a very common case).

Reviewed By: vitalybuka

Differential Revision: https://reviews.llvm.org/D113916

GitOrigin-RevId: 64b45399e5c547dc59ea3c72891fccdda7284eaa
diff --git a/lib/tsan/rtl/tsan_interceptors_posix.cpp b/lib/tsan/rtl/tsan_interceptors_posix.cpp
index 9b62b20..2f04cd2 100644
--- a/lib/tsan/rtl/tsan_interceptors_posix.cpp
+++ b/lib/tsan/rtl/tsan_interceptors_posix.cpp
@@ -90,6 +90,7 @@
 DECLARE_REAL(int, fflush, __sanitizer_FILE *fp)
 DECLARE_REAL_AND_INTERCEPTOR(void *, malloc, uptr size)
 DECLARE_REAL_AND_INTERCEPTOR(void, free, void *ptr)
+extern "C" int pthread_equal(void *t1, void *t2);
 extern "C" void *pthread_self();
 extern "C" void _exit(int status);
 #if !SANITIZER_NETBSD
@@ -2392,8 +2393,11 @@
 #define COMMON_INTERCEPTOR_SET_THREAD_NAME(ctx, name) \
   ThreadSetName(((TsanInterceptorContext *) ctx)->thr, name)
 
-#define COMMON_INTERCEPTOR_SET_PTHREAD_NAME(ctx, thread, name) \
-  __tsan::ctx->thread_registry.SetThreadNameByUserId(thread, name)
+#define COMMON_INTERCEPTOR_SET_PTHREAD_NAME(ctx, thread, name)         \
+  if (pthread_equal(pthread_self(), reinterpret_cast<void *>(thread))) \
+    COMMON_INTERCEPTOR_SET_THREAD_NAME(ctx, name);                     \
+  else                                                                 \
+    __tsan::ctx->thread_registry.SetThreadNameByUserId(thread, name)
 
 #define COMMON_INTERCEPTOR_BLOCK_REAL(name) BLOCK_REAL(name)
 
diff --git a/test/tsan/bench_threads.cpp b/test/tsan/bench_threads.cpp
new file mode 100644
index 0000000..1d0be21
--- /dev/null
+++ b/test/tsan/bench_threads.cpp
@@ -0,0 +1,45 @@
+// RUN: %clangxx_tsan %s -o %t
+// RUN: %run %t 2>&1 | FileCheck %s
+
+// bench.h needs pthread barriers which are not available on OS X
+// UNSUPPORTED: darwin
+
+#include "bench.h"
+
+void *nop_thread(void *arg) {
+  pthread_setname_np(pthread_self(), "nop_thread");
+  return nullptr;
+}
+
+void thread(int tid) {
+  for (int i = 0; i < bench_niter; i++) {
+    pthread_t th;
+    pthread_create(&th, nullptr, nop_thread, nullptr);
+    pthread_join(th, nullptr);
+  }
+}
+
+void bench() {
+  // Benchmark thread creation/joining in presence of a large number
+  // of threads (both alive and already joined).
+  printf("starting transient threads...\n");
+  for (int i = 0; i < 200; i++) {
+    const int kBatch = 100;
+    pthread_t th[kBatch];
+    for (int j = 0; j < kBatch; j++)
+      pthread_create(&th[j], nullptr, nop_thread, nullptr);
+    for (int j = 0; j < kBatch; j++)
+      pthread_join(th[j], nullptr);
+  }
+  printf("starting persistent threads...\n");
+  const int kLiveThreads = 2000;
+  pthread_t th[kLiveThreads];
+  for (int j = 0; j < kLiveThreads; j++)
+    pthread_create(&th[j], nullptr, nop_thread, nullptr);
+  printf("starting benchmark threads...\n");
+  start_thread_group(bench_nthread, thread);
+  for (int j = 0; j < kLiveThreads; j++)
+    pthread_join(th[j], nullptr);
+}
+
+// CHECK: DONE