tsan: speed up pthread_setname_np
pthread_setname_np does linear search over all thread descriptors
to map pthread_t to the thread descriptor. This has O(N^2) complexity
and becomes much worse in the new tsan runtime that keeps all ever
existed threads in the thread registry.
Replace linear search with direct access if pthread_setname_np
is called for the current thread (a very common case).
Reviewed By: vitalybuka
Differential Revision: https://reviews.llvm.org/D113916
GitOrigin-RevId: 64b45399e5c547dc59ea3c72891fccdda7284eaa
diff --git a/lib/tsan/rtl/tsan_interceptors_posix.cpp b/lib/tsan/rtl/tsan_interceptors_posix.cpp
index 9b62b20..2f04cd2 100644
--- a/lib/tsan/rtl/tsan_interceptors_posix.cpp
+++ b/lib/tsan/rtl/tsan_interceptors_posix.cpp
@@ -90,6 +90,7 @@
DECLARE_REAL(int, fflush, __sanitizer_FILE *fp)
DECLARE_REAL_AND_INTERCEPTOR(void *, malloc, uptr size)
DECLARE_REAL_AND_INTERCEPTOR(void, free, void *ptr)
+extern "C" int pthread_equal(void *t1, void *t2);
extern "C" void *pthread_self();
extern "C" void _exit(int status);
#if !SANITIZER_NETBSD
@@ -2392,8 +2393,11 @@
#define COMMON_INTERCEPTOR_SET_THREAD_NAME(ctx, name) \
ThreadSetName(((TsanInterceptorContext *) ctx)->thr, name)
-#define COMMON_INTERCEPTOR_SET_PTHREAD_NAME(ctx, thread, name) \
- __tsan::ctx->thread_registry.SetThreadNameByUserId(thread, name)
+#define COMMON_INTERCEPTOR_SET_PTHREAD_NAME(ctx, thread, name) \
+ if (pthread_equal(pthread_self(), reinterpret_cast<void *>(thread))) \
+ COMMON_INTERCEPTOR_SET_THREAD_NAME(ctx, name); \
+ else \
+ __tsan::ctx->thread_registry.SetThreadNameByUserId(thread, name)
#define COMMON_INTERCEPTOR_BLOCK_REAL(name) BLOCK_REAL(name)
diff --git a/test/tsan/bench_threads.cpp b/test/tsan/bench_threads.cpp
new file mode 100644
index 0000000..1d0be21
--- /dev/null
+++ b/test/tsan/bench_threads.cpp
@@ -0,0 +1,45 @@
+// RUN: %clangxx_tsan %s -o %t
+// RUN: %run %t 2>&1 | FileCheck %s
+
+// bench.h needs pthread barriers which are not available on OS X
+// UNSUPPORTED: darwin
+
+#include "bench.h"
+
+void *nop_thread(void *arg) {
+ pthread_setname_np(pthread_self(), "nop_thread");
+ return nullptr;
+}
+
+void thread(int tid) {
+ for (int i = 0; i < bench_niter; i++) {
+ pthread_t th;
+ pthread_create(&th, nullptr, nop_thread, nullptr);
+ pthread_join(th, nullptr);
+ }
+}
+
+void bench() {
+ // Benchmark thread creation/joining in presence of a large number
+ // of threads (both alive and already joined).
+ printf("starting transient threads...\n");
+ for (int i = 0; i < 200; i++) {
+ const int kBatch = 100;
+ pthread_t th[kBatch];
+ for (int j = 0; j < kBatch; j++)
+ pthread_create(&th[j], nullptr, nop_thread, nullptr);
+ for (int j = 0; j < kBatch; j++)
+ pthread_join(th[j], nullptr);
+ }
+ printf("starting persistent threads...\n");
+ const int kLiveThreads = 2000;
+ pthread_t th[kLiveThreads];
+ for (int j = 0; j < kLiveThreads; j++)
+ pthread_create(&th[j], nullptr, nop_thread, nullptr);
+ printf("starting benchmark threads...\n");
+ start_thread_group(bench_nthread, thread);
+ for (int j = 0; j < kLiveThreads; j++)
+ pthread_join(th[j], nullptr);
+}
+
+// CHECK: DONE