[lsan] Fix deadlock in dl_iterate_phdr.

Summary:
Do not grab the allocator lock before calling dl_iterate_phdr. This may
cause a lock order inversion with (valid) user code that uses malloc
inside a dl_iterate_phdr callback.

Reviewers: vitalybuka, hctim

Subscribers: jfb, #sanitizers, llvm-commits

Tags: #sanitizers, #llvm

Differential Revision: https://reviews.llvm.org/D67738

git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@372348 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/lsan/lsan_common.cpp b/lib/lsan/lsan_common.cpp
index 86bd120..9ff9f4c 100644
--- a/lib/lsan/lsan_common.cpp
+++ b/lib/lsan/lsan_common.cpp
@@ -570,11 +570,7 @@
   EnsureMainThreadIDIsCorrect();
   CheckForLeaksParam param;
   param.success = false;
-  LockThreadRegistry();
-  LockAllocator();
-  DoStopTheWorld(CheckForLeaksCallback, &param);
-  UnlockAllocator();
-  UnlockThreadRegistry();
+  LockStuffAndStopTheWorld(CheckForLeaksCallback, &param);
 
   if (!param.success) {
     Report("LeakSanitizer has encountered a fatal error.\n");
diff --git a/lib/lsan/lsan_common.h b/lib/lsan/lsan_common.h
index 58dc00f..d24abe3 100644
--- a/lib/lsan/lsan_common.h
+++ b/lib/lsan/lsan_common.h
@@ -129,8 +129,9 @@
 InternalMmapVector<RootRegion> const *GetRootRegions();
 void ScanRootRegion(Frontier *frontier, RootRegion const &region,
                     uptr region_begin, uptr region_end, bool is_readable);
-// Run stoptheworld while holding any platform-specific locks.
-void DoStopTheWorld(StopTheWorldCallback callback, void* argument);
+// Run stoptheworld while holding any platform-specific locks, as well as the
+// allocator and thread registry locks.
+void LockStuffAndStopTheWorld(StopTheWorldCallback callback, void* argument);
 
 void ScanRangeForPointers(uptr begin, uptr end,
                           Frontier *frontier,
diff --git a/lib/lsan/lsan_common_linux.cpp b/lib/lsan/lsan_common_linux.cpp
index 9ce27a9..ea1a4a2 100644
--- a/lib/lsan/lsan_common_linux.cpp
+++ b/lib/lsan/lsan_common_linux.cpp
@@ -115,10 +115,14 @@
   if (common_flags()->exitcode) Die();
 }
 
-static int DoStopTheWorldCallback(struct dl_phdr_info *info, size_t size,
-                                  void *data) {
+static int LockStuffAndStopTheWorldCallback(struct dl_phdr_info *info,
+                                            size_t size, void *data) {
+  LockThreadRegistry();
+  LockAllocator();
   DoStopTheWorldParam *param = reinterpret_cast<DoStopTheWorldParam *>(data);
   StopTheWorld(param->callback, param->argument);
+  UnlockAllocator();
+  UnlockThreadRegistry();
   return 1;
 }
 
@@ -130,9 +134,9 @@
 // while holding the libdl lock in the parent thread, we can safely reenter it
 // in the tracer. The solution is to run stoptheworld from a dl_iterate_phdr()
 // callback in the parent thread.
-void DoStopTheWorld(StopTheWorldCallback callback, void *argument) {
+void LockStuffAndStopTheWorld(StopTheWorldCallback callback, void *argument) {
   DoStopTheWorldParam param = {callback, argument};
-  dl_iterate_phdr(DoStopTheWorldCallback, &param);
+  dl_iterate_phdr(LockStuffAndStopTheWorldCallback, &param);
 }
 
 } // namespace __lsan
diff --git a/lib/lsan/lsan_common_mac.cpp b/lib/lsan/lsan_common_mac.cpp
index 5204a66..c1804e9 100644
--- a/lib/lsan/lsan_common_mac.cpp
+++ b/lib/lsan/lsan_common_mac.cpp
@@ -193,8 +193,12 @@
 // causes rare race conditions.
 void HandleLeaks() {}
 
-void DoStopTheWorld(StopTheWorldCallback callback, void *argument) {
+void LockStuffAndStopTheWorld(StopTheWorldCallback callback, void *argument) {
+  LockThreadRegistry();
+  LockAllocator();
   StopTheWorld(callback, argument);
+  UnlockAllocator();
+  UnlockThreadRegistry();
 }
 
 } // namespace __lsan
diff --git a/test/lsan/TestCases/Linux/libdl_deadlock.cpp b/test/lsan/TestCases/Linux/libdl_deadlock.cpp
new file mode 100644
index 0000000..7c1f82f
--- /dev/null
+++ b/test/lsan/TestCases/Linux/libdl_deadlock.cpp
@@ -0,0 +1,52 @@
+// Regression test for a deadlock in leak detection,
+// where lsan would call dl_iterate_phdr while holding the allocator lock.
+// RUN: %clangxx_lsan %s -o %t && %run %t
+
+#include <link.h>
+#include <mutex>
+#include <stdlib.h>
+#include <thread>
+#include <unistd.h>
+
+std::mutex in, out;
+
+int Callback(struct dl_phdr_info *info, size_t size, void *data) {
+  for (int step = 0; step < 50; ++step) {
+    void *p[1000];
+    for (int i = 0; i < 1000; ++i)
+      p[i] = malloc(10 * i);
+
+    if (step == 0)
+      in.unlock();
+
+    for (int i = 0; i < 1000; ++i)
+      free(p[i]);
+  }
+  out.unlock();
+  return 1; // just once
+}
+
+void Watchdog() {
+  // This is just a fail-safe to turn a deadlock (in case the bug reappears)
+  // into a (slow) test failure.
+  usleep(10000000);
+  if (!out.try_lock()) {
+    write(2, "DEADLOCK\n", 9);
+    exit(1);
+  }
+}
+
+int main() {
+  in.lock();
+  out.lock();
+
+  std::thread t([] { dl_iterate_phdr(Callback, nullptr); });
+  t.detach();
+
+  std::thread w(Watchdog);
+  w.detach();
+
+  // Wait for the malloc thread to preheat, then start leak detection (on exit)
+  in.lock();
+  return 0;
+}