[dfsan] Clean TLS after sigaction callbacks

DFSan uses TLS to pass metadata of arguments and return values. When an
instrumented function accesses the TLS, if a signal callback happens, and
the callback calls other instrumented functions with updating the same TLS,
the TLS is in an inconsistent state after the callback ends. This may cause
either under-tainting or over-tainting.

This fix follows MSan's workaround.
  https://github.com/llvm/llvm-project/commit/cb22c67a21e4b5e1ade65141117a70be318be072
It simply resets TLS at restore. This prevents from over-tainting. Although
under-tainting may still happen, a taint flow can be found eventually if we
run a DFSan-instrumented program multiple times. The alternative option is
saving the entire TLS. However the TLS storage takes 2k bytes, and signal calls
could be nested. So it does not seem worth.

This diff fixes sigaction. A following diff will be fixing signal.

Reviewed-by: morehouse

Differential Revision: https://reviews.llvm.org/D95642

GitOrigin-RevId: e1a4322f8136788228d915a7384c5679b39dfeed
diff --git a/lib/dfsan/dfsan.cpp b/lib/dfsan/dfsan.cpp
index c17bfe0..b53a755 100644
--- a/lib/dfsan/dfsan.cpp
+++ b/lib/dfsan/dfsan.cpp
@@ -456,6 +456,17 @@
   if (common_flags()->help) parser.PrintFlagDescriptions();
 }
 
+SANITIZER_INTERFACE_ATTRIBUTE
+void dfsan_clear_arg_tls(uptr offset, uptr size) {
+  internal_memset((void *)((uptr)__dfsan_arg_tls + offset), 0, size);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void dfsan_clear_thread_local_state() {
+  internal_memset(__dfsan_arg_tls, 0, sizeof(__dfsan_arg_tls));
+  internal_memset(__dfsan_retval_tls, 0, sizeof(__dfsan_retval_tls));
+}
+
 static void InitializePlatformEarly() {
   AvoidCVE_2016_2143();
 #ifdef DFSAN_RUNTIME_VMA
diff --git a/lib/dfsan/dfsan.h b/lib/dfsan/dfsan.h
index d662391..e03716f 100644
--- a/lib/dfsan/dfsan.h
+++ b/lib/dfsan/dfsan.h
@@ -35,6 +35,10 @@
 void dfsan_set_label(dfsan_label label, void *addr, uptr size);
 dfsan_label dfsan_read_label(const void *addr, uptr size);
 dfsan_label dfsan_union(dfsan_label l1, dfsan_label l2);
+// Zero out [offset, offset+size) from __dfsan_arg_tls.
+void dfsan_clear_arg_tls(uptr offset, uptr size);
+// Zero out the TLS storage.
+void dfsan_clear_thread_local_state();
 }  // extern "C"
 
 template <typename T>
diff --git a/lib/dfsan/dfsan_custom.cpp b/lib/dfsan/dfsan_custom.cpp
index 94901ce..eddb077 100644
--- a/lib/dfsan/dfsan_custom.cpp
+++ b/lib/dfsan/dfsan_custom.cpp
@@ -51,6 +51,30 @@
 #define DECLARE_WEAK_INTERCEPTOR_HOOK(f, ...) \
 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE void f(__VA_ARGS__);
 
+// Async-safe, non-reentrant spin lock.
+class SignalSpinLocker {
+ public:
+  SignalSpinLocker() {
+    sigset_t all_set;
+    sigfillset(&all_set);
+    pthread_sigmask(SIG_SETMASK, &all_set, &saved_thread_mask_);
+    sigactions_mu.Lock();
+  }
+  ~SignalSpinLocker() {
+    sigactions_mu.Unlock();
+    pthread_sigmask(SIG_SETMASK, &saved_thread_mask_, nullptr);
+  }
+
+ private:
+  static StaticSpinMutex sigactions_mu;
+  sigset_t saved_thread_mask_;
+
+  SignalSpinLocker(const SignalSpinLocker &) = delete;
+  SignalSpinLocker &operator=(const SignalSpinLocker &) = delete;
+};
+
+StaticSpinMutex SignalSpinLocker::sigactions_mu;
+
 extern "C" {
 SANITIZER_INTERFACE_ATTRIBUTE int
 __dfsw_stat(const char *path, struct stat *buf, dfsan_label path_label,
@@ -812,12 +836,100 @@
   return ret;
 }
 
+// Clear DFSan runtime TLS state at the end of a scope.
+//
+// Implementation must be async-signal-safe and use small data size, because
+// instances of this class may live on the signal handler stack.
+//
+// DFSan uses TLS to pass metadata of arguments and return values. When an
+// instrumented function accesses the TLS, if a signal callback happens, and the
+// callback calls other instrumented functions with updating the same TLS, the
+// TLS is in an inconsistent state after the callback ends. This may cause
+// either under-tainting or over-tainting.
+//
+// The current implementation simply resets TLS at restore. This prevents from
+// over-tainting. Although under-tainting may still happen, a taint flow can be
+// found eventually if we run a DFSan-instrumented program multiple times. The
+// alternative option is saving the entire TLS. However the TLS storage takes
+// 2k bytes, and signal calls could be nested. So it does not seem worth.
+class ScopedClearThreadLocalState {
+ public:
+  ScopedClearThreadLocalState() {}
+  ~ScopedClearThreadLocalState() { dfsan_clear_thread_local_state(); }
+};
+
+// SignalSpinLocker::sigactions_mu guarantees atomicity of sigaction() calls.
+const int kMaxSignals = 1024;
+static atomic_uintptr_t sigactions[kMaxSignals];
+
+static void SignalHandler(int signo) {
+  ScopedClearThreadLocalState stlsb;
+
+  // Clear shadows for all inputs provided by system. This is why DFSan
+  // instrumentation generates a trampoline function to each function pointer,
+  // and uses the trampoline to clear shadows. However sigaction does not use
+  // a function pointer directly, so we have to do this manually.
+  dfsan_clear_arg_tls(0, sizeof(dfsan_label));
+
+  typedef void (*signal_cb)(int x);
+  signal_cb cb =
+      (signal_cb)atomic_load(&sigactions[signo], memory_order_relaxed);
+  cb(signo);
+}
+
+static void SignalAction(int signo, siginfo_t *si, void *uc) {
+  ScopedClearThreadLocalState stlsb;
+
+  // Clear shadows for all inputs provided by system. Similar to SignalHandler.
+  dfsan_clear_arg_tls(0, 3 * sizeof(dfsan_label));
+  dfsan_set_label(0, si, sizeof(*si));
+  dfsan_set_label(0, uc, sizeof(ucontext_t));
+
+  typedef void (*sigaction_cb)(int, siginfo_t *, void *);
+  sigaction_cb cb =
+      (sigaction_cb)atomic_load(&sigactions[signo], memory_order_relaxed);
+  cb(signo, si, uc);
+}
+
 SANITIZER_INTERFACE_ATTRIBUTE
 int __dfsw_sigaction(int signum, const struct sigaction *act,
                      struct sigaction *oldact, dfsan_label signum_label,
                      dfsan_label act_label, dfsan_label oldact_label,
                      dfsan_label *ret_label) {
-  int ret = sigaction(signum, act, oldact);
+  CHECK_LT(signum, kMaxSignals);
+  SignalSpinLocker lock;
+  uptr old_cb = atomic_load(&sigactions[signum], memory_order_relaxed);
+  struct sigaction new_act;
+  struct sigaction *pnew_act = act ? &new_act : nullptr;
+  if (act) {
+    internal_memcpy(pnew_act, act, sizeof(struct sigaction));
+    if (pnew_act->sa_flags & SA_SIGINFO) {
+      uptr cb = (uptr)(pnew_act->sa_sigaction);
+      if (cb != (uptr)SIG_IGN && cb != (uptr)SIG_DFL) {
+        atomic_store(&sigactions[signum], cb, memory_order_relaxed);
+        pnew_act->sa_sigaction = SignalAction;
+      }
+    } else {
+      uptr cb = (uptr)(pnew_act->sa_handler);
+      if (cb != (uptr)SIG_IGN && cb != (uptr)SIG_DFL) {
+        atomic_store(&sigactions[signum], cb, memory_order_relaxed);
+        pnew_act->sa_handler = SignalHandler;
+      }
+    }
+  }
+
+  int ret = sigaction(signum, pnew_act, oldact);
+
+  if (ret == 0 && oldact) {
+    if (oldact->sa_flags & SA_SIGINFO) {
+      if (oldact->sa_sigaction == SignalAction)
+        oldact->sa_sigaction = (decltype(oldact->sa_sigaction))old_cb;
+    } else {
+      if (oldact->sa_handler == SignalHandler)
+        oldact->sa_handler = (decltype(oldact->sa_handler))old_cb;
+    }
+  }
+
   if (oldact) {
     dfsan_set_label(0, oldact, sizeof(struct sigaction));
   }
diff --git a/test/dfsan/custom.cpp b/test/dfsan/custom.cpp
index 804904b..7c37406 100644
--- a/test/dfsan/custom.cpp
+++ b/test/dfsan/custom.cpp
@@ -812,12 +812,34 @@
   ASSERT_READ_ZERO_LABEL(&set, sizeof(set));
 }
 
+static void SignalHandler(int signo) {}
+
+static void SignalAction(int signo, siginfo_t *si, void *uc) {}
+
 void test_sigaction() {
-  struct sigaction oldact;
-  dfsan_set_label(j_label, &oldact, 1);
-  int ret = sigaction(SIGUSR1, NULL, &oldact);
+  struct sigaction newact_with_sigaction = {};
+  newact_with_sigaction.sa_flags = SA_SIGINFO;
+  newact_with_sigaction.sa_sigaction = SignalAction;
+
+  // Set sigaction to be SignalAction, save the last one into origin_act
+  struct sigaction origin_act;
+  dfsan_set_label(j_label, &origin_act, 1);
+  int ret = sigaction(SIGUSR1, &newact_with_sigaction, &origin_act);
   assert(ret == 0);
-  ASSERT_READ_ZERO_LABEL(&oldact, sizeof(oldact));
+  ASSERT_ZERO_LABEL(ret);
+  ASSERT_READ_ZERO_LABEL(&origin_act, sizeof(origin_act));
+
+  struct sigaction newact_with_sighandler = {};
+  newact_with_sighandler.sa_handler = SignalHandler;
+
+  // Set sigaction to be SignalHandler, check the last one is SignalAction
+  struct sigaction oldact;
+  assert(0 == sigaction(SIGUSR1, &newact_with_sighandler, &oldact));
+  assert(oldact.sa_sigaction == SignalAction);
+
+  // Restore sigaction to the orginal setting, check the last one is SignalHandler
+  assert(0 == sigaction(SIGUSR1, &origin_act, &oldact));
+  assert(oldact.sa_handler == SignalHandler);
 }
 
 void test_sigaltstack() {
diff --git a/test/dfsan/sigaction.c b/test/dfsan/sigaction.c
new file mode 100644
index 0000000..d9b7db2
--- /dev/null
+++ b/test/dfsan/sigaction.c
@@ -0,0 +1,49 @@
+// RUN: %clang_dfsan -DUSE_SIGNAL_ACTION -mllvm -dfsan-fast-16-labels=true %s -o %t && \
+// RUN:     %run %t
+// RUN: %clang_dfsan -mllvm -dfsan-fast-16-labels=true %s -o %t && %run %t
+
+#include <sanitizer/dfsan_interface.h>
+
+#include <assert.h>
+#include <signal.h>
+#include <string.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+volatile int x;
+volatile int z = 1;
+
+void SignalHandler(int signo) {
+  assert(dfsan_get_label(signo) == 0);
+  x = z;
+}
+
+void SignalAction(int signo, siginfo_t *si, void *uc) {
+  assert(dfsan_get_label(signo) == 0);
+  assert(dfsan_get_label(si) == 0);
+  assert(dfsan_get_label(uc) == 0);
+  assert(0 == dfsan_read_label(si, sizeof(*si)));
+  assert(0 == dfsan_read_label(uc, sizeof(ucontext_t)));
+  x = z;
+}
+
+int main(int argc, char *argv[]) {
+  dfsan_set_label(8, (void *)&z, sizeof(z));
+
+  struct sigaction sa = {};
+#ifdef USE_SIGNAL_ACTION
+  sa.sa_flags = SA_SIGINFO;
+  sa.sa_sigaction = SignalAction;
+#else
+  sa.sa_handler = SignalHandler;
+#endif
+  int r = sigaction(SIGHUP, &sa, NULL);
+  assert(dfsan_get_label(r) == 0);
+
+  kill(getpid(), SIGHUP);
+  signal(SIGHUP, SIG_DFL);
+
+  assert(x == 1);
+
+  return 0;
+}
diff --git a/test/dfsan/sigaction_stress_test.c b/test/dfsan/sigaction_stress_test.c
new file mode 100644
index 0000000..0748d20
--- /dev/null
+++ b/test/dfsan/sigaction_stress_test.c
@@ -0,0 +1,63 @@
+// RUN: %clangxx_dfsan -mllvm -dfsan-fast-16-labels=true -O0 %s -o %t && %run %t
+//
+// Test that the state of shadows from a sigaction handler are consistent.
+
+#include <signal.h>
+#include <stdarg.h>
+#include <sanitizer/dfsan_interface.h>
+#include <assert.h>
+#include <sys/time.h>
+#include <stdio.h>
+
+const int kSigCnt = 200;
+int x;
+
+__attribute__((noinline))
+int f(int a) {
+  return a;
+}
+
+__attribute__((noinline))
+void g() {
+  int r = f(x);
+  const dfsan_label r_label = dfsan_get_label(r);
+  assert(r_label == 8 || r_label == 0);
+  return;
+}
+
+int sigcnt;
+
+void SignalHandler(int signo) {
+  assert(signo == SIGPROF);
+  int a = 0;
+  dfsan_set_label(4, &a, sizeof(a));
+  (void)f(a);
+  ++sigcnt;
+}
+
+int main() {
+  struct sigaction psa = {};
+  psa.sa_handler = SignalHandler;
+  int r = sigaction(SIGPROF, &psa, NULL);
+
+  itimerval itv;
+  itv.it_interval.tv_sec = 0;
+  itv.it_interval.tv_usec = 100;
+  itv.it_value.tv_sec = 0;
+  itv.it_value.tv_usec = 100;
+  setitimer(ITIMER_PROF, &itv, NULL);
+
+  dfsan_set_label(8, &x, sizeof(x));
+  do {
+    g();
+  } while (sigcnt < kSigCnt);
+
+  itv.it_interval.tv_sec = 0;
+  itv.it_interval.tv_usec = 0;
+  itv.it_value.tv_sec = 0;
+  itv.it_value.tv_usec = 0;
+  setitimer(ITIMER_PROF, &itv, NULL);
+
+  signal(SIGPROF, SIG_DFL);
+  return 0;
+}