diff --git a/compiler-rt/lib/dfsan/dfsan.cpp b/compiler-rt/lib/dfsan/dfsan.cpp
index da01a63..5a9620a 100644
--- a/compiler-rt/lib/dfsan/dfsan.cpp
+++ b/compiler-rt/lib/dfsan/dfsan.cpp
@@ -708,6 +708,14 @@
   return data_label;
 }
 
+// This function is used if dfsan_get_origin is called when origin tracking is
+// off.
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin __dfsw_dfsan_get_origin(
+    long data, dfsan_label data_label, dfsan_label *ret_label) {
+  *ret_label = 0;
+  return 0;
+}
+
 extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin __dfso_dfsan_get_origin(
     long data, dfsan_label data_label, dfsan_label *ret_label,
     dfsan_origin data_origin, dfsan_origin *ret_origin) {
@@ -847,6 +855,7 @@
   dfsan_origin origin_id = o.raw_id();
   while (o.isChainedOrigin()) {
     StackTrace stack;
+    origin_id = o.raw_id();
     o = o.getNextChainedOrigin(&stack);
   }
   return origin_id;
diff --git a/compiler-rt/lib/dfsan/dfsan_custom.cpp b/compiler-rt/lib/dfsan/dfsan_custom.cpp
index 72c1065..ae0c46a 100644
--- a/compiler-rt/lib/dfsan/dfsan_custom.cpp
+++ b/compiler-rt/lib/dfsan/dfsan_custom.cpp
@@ -37,10 +37,12 @@
 #include <unistd.h>
 
 #include "dfsan/dfsan.h"
+#include "dfsan/dfsan_chained_origin_depot.h"
 #include "dfsan/dfsan_thread.h"
 #include "sanitizer_common/sanitizer_common.h"
 #include "sanitizer_common/sanitizer_internal_defs.h"
 #include "sanitizer_common/sanitizer_linux.h"
+#include "sanitizer_common/sanitizer_stackdepot.h"
 
 using namespace __dfsan;
 
@@ -310,6 +312,17 @@
   return ret;
 }
 
+SANITIZER_INTERFACE_ATTRIBUTE size_t __dfso_strlen(const char *s,
+                                                   dfsan_label s_label,
+                                                   dfsan_label *ret_label,
+                                                   dfsan_origin s_origin,
+                                                   dfsan_origin *ret_origin) {
+  size_t ret = __dfsw_strlen(s, s_label, ret_label);
+  if (!flags().strict_data_dependencies)
+    *ret_origin = dfsan_read_origin_of_first_taint(s, ret + 1);
+  return ret;
+}
+
 static void *dfsan_memmove(void *dest, const void *src, size_t n) {
   dfsan_label *sdest = shadow_for(dest);
   const dfsan_label *ssrc = shadow_for(src);
@@ -456,7 +469,8 @@
 static int dfsan_pthread_create(pthread_t *thread, const pthread_attr_t *attr,
                                 void *start_routine_trampoline,
                                 void *start_routine, void *arg,
-                                dfsan_label *ret_label) {
+                                dfsan_label *ret_label,
+                                bool track_origins = false) {
   pthread_attr_t myattr;
   if (!attr) {
     pthread_attr_init(&myattr);
@@ -466,8 +480,9 @@
   // Ensure that the thread stack is large enough to hold all TLS data.
   AdjustStackSize((void *)(const_cast<pthread_attr_t *>(attr)));
 
-  DFsanThread *t = DFsanThread::Create(start_routine_trampoline,
-                                       (thread_callback_t)start_routine, arg);
+  DFsanThread *t =
+      DFsanThread::Create(start_routine_trampoline,
+                          (thread_callback_t)start_routine, arg, track_origins);
   int res = pthread_create(thread, attr, DFsanThreadStartFunc, t);
 
   if (attr == &myattr)
@@ -487,6 +502,20 @@
                               start_routine, arg, ret_label);
 }
 
+SANITIZER_INTERFACE_ATTRIBUTE int __dfso_pthread_create(
+    pthread_t *thread, const pthread_attr_t *attr,
+    void *(*start_routine_trampoline)(void *, void *, dfsan_label,
+                                      dfsan_label *, dfsan_origin,
+                                      dfsan_origin *),
+    void *start_routine, void *arg, dfsan_label thread_label,
+    dfsan_label attr_label, dfsan_label start_routine_label,
+    dfsan_label arg_label, dfsan_label *ret_label, dfsan_origin thread_origin,
+    dfsan_origin attr_origin, dfsan_origin start_routine_origin,
+    dfsan_origin arg_origin, dfsan_origin *ret_origin) {
+  return dfsan_pthread_create(thread, attr, (void *)start_routine_trampoline,
+                              start_routine, arg, ret_label, true);
+}
+
 SANITIZER_INTERFACE_ATTRIBUTE int __dfsw_pthread_join(pthread_t thread,
                                                       void **retval,
                                                       dfsan_label thread_label,
@@ -499,6 +528,15 @@
   return ret;
 }
 
+SANITIZER_INTERFACE_ATTRIBUTE int __dfso_pthread_join(
+    pthread_t thread, void **retval, dfsan_label thread_label,
+    dfsan_label retval_label, dfsan_label *ret_label,
+    dfsan_origin thread_origin, dfsan_origin retval_origin,
+    dfsan_origin *ret_origin) {
+  return __dfsw_pthread_join(thread, retval, thread_label, retval_label,
+                             ret_label);
+}
+
 struct dl_iterate_phdr_info {
   int (*callback_trampoline)(void *callback, struct dl_phdr_info *info,
                              size_t size, void *data, dfsan_label info_label,
@@ -872,6 +910,13 @@
   return ret;
 }
 
+SANITIZER_INTERFACE_ATTRIBUTE
+int __dfso_sigemptyset(sigset_t *set, dfsan_label set_label,
+                       dfsan_label *ret_label, dfsan_origin set_origin,
+                       dfsan_origin *ret_origin) {
+  return __dfsw_sigemptyset(set, set_label, ret_label);
+}
+
 class SignalHandlerScope {
  public:
   SignalHandlerScope() {
@@ -988,11 +1033,18 @@
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-sighandler_t __dfsw_signal(int signum,
-                           void *(*handler_trampoline)(void *, int, dfsan_label,
-                                                       dfsan_label *),
-                           sighandler_t handler, dfsan_label signum_label,
-                           dfsan_label handler_label, dfsan_label *ret_label) {
+int __dfso_sigaction(int signum, const struct sigaction *act,
+                     struct sigaction *oldact, dfsan_label signum_label,
+                     dfsan_label act_label, dfsan_label oldact_label,
+                     dfsan_label *ret_label, dfsan_origin signum_origin,
+                     dfsan_origin act_origin, dfsan_origin oldact_origin,
+                     dfsan_origin *ret_origin) {
+  return __dfsw_sigaction(signum, act, oldact, signum_label, act_label,
+                          oldact_label, ret_label);
+}
+
+static sighandler_t dfsan_signal(int signum, sighandler_t handler,
+                                 dfsan_label *ret_label) {
   CHECK_LT(signum, kMaxSignals);
   SignalSpinLocker lock;
   uptr old_cb = atomic_load(&sigactions[signum], memory_order_relaxed);
@@ -1011,6 +1063,26 @@
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
+sighandler_t __dfsw_signal(int signum,
+                           void *(*handler_trampoline)(void *, int, dfsan_label,
+                                                       dfsan_label *),
+                           sighandler_t handler, dfsan_label signum_label,
+                           dfsan_label handler_label, dfsan_label *ret_label) {
+  return dfsan_signal(signum, handler, ret_label);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+sighandler_t __dfso_signal(
+    int signum,
+    void *(*handler_trampoline)(void *, int, dfsan_label, dfsan_label *,
+                                dfsan_origin, dfsan_origin *),
+    sighandler_t handler, dfsan_label signum_label, dfsan_label handler_label,
+    dfsan_label *ret_label, dfsan_origin signum_origin,
+    dfsan_origin handler_origin, dfsan_origin *ret_origin) {
+  return dfsan_signal(signum, handler, ret_label);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
 int __dfsw_sigaltstack(const stack_t *ss, stack_t *old_ss, dfsan_label ss_label,
                        dfsan_label old_ss_label, dfsan_label *ret_label) {
   int ret = sigaltstack(ss, old_ss);
@@ -1021,6 +1093,14 @@
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
+int __dfso_sigaltstack(const stack_t *ss, stack_t *old_ss, dfsan_label ss_label,
+                       dfsan_label old_ss_label, dfsan_label *ret_label,
+                       dfsan_origin ss_origin, dfsan_origin old_ss_origin,
+                       dfsan_origin *ret_origin) {
+  return __dfsw_sigaltstack(ss, old_ss, ss_label, old_ss_label, ret_label);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
 int __dfsw_gettimeofday(struct timeval *tv, struct timezone *tz,
                         dfsan_label tv_label, dfsan_label tz_label,
                         dfsan_label *ret_label) {
@@ -1203,6 +1283,11 @@
     int fd, const void *buf, ssize_t count,
     dfsan_label fd_label, dfsan_label buf_label, dfsan_label count_label);
 
+typedef void (*write_origin_trampoline_t)(
+    void *callback, int fd, const void *buf, ssize_t count,
+    dfsan_label fd_label, dfsan_label buf_label, dfsan_label count_label,
+    dfsan_origin fd_origin, dfsan_origin buf_origin, dfsan_origin count_origin);
+
 // Calls to dfsan_set_write_callback() set the values in this struct.
 // Calls to the custom version of write() read (and invoke) them.
 static struct {
@@ -1210,6 +1295,11 @@
   void *write_callback = nullptr;
 } write_callback_info;
 
+static struct {
+  write_origin_trampoline_t write_callback_trampoline = nullptr;
+  void *write_callback = nullptr;
+} write_origin_callback_info;
+
 SANITIZER_INTERFACE_ATTRIBUTE void
 __dfsw_dfsan_set_write_callback(
     write_trampoline_t write_callback_trampoline,
@@ -1220,6 +1310,15 @@
   write_callback_info.write_callback = write_callback;
 }
 
+SANITIZER_INTERFACE_ATTRIBUTE void __dfso_dfsan_set_write_callback(
+    write_origin_trampoline_t write_callback_trampoline, void *write_callback,
+    dfsan_label write_callback_label, dfsan_label *ret_label,
+    dfsan_origin write_callback_origin, dfsan_origin *ret_origin) {
+  write_origin_callback_info.write_callback_trampoline =
+      write_callback_trampoline;
+  write_origin_callback_info.write_callback = write_callback;
+}
+
 SANITIZER_INTERFACE_ATTRIBUTE int
 __dfsw_write(int fd, const void *buf, size_t count,
              dfsan_label fd_label, dfsan_label buf_label,
@@ -1234,6 +1333,21 @@
   *ret_label = 0;
   return write(fd, buf, count);
 }
+
+SANITIZER_INTERFACE_ATTRIBUTE int __dfso_write(
+    int fd, const void *buf, size_t count, dfsan_label fd_label,
+    dfsan_label buf_label, dfsan_label count_label, dfsan_label *ret_label,
+    dfsan_origin fd_origin, dfsan_origin buf_origin, dfsan_origin count_origin,
+    dfsan_origin *ret_origin) {
+  if (write_origin_callback_info.write_callback) {
+    write_origin_callback_info.write_callback_trampoline(
+        write_origin_callback_info.write_callback, fd, buf, count, fd_label,
+        buf_label, count_label, fd_origin, buf_origin, count_origin);
+  }
+
+  *ret_label = 0;
+  return write(fd, buf, count);
+}
 } // namespace __dfsan
 
 // Type used to extract a dfsan_label with va_arg()
@@ -1491,6 +1605,31 @@
   return ret;
 }
 
+static void BeforeFork() {
+  StackDepotLockAll();
+  GetChainedOriginDepot()->LockAll();
+}
+
+static void AfterFork() {
+  GetChainedOriginDepot()->UnlockAll();
+  StackDepotUnlockAll();
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+pid_t __dfsw_fork(dfsan_label *ret_label) {
+  pid_t pid = fork();
+  *ret_label = 0;
+  return pid;
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+pid_t __dfso_fork(dfsan_label *ret_label, dfsan_origin *ret_origin) {
+  BeforeFork();
+  pid_t pid = __dfsw_fork(ret_label);
+  AfterFork();
+  return pid;
+}
+
 // Default empty implementations (weak). Users should redefine them.
 SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_pc_guard, u32 *) {}
 SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_pc_guard_init, u32 *,
diff --git a/compiler-rt/lib/dfsan/dfsan_thread.cpp b/compiler-rt/lib/dfsan/dfsan_thread.cpp
index 7fd9c8e..aa1209a 100644
--- a/compiler-rt/lib/dfsan/dfsan_thread.cpp
+++ b/compiler-rt/lib/dfsan/dfsan_thread.cpp
@@ -7,13 +7,15 @@
 namespace __dfsan {
 
 DFsanThread *DFsanThread::Create(void *start_routine_trampoline,
-                                 thread_callback_t start_routine, void *arg) {
+                                 thread_callback_t start_routine, void *arg,
+                                 bool track_origins) {
   uptr PageSize = GetPageSizeCached();
   uptr size = RoundUpTo(sizeof(DFsanThread), PageSize);
   DFsanThread *thread = (DFsanThread *)MmapOrDie(size, __func__);
   thread->start_routine_trampoline_ = start_routine_trampoline;
   thread->start_routine_ = start_routine;
   thread->arg_ = arg;
+  thread->track_origins_ = track_origins;
   thread->destructor_iterations_ = GetPthreadDestructorIterations();
 
   return thread;
@@ -57,11 +59,19 @@
 
   typedef void *(*thread_callback_trampoline_t)(void *, void *, dfsan_label,
                                                 dfsan_label *);
+  typedef void *(*thread_callback_origin_trampoline_t)(
+      void *, void *, dfsan_label, dfsan_label *, dfsan_origin, dfsan_origin *);
 
   dfsan_label ret_label;
-  return ((thread_callback_trampoline_t)
+  if (!track_origins_)
+    return ((thread_callback_trampoline_t)
+                start_routine_trampoline_)((void *)start_routine_, arg_, 0,
+                                           &ret_label);
+
+  dfsan_origin ret_origin;
+  return ((thread_callback_origin_trampoline_t)
               start_routine_trampoline_)((void *)start_routine_, arg_, 0,
-                                         &ret_label);
+                                         &ret_label, 0, &ret_origin);
 }
 
 DFsanThread::StackBounds DFsanThread::GetStackBounds() const {
diff --git a/compiler-rt/lib/dfsan/dfsan_thread.h b/compiler-rt/lib/dfsan/dfsan_thread.h
index c28f1df..616bbc5 100644
--- a/compiler-rt/lib/dfsan/dfsan_thread.h
+++ b/compiler-rt/lib/dfsan/dfsan_thread.h
@@ -24,7 +24,8 @@
   // via mmap() and *must* be valid in zero-initialized state.
 
   static DFsanThread *Create(void *start_routine_trampoline,
-                             thread_callback_t start_routine, void *arg);
+                             thread_callback_t start_routine, void *arg,
+                             bool track_origins = false);
   static void TSDDtor(void *tsd);
   void Destroy();
 
@@ -54,6 +55,7 @@
   void *start_routine_trampoline_;
   thread_callback_t start_routine_;
   void *arg_;
+  bool track_origins_;
 
   StackBounds stack_;
 
diff --git a/compiler-rt/lib/dfsan/done_abilist.txt b/compiler-rt/lib/dfsan/done_abilist.txt
index 1c993aa..7b392fa 100644
--- a/compiler-rt/lib/dfsan/done_abilist.txt
+++ b/compiler-rt/lib/dfsan/done_abilist.txt
@@ -30,6 +30,8 @@
 fun:dfsan_flush=discard
 fun:dfsan_print_origin_trace=uninstrumented
 fun:dfsan_print_origin_trace=discard
+fun:dfsan_get_origin=uninstrumented
+fun:dfsan_get_origin=custom
 fun:dfsan_get_init_origin=uninstrumented
 fun:dfsan_get_init_origin=discard
 
@@ -270,6 +272,9 @@
 fun:asprintf=discard
 fun:qsort=discard
 
+# fork
+fun:fork=custom
+
 ###############################################################################
 # pthread
 ###############################################################################
diff --git a/compiler-rt/test/dfsan/atomic.cpp b/compiler-rt/test/dfsan/atomic.cpp
index 7d4dc04..459bf31 100644
--- a/compiler-rt/test/dfsan/atomic.cpp
+++ b/compiler-rt/test/dfsan/atomic.cpp
@@ -1,4 +1,7 @@
 // RUN: %clangxx_dfsan -mllvm -dfsan-fast-16-labels=true %s -fno-exceptions -o %t && %run %t
+// RUN: %clangxx_dfsan -DORIGIN_TRACKING -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -fno-exceptions -o %t && %run %t
+//
+// REQUIRES: x86_64-target-arch
 //
 // Use -fno-exceptions to turn off exceptions to avoid instrumenting
 // __cxa_begin_catch, std::terminate and __gxx_personality_v0.
@@ -14,31 +17,45 @@
 
 std::atomic<int> atomic_i{0};
 
+struct arg_struct {
+  size_t index;
+  dfsan_origin origin;
+};
+
 static void *ThreadFn(void *arg) {
-  if ((size_t)arg % 2) {
+  if (((arg_struct *)arg)->index % 2) {
     int i = 10;
     dfsan_set_label(8, (void *)&i, sizeof(i));
     atomic_i.store(i, std::memory_order_relaxed);
-
     return 0;
   }
   int j = atomic_i.load();
   assert(dfsan_get_label(j) == 0 || dfsan_get_label(j) == 2);
-
+#ifdef ORIGIN_TRACKING
+  if (dfsan_get_label(j) == 2)
+    assert(dfsan_get_init_origin(&j) == ((arg_struct *)arg)->origin);
+#endif
   return 0;
 }
 
 int main(void) {
   int i = 10;
   dfsan_set_label(2, (void *)&i, sizeof(i));
+#ifdef ORIGIN_TRACKING
+  dfsan_origin origin = dfsan_get_origin(i);
+#endif
   atomic_i.store(i, std::memory_order_relaxed);
   const int kNumThreads = 24;
   pthread_t t[kNumThreads];
+  arg_struct args[kNumThreads];
   for (int i = 0; i < kNumThreads; ++i) {
-    pthread_create(&t[i], 0, ThreadFn, (void *)i);
+    args[i].index = i;
+#ifdef ORIGIN_TRACKING
+    args[i].origin = origin;
+#endif
+    pthread_create(&t[i], 0, ThreadFn, (void *)(args + i));
   }
-  for (int i = 0; i < kNumThreads; ++i) {
+  for (int i = 0; i < kNumThreads; ++i)
     pthread_join(t[i], 0);
-  }
   return 0;
 }
diff --git a/compiler-rt/test/dfsan/custom.cpp b/compiler-rt/test/dfsan/custom.cpp
index 4676c91..b95d744 100644
--- a/compiler-rt/test/dfsan/custom.cpp
+++ b/compiler-rt/test/dfsan/custom.cpp
@@ -3,8 +3,12 @@
 // RUN: %clang_dfsan -DFAST_16_LABELS -mllvm -dfsan-fast-16-labels %s -o %t && DFSAN_OPTIONS="strict_data_dependencies=0" %run %t
 // RUN: %clang_dfsan -DSTRICT_DATA_DEPENDENCIES %s -o %t && %run %t
 // RUN: %clang_dfsan -DSTRICT_DATA_DEPENDENCIES -mllvm -dfsan-args-abi %s -o %t && %run %t
-
+// RUN: %clang_dfsan -DFAST_16_LABELS -DORIGIN_TRACKING -mllvm -dfsan-fast-16-labels -mllvm -dfsan-track-origins=1 -mllvm -dfsan-combine-pointer-labels-on-load=false -DSTRICT_DATA_DEPENDENCIES %s -o %t && %run %t
+// RUN: %clang_dfsan -DFAST_16_LABELS -DORIGIN_TRACKING -mllvm -dfsan-fast-16-labels -mllvm -dfsan-track-origins=1 -mllvm -dfsan-combine-pointer-labels-on-load=false %s -o %t && DFSAN_OPTIONS="strict_data_dependencies=0" %run %t
+//
 // Tests custom implementations of various glibc functions.
+//
+// REQUIRES: x86_64-target-arch
 
 #include <sanitizer/dfsan_interface.h>
 
@@ -35,6 +39,8 @@
 dfsan_label i_label = 0;
 dfsan_label j_label = 0;
 dfsan_label k_label = 0;
+dfsan_label m_label = 0;
+dfsan_label n_label = 0;
 dfsan_label i_j_label = 0;
 
 #define ASSERT_ZERO_LABEL(data) \
@@ -49,6 +55,102 @@
 #define ASSERT_READ_LABEL(ptr, size, label) \
   assert(label == dfsan_read_label(ptr, size))
 
+#ifdef ORIGIN_TRACKING
+#define ASSERT_ZERO_ORIGIN(data) \
+  assert(0 == dfsan_get_origin((long)(data)))
+#else
+#define ASSERT_ZERO_ORIGIN(data)
+#endif
+
+#ifdef ORIGIN_TRACKING
+#define ASSERT_ZERO_ORIGINS(ptr, size)                       \
+  for (int i = 0; i < size; ++i) {                           \
+    assert(0 == dfsan_get_origin((long)(((char *)ptr)[i]))); \
+  }
+#else
+#define ASSERT_ZERO_ORIGINS(ptr, size)
+#endif
+
+#ifdef ORIGIN_TRACKING
+#define ASSERT_ORIGIN(data, origin) \
+  assert(origin == dfsan_get_origin((long)(data)))
+#else
+#define ASSERT_ORIGIN(data, origin)
+#endif
+
+#ifdef ORIGIN_TRACKING
+#define ASSERT_ORIGINS(ptr, size, origin)                         \
+  for (int i = 0; i < size; ++i) {                                \
+    assert(origin == dfsan_get_origin((long)(((char *)ptr)[i]))); \
+  }
+#define ASSERT_ORIGINS(ptr, size, origin)
+#else
+#endif
+
+#ifdef ORIGIN_TRACKING
+#define ASSERT_INIT_ORIGIN(ptr, origin) \
+  assert(origin == dfsan_get_init_origin(ptr))
+#else
+#define ASSERT_INIT_ORIGIN(ptr, origin)
+#endif
+
+#ifdef ORIGIN_TRACKING
+#define ASSERT_INIT_ORIGIN_EQ_ORIGIN(ptr, data) \
+  assert(dfsan_get_origin((long)(data)) == dfsan_get_init_origin(ptr))
+#else
+#define ASSERT_INIT_ORIGIN_EQ_ORIGIN(ptr, data)
+#endif
+
+#ifdef ORIGIN_TRACKING
+#define ASSERT_INIT_ORIGINS(ptr, size, origin)                  \
+  for (int i = 0; i < size; ++i) {                              \
+    assert(origin == dfsan_get_init_origin(&((char *)ptr)[i])); \
+  }
+#else
+#define ASSERT_INIT_ORIGINS(ptr, size, origin)
+#endif
+
+#ifdef ORIGIN_TRACKING
+#define ASSERT_EQ_ORIGIN(data1, data2) \
+  assert(dfsan_get_origin((long)(data1)) == dfsan_get_origin((long)(data2)))
+#else
+#define ASSERT_EQ_ORIGIN(data1, data2)
+#endif
+
+#ifdef ORIGIN_TRACKING
+#define DEFINE_AND_SAVE_ORIGINS(val)    \
+  dfsan_origin val##_o[sizeof(val)];    \
+  for (int i = 0; i < sizeof(val); ++i) \
+    val##_o[i] = dfsan_get_origin((long)(((char *)(&val))[i]));
+#else
+#define DEFINE_AND_SAVE_ORIGINS(val)
+#endif
+
+#ifdef ORIGIN_TRACKING
+#define SAVE_ORIGINS(val)               \
+  for (int i = 0; i < sizeof(val); ++i) \
+    val##_o[i] = dfsan_get_origin((long)(((char *)(&val))[i]));
+#else
+#define SAVE_ORIGINS(val)
+#endif
+
+#ifdef ORIGIN_TRACKING
+#define ASSERT_SAVED_ORIGINS(val)       \
+  for (int i = 0; i < sizeof(val); ++i) \
+    ASSERT_ORIGIN(((char *)(&val))[i], val##_o[i]);
+#else
+#define ASSERT_SAVED_ORIGINS(val)
+#endif
+
+#ifdef ORIGIN_TRACKING
+#define ASSERT_SAVED_N_ORIGINS(val, n) \
+  for (int i = 0; i < n; ++i)          \
+    ASSERT_ORIGIN(val[i], val##_o[i]);
+#else
+#define ASSERT_SAVED_N_ORIGINS(val, n)
+#endif
+
+#if !defined(ORIGIN_TRACKING)
 void test_stat() {
   int i = 1;
   dfsan_set_label(i_label, &i, sizeof(i));
@@ -175,6 +277,7 @@
   }
   ASSERT_LABEL(dst[11], j_label);
 }
+#endif // !defined(ORIGIN_TRACKING)
 
 void test_strlen() {
   char str1[] = "str1";
@@ -186,9 +289,11 @@
   ASSERT_ZERO_LABEL(rv);
 #else
   ASSERT_LABEL(rv, i_label);
+  ASSERT_EQ_ORIGIN(rv, str1[3]);
 #endif
 }
 
+#if !defined(ORIGIN_TRACKING)
 void test_strdup() {
   char str1[] = "str1";
   dfsan_set_label(i_label, &str1[3], 1);
@@ -835,13 +940,17 @@
   assert(ret == 0);
   ASSERT_READ_ZERO_LABEL(&mask, sizeof(mask));
 }
+#endif // !defined(ORIGIN_TRACKING)
 
 void test_sigemptyset() {
   sigset_t set;
   dfsan_set_label(j_label, &set, 1);
+  DEFINE_AND_SAVE_ORIGINS(set)
   int ret = sigemptyset(&set);
   assert(ret == 0);
+  ASSERT_ZERO_LABEL(ret);
   ASSERT_READ_ZERO_LABEL(&set, sizeof(set));
+  ASSERT_SAVED_ORIGINS(set)
 }
 
 static void SignalHandler(int signo) {}
@@ -856,10 +965,12 @@
   // Set sigaction to be SignalAction, save the last one into origin_act
   struct sigaction origin_act;
   dfsan_set_label(j_label, &origin_act, 1);
+  DEFINE_AND_SAVE_ORIGINS(origin_act)
   int ret = sigaction(SIGUSR1, &newact_with_sigaction, &origin_act);
   assert(ret == 0);
   ASSERT_ZERO_LABEL(ret);
   ASSERT_READ_ZERO_LABEL(&origin_act, sizeof(origin_act));
+  ASSERT_SAVED_ORIGINS(origin_act)
 
   struct sigaction newact_with_sighandler = {};
   newact_with_sighandler.sa_handler = SignalHandler;
@@ -904,12 +1015,15 @@
 void test_sigaltstack() {
   stack_t old_altstack = {};
   dfsan_set_label(j_label, &old_altstack, sizeof(old_altstack));
+  DEFINE_AND_SAVE_ORIGINS(old_altstack)
   int ret = sigaltstack(NULL, &old_altstack);
   assert(ret == 0);
   ASSERT_ZERO_LABEL(ret);
   ASSERT_READ_ZERO_LABEL(&old_altstack, sizeof(old_altstack));
+  ASSERT_SAVED_ORIGINS(old_altstack)
 }
 
+#if !defined(ORIGIN_TRACKING)
 void test_gettimeofday() {
   struct timeval tv;
   struct timezone tz;
@@ -920,6 +1034,7 @@
   ASSERT_READ_ZERO_LABEL(&tv, sizeof(tv));
   ASSERT_READ_ZERO_LABEL(&tz, sizeof(tz));
 }
+#endif // !defined(ORIGIN_TRACKING)
 
 void *pthread_create_test_cb(void *p) {
   assert(p == (void *)1);
@@ -929,20 +1044,25 @@
 
 void test_pthread_create() {
   pthread_t pt;
-  pthread_create(&pt, 0, pthread_create_test_cb, (void *)1);
+  int create_ret = pthread_create(&pt, 0, pthread_create_test_cb, (void *)1);
+  assert(create_ret == 0);
+  ASSERT_ZERO_LABEL(create_ret);
   void *cbrv;
   dfsan_set_label(i_label, &cbrv, sizeof(cbrv));
-  int ret = pthread_join(pt, &cbrv);
-  assert(ret == 0);
+  DEFINE_AND_SAVE_ORIGINS(cbrv)
+  int joint_ret = pthread_join(pt, &cbrv);
+  assert(joint_ret == 0);
   assert(cbrv == (void *)2);
-  ASSERT_ZERO_LABEL(ret);
+  ASSERT_ZERO_LABEL(joint_ret);
   ASSERT_ZERO_LABEL(cbrv);
+  ASSERT_SAVED_ORIGINS(cbrv);
 }
 
 // Tested by test_pthread_create().  This empty function is here to appease the
 // check-wrappers script.
 void test_pthread_join() {}
 
+#if !defined(ORIGIN_TRACKING)
 int dl_iterate_phdr_test_cb(struct dl_phdr_info *info, size_t size,
                             void *data) {
   assert(data == (void *)3);
@@ -1165,6 +1285,7 @@
 
   close(sockfd);
 }
+#endif // !defined(ORIGIN_TRACKING)
 
 void test_write() {
   int fd = open("/dev/null", O_WRONLY);
@@ -1189,6 +1310,7 @@
   close(fd);
 }
 
+#if !defined(ORIGIN_TRACKING)
 template <class T>
 void test_sprintf_chunk(const char* expected, const char* format, T arg) {
   char buf[512];
@@ -1316,22 +1438,32 @@
   ASSERT_READ_LABEL(buf + 17, 2, 0);
   ASSERT_LABEL(r, 0);
 }
+#endif // !defined(ORIGIN_TRACKING)
+
+// Tested by a seperate source file.  This empty function is here to appease the
+// check-wrappers script.
+void test_fork() {}
 
 int main(void) {
 #ifdef FAST_16_LABELS
   i_label = 1;
   j_label = 2;
   k_label = 4;
+  m_label = 8;
+  n_label = 16;
 #else
   i_label = dfsan_create_label("i", 0);
   j_label = dfsan_create_label("j", 0);
   k_label = dfsan_create_label("k", 0);
+  m_label = dfsan_create_label("m", 0);
+  n_label = dfsan_create_label("n", 0);
 #endif
   i_j_label = dfsan_union(i_label, j_label);
   assert(i_j_label != i_label);
   assert(i_j_label != j_label);
   assert(i_j_label != k_label);
 
+#if !defined(ORIGIN_TRACKING)
   test__dl_get_tls_static_info();
   test_bcmp();
   test_calloc();
@@ -1363,17 +1495,21 @@
   test_nanosleep();
   test_poll();
   test_pread();
+#endif // !defined(ORIGIN_TRACKING)
   test_pthread_create();
   test_pthread_join();
+#if !defined(ORIGIN_TRACKING)
   test_read();
   test_recvmmsg();
   test_recvmsg();
   test_sched_getaffinity();
   test_select();
+#endif // !defined(ORIGIN_TRACKING)
   test_sigaction();
   test_signal();
   test_sigaltstack();
   test_sigemptyset();
+#if !defined(ORIGIN_TRACKING)
   test_snprintf();
   test_socketpair();
   test_sprintf();
@@ -1384,7 +1520,9 @@
   test_strcat();
   test_strcpy();
   test_strdup();
+#endif // !defined(ORIGIN_TRACKING)
   test_strlen();
+#if !defined(ORIGIN_TRACKING)
   test_strncasecmp();
   test_strncmp();
   test_strncpy();
@@ -1397,5 +1535,7 @@
   test_strtoul();
   test_strtoull();
   test_time();
+#endif // !defined(ORIGIN_TRACKING)
   test_write();
+  test_fork();
 }
diff --git a/compiler-rt/test/dfsan/fork.cpp b/compiler-rt/test/dfsan/fork.cpp
new file mode 100644
index 0000000..cd9b641
--- /dev/null
+++ b/compiler-rt/test/dfsan/fork.cpp
@@ -0,0 +1,107 @@
+// Test that chained origins are fork-safe.
+// Run a number of threads that create new chained origins, then fork
+// and verify that origin reads do not deadlock in the child process.
+//
+// RUN: %clangxx_dfsan -mllvm -dfsan-fast-16-labels=true %s -o %t
+// RUN: %run %t 2>&1 | FileCheck %s
+//
+// RUN: %clangxx_dfsan -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t
+// RUN: DFSAN_OPTIONS=store_context_size=1000,origin_history_size=0,origin_history_per_stack_limit=0 %run %t 2>&1 | FileCheck %s
+//
+// REQUIRES: x86_64-target-arch
+
+#include <assert.h>
+#include <errno.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include <sanitizer/dfsan_interface.h>
+
+int done;
+
+void copy_labels_thread2() {
+  volatile int x = 0;
+  volatile int v = 0;
+  dfsan_set_label(8, (void *)&x, sizeof(x));
+  while (true) {
+    v = x;
+    x = v;
+    if (__atomic_load_n(&done, __ATOMIC_RELAXED))
+      return;
+  }
+}
+
+void copy_labels_thread1(int level) {
+  if (!level)
+    copy_labels_thread2();
+  else
+    copy_labels_thread1(level - 1);
+}
+
+void *copy_labels_thread(void *id) {
+  copy_labels_thread1((long)id);
+  return 0;
+}
+
+// Run through stackdepot in the child process.
+// If any of the hash table cells are locked, this may deadlock.
+void child() {
+  volatile int x = 0;
+  volatile int v = 0;
+  dfsan_set_label(16, (void *)&x, sizeof(x));
+  for (int i = 0; i < 10000; ++i) {
+    v = x;
+    x = v;
+  }
+  write(2, "done\n", 5);
+}
+
+void test() {
+  const int kThreads = 10;
+  pthread_t t[kThreads];
+  for (int i = 0; i < kThreads; ++i)
+    pthread_create(&t[i], NULL, copy_labels_thread, (void *)(long)i);
+  usleep(100000);
+  pid_t pid = fork();
+  if (pid) {
+    // parent
+    __atomic_store_n(&done, 1, __ATOMIC_RELAXED);
+    pid_t p;
+    while ((p = wait(NULL)) == -1) {
+    }
+  } else {
+    // child
+    child();
+  }
+}
+
+int main() {
+  const int kChildren = 20;
+  for (int i = 0; i < kChildren; ++i) {
+    pid_t pid = fork();
+    assert(dfsan_get_label(pid) == 0);
+    if (pid) {
+      // parent
+    } else {
+      test();
+      exit(0);
+    }
+  }
+
+  for (int i = 0; i < kChildren; ++i) {
+    pid_t p;
+    while ((p = wait(NULL)) == -1) {
+    }
+  }
+
+  return 0;
+}
+
+// Expect 20 (== kChildren) "done" messages.
+// CHECK-COUNT-20: done
diff --git a/compiler-rt/test/dfsan/origin_with_sigactions.c b/compiler-rt/test/dfsan/origin_with_sigactions.c
new file mode 100644
index 0000000..3f7986d
--- /dev/null
+++ b/compiler-rt/test/dfsan/origin_with_sigactions.c
@@ -0,0 +1,79 @@
+// Check that stores in signal handlers are not recorded in origin history.
+//
+// Origin tracking uses ChainedOriginDepot that is not async signal safe, so we
+// do not track origins inside signal handlers.
+//
+// RUN: %clang_dfsan -gmlt -DUSE_SIGNAL_ACTION -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \
+// RUN:      %run %t >%t.out 2>&1
+// RUN: FileCheck %s < %t.out
+//
+// RUN: %clang_dfsan -gmlt -DUSE_SIGNAL_ACTION -mllvm -dfsan-instrument-with-call-threshold=0 -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \
+// RUN:     %run %t >%t.out 2>&1
+// RUN: FileCheck %s < %t.out
+//
+// RUN: %clang_dfsan -gmlt -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \
+// RUN:     %run %t >%t.out 2>&1
+// RUN: FileCheck %s < %t.out
+//
+// RUN: %clang_dfsan -gmlt -mllvm -dfsan-instrument-with-call-threshold=0 -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \
+// RUN:     %run %t >%t.out 2>&1
+// RUN: FileCheck %s < %t.out
+//
+// REQUIRES: x86_64-target-arch
+
+#include <sanitizer/dfsan_interface.h>
+
+#include <assert.h>
+#include <signal.h>
+#include <string.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+int x, y, u;
+
+void CopyXtoYtoU() {
+  y = x;
+  memcpy(&u, &y, sizeof(int));
+}
+
+void SignalHandler(int signo) {
+  CopyXtoYtoU();
+}
+
+void SignalAction(int signo, siginfo_t *si, void *uc) {
+  CopyXtoYtoU();
+}
+
+int main(int argc, char *argv[]) {
+  int z = 1;
+  dfsan_set_label(8, &z, sizeof(z));
+  x = z;
+
+  struct sigaction psa = {};
+#ifdef USE_SIGNAL_ACTION
+  psa.sa_flags = SA_SIGINFO;
+  psa.sa_sigaction = SignalAction;
+#else
+  psa.sa_flags = 0;
+  psa.sa_handler = SignalHandler;
+#endif
+  sigaction(SIGHUP, &psa, NULL);
+  kill(getpid(), SIGHUP);
+  signal(SIGHUP, SIG_DFL);
+
+  assert(x == 1);
+  assert(y == 1);
+  assert(u == 1);
+
+  dfsan_print_origin_trace(&u, NULL);
+  return 0;
+}
+
+// CHECK: Taint value 0x8 {{.*}} origin tracking ()
+// CHECK: Origin value: {{.*}}, Taint value was stored to memory at
+// CHECK-NOT: {{.*}} in dfs$CopyXtoYtoU {{.*}}origin_with_sigactions.c{{.*}}
+
+// CHECK: #0 {{.*}} in main {{.*}}origin_with_sigactions.c:[[@LINE-26]]
+
+// CHECK: Origin value: {{.*}}, Taint value was created at
+// CHECK: #0 {{.*}} in main {{.*}}origin_with_sigactions.c:[[@LINE-30]]
diff --git a/compiler-rt/test/dfsan/origin_with_signals.cpp b/compiler-rt/test/dfsan/origin_with_signals.cpp
new file mode 100644
index 0000000..d488556
--- /dev/null
+++ b/compiler-rt/test/dfsan/origin_with_signals.cpp
@@ -0,0 +1,50 @@
+// Check that stores in signal handlers are not recorded in origin history.
+//
+// Origin tracking uses ChainedOriginDepot that is not async signal safe, so we
+// do not track origins inside signal handlers.
+//
+// RUN: %clangxx_dfsan -gmlt -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \
+// RUN:     %run %t >%t.out 2>&1
+// RUN: FileCheck %s < %t.out
+//
+// RUN: %clangxx_dfsan -gmlt -mllvm -dfsan-instrument-with-call-threshold=0 -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \
+// RUN:     %run %t >%t.out 2>&1
+// RUN: FileCheck %s < %t.out
+//
+// REQUIRES: x86_64-target-arch
+
+#include <sanitizer/dfsan_interface.h>
+
+#include <signal.h>
+#include <string.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+int x, y, u;
+
+void SignalHandler(int signo) {
+  y = x;
+  memcpy(&u, &y, sizeof(int));
+}
+
+int main(int argc, char *argv[]) {
+  int z = 0;
+  dfsan_set_label(8, &z, sizeof(z));
+  x = z;
+
+  signal(SIGHUP, SignalHandler);
+  kill(getpid(), SIGHUP);
+  signal(SIGHUP, SIG_DFL);
+
+  dfsan_print_origin_trace(&u, nullptr);
+  return 0;
+}
+
+// CHECK: Taint value 0x8 {{.*}} origin tracking ()
+// CHECK: Origin value: {{.*}}, Taint value was stored to memory at
+// CHECK-NOT: {{.*}} in dfs$SignalHandler {{.*}}origin_with_signals.cpp{{.*}}
+
+// CHECK: #0 {{.*}} in main {{.*}}origin_with_signals.cpp:[[@LINE-14]]
+
+// CHECK: Origin value: {{.*}}, Taint value was created at
+// CHECK: #0 {{.*}} in main {{.*}}origin_with_signals.cpp:[[@LINE-18]]
diff --git a/compiler-rt/test/dfsan/pthread.c b/compiler-rt/test/dfsan/pthread.c
index 6824cb3..75bc472 100644
--- a/compiler-rt/test/dfsan/pthread.c
+++ b/compiler-rt/test/dfsan/pthread.c
@@ -1,29 +1,53 @@
 // RUN: %clang_dfsan -mllvm -dfsan-fast-16-labels=true %s -o %t && %run %t
+//
+// RUN: %clang_dfsan -gmlt -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \
+// RUN:     %run %t >%t.out 2>&1
+// RUN: FileCheck %s < %t.out
+//
+// RUN: %clang_dfsan -gmlt -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true -mllvm -dfsan-instrument-with-call-threshold=0 %s -o %t && \
+// RUN:     %run %t >%t.out 2>&1
+// RUN: FileCheck %s < %t.out
+//
+// REQUIRES: x86_64-target-arch
 
 #include <sanitizer/dfsan_interface.h>
 
 #include <assert.h>
 #include <pthread.h>
+#include <string.h>
 
-int volatile x;
-int __thread y;
+const int kNumThreads = 24;
+int x = 0;
+int __thread y, z;
 
 static void *ThreadFn(void *a) {
   y = x;
   assert(dfsan_get_label(y) == 8);
+  memcpy(&z, &y, sizeof(y));
+  if ((int)a == 7)
+    dfsan_print_origin_trace(&z, NULL);
   return 0;
 }
 
 int main(void) {
   dfsan_set_label(8, &x, sizeof(x));
 
-  const int kNumThreads = 24;
   pthread_t t[kNumThreads];
-  for (size_t i = 0; i < kNumThreads; ++i) {
+  for (size_t i = 0; i < kNumThreads; ++i)
     pthread_create(&t[i], 0, ThreadFn, (void *)i);
-  }
-  for (size_t i = 0; i < kNumThreads; ++i) {
+
+  for (size_t i = 0; i < kNumThreads; ++i)
     pthread_join(t[i], 0);
-  }
+
   return 0;
 }
+
+// CHECK: Taint value 0x8 {{.*}} origin tracking ()
+// CHECK: Origin value: {{.*}}, Taint value was stored to memory at
+// CHECK: #0 {{.*}} in dfs$ThreadFn {{.*}}pthread.c:[[@LINE-21]]
+
+// CHECK: Origin value: {{.*}}, Taint value was stored to memory at
+// CHECK: #0 {{.*}} in dfs$ThreadFn {{.*}}pthread.c:[[@LINE-26]]
+
+// CHECK: Origin value: {{.*}}, Taint value was created at
+// CHECK: #0 {{.*}} in main {{.*}}pthread.c:[[@LINE-20]]
diff --git a/compiler-rt/test/dfsan/sigaction_stress_test.c b/compiler-rt/test/dfsan/sigaction_stress_test.c
index 0748d20..edb45fa 100644
--- a/compiler-rt/test/dfsan/sigaction_stress_test.c
+++ b/compiler-rt/test/dfsan/sigaction_stress_test.c
@@ -1,6 +1,10 @@
-// RUN: %clangxx_dfsan -mllvm -dfsan-fast-16-labels=true -O0 %s -o %t && %run %t
+// RUN: %clangxx_dfsan -mllvm -dfsan-fast-16-labels=true %s -o %t && %run %t
+// RUN: %clangxx_dfsan -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && %run %t
+// RUN: %clangxx_dfsan -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true -mllvm -dfsan-instrument-with-call-threshold=0 %s -o %t && %run %t
 //
 // Test that the state of shadows from a sigaction handler are consistent.
+//
+// REQUIRES: x86_64-target-arch
 
 #include <signal.h>
 #include <stdarg.h>
@@ -10,7 +14,7 @@
 #include <stdio.h>
 
 const int kSigCnt = 200;
-int x;
+int x = 0;
 
 __attribute__((noinline))
 int f(int a) {
