Creating branches/google/testing and tags/google/testing/ from r317203

git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/branches/google/testing@317856 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/.gitignore b/.gitignore
index 2a7bdd6..f7d4697 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,4 @@
 clang_darwin
 multi_arch
 *.sw?
+*.pyc
diff --git a/CMakeLists.txt b/CMakeLists.txt
index cbde831..adb40f2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -7,30 +7,26 @@
 # An important constraint of the build is that it only produces libraries
 # based on the ability of the host toolchain to target various platforms.
 
+cmake_minimum_required(VERSION 3.4.3)
+
 # Check if compiler-rt is built as a standalone project.
 if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR COMPILER_RT_STANDALONE_BUILD)
   project(CompilerRT C CXX ASM)
   set(COMPILER_RT_STANDALONE_BUILD TRUE)
 endif()
 
-cmake_minimum_required(VERSION 3.4.3)
-# FIXME:
-# The OLD behavior (pre 3.2) for this policy is to not set the value of the 
-# CMAKE_EXE_LINKER_FLAGS variable in the generated test project. The NEW behavior
-# for this policy is to set the value of the CMAKE_EXE_LINKER_FLAGS variable 
-# in the test project to the same as it is in the calling project. The new 
-# behavior cause the compiler_rt test to fail during try_compile: see
-# projects/compiler-rt/cmake/Modules/CompilerRTUtils.cmake:121 such that
-# CAN_TARGET_${arch} is not set properly. This results in COMPILER_RT_SUPPORTED_ARCH
-# not being updated properly leading to poblems.
-cmake_policy(SET CMP0056 OLD)
-
 # Add path for custom compiler-rt modules.
 list(INSERT CMAKE_MODULE_PATH 0
   "${CMAKE_CURRENT_SOURCE_DIR}/cmake"
   "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules"
   )
 
+if(CMAKE_CONFIGURATION_TYPES)
+  set(CMAKE_CFG_RESOLVED_INTDIR "${CMAKE_CFG_INTDIR}/")
+else()
+  set(CMAKE_CFG_RESOLVED_INTDIR "")
+endif()
+
 include(base-config-ix)
 include(CompilerRTUtils)
 
@@ -40,6 +36,15 @@
 mark_as_advanced(COMPILER_RT_BUILD_SANITIZERS)
 option(COMPILER_RT_BUILD_XRAY "Build xray" ON)
 mark_as_advanced(COMPILER_RT_BUILD_XRAY)
+option(COMPILER_RT_BUILD_LIBFUZZER "Build libFuzzer" ON)
+mark_as_advanced(COMPILER_RT_BUILD_LIBFUZZER)
+option(COMPILER_RT_BUILD_PROFILE "Build profile runtime" ON)
+mark_as_advanced(COMPILER_RT_BUILD_PROFILE)
+option(COMPILER_RT_BUILD_XRAY_NO_PREINIT "Build xray with no preinit patching" OFF)
+mark_as_advanced(COMPILER_RT_BUILD_XRAY_NO_PREINIT)
+
+set(COMPILER_RT_BAREMETAL_BUILD OFF CACHE BOOLEAN
+  "Build for a bare-metal target.")
 
 if (COMPILER_RT_STANDALONE_BUILD)
   load_llvm_config()
@@ -59,12 +64,19 @@
     set(LIT_ARGS_DEFAULT "${LIT_ARGS_DEFAULT} --no-progress-bar")
   endif()
   set(LLVM_LIT_ARGS "${LIT_ARGS_DEFAULT}" CACHE STRING "Default options for lit")
+  set(LLVM_LIT_OUTPUT_DIR "${COMPILER_RT_EXEC_OUTPUT_DIR}")
 endif()
 
 construct_compiler_rt_default_triple()
-if ("${COMPILER_RT_DEFAULT_TARGET_ABI}" STREQUAL "androideabi")
+if ("${COMPILER_RT_DEFAULT_TARGET_ABI}" MATCHES "hf$")
+  if (${COMPILER_RT_DEFAULT_TARGET_ARCH} MATCHES "^arm")
+    set(COMPILER_RT_DEFAULT_TARGET_ARCH "armhf")
+  endif()
+endif()
+if ("${COMPILER_RT_DEFAULT_TARGET_ABI}" MATCHES "^android")
   set(ANDROID 1)
 endif()
+pythonize_bool(ANDROID)
 
 set(COMPILER_RT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
 set(COMPILER_RT_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
@@ -85,21 +97,63 @@
 # COMPILER_RT_DEBUG_PYBOOL is used by lit.common.configured.in.
 pythonize_bool(COMPILER_RT_DEBUG)
 
+include(HandleCompilerRT)
 include(config-ix)
 
-if(APPLE AND SANITIZER_MIN_OSX_VERSION VERSION_LESS "10.9")
+if(APPLE AND SANITIZER_MIN_OSX_VERSION AND SANITIZER_MIN_OSX_VERSION VERSION_LESS "10.9")
   # Mac OS X prior to 10.9 had problems with exporting symbols from
   # libc++/libc++abi.
-  set(use_cxxabi_default OFF)
-elseif(MSVC)
-  set(use_cxxabi_default OFF)
+  set(cxxabi_supported OFF)
 else()
-  set(use_cxxabi_default ON)
+  set(cxxabi_supported ON)
 endif()
 
-option(SANITIZER_CAN_USE_CXXABI "Sanitizers can use cxxabi" ${use_cxxabi_default})
+option(SANITIZER_ALLOW_CXXABI "Allow use of C++ ABI details in ubsan" ON)
+
+set(SANITIZE_CAN_USE_CXXABI OFF)
+if (cxxabi_supported AND SANITIZER_ALLOW_CXXABI)
+  set(SANITIZER_CAN_USE_CXXABI ON)
+endif()
 pythonize_bool(SANITIZER_CAN_USE_CXXABI)
 
+set(SANITIZER_CXX_ABI "default" CACHE STRING
+    "Specify C++ ABI library to use.")
+set(CXXABIS none default libcxxabi libstdc++ libc++)
+set_property(CACHE SANITIZER_CXX_ABI PROPERTY STRINGS ;${CXXABIS})
+
+if (SANITIZER_CXX_ABI STREQUAL "default")
+  if (HAVE_LIBCXXABI AND COMPILER_RT_DEFAULT_TARGET_ONLY)
+    set(SANITIZER_CXX_ABI_LIBNAME "libcxxabi")
+    set(SANITIZER_CXX_ABI_INTREE 1)
+  elseif (APPLE)
+    set(SANITIZER_CXX_ABI_LIBNAME "libcxxabi")
+    set(SANITIZER_CXX_ABI_SYSTEM 1)
+  else()
+    set(SANITIZER_CXX_ABI_LIBNAME "libstdc++")
+  endif()
+else()
+  set(SANITIZER_CXX_ABI_LIBNAME "${SANITIZER_CXX_ABI}")
+endif()
+
+if (SANITIZER_CXX_ABI_LIBNAME STREQUAL "libcxxabi")
+  if (SANITIZER_CXX_ABI_INTREE)
+    if (TARGET unwind_shared OR HAVE_LIBUNWIND)
+      list(APPEND SANITIZER_CXX_ABI_LIBRARY unwind_shared)
+    endif()
+    if (TARGET cxxabi_shared OR HAVE_LIBCXXABI)
+      list(APPEND SANITIZER_CXX_ABI_LIBRARY cxxabi_shared)
+    endif()
+  else()
+    list(APPEND SANITIZER_CXX_ABI_LIBRARY "c++abi")
+  endif()
+elseif (SANITIZER_CXX_ABI_LIBNAME STREQUAL "libc++")
+  list(APPEND SANITIZER_CXX_ABI_LIBRARY "c++")
+elseif (SANITIZER_CXX_ABI_LIBNAME STREQUAL "libstdc++")
+  append_list_if(COMPILER_RT_HAS_LIBSTDCXX stdc++ SANITIZER_CXX_ABI_LIBRARY)
+endif()
+
+option(SANITIZER_USE_COMPILER_RT "Use compiler-rt builtins instead of libgcc" OFF)
+
 #================================
 # Setup Compiler Flags
 #================================
@@ -134,7 +188,7 @@
 endif()
 append_list_if(COMPILER_RT_HAS_FNO_BUILTIN_FLAG -fno-builtin SANITIZER_COMMON_CFLAGS)
 append_list_if(COMPILER_RT_HAS_FNO_EXCEPTIONS_FLAG -fno-exceptions SANITIZER_COMMON_CFLAGS)
-if(NOT COMPILER_RT_DEBUG)
+if(NOT COMPILER_RT_DEBUG AND NOT APPLE)
   append_list_if(COMPILER_RT_HAS_FOMIT_FRAME_POINTER_FLAG -fomit-frame-pointer SANITIZER_COMMON_CFLAGS)
 endif()
 append_list_if(COMPILER_RT_HAS_FUNWIND_TABLES_FLAG -funwind-tables SANITIZER_COMMON_CFLAGS)
@@ -179,10 +233,16 @@
 
 append_list_if(COMPILER_RT_DEBUG -DSANITIZER_DEBUG=1 SANITIZER_COMMON_CFLAGS)
 
-# Build with optimization, unless we're in debug mode. If we're using MSVC,
+# If we're using MSVC,
 # always respect the optimization flags set by CMAKE_BUILD_TYPE instead.
-if(NOT COMPILER_RT_DEBUG AND NOT MSVC)
-  list(APPEND SANITIZER_COMMON_CFLAGS -O3)
+if (NOT MSVC)
+
+  # Build with optimization, unless we're in debug mode.
+  if(COMPILER_RT_DEBUG)
+    list(APPEND SANITIZER_COMMON_CFLAGS -O0)
+  else()
+    list(APPEND SANITIZER_COMMON_CFLAGS -O3)
+  endif()
 endif()
 
 # Determine if we should restrict stack frame sizes.
@@ -229,6 +289,28 @@
 append_list_if(COMPILER_RT_HAS_WD4722_FLAG /wd4722 SANITIZER_COMMON_CFLAGS)
 append_list_if(COMPILER_RT_HAS_WD4800_FLAG /wd4800 SANITIZER_COMMON_CFLAGS)
 
+# Set common link flags.
+append_list_if(COMPILER_RT_HAS_NODEFAULTLIBS_FLAG -nodefaultlibs SANITIZER_COMMON_LINK_FLAGS)
+
+if (SANITIZER_USE_COMPILER_RT)
+  list(APPEND SANITIZER_COMMON_LINK_FLAGS -rtlib=compiler-rt)
+  find_compiler_rt_library(builtins COMPILER_RT_BUILTINS_LIBRARY)
+  list(APPEND SANITIZER_COMMON_LINK_LIBS ${COMPILER_RT_BUILTINS_LIBRARY})
+else()
+  if (ANDROID)
+    append_list_if(COMPILER_RT_HAS_GCC_LIB gcc SANITIZER_COMMON_LINK_LIBS)
+  else()
+    append_list_if(COMPILER_RT_HAS_GCC_S_LIB gcc_s SANITIZER_COMMON_LINK_LIBS)
+  endif()
+endif()
+
+append_list_if(COMPILER_RT_HAS_LIBC c SANITIZER_COMMON_LINK_LIBS)
+
+if("${CMAKE_SYSTEM_NAME}" STREQUAL "Fuchsia")
+  list(APPEND SANITIZER_COMMON_LINK_FLAGS -Wl,-z,defs,-z,now,-z,relro)
+  list(APPEND SANITIZER_COMMON_LINK_LIBS zircon)
+endif()
+
 # Warnings to turn off for all libraries, not just sanitizers.
 append_string_if(COMPILER_RT_HAS_WUNUSED_PARAMETER_FLAG -Wno-unused-parameter CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
 
@@ -249,20 +331,41 @@
 if(EXISTS ${COMPILER_RT_LIBCXX_PATH}/)
   set(COMPILER_RT_HAS_LIBCXX_SOURCES TRUE)
 else()
-  set(COMPILER_RT_HAS_LIBCXX_SOURCES FALSE)
+  set(COMPILER_RT_LIBCXX_PATH ${LLVM_MAIN_SRC_DIR}/../libcxx)
+  if(EXISTS ${COMPILER_RT_LIBCXX_PATH}/)
+    set(COMPILER_RT_HAS_LIBCXX_SOURCES TRUE)
+  else()
+    set(COMPILER_RT_HAS_LIBCXX_SOURCES FALSE)
+  endif()
 endif()
 
 set(COMPILER_RT_LLD_PATH ${LLVM_MAIN_SRC_DIR}/tools/lld)
-if(EXISTS ${COMPILER_RT_LLD_PATH}/)
-  set(COMPILER_RT_HAS_LLD_SOURCES TRUE)
+if(EXISTS ${COMPILER_RT_LLD_PATH}/ AND LLVM_TOOL_LLD_BUILD)
+  set(COMPILER_RT_HAS_LLD TRUE)
 else()
-  set(COMPILER_RT_HAS_LLD_SOURCES FALSE)
+  set(COMPILER_RT_LLD_PATH ${LLVM_MAIN_SRC_DIR}/../lld)
+  if(EXISTS ${COMPILER_RT_LLD_PATH}/ AND LLVM_TOOL_LLD_BUILD)
+    set(COMPILER_RT_HAS_LLD TRUE)
+  else()
+    set(COMPILER_RT_HAS_LLD FALSE)
+  endif()
 endif()
-pythonize_bool(COMPILER_RT_HAS_LLD_SOURCES)
+pythonize_bool(COMPILER_RT_HAS_LLD)
 
 add_subdirectory(lib)
 
 if(COMPILER_RT_INCLUDE_TESTS)
   add_subdirectory(unittests)
   add_subdirectory(test)
+  if (COMPILER_RT_STANDALONE_BUILD)
+    # If we have a valid source tree, generate llvm-lit into the bin directory.
+    # The user can still choose to have the check targets *use* a different lit
+    # by specifying -DLLVM_EXTERNAL_LIT, but we generate it regardless.
+    if (EXISTS ${LLVM_MAIN_SRC_DIR}/utils/llvm-lit)
+      add_subdirectory(${LLVM_MAIN_SRC_DIR}/utils/llvm-lit ${CMAKE_CURRENT_BINARY_DIR}/llvm-lit)
+    elseif(NOT EXISTS ${LLVM_EXTERNAL_LIT})
+      message(WARNING "Could not find LLVM source directory and LLVM_EXTERNAL_LIT does not"
+                       "point to a valid file.  You will not be able to run tests.")
+    endif()
+  endif()
 endif()
diff --git a/cmake/Modules/AddCompilerRT.cmake b/cmake/Modules/AddCompilerRT.cmake
index d4533e6..700c31f 100644
--- a/cmake/Modules/AddCompilerRT.cmake
+++ b/cmake/Modules/AddCompilerRT.cmake
@@ -56,8 +56,15 @@
 
   foreach(libname ${libnames})
     add_library(${libname} OBJECT ${LIB_SOURCES})
+
+    # Strip out -msse3 if this isn't macOS.
+    set(target_flags ${LIB_CFLAGS})
+    if(APPLE AND NOT "${libname}" MATCHES ".*\.osx.*")
+      list(REMOVE_ITEM target_flags "-msse3")
+    endif()
+
     set_target_compile_flags(${libname}
-      ${CMAKE_CXX_FLAGS} ${extra_cflags_${libname}} ${LIB_CFLAGS})
+      ${CMAKE_CXX_FLAGS} ${extra_cflags_${libname}} ${target_flags})
     set_property(TARGET ${libname} APPEND PROPERTY
       COMPILE_DEFINITIONS ${LIB_DEFS})
     set_target_properties(${libname} PROPERTIES FOLDER "Compiler-RT Libraries")
@@ -86,6 +93,14 @@
   add_dependencies(compiler-rt ${name})
 endfunction()
 
+macro(set_output_name output name arch)
+  if(ANDROID AND ${arch} STREQUAL "i386")
+    set(${output} "${name}-i686${COMPILER_RT_OS_SUFFIX}")
+  else()
+    set(${output} "${name}-${arch}${COMPILER_RT_OS_SUFFIX}")
+  endif()
+endmacro()
+
 # Adds static or shared runtime for a list of architectures and operating
 # systems and puts it in the proper directory in the build and install trees.
 # add_compiler_rt_runtime(<name>
@@ -110,8 +125,21 @@
     "OS;ARCHS;SOURCES;CFLAGS;LINK_FLAGS;DEFS;LINK_LIBS;OBJECT_LIBS"
     ${ARGN})
   set(libnames)
+  # Until we support this some other way, build compiler-rt runtime without LTO
+  # to allow non-LTO projects to link with it.
+  if(COMPILER_RT_HAS_FNO_LTO_FLAG)
+    set(NO_LTO_FLAGS "-fno-lto")
+  else()
+    set(NO_LTO_FLAGS "")
+  endif()
+
   if(APPLE)
     foreach(os ${LIB_OS})
+      # Strip out -msse3 if this isn't macOS.
+      list(LENGTH LIB_CFLAGS HAS_EXTRA_CFLAGS)
+      if(HAS_EXTRA_CFLAGS AND NOT "${os}" MATCHES "^(osx)$")
+        list(REMOVE_ITEM LIB_CFLAGS "-msse3")
+      endif()
       if(type STREQUAL "STATIC")
         set(libname "${name}_${os}")
       else()
@@ -121,7 +149,7 @@
       list_intersect(LIB_ARCHS_${libname} DARWIN_${os}_ARCHS LIB_ARCHS)
       if(LIB_ARCHS_${libname})
         list(APPEND libnames ${libname})
-        set(extra_cflags_${libname} ${DARWIN_${os}_CFLAGS} ${LIB_CFLAGS})
+        set(extra_cflags_${libname} ${DARWIN_${os}_CFLAGS} ${NO_LTO_FLAGS} ${LIB_CFLAGS})
         set(output_name_${libname} ${libname}${COMPILER_RT_OS_SUFFIX})
         set(sources_${libname} ${LIB_SOURCES})
         format_object_libs(sources_${libname} ${os} ${LIB_OBJECT_LIBS})
@@ -135,21 +163,21 @@
       endif()
       if(type STREQUAL "STATIC")
         set(libname "${name}-${arch}")
-        set(output_name_${libname} ${libname}${COMPILER_RT_OS_SUFFIX})
+        set_output_name(output_name_${libname} ${name} ${arch})
       else()
         set(libname "${name}-dynamic-${arch}")
         set(extra_cflags_${libname} ${TARGET_${arch}_CFLAGS} ${LIB_CFLAGS})
         set(extra_link_flags_${libname} ${TARGET_${arch}_LINK_FLAGS} ${LIB_LINK_FLAGS})
         if(WIN32)
-          set(output_name_${libname} ${name}_dynamic-${arch}${COMPILER_RT_OS_SUFFIX})
+          set_output_name(output_name_${libname} ${name}_dynamic ${arch})
         else()
-          set(output_name_${libname} ${name}-${arch}${COMPILER_RT_OS_SUFFIX})
+          set_output_name(output_name_${libname} ${name} ${arch})
         endif()
       endif()
       set(sources_${libname} ${LIB_SOURCES})
       format_object_libs(sources_${libname} ${arch} ${LIB_OBJECT_LIBS})
       set(libnames ${libnames} ${libname})
-      set(extra_cflags_${libname} ${TARGET_${arch}_CFLAGS} ${LIB_CFLAGS})
+      set(extra_cflags_${libname} ${TARGET_${arch}_CFLAGS} ${NO_LTO_FLAGS} ${LIB_CFLAGS})
     endforeach()
   endif()
 
@@ -195,14 +223,22 @@
     set_target_properties(${libname} PROPERTIES
         OUTPUT_NAME ${output_name_${libname}})
     set_target_properties(${libname} PROPERTIES FOLDER "Compiler-RT Runtime")
+    if(LIB_LINK_LIBS)
+      target_link_libraries(${libname} ${LIB_LINK_LIBS})
+    endif()
     if(${type} STREQUAL "SHARED")
-      if(LIB_LINK_LIBS)
-        target_link_libraries(${libname} ${LIB_LINK_LIBS})
-      endif()
       if(WIN32 AND NOT CYGWIN AND NOT MINGW)
         set_target_properties(${libname} PROPERTIES IMPORT_PREFIX "")
         set_target_properties(${libname} PROPERTIES IMPORT_SUFFIX ".lib")
       endif()
+      if(APPLE)
+        # Ad-hoc sign the dylibs
+        add_custom_command(TARGET ${libname}
+          POST_BUILD  
+          COMMAND codesign --sign - $<TARGET_FILE:${libname}>
+          WORKING_DIRECTORY ${COMPILER_RT_LIBRARY_OUTPUT_DIR}
+        )
+      endif()
     endif()
     install(TARGETS ${libname}
       ARCHIVE DESTINATION ${COMPILER_RT_LIBRARY_INSTALL_DIR}
@@ -272,24 +308,65 @@
   list(APPEND COMPILER_RT_GTEST_CFLAGS -Wno-deprecated-declarations)
 endif()
 
+# Compile and register compiler-rt tests.
+# generate_compiler_rt_tests(<output object files> <test_suite> <test_name>
+#                           <test architecture>
+#                           KIND <custom prefix>
+#                           SUBDIR <subdirectory for testing binary>
+#                           SOURCES <sources to compile>
+#                           RUNTIME <tests runtime to link in>
+#                           CFLAGS <compile-time flags>
+#                           COMPILE_DEPS <compile-time dependencies>
+#                           DEPS <dependencies>
+#                           LINK_FLAGS <flags to use during linking>
+# )
+function(generate_compiler_rt_tests test_objects test_suite testname arch)
+  cmake_parse_arguments(TEST "" "KIND;RUNTIME;SUBDIR"
+    "SOURCES;COMPILE_DEPS;DEPS;CFLAGS;LINK_FLAGS" ${ARGN})
+
+  foreach(source ${TEST_SOURCES})
+    sanitizer_test_compile(
+      "${test_objects}" "${source}" "${arch}"
+      KIND ${TEST_KIND}
+      COMPILE_DEPS ${TEST_COMPILE_DEPS}
+      DEPS ${TEST_DEPS}
+      CFLAGS ${TEST_CFLAGS}
+      )
+  endforeach()
+
+  set(TEST_DEPS ${${test_objects}})
+
+  if(NOT "${TEST_RUNTIME}" STREQUAL "")
+    list(APPEND TEST_DEPS ${TEST_RUNTIME})
+    list(APPEND "${test_objects}" $<TARGET_FILE:${TEST_RUNTIME}>)
+  endif()
+
+  add_compiler_rt_test(${test_suite} "${testname}" "${arch}"
+    SUBDIR ${TEST_SUBDIR}
+    OBJECTS ${${test_objects}}
+    DEPS ${TEST_DEPS}
+    LINK_FLAGS ${TEST_LINK_FLAGS}
+    )
+  set("${test_objects}" "${${test_objects}}" PARENT_SCOPE)
+endfunction()
+
 # Link objects into a single executable with COMPILER_RT_TEST_COMPILER,
 # using specified link flags. Make executable a part of provided
 # test_suite.
-# add_compiler_rt_test(<test_suite> <test_name>
+# add_compiler_rt_test(<test_suite> <test_name> <arch>
 #                      SUBDIR <subdirectory for binary>
 #                      OBJECTS <object files>
 #                      DEPS <deps (e.g. runtime libs)>
 #                      LINK_FLAGS <link flags>)
-macro(add_compiler_rt_test test_suite test_name)
+function(add_compiler_rt_test test_suite test_name arch)
   cmake_parse_arguments(TEST "" "SUBDIR" "OBJECTS;DEPS;LINK_FLAGS" "" ${ARGN})
-  set(output_bin ${CMAKE_CURRENT_BINARY_DIR})
+  set(output_dir ${CMAKE_CURRENT_BINARY_DIR})
   if(TEST_SUBDIR)
-    set(output_bin "${output_bin}/${TEST_SUBDIR}")
+    set(output_dir "${output_dir}/${TEST_SUBDIR}")
   endif()
-  if(CMAKE_CONFIGURATION_TYPES)
-    set(output_bin "${output_bin}/${CMAKE_CFG_INTDIR}")
-  endif()
-  set(output_bin "${output_bin}/${test_name}")
+  set(output_dir "${output_dir}/${CMAKE_CFG_INTDIR}")
+  file(MAKE_DIRECTORY "${output_dir}")
+  set(output_bin "${output_dir}/${test_name}")
   if(MSVC)
     set(output_bin "${output_bin}.exe")
   endif()
@@ -298,6 +375,10 @@
   if(NOT COMPILER_RT_STANDALONE_BUILD)
     list(APPEND TEST_DEPS clang)
   endif()
+
+  get_target_flags_for_arch(${arch} TARGET_LINK_FLAGS)
+  list(APPEND TEST_LINK_FLAGS ${TARGET_LINK_FLAGS})
+
   # If we're not on MSVC, include the linker flags from CMAKE but override them
   # with the provided link flags. This ensures that flags which are required to
   # link programs at all are included, but the changes needed for the test
@@ -308,16 +389,18 @@
     set(TEST_LINK_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${TEST_LINK_FLAGS}")
     separate_arguments(TEST_LINK_FLAGS)
   endif()
-  add_custom_target(${test_name}
-    COMMAND ${COMPILER_RT_TEST_COMPILER} ${TEST_OBJECTS}
-            -o "${output_bin}"
+  add_custom_command(
+    OUTPUT "${output_bin}"
+    COMMAND ${COMPILER_RT_TEST_COMPILER} ${TEST_OBJECTS} -o "${output_bin}"
             ${TEST_LINK_FLAGS}
-    DEPENDS ${TEST_DEPS})
-  set_target_properties(${test_name} PROPERTIES FOLDER "Compiler-RT Tests")
+    DEPENDS ${TEST_DEPS}
+    )
+  add_custom_target(T${test_name} DEPENDS "${output_bin}")
+  set_target_properties(T${test_name} PROPERTIES FOLDER "Compiler-RT Tests")
 
   # Make the test suite depend on the binary.
-  add_dependencies(${test_suite} ${test_name})
-endmacro()
+  add_dependencies(${test_suite} T${test_name})
+endfunction()
 
 macro(add_compiler_rt_resource_file target_name file_name component)
   set(src_file "${CMAKE_CURRENT_SOURCE_DIR}/${file_name}")
diff --git a/cmake/Modules/CompilerRTCompile.cmake b/cmake/Modules/CompilerRTCompile.cmake
index 30663b6..556ee78 100644
--- a/cmake/Modules/CompilerRTCompile.cmake
+++ b/cmake/Modules/CompilerRTCompile.cmake
@@ -24,12 +24,44 @@
   set(${out_flags} "${clang_flags}" PARENT_SCOPE)
 endfunction()
 
+# Compile a sanitizer test with a freshly built clang
+# for a given architecture, adding the result to the object list.
+#  - obj_list: output list of objects, populated by path
+#              of a generated object file.
+#  - source:   source file of a test.
+#  - arch:     architecture to compile for.
+# sanitizer_test_compile(<obj_list> <source> <arch>
+#                        KIND <custom namespace>
+#                        COMPILE_DEPS <list of compile-time dependencies>
+#                        DEPS <list of dependencies>
+#                        CFLAGS <list of flags>
+# )
+function(sanitizer_test_compile obj_list source arch)
+  cmake_parse_arguments(TEST
+      "" "" "KIND;COMPILE_DEPS;DEPS;CFLAGS" ${ARGN})
+  get_filename_component(basename ${source} NAME)
+  set(output_obj
+    "${CMAKE_CFG_RESOLVED_INTDIR}${obj_list}.${basename}.${arch}${TEST_KIND}.o")
+
+  # Write out architecture-specific flags into TARGET_CFLAGS variable.
+  get_target_flags_for_arch(${arch} TARGET_CFLAGS)
+  set(COMPILE_DEPS ${TEST_COMPILE_DEPS})
+  if(NOT COMPILER_RT_STANDALONE_BUILD)
+    list(APPEND COMPILE_DEPS ${TEST_DEPS})
+  endif()
+  clang_compile(${output_obj} ${source}
+                CFLAGS ${TEST_CFLAGS} ${TARGET_CFLAGS}
+                DEPS ${COMPILE_DEPS})
+  list(APPEND ${obj_list} ${output_obj})
+  set("${obj_list}" "${${obj_list}}" PARENT_SCOPE)
+endfunction()
+
 # Compile a source into an object file with COMPILER_RT_TEST_COMPILER using
 # a provided compile flags and dependenices.
 # clang_compile(<object> <source>
 #               CFLAGS <list of compile flags>
 #               DEPS <list of dependencies>)
-macro(clang_compile object_file source)
+function(clang_compile object_file source)
   cmake_parse_arguments(SOURCE "" "" "CFLAGS;DEPS" ${ARGN})
   get_filename_component(source_rpath ${source} REALPATH)
   if(NOT COMPILER_RT_STANDALONE_BUILD)
@@ -39,6 +71,7 @@
     list(APPEND SOURCE_DEPS CompilerRTUnitTestCheckCxx)
   endif()
   string(REGEX MATCH "[.](cc|cpp)$" is_cxx ${source_rpath})
+  string(REGEX MATCH "[.](m|mm)$" is_objc ${source_rpath})
   if(is_cxx)
     string(REPLACE " " ";" global_flags "${CMAKE_CXX_FLAGS}")
   else()
@@ -52,6 +85,9 @@
   if (APPLE)
     set(global_flags ${OSX_SYSROOT_FLAG} ${global_flags})
   endif()
+  if (is_objc)
+    list(APPEND global_flags -ObjC)
+  endif()
 
   # Ignore unknown warnings. CMAKE_CXX_FLAGS may contain GCC-specific options
   # which are not supported by Clang.
@@ -64,7 +100,7 @@
             ${source_rpath}
     MAIN_DEPENDENCY ${source}
     DEPENDS ${SOURCE_DEPS})
-endmacro()
+endfunction()
 
 # On Darwin, there are no system-wide C++ headers and the just-built clang is
 # therefore not able to compile C++ files unless they are copied/symlinked into
@@ -100,7 +136,7 @@
       COMMAND bash -c "${CMD}"
       COMMENT "Checking that just-built clang can find C++ headers..."
       VERBATIM)
-    if (TARGET clang)
+    if (NOT COMPILER_RT_STANDALONE_BUILD)
       ADD_DEPENDENCIES(CompilerRTUnitTestCheckCxx clang)
     endif()
   endif()
diff --git a/cmake/Modules/CompilerRTDarwinUtils.cmake b/cmake/Modules/CompilerRTDarwinUtils.cmake
index 3c89381..a25540b 100644
--- a/cmake/Modules/CompilerRTDarwinUtils.cmake
+++ b/cmake/Modules/CompilerRTDarwinUtils.cmake
@@ -4,14 +4,23 @@
 # set the default Xcode to use. This function finds the SDKs that are present in
 # the current Xcode.
 function(find_darwin_sdk_dir var sdk_name)
-  # Let's first try the internal SDK, otherwise use the public SDK.
-  execute_process(
-    COMMAND xcodebuild -version -sdk ${sdk_name}.internal Path
-    RESULT_VARIABLE result_process
-    OUTPUT_VARIABLE var_internal
-    OUTPUT_STRIP_TRAILING_WHITESPACE
-    ERROR_FILE /dev/null
-  )
+  set(DARWIN_${sdk_name}_CACHED_SYSROOT "" CACHE STRING "Darwin SDK path for SDK ${sdk_name}.")
+  set(DARWIN_PREFER_PUBLIC_SDK OFF CACHE BOOL "Prefer Darwin public SDK, even when an internal SDK is present.")
+
+  if(DARWIN_${sdk_name}_CACHED_SYSROOT)
+    set(${var} ${DARWIN_${sdk_name}_CACHED_SYSROOT} PARENT_SCOPE)
+    return()
+  endif()
+  if(NOT DARWIN_PREFER_PUBLIC_SDK)
+    # Let's first try the internal SDK, otherwise use the public SDK.
+    execute_process(
+      COMMAND xcodebuild -version -sdk ${sdk_name}.internal Path
+      RESULT_VARIABLE result_process
+      OUTPUT_VARIABLE var_internal
+      OUTPUT_STRIP_TRAILING_WHITESPACE
+      ERROR_FILE /dev/null
+    )
+  endif()
   if((NOT result_process EQUAL 0) OR "" STREQUAL "${var_internal}")
     execute_process(
       COMMAND xcodebuild -version -sdk ${sdk_name} Path
@@ -26,6 +35,7 @@
   if(result_process EQUAL 0)
     set(${var} ${var_internal} PARENT_SCOPE)
   endif()
+  set(DARWIN_${sdk_name}_CACHED_SYSROOT ${var_internal} CACHE STRING "Darwin SDK path for SDK ${sdk_name}." FORCE)
 endfunction()
 
 # There isn't a clear mapping of what architectures are supported with a given
@@ -85,10 +95,12 @@
     if(TEST_COMPILE_ONLY)
       try_compile_only(CAN_TARGET_${os}_${arch} -v -arch ${arch} ${DARWIN_${os}_CFLAGS})
     else()
+      set(SAVED_CMAKE_EXE_LINKER_FLAGS ${CMAKE_EXE_LINKER_FLAGS})
+      set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${arch_linker_flags}")
       try_compile(CAN_TARGET_${os}_${arch} ${CMAKE_BINARY_DIR} ${SIMPLE_C}
                   COMPILE_DEFINITIONS "-v -arch ${arch}" ${DARWIN_${os}_CFLAGS}
-                  CMAKE_FLAGS "-DCMAKE_EXE_LINKER_FLAGS=${arch_linker_flags}"
                   OUTPUT_VARIABLE TEST_OUTPUT)
+      set(CMAKE_EXE_LINKER_FLAGS ${SAVED_CMAKE_EXE_LINKER_FLAGS})
     endif()
     if(${CAN_TARGET_${os}_${arch}})
       list(APPEND working_archs ${arch})
@@ -188,10 +200,23 @@
   if(DARWIN_${LIB_OS}_SYSROOT)
     set(sysroot_flag -isysroot ${DARWIN_${LIB_OS}_SYSROOT})
   endif()
+
+  # Make a copy of the compilation flags.
+  set(builtin_cflags ${LIB_CFLAGS})
+
+  # Strip out any inappropriate flags for the target.
+  if("${LIB_ARCH}" MATCHES "^(armv7|armv7k|armv7s)$")
+    set(builtin_cflags "")
+    foreach(cflag "${LIB_CFLAGS}")
+      string(REPLACE "-fomit-frame-pointer" "" cflag "${cflag}")
+      list(APPEND builtin_cflags ${cflag})
+    endforeach(cflag)
+  endif()
+
   set_target_compile_flags(${libname}
     ${sysroot_flag}
     ${DARWIN_${LIB_OS}_BUILTIN_MIN_VER_FLAG}
-    ${LIB_CFLAGS})
+    ${builtin_cflags})
   set_property(TARGET ${libname} APPEND PROPERTY
       COMPILE_DEFINITIONS ${LIB_DEFS})
   set_target_properties(${libname} PROPERTIES
@@ -231,35 +256,6 @@
   endif()
 endfunction()
 
-# Filter out generic versions of routines that are re-implemented in
-# architecture specific manner.  This prevents multiple definitions of the
-# same symbols, making the symbol selection non-deterministic.
-function(darwin_filter_builtin_sources output_var exclude_or_include excluded_list)
-  if(exclude_or_include STREQUAL "EXCLUDE")
-    set(filter_action GREATER)
-    set(filter_value -1)
-  elseif(exclude_or_include STREQUAL "INCLUDE")
-    set(filter_action LESS)
-    set(filter_value 0)
-  else()
-    message(FATAL_ERROR "darwin_filter_builtin_sources called without EXCLUDE|INCLUDE")
-  endif()
-
-  set(intermediate ${ARGN})
-  foreach (_file ${intermediate})
-    get_filename_component(_name_we ${_file} NAME_WE)
-    list(FIND ${excluded_list} ${_name_we} _found)
-    if(_found ${filter_action} ${filter_value})
-      list(REMOVE_ITEM intermediate ${_file})
-    elseif(${_file} MATCHES ".*/.*\\.S" OR ${_file} MATCHES ".*/.*\\.c")
-      get_filename_component(_name ${_file} NAME)
-      string(REPLACE ".S" ".c" _cname "${_name}")
-      list(REMOVE_ITEM intermediate ${_cname})
-    endif ()
-  endforeach ()
-  set(${output_var} ${intermediate} PARENT_SCOPE)
-endfunction()
-
 # Generates builtin libraries for all operating systems specified in ARGN. Each
 # OS library is constructed by lipo-ing together single-architecture libraries.
 macro(darwin_add_builtin_libraries)
@@ -282,7 +278,7 @@
                               ARCH ${arch}
                               MIN_VERSION ${DARWIN_${os}_BUILTIN_MIN_VER})
 
-      darwin_filter_builtin_sources(filtered_sources
+      filter_builtin_sources(filtered_sources
         EXCLUDE ${arch}_${os}_EXCLUDED_BUILTINS
         ${${arch}_SOURCES})
 
@@ -304,7 +300,7 @@
                               OS ${os}
                               ARCH ${arch})
 
-        darwin_filter_builtin_sources(filtered_sources
+        filter_builtin_sources(filtered_sources
           EXCLUDE ${arch}_${os}_EXCLUDED_BUILTINS
           ${${arch}_SOURCES})
 
@@ -399,7 +395,7 @@
     set(x86_64_FUNCTIONS ${common_FUNCTIONS})
 
     foreach(arch ${DARWIN_macho_embedded_ARCHS})
-      darwin_filter_builtin_sources(${arch}_filtered_sources
+      filter_builtin_sources(${arch}_filtered_sources
         INCLUDE ${arch}_FUNCTIONS
         ${${arch}_SOURCES})
       if(NOT ${arch}_filtered_sources)
diff --git a/cmake/Modules/CompilerRTUtils.cmake b/cmake/Modules/CompilerRTUtils.cmake
index ed946d8..63b2252 100644
--- a/cmake/Modules/CompilerRTUtils.cmake
+++ b/cmake/Modules/CompilerRTUtils.cmake
@@ -138,15 +138,16 @@
     elseif(TEST_COMPILE_ONLY)
       try_compile_only(CAN_TARGET_${arch} ${TARGET_${arch}_CFLAGS})
     else()
-      set(argstring "${CMAKE_EXE_LINKER_FLAGS} ${argstring}")
       set(FLAG_NO_EXCEPTIONS "")
       if(COMPILER_RT_HAS_FNO_EXCEPTIONS_FLAG)
         set(FLAG_NO_EXCEPTIONS " -fno-exceptions ")
       endif()
+      set(SAVED_CMAKE_EXE_LINKER_FLAGS ${CMAKE_EXE_LINKER_FLAGS})
+      set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${argstring}")
       try_compile(CAN_TARGET_${arch} ${CMAKE_BINARY_DIR} ${SIMPLE_SOURCE}
                   COMPILE_DEFINITIONS "${TARGET_${arch}_CFLAGS} ${FLAG_NO_EXCEPTIONS}"
-                  OUTPUT_VARIABLE TARGET_${arch}_OUTPUT
-                  CMAKE_FLAGS "-DCMAKE_EXE_LINKER_FLAGS:STRING=${argstring}")
+                  OUTPUT_VARIABLE TARGET_${arch}_OUTPUT)
+      set(CMAKE_EXE_LINKER_FLAGS ${SAVED_CMAKE_EXE_LINKER_FLAGS})
     endif()
   endif()
   if(${CAN_TARGET_${arch}})
@@ -162,7 +163,6 @@
   check_symbol_exists(__arm__ "" __ARM)
   check_symbol_exists(__aarch64__ "" __AARCH64)
   check_symbol_exists(__x86_64__ "" __X86_64)
-  check_symbol_exists(__i686__ "" __I686)
   check_symbol_exists(__i386__ "" __I386)
   check_symbol_exists(__mips__ "" __MIPS)
   check_symbol_exists(__mips64__ "" __MIPS64)
@@ -175,8 +175,6 @@
     add_default_target_arch(aarch64)
   elseif(__X86_64)
     add_default_target_arch(x86_64)
-  elseif(__I686)
-    add_default_target_arch(i686)
   elseif(__I386)
     add_default_target_arch(i386)
   elseif(__MIPS64) # must be checked before __MIPS
@@ -272,3 +270,32 @@
     set(COMPILER_RT_HAS_EXPLICIT_DEFAULT_TARGET_TRIPLE FALSE)
   endif()
 endmacro()
+
+# Filter out generic versions of routines that are re-implemented in
+# architecture specific manner.  This prevents multiple definitions of the
+# same symbols, making the symbol selection non-deterministic.
+function(filter_builtin_sources output_var exclude_or_include excluded_list)
+  if(exclude_or_include STREQUAL "EXCLUDE")
+    set(filter_action GREATER)
+    set(filter_value -1)
+  elseif(exclude_or_include STREQUAL "INCLUDE")
+    set(filter_action LESS)
+    set(filter_value 0)
+  else()
+    message(FATAL_ERROR "filter_builtin_sources called without EXCLUDE|INCLUDE")
+  endif()
+
+  set(intermediate ${ARGN})
+  foreach (_file ${intermediate})
+    get_filename_component(_name_we ${_file} NAME_WE)
+    list(FIND ${excluded_list} ${_name_we} _found)
+    if(_found ${filter_action} ${filter_value})
+      list(REMOVE_ITEM intermediate ${_file})
+    elseif(${_file} MATCHES ".*/.*\\.S" OR ${_file} MATCHES ".*/.*\\.c")
+      get_filename_component(_name ${_file} NAME)
+      string(REPLACE ".S" ".c" _cname "${_name}")
+      list(REMOVE_ITEM intermediate ${_cname})
+    endif ()
+  endforeach ()
+  set(${output_var} ${intermediate} PARENT_SCOPE)
+endfunction()
diff --git a/cmake/Modules/HandleCompilerRT.cmake b/cmake/Modules/HandleCompilerRT.cmake
new file mode 100644
index 0000000..cff5031
--- /dev/null
+++ b/cmake/Modules/HandleCompilerRT.cmake
@@ -0,0 +1,21 @@
+function(find_compiler_rt_library name dest)
+  set(dest "" PARENT_SCOPE)
+  set(CLANG_COMMAND ${CMAKE_CXX_COMPILER} ${SANITIZER_COMMON_CFLAGS}
+      "--rtlib=compiler-rt" "--print-libgcc-file-name")
+  if (CMAKE_CXX_COMPILER_ID MATCHES Clang AND CMAKE_CXX_COMPILER_TARGET)
+    list(APPEND CLANG_COMMAND "--target=${CMAKE_CXX_COMPILER_TARGET}")
+  endif()
+  execute_process(
+      COMMAND ${CLANG_COMMAND}
+      RESULT_VARIABLE HAD_ERROR
+      OUTPUT_VARIABLE LIBRARY_FILE
+  )
+  string(STRIP "${LIBRARY_FILE}" LIBRARY_FILE)
+  string(REPLACE "builtins" "${name}" LIBRARY_FILE "${LIBRARY_FILE}")
+  if (NOT HAD_ERROR AND EXISTS "${LIBRARY_FILE}")
+    message(STATUS "Found compiler-rt ${name} library: ${LIBRARY_FILE}")
+    set(${dest} "${LIBRARY_FILE}" PARENT_SCOPE)
+  else()
+    message(STATUS "Failed to find compiler-rt ${name} library")
+  endif()
+endfunction()
diff --git a/cmake/base-config-ix.cmake b/cmake/base-config-ix.cmake
index 6f9f151..5b21b70 100644
--- a/cmake/base-config-ix.cmake
+++ b/cmake/base-config-ix.cmake
@@ -4,6 +4,8 @@
 # runtime libraries.
 
 include(CheckIncludeFile)
+include(CheckCXXSourceCompiles)
+
 check_include_file(unwind.h HAVE_UNWIND_H)
 
 # Top level target used to build all compiler-rt libraries.
@@ -63,7 +65,9 @@
   set(COMPILER_RT_TEST_COMPILER_ID GNU)
 endif()
 
-string(TOLOWER ${CMAKE_SYSTEM_NAME} COMPILER_RT_OS_DIR)
+if(NOT DEFINED COMPILER_RT_OS_DIR)
+  string(TOLOWER ${CMAKE_SYSTEM_NAME} COMPILER_RT_OS_DIR)
+endif()
 set(COMPILER_RT_LIBRARY_OUTPUT_DIR
   ${COMPILER_RT_OUTPUT_DIR}/lib/${COMPILER_RT_OS_DIR})
 set(COMPILER_RT_LIBRARY_INSTALL_DIR
@@ -84,6 +88,7 @@
   option(COMPILER_RT_ENABLE_IOS "Enable building for iOS" On)
   option(COMPILER_RT_ENABLE_WATCHOS "Enable building for watchOS - Experimental" Off)
   option(COMPILER_RT_ENABLE_TVOS "Enable building for tvOS - Experimental" Off)
+
 else()
   option(COMPILER_RT_DEFAULT_TARGET_ONLY "Build builtins only for the default target" Off)
 endif()
@@ -134,10 +139,6 @@
     elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "i[2-6]86|x86|amd64")
       if(NOT MSVC)
         test_target_arch(x86_64 "" "-m64")
-        # FIXME: We build runtimes for both i686 and i386, as "clang -m32" may
-        # target different variant than "$CMAKE_C_COMPILER -m32". This part should
-        # be gone after we resolve PR14109.
-        test_target_arch(i686 __i686__ "-m32")
         test_target_arch(i386 __i386__ "-m32")
       else()
         if (CMAKE_SIZEOF_VOID_P EQUAL 4)
@@ -147,7 +148,13 @@
         endif()
       endif()
     elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "powerpc")
+      # Strip out -nodefaultlibs when calling TEST_BIG_ENDIAN. Configuration
+      # will fail with this option when building with a sanitizer.
+      cmake_push_check_state()
+      string(REPLACE "-nodefaultlibs" "" CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
       TEST_BIG_ENDIAN(HOST_IS_BIG_ENDIAN)
+      cmake_pop_check_state()
+
       if(HOST_IS_BIG_ENDIAN)
         test_target_arch(powerpc64 "" "-m64")
       else()
diff --git a/cmake/builtin-config-ix.cmake b/cmake/builtin-config-ix.cmake
index dc2ec16..162ead1 100644
--- a/cmake/builtin-config-ix.cmake
+++ b/cmake/builtin-config-ix.cmake
@@ -24,8 +24,8 @@
 
 
 set(ARM64 aarch64)
-set(ARM32 arm armhf armv6m)
-set(X86 i386 i686)
+set(ARM32 arm armhf armv6m armv7m armv7em armv7 armv7s armv7k)
+set(X86 i386)
 set(X86_64 x86_64)
 set(MIPS32 mips mipsel)
 set(MIPS64 mips64 mips64el)
diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake
index 26eb532..30b3081 100644
--- a/cmake/config-ix.cmake
+++ b/cmake/config-ix.cmake
@@ -1,4 +1,5 @@
 include(CMakePushCheckState)
+include(CheckCCompilerFlag)
 include(CheckCXXCompilerFlag)
 include(CheckLibraryExists)
 include(CheckSymbolExists)
@@ -11,6 +12,32 @@
   cmake_pop_check_state()
 endfunction()
 
+check_library_exists(c fopen "" COMPILER_RT_HAS_LIBC)
+if (NOT SANITIZER_USE_COMPILER_RT)
+  if (ANDROID)
+    check_library_exists(gcc __gcc_personality_v0 "" COMPILER_RT_HAS_GCC_LIB)
+  else()
+    check_library_exists(gcc_s __gcc_personality_v0 "" COMPILER_RT_HAS_GCC_S_LIB)
+  endif()
+endif()
+
+check_c_compiler_flag(-nodefaultlibs COMPILER_RT_HAS_NODEFAULTLIBS_FLAG)
+if (COMPILER_RT_HAS_NODEFAULTLIBS_FLAG)
+  set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -nodefaultlibs")
+  if (COMPILER_RT_HAS_LIBC)
+    list(APPEND CMAKE_REQUIRED_LIBRARIES c)
+  endif ()
+  if (SANITIZER_USE_COMPILER_RT)
+    list(APPEND CMAKE_REQUIRED_FLAGS -rtlib=compiler-rt)
+    find_compiler_rt_library(builtins COMPILER_RT_BUILTINS_LIBRARY)
+    list(APPEND CMAKE_REQUIRED_LIBRARIES "${COMPILER_RT_BUILTINS_LIBRARY}")
+  elseif (COMPILER_RT_HAS_GCC_S_LIB)
+    list(APPEND CMAKE_REQUIRED_LIBRARIES gcc_s)
+  elseif (COMPILER_RT_HAS_GCC_LIB)
+    list(APPEND CMAKE_REQUIRED_LIBRARIES gcc)
+  endif ()
+endif ()
+
 # CodeGen options.
 check_cxx_compiler_flag(-fPIC                COMPILER_RT_HAS_FPIC_FLAG)
 check_cxx_compiler_flag(-fPIE                COMPILER_RT_HAS_FPIE_FLAG)
@@ -73,11 +100,14 @@
 check_symbol_exists(__func__ "" COMPILER_RT_HAS_FUNC_SYMBOL)
 
 # Libraries.
-check_library_exists(c fopen "" COMPILER_RT_HAS_LIBC)
 check_library_exists(dl dlopen "" COMPILER_RT_HAS_LIBDL)
 check_library_exists(rt shm_open "" COMPILER_RT_HAS_LIBRT)
 check_library_exists(m pow "" COMPILER_RT_HAS_LIBM)
 check_library_exists(pthread pthread_create "" COMPILER_RT_HAS_LIBPTHREAD)
+if (ANDROID AND COMPILER_RT_HAS_LIBDL)
+  # Android's libstdc++ has a dependency on libdl.
+  list(APPEND CMAKE_REQUIRED_LIBRARIES dl)
+endif()
 check_library_exists(stdc++ __cxa_throw "" COMPILER_RT_HAS_LIBSTDCXX)
 
 # Linker flags.
@@ -144,7 +174,7 @@
 
 set(ARM64 aarch64)
 set(ARM32 arm armhf)
-set(X86 i386 i686)
+set(X86 i386)
 set(X86_64 x86_64)
 set(MIPS32 mips mipsel)
 set(MIPS64 mips64 mips64el)
@@ -164,14 +194,12 @@
 set(ALL_ASAN_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64}
     ${MIPS32} ${MIPS64} ${PPC64} ${S390X})
 set(ALL_DFSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64})
+set(ALL_FUZZER_SUPPORTED_ARCH x86_64)
 
-# Darwin does not support 32-bit thread-local storage on ios versions
-# below 9.0. Until the min ios version is bumped to 9.0, lsan will
-# not build for 32-bit darwin targets.
 if(APPLE)
-  set(ALL_LSAN_SUPPORTED_ARCH ${X86_64} ${ARM64})
-else()
   set(ALL_LSAN_SUPPORTED_ARCH ${X86} ${X86_64} ${MIPS64} ${ARM64})
+else()
+  set(ALL_LSAN_SUPPORTED_ARCH ${X86} ${X86_64} ${MIPS64} ${ARM64} ${ARM32} ${PPC64})
 endif()
 set(ALL_MSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64} ${PPC64})
 set(ALL_PROFILE_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${PPC64}
@@ -180,9 +208,9 @@
 set(ALL_UBSAN_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64}
     ${MIPS32} ${MIPS64} ${PPC64} ${S390X})
 set(ALL_SAFESTACK_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM64} ${MIPS32} ${MIPS64})
-set(ALL_CFI_SUPPORTED_ARCH ${X86} ${X86_64} ${MIPS64})
+set(ALL_CFI_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${MIPS64})
 set(ALL_ESAN_SUPPORTED_ARCH ${X86_64} ${MIPS64})
-set(ALL_SCUDO_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64})
+set(ALL_SCUDO_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${MIPS32} ${MIPS64})
 set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32} ${ARM64} ${MIPS32} ${MIPS64} powerpc64le)
 
 if(APPLE)
@@ -208,7 +236,7 @@
     list(APPEND DARWIN_EMBEDDED_PLATFORMS ios)
     set(DARWIN_ios_MIN_VER_FLAG -miphoneos-version-min)
     set(DARWIN_ios_SANITIZER_MIN_VER_FLAG
-      ${DARWIN_ios_MIN_VER_FLAG}=7.0)
+      ${DARWIN_ios_MIN_VER_FLAG}=8.0)
   endif()
   if(COMPILER_RT_ENABLE_WATCHOS)
     list(APPEND DARWIN_EMBEDDED_PLATFORMS watchos)
@@ -306,9 +334,7 @@
         if(DARWIN_${platform}sim_ARCHS)
           list(APPEND SANITIZER_COMMON_SUPPORTED_OS ${platform}sim)
           list(APPEND PROFILE_SUPPORTED_OS ${platform}sim)
-          if(DARWIN_${platform}_SYSROOT_INTERNAL)
-            list(APPEND TSAN_SUPPORTED_OS ${platform}sim)
-          endif()
+          list(APPEND TSAN_SUPPORTED_OS ${platform}sim)
         endif()
         foreach(arch ${DARWIN_${platform}sim_ARCHS})
           list(APPEND COMPILER_RT_SUPPORTED_ARCH ${arch})
@@ -333,6 +359,7 @@
         if(DARWIN_${platform}_ARCHS)
           list(APPEND SANITIZER_COMMON_SUPPORTED_OS ${platform})
           list(APPEND PROFILE_SUPPORTED_OS ${platform})
+          list(APPEND TSAN_SUPPORTED_OS ${platform})
         endif()
         foreach(arch ${DARWIN_${platform}_ARCHS})
           list(APPEND COMPILER_RT_SUPPORTED_ARCH ${arch})
@@ -387,7 +414,11 @@
     SANITIZER_COMMON_SUPPORTED_ARCH)
   list_intersect(XRAY_SUPPORTED_ARCH
     ALL_XRAY_SUPPORTED_ARCH
-		SANITIZER_COMMON_SUPPORTED_ARCH)
+    SANITIZER_COMMON_SUPPORTED_ARCH)
+  list_intersect(FUZZER_SUPPORTED_ARCH
+    ALL_FUZZER_SUPPORTED_ARCH
+    ALL_SANITIZER_COMMON_SUPPORTED_ARCH)
+
 else()
   # Architectures supported by compiler-rt libraries.
   filter_available_targets(SANITIZER_COMMON_SUPPORTED_ARCH
@@ -399,6 +430,7 @@
   filter_available_targets(UBSAN_COMMON_SUPPORTED_ARCH
     ${SANITIZER_COMMON_SUPPORTED_ARCH})
   filter_available_targets(ASAN_SUPPORTED_ARCH ${ALL_ASAN_SUPPORTED_ARCH})
+  filter_available_targets(FUZZER_SUPPORTED_ARCH ${ALL_FUZZER_SUPPORTED_ARCH})
   filter_available_targets(DFSAN_SUPPORTED_ARCH ${ALL_DFSAN_SUPPORTED_ARCH})
   filter_available_targets(LSAN_SUPPORTED_ARCH ${ALL_LSAN_SUPPORTED_ARCH})
   filter_available_targets(MSAN_SUPPORTED_ARCH ${ALL_MSAN_SUPPORTED_ARCH})
@@ -438,13 +470,13 @@
   set(OS_NAME "${CMAKE_SYSTEM_NAME}")
 endif()
 
-set(ALL_SANITIZERS asan;dfsan;msan;tsan;safestack;cfi;esan;scudo)
-set(COMPILER_RT_SANITIZERS_TO_BUILD ${ALL_SANITIZERS} CACHE STRING
+set(ALL_SANITIZERS asan;dfsan;msan;tsan;safestack;cfi;esan;scudo;ubsan_minimal)
+set(COMPILER_RT_SANITIZERS_TO_BUILD all CACHE STRING
     "sanitizers to build if supported on the target (all;${ALL_SANITIZERS})")
 list_replace(COMPILER_RT_SANITIZERS_TO_BUILD all "${ALL_SANITIZERS}")
 
 if (SANITIZER_COMMON_SUPPORTED_ARCH AND NOT LLVM_USE_SANITIZER AND
-    (OS_NAME MATCHES "Android|Darwin|Linux|FreeBSD" OR
+    (OS_NAME MATCHES "Android|Darwin|Linux|FreeBSD|NetBSD|Fuchsia" OR
     (OS_NAME MATCHES "Windows" AND (NOT MINGW AND NOT CYGWIN))))
   set(COMPILER_RT_HAS_SANITIZER_COMMON TRUE)
 else()
@@ -463,7 +495,7 @@
   set(COMPILER_RT_HAS_ASAN FALSE)
 endif()
 
-if (OS_NAME MATCHES "Linux|FreeBSD|Windows")
+if (OS_NAME MATCHES "Linux|FreeBSD|Windows|NetBSD")
   set(COMPILER_RT_ASAN_HAS_STATIC_RUNTIME TRUE)
 else()
   set(COMPILER_RT_ASAN_HAS_STATIC_RUNTIME FALSE)
@@ -479,19 +511,12 @@
 endif()
 
 if (COMPILER_RT_HAS_SANITIZER_COMMON AND LSAN_SUPPORTED_ARCH AND
-    OS_NAME MATCHES "Linux|FreeBSD")
+    OS_NAME MATCHES "Darwin|Linux|FreeBSD")
   set(COMPILER_RT_HAS_LSAN TRUE)
 else()
   set(COMPILER_RT_HAS_LSAN FALSE)
 endif()
 
-if(APPLE)
-  option(COMPILER_RT_ENABLE_LSAN_OSX "Enable building LSan for OS X - Experimental" Off)
-  if(COMPILER_RT_ENABLE_LSAN_OSX)
-    set(COMPILER_RT_HAS_LSAN TRUE)
-  endif()
-endif()
-
 if (COMPILER_RT_HAS_SANITIZER_COMMON AND MSAN_SUPPORTED_ARCH AND
     OS_NAME MATCHES "Linux")
   set(COMPILER_RT_HAS_MSAN TRUE)
@@ -514,21 +539,27 @@
 endif()
 
 if (COMPILER_RT_HAS_SANITIZER_COMMON AND UBSAN_SUPPORTED_ARCH AND
-    OS_NAME MATCHES "Darwin|Linux|FreeBSD|Windows|Android")
+    OS_NAME MATCHES "Darwin|Linux|FreeBSD|NetBSD|Windows|Android|Fuchsia")
   set(COMPILER_RT_HAS_UBSAN TRUE)
 else()
   set(COMPILER_RT_HAS_UBSAN FALSE)
 endif()
 
+if (COMPILER_RT_HAS_SANITIZER_COMMON AND UBSAN_SUPPORTED_ARCH AND
+    OS_NAME MATCHES "Linux|FreeBSD|NetBSD|Android|Darwin")
+  set(COMPILER_RT_HAS_UBSAN_MINIMAL TRUE)
+else()
+  set(COMPILER_RT_HAS_UBSAN_MINIMAL FALSE)
+endif()
+
 if (COMPILER_RT_HAS_SANITIZER_COMMON AND SAFESTACK_SUPPORTED_ARCH AND
-    OS_NAME MATCHES "Darwin|Linux|FreeBSD")
+    OS_NAME MATCHES "Darwin|Linux|FreeBSD|NetBSD")
   set(COMPILER_RT_HAS_SAFESTACK TRUE)
 else()
   set(COMPILER_RT_HAS_SAFESTACK FALSE)
 endif()
 
-if (COMPILER_RT_HAS_SANITIZER_COMMON AND CFI_SUPPORTED_ARCH AND
-    OS_NAME MATCHES "Linux")
+if (COMPILER_RT_HAS_SANITIZER_COMMON AND CFI_SUPPORTED_ARCH)
   set(COMPILER_RT_HAS_CFI TRUE)
 else()
   set(COMPILER_RT_HAS_CFI FALSE)
@@ -542,7 +573,7 @@
 endif()
 
 if (COMPILER_RT_HAS_SANITIZER_COMMON AND SCUDO_SUPPORTED_ARCH AND
-    OS_NAME MATCHES "Linux")
+    OS_NAME MATCHES "Linux|Android")
   set(COMPILER_RT_HAS_SCUDO TRUE)
 else()
   set(COMPILER_RT_HAS_SCUDO FALSE)
@@ -554,3 +585,10 @@
 else()
   set(COMPILER_RT_HAS_XRAY FALSE)
 endif()
+
+if (COMPILER_RT_HAS_SANITIZER_COMMON AND FUZZER_SUPPORTED_ARCH AND
+      OS_NAME MATCHES "Darwin|Linux|NetBSD")
+  set(COMPILER_RT_HAS_FUZZER TRUE)
+else()
+  set(COMPILER_RT_HAS_FUZZER FALSE)
+endif()
diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt
index 1f8b481..ec3bf40 100644
--- a/include/CMakeLists.txt
+++ b/include/CMakeLists.txt
@@ -1,17 +1,23 @@
-set(SANITIZER_HEADERS
-  sanitizer/allocator_interface.h
-  sanitizer/asan_interface.h
-  sanitizer/common_interface_defs.h
-  sanitizer/coverage_interface.h
-  sanitizer/dfsan_interface.h
-  sanitizer/esan_interface.h
-  sanitizer/linux_syscall_hooks.h
-  sanitizer/lsan_interface.h
-  sanitizer/msan_interface.h
-  sanitizer/tsan_interface_atomic.h)
+if (COMPILER_RT_BUILD_SANITIZERS)
+  set(SANITIZER_HEADERS
+    sanitizer/allocator_interface.h
+    sanitizer/asan_interface.h
+    sanitizer/common_interface_defs.h
+    sanitizer/coverage_interface.h
+    sanitizer/dfsan_interface.h
+    sanitizer/esan_interface.h
+    sanitizer/linux_syscall_hooks.h
+    sanitizer/lsan_interface.h
+    sanitizer/msan_interface.h
+    sanitizer/tsan_interface.h
+    sanitizer/tsan_interface_atomic.h)
+endif(COMPILER_RT_BUILD_SANITIZERS)
 
-set(XRAY_HEADERS
-  xray/xray_interface.h)
+if (COMPILER_RT_BUILD_XRAY)
+  set(XRAY_HEADERS
+    xray/xray_interface.h
+    xray/xray_log_interface.h)
+endif(COMPILER_RT_BUILD_XRAY)
 
 set(COMPILER_RT_HEADERS
   ${SANITIZER_HEADERS}
diff --git a/include/sanitizer/allocator_interface.h b/include/sanitizer/allocator_interface.h
index 5220631..2d35804 100644
--- a/include/sanitizer/allocator_interface.h
+++ b/include/sanitizer/allocator_interface.h
@@ -76,6 +76,13 @@
       void (*malloc_hook)(const volatile void *, size_t),
       void (*free_hook)(const volatile void *));
 
+  /* Drains allocator quarantines (calling thread's and global ones), returns
+     freed memory back to OS and releases other non-essential internal allocator
+     resources in attempt to reduce process RSS.
+     Currently available with ASan only.
+  */
+  void __sanitizer_purge_allocator();
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/include/sanitizer/asan_interface.h b/include/sanitizer/asan_interface.h
index 97ba0ce..e689a73 100644
--- a/include/sanitizer/asan_interface.h
+++ b/include/sanitizer/asan_interface.h
@@ -144,6 +144,10 @@
   void *__asan_addr_is_in_fake_stack(void *fake_stack, void *addr, void **beg,
                                      void **end);
 
+  // Performs cleanup before a [[noreturn]] function.  Must be called
+  // before things like _exit and execl to avoid false positives on stack.
+  void __asan_handle_no_return(void);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/include/sanitizer/common_interface_defs.h b/include/sanitizer/common_interface_defs.h
index f9f9302..4a1de96 100644
--- a/include/sanitizer/common_interface_defs.h
+++ b/include/sanitizer/common_interface_defs.h
@@ -158,8 +158,10 @@
   // Prints stack traces for all live heap allocations ordered by total
   // allocation size until `top_percent` of total live heap is shown.
   // `top_percent` should be between 1 and 100.
+  // At most `max_number_of_contexts` contexts (stack traces) is printed.
   // Experimental feature currently available only with asan on Linux/x86_64.
-  void __sanitizer_print_memory_profile(size_t top_percent);
+  void __sanitizer_print_memory_profile(size_t top_percent,
+                                        size_t max_number_of_contexts);
 
   // Fiber annotation interface.
   // Before switching to a different stack, one must call
diff --git a/include/sanitizer/coverage_interface.h b/include/sanitizer/coverage_interface.h
index b44c5ac..081033c 100644
--- a/include/sanitizer/coverage_interface.h
+++ b/include/sanitizer/coverage_interface.h
@@ -19,51 +19,16 @@
 extern "C" {
 #endif
 
-  // Initialize coverage.
-  void __sanitizer_cov_init();
   // Record and dump coverage info.
   void __sanitizer_cov_dump();
 
-  //  Dump collected coverage info. Sorts pcs by module into individual
-  //  .sancov files.
+  // Clear collected coverage info.
+  void __sanitizer_cov_reset();
+
+  // Dump collected coverage info. Sorts pcs by module into individual .sancov
+  // files.
   void __sanitizer_dump_coverage(const uintptr_t *pcs, uintptr_t len);
 
-  // Open <name>.sancov.packed in the coverage directory and return the file
-  // descriptor. Returns -1 on failure, or if coverage dumping is disabled.
-  // This is intended for use by sandboxing code.
-  intptr_t __sanitizer_maybe_open_cov_file(const char *name);
-  // Get the number of unique covered blocks (or edges).
-  // This can be useful for coverage-directed in-process fuzzers.
-  uintptr_t __sanitizer_get_total_unique_coverage();
-  // Get the number of unique indirect caller-callee pairs.
-  uintptr_t __sanitizer_get_total_unique_caller_callee_pairs();
-
-  // Reset the basic-block (edge) coverage to the initial state.
-  // Useful for in-process fuzzing to start collecting coverage from scratch.
-  // Experimental, will likely not work for multi-threaded process.
-  void __sanitizer_reset_coverage();
-  // Set *data to the array of covered PCs and return the size of that array.
-  // Some of the entries in *data will be zero.
-  uintptr_t __sanitizer_get_coverage_guards(uintptr_t **data);
-
-  // The coverage instrumentation may optionally provide imprecise counters.
-  // Rather than exposing the counter values to the user we instead map
-  // the counters to a bitset.
-  // Every counter is associated with 8 bits in the bitset.
-  // We define 8 value ranges: 1, 2, 3, 4-7, 8-15, 16-31, 32-127, 128+
-  // The i-th bit is set to 1 if the counter value is in the i-th range.
-  // This counter-based coverage implementation is *not* thread-safe.
-
-  // Returns the number of registered coverage counters.
-  uintptr_t __sanitizer_get_number_of_counters();
-  // Updates the counter 'bitset', clears the counters and returns the number of
-  // new bits in 'bitset'.
-  // If 'bitset' is nullptr, only clears the counters.
-  // Otherwise 'bitset' should be at least
-  // __sanitizer_get_number_of_counters bytes long and 8-aligned.
-  uintptr_t
-  __sanitizer_update_counter_bitset_and_clear_counters(uint8_t *bitset);
-
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/include/sanitizer/lsan_interface.h b/include/sanitizer/lsan_interface.h
index 8fb8e75..e3e509f 100644
--- a/include/sanitizer/lsan_interface.h
+++ b/include/sanitizer/lsan_interface.h
@@ -64,8 +64,14 @@
   // for the program it is linked into (if the return value is non-zero). This
   // function must be defined as returning a constant value; any behavior beyond
   // that is unsupported.
+  // To avoid dead stripping, you may need to define this function with
+  // __attribute__((used))
   int __lsan_is_turned_off();
 
+  // This function may be optionally provided by user and should return
+  // a string containing LSan runtime options. See lsan_flags.inc for details.
+  const char *__lsan_default_options();
+
   // This function may be optionally provided by the user and should return
   // a string containing LSan suppressions.
   const char *__lsan_default_suppressions();
diff --git a/include/sanitizer/tsan_interface.h b/include/sanitizer/tsan_interface.h
new file mode 100644
index 0000000..5308850
--- /dev/null
+++ b/include/sanitizer/tsan_interface.h
@@ -0,0 +1,144 @@
+//===-- tsan_interface.h ----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// Public interface header for TSan.
+//===----------------------------------------------------------------------===//
+#ifndef SANITIZER_TSAN_INTERFACE_H
+#define SANITIZER_TSAN_INTERFACE_H
+
+#include <sanitizer/common_interface_defs.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// __tsan_release establishes a happens-before relation with a preceding
+// __tsan_acquire on the same address.
+void __tsan_acquire(void *addr);
+void __tsan_release(void *addr);
+
+// Annotations for custom mutexes.
+// The annotations allow to get better reports (with sets of locked mutexes),
+// detect more types of bugs (e.g. mutex misuses, races between lock/unlock and
+// destruction and potential deadlocks) and improve precision and performance
+// (by ignoring individual atomic operations in mutex code). However, the
+// downside is that annotated mutex code itself is not checked for correctness.
+
+// Mutex creation flags are passed to __tsan_mutex_create annotation.
+// If mutex has no constructor and __tsan_mutex_create is not called,
+// the flags may be passed to __tsan_mutex_pre_lock/__tsan_mutex_post_lock
+// annotations.
+
+// Mutex has static storage duration and no-op constructor and destructor.
+// This effectively makes tsan ignore destroy annotation.
+const unsigned __tsan_mutex_linker_init      = 1 << 0;
+// Mutex is write reentrant.
+const unsigned __tsan_mutex_write_reentrant  = 1 << 1;
+// Mutex is read reentrant.
+const unsigned __tsan_mutex_read_reentrant   = 1 << 2;
+// Mutex does not have static storage duration, and must not be used after
+// its destructor runs.  The opposite of __tsan_mutex_linker_init.
+// If this flag is passed to __tsan_mutex_destroy, then the destruction
+// is ignored unless this flag was previously set on the mutex.
+const unsigned __tsan_mutex_not_static       = 1 << 8;
+
+// Mutex operation flags:
+
+// Denotes read lock operation.
+const unsigned __tsan_mutex_read_lock        = 1 << 3;
+// Denotes try lock operation.
+const unsigned __tsan_mutex_try_lock         = 1 << 4;
+// Denotes that a try lock operation has failed to acquire the mutex.
+const unsigned __tsan_mutex_try_lock_failed  = 1 << 5;
+// Denotes that the lock operation acquires multiple recursion levels.
+// Number of levels is passed in recursion parameter.
+// This is useful for annotation of e.g. Java builtin monitors,
+// for which wait operation releases all recursive acquisitions of the mutex.
+const unsigned __tsan_mutex_recursive_lock   = 1 << 6;
+// Denotes that the unlock operation releases all recursion levels.
+// Number of released levels is returned and later must be passed to
+// the corresponding __tsan_mutex_post_lock annotation.
+const unsigned __tsan_mutex_recursive_unlock = 1 << 7;
+
+// Annotate creation of a mutex.
+// Supported flags: mutex creation flags.
+void __tsan_mutex_create(void *addr, unsigned flags);
+
+// Annotate destruction of a mutex.
+// Supported flags:
+//   - __tsan_mutex_linker_init
+//   - __tsan_mutex_not_static
+void __tsan_mutex_destroy(void *addr, unsigned flags);
+
+// Annotate start of lock operation.
+// Supported flags:
+//   - __tsan_mutex_read_lock
+//   - __tsan_mutex_try_lock
+//   - all mutex creation flags
+void __tsan_mutex_pre_lock(void *addr, unsigned flags);
+
+// Annotate end of lock operation.
+// Supported flags:
+//   - __tsan_mutex_read_lock (must match __tsan_mutex_pre_lock)
+//   - __tsan_mutex_try_lock (must match __tsan_mutex_pre_lock)
+//   - __tsan_mutex_try_lock_failed
+//   - __tsan_mutex_recursive_lock
+//   - all mutex creation flags
+void __tsan_mutex_post_lock(void *addr, unsigned flags, int recursion);
+
+// Annotate start of unlock operation.
+// Supported flags:
+//   - __tsan_mutex_read_lock
+//   - __tsan_mutex_recursive_unlock
+int __tsan_mutex_pre_unlock(void *addr, unsigned flags);
+
+// Annotate end of unlock operation.
+// Supported flags:
+//   - __tsan_mutex_read_lock (must match __tsan_mutex_pre_unlock)
+void __tsan_mutex_post_unlock(void *addr, unsigned flags);
+
+// Annotate start/end of notify/signal/broadcast operation.
+// Supported flags: none.
+void __tsan_mutex_pre_signal(void *addr, unsigned flags);
+void __tsan_mutex_post_signal(void *addr, unsigned flags);
+
+// Annotate start/end of a region of code where lock/unlock/signal operation
+// diverts to do something else unrelated to the mutex. This can be used to
+// annotate, for example, calls into cooperative scheduler or contention
+// profiling code.
+// These annotations must be called only from within
+// __tsan_mutex_pre/post_lock, __tsan_mutex_pre/post_unlock,
+// __tsan_mutex_pre/post_signal regions.
+// Supported flags: none.
+void __tsan_mutex_pre_divert(void *addr, unsigned flags);
+void __tsan_mutex_post_divert(void *addr, unsigned flags);
+
+// External race detection API.
+// Can be used by non-instrumented libraries to detect when their objects are
+// being used in an unsafe manner.
+//   - __tsan_external_read/__tsan_external_write annotates the logical reads
+//       and writes of the object at the specified address. 'caller_pc' should
+//       be the PC of the library user, which the library can obtain with e.g.
+//       `__builtin_return_address(0)`.
+//   - __tsan_external_register_tag registers a 'tag' with the specified name,
+//       which is later used in read/write annotations to denote the object type
+//   - __tsan_external_assign_tag can optionally mark a heap object with a tag
+void *__tsan_external_register_tag(const char *object_type);
+void __tsan_external_register_header(void *tag, const char *header);
+void __tsan_external_assign_tag(void *addr, void *tag);
+void __tsan_external_read(void *addr, void *caller_pc, void *tag);
+void __tsan_external_write(void *addr, void *caller_pc, void *tag);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // SANITIZER_TSAN_INTERFACE_H
diff --git a/include/xray/xray_interface.h b/include/xray/xray_interface.h
index 52a7e1d..d08039a 100644
--- a/include/xray/xray_interface.h
+++ b/include/xray/xray_interface.h
@@ -1,4 +1,4 @@
-//===-- xray_interface.h ----------------------------------------*- C++ -*-===//
+//===- xray_interface.h -----------------------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,47 +11,69 @@
 //
 // APIs for controlling XRay functionality explicitly.
 //===----------------------------------------------------------------------===//
+
 #ifndef XRAY_XRAY_INTERFACE_H
 #define XRAY_XRAY_INTERFACE_H
 
+#include <cstddef>
 #include <cstdint>
 
 extern "C" {
 
-// Synchronize this with AsmPrinter::SledKind in LLVM.
+/// Synchronize this with AsmPrinter::SledKind in LLVM.
 enum XRayEntryType {
   ENTRY = 0,
   EXIT = 1,
   TAIL = 2,
   LOG_ARGS_ENTRY = 3,
+  CUSTOM_EVENT = 4,
 };
 
-// Provide a function to invoke for when instrumentation points are hit. This is
-// a user-visible control surface that overrides the default implementation. The
-// function provided should take the following arguments:
-//
-//   - function id: an identifier that indicates the id of a function; this id
-//                  is generated by xray; the mapping between the function id
-//                  and the actual function pointer is available through
-//                  __xray_table.
-//   - entry type: identifies what kind of instrumentation point was encountered
-//                 (function entry, function exit, etc.). See the enum
-//                 XRayEntryType for more details.
-//
-// The user handler must handle correctly spurious calls after this handler is
-// removed or replaced with another handler, because it would be too costly for
-// XRay runtime to avoid spurious calls.
-// To prevent circular calling, the handler function itself and all its
-// direct&indirect callees must not be instrumented with XRay, which can be
-// achieved by marking them all with: __attribute__((xray_never_instrument))
-//
-// Returns 1 on success, 0 on error.
+/// Provide a function to invoke for when instrumentation points are hit. This
+/// is a user-visible control surface that overrides the default implementation.
+/// The function provided should take the following arguments:
+///
+///   - function id: an identifier that indicates the id of a function; this id
+///                  is generated by xray; the mapping between the function id
+///                  and the actual function pointer is available through
+///                  __xray_table.
+///   - entry type: identifies what kind of instrumentation point was
+///                 encountered (function entry, function exit, etc.). See the
+///                 enum XRayEntryType for more details.
+///
+/// The user handler must handle correctly spurious calls after this handler is
+/// removed or replaced with another handler, because it would be too costly for
+/// XRay runtime to avoid spurious calls.
+/// To prevent circular calling, the handler function itself and all its
+/// direct&indirect callees must not be instrumented with XRay, which can be
+/// achieved by marking them all with: __attribute__((xray_never_instrument))
+///
+/// Returns 1 on success, 0 on error.
 extern int __xray_set_handler(void (*entry)(int32_t, XRayEntryType));
 
-// This removes whatever the currently provided handler is. Returns 1 on
-// success, 0 on error.
+/// This removes whatever the currently provided handler is. Returns 1 on
+/// success, 0 on error.
 extern int __xray_remove_handler();
 
+/// Use XRay to log the first argument of each (instrumented) function call.
+/// When this function exits, all threads will have observed the effect and
+/// start logging their subsequent affected function calls (if patched).
+///
+/// Returns 1 on success, 0 on error.
+extern int __xray_set_handler_arg1(void (*entry)(int32_t, XRayEntryType,
+                                                 uint64_t));
+
+/// Disables the XRay handler used to log first arguments of function calls.
+/// Returns 1 on success, 0 on error.
+extern int __xray_remove_handler_arg1();
+
+/// Provide a function to invoke when XRay encounters a custom event.
+extern int __xray_set_customevent_handler(void (*entry)(void*, std::size_t));
+
+/// This removes whatever the currently provided custom event handler is.
+/// Returns 1 on success, 0 on error.
+extern int __xray_remove_customevent_handler();
+
 enum XRayPatchingStatus {
   NOT_INITIALIZED = 0,
   SUCCESS = 1,
@@ -59,24 +81,39 @@
   FAILED = 3,
 };
 
-// This tells XRay to patch the instrumentation points. See XRayPatchingStatus
-// for possible result values.
+/// This tells XRay to patch the instrumentation points. See XRayPatchingStatus
+/// for possible result values.
 extern XRayPatchingStatus __xray_patch();
 
-// Reverses the effect of __xray_patch(). See XRayPatchingStatus for possible
-// result values.
+/// Reverses the effect of __xray_patch(). See XRayPatchingStatus for possible
+/// result values.
 extern XRayPatchingStatus __xray_unpatch();
 
-// Use XRay to log the first argument of each (instrumented) function call.
-// When this function exits, all threads will have observed the effect and
-// start logging their subsequent affected function calls (if patched).
-//
-// Returns 1 on success, 0 on error.
-extern int __xray_set_handler_arg1(void (*)(int32_t, XRayEntryType, uint64_t));
+/// This patches a specific function id. See XRayPatchingStatus for possible
+/// result values.
+extern XRayPatchingStatus __xray_patch_function(int32_t FuncId);
 
-// Disables the XRay handler used to log first arguments of function calls.
-// Returns 1 on success, 0 on error.
-extern int __xray_remove_handler_arg1();
-}
+/// This unpatches a specific function id. See XRayPatchingStatus for possible
+/// result values.
+extern XRayPatchingStatus __xray_unpatch_function(int32_t FuncId);
 
-#endif
+/// This function returns the address of the function provided a valid function
+/// id. We return 0 if we encounter any error, even if 0 may be a valid function
+/// address.
+extern uintptr_t __xray_function_address(int32_t FuncId);
+
+/// This function returns the maximum valid function id. Returns 0 if we
+/// encounter errors (when there are no instrumented functions, etc.).
+extern size_t __xray_max_function_id();
+
+/// Initialize the required XRay data structures. This is useful in cases where
+/// users want to control precisely when the XRay instrumentation data
+/// structures are initialized, for example when the XRay library is built with
+/// the XRAY_NO_PREINIT preprocessor definition.
+///
+/// Calling __xray_init() more than once is safe across multiple threads.
+extern void __xray_init();
+
+} // end extern "C"
+
+#endif // XRAY_XRAY_INTERFACE_H
diff --git a/include/xray/xray_log_interface.h b/include/xray/xray_log_interface.h
index f98b331..7be9a4a 100644
--- a/include/xray/xray_log_interface.h
+++ b/include/xray/xray_log_interface.h
@@ -10,7 +10,73 @@
 // This file is a part of XRay, a function call tracing system.
 //
 // APIs for installing a new logging implementation.
+//
 //===----------------------------------------------------------------------===//
+///
+/// XRay allows users to implement their own logging handlers and install them
+/// to replace the default runtime-controllable implementation that comes with
+/// compiler-rt/xray. The "flight data recorder" (FDR) mode implementation uses
+/// this API to install itself in an XRay-enabled binary. See
+/// compiler-rt/lib/xray_fdr_logging.{h,cc} for details of that implementation.
+///
+/// The high-level usage pattern for these APIs look like the following:
+///
+///   // Before we try initializing the log implementation, we must set it as
+///   // the log implementation. We provide the function pointers that define
+///   // the various initialization, finalization, and other pluggable hooks
+///   // that we need.
+///   __xray_set_log_impl({...});
+///
+///   // Once that's done, we can now initialize the implementation. Each
+///   // implementation has a chance to let users customize the implementation
+///   // with a struct that their implementation supports. Roughly this might
+///   // look like:
+///   MyImplementationOptions opts;
+///   opts.enable_feature = true;
+///   ...
+///   auto init_status = __xray_log_init(
+///       BufferSize, MaxBuffers, &opts, sizeof opts);
+///   if (init_status != XRayLogInitStatus::XRAY_LOG_INITIALIZED) {
+///     // deal with the error here, if there is one.
+///   }
+///
+///   // When the log implementation has had the chance to initialize, we can
+///   // now patch the sleds.
+///   auto patch_status = __xray_patch();
+///   if (patch_status != XRayPatchingStatus::SUCCESS) {
+///     // deal with the error here, if it is an error.
+///   }
+///
+///   // If we want to stop the implementation, we can then finalize it (before
+///   // optionally flushing the log).
+///   auto fin_status = __xray_log_finalize();
+///   if (fin_status != XRayLogInitStatus::XRAY_LOG_FINALIZED) {
+///     // deal with the error here, if it is an error.
+///   }
+///
+///   // We can optionally wait before flushing the log to give other threads a
+///   // chance to see that the implementation is already finalized. Also, at
+///   // this point we can optionally unpatch the sleds to reduce overheads at
+///   // runtime.
+///   auto unpatch_status = __xray_unpatch();
+///   if (unpatch_status != XRayPatchingStatus::SUCCESS) {
+//      // deal with the error here, if it is an error.
+//    }
+///
+///   // If there are logs or data to be flushed somewhere, we can do so only
+///   // after we've finalized the log. Some implementations may not actually
+///   // have anything to log (it might keep the data in memory, or periodically
+///   // be logging the data anyway).
+///   auto flush_status = __xray_log_flushLog();
+///   if (flush_status != XRayLogFlushStatus::XRAY_LOG_FLUSHED) {
+///     // deal with the error here, if it is an error.
+///   }
+///
+///
+/// NOTE: Before calling __xray_patch() again, consider re-initializing the
+/// implementation first. Some implementations might stay in an "off" state when
+/// they are finalized, while some might be in an invalid/unknown state.
+///
 #ifndef XRAY_XRAY_LOG_INTERFACE_H
 #define XRAY_XRAY_LOG_INTERFACE_H
 
@@ -19,33 +85,150 @@
 
 extern "C" {
 
+/// This enum defines the valid states in which the logging implementation can
+/// be at.
 enum XRayLogInitStatus {
+  /// The default state is uninitialized, and in case there were errors in the
+  /// initialization, the implementation MUST return XRAY_LOG_UNINITIALIZED.
   XRAY_LOG_UNINITIALIZED = 0,
+
+  /// Some implementations support multi-stage init (or asynchronous init), and
+  /// may return XRAY_LOG_INITIALIZING to signal callers of the API that
+  /// there's an ongoing initialization routine running. This allows
+  /// implementations to support concurrent threads attempting to initialize,
+  /// while only signalling success in one.
   XRAY_LOG_INITIALIZING = 1,
+
+  /// When an implementation is done initializing, it MUST return
+  /// XRAY_LOG_INITIALIZED. When users call `__xray_patch()`, they are
+  /// guaranteed that the implementation installed with
+  /// `__xray_set_log_impl(...)` has been initialized.
   XRAY_LOG_INITIALIZED = 2,
+
+  /// Some implementations might support multi-stage finalization (or
+  /// asynchronous finalization), and may return XRAY_LOG_FINALIZING to signal
+  /// callers of the API that there's an ongoing finalization routine running.
+  /// This allows implementations to support concurrent threads attempting to
+  /// finalize, while only signalling success/completion in one.
   XRAY_LOG_FINALIZING = 3,
+
+  /// When an implementation is done finalizing, it MUST return
+  /// XRAY_LOG_FINALIZED. It is up to the implementation to determine what the
+  /// semantics of a finalized implementation is. Some implementations might
+  /// allow re-initialization once the log is finalized, while some might always
+  /// be on (and that finalization is a no-op).
   XRAY_LOG_FINALIZED = 4,
 };
 
+/// This enum allows an implementation to signal log flushing operations via
+/// `__xray_log_flushLog()`, and the state of flushing the log.
 enum XRayLogFlushStatus {
   XRAY_LOG_NOT_FLUSHING = 0,
   XRAY_LOG_FLUSHING = 1,
   XRAY_LOG_FLUSHED = 2,
 };
 
+/// A valid XRay logging implementation MUST provide all of the function
+/// pointers in XRayLogImpl when being installed through `__xray_set_log_impl`.
+/// To be precise, ALL the functions pointers MUST NOT be nullptr.
 struct XRayLogImpl {
+  /// The log initialization routine provided by the implementation, always
+  /// provided with the following parameters:
+  ///
+  ///   - buffer size
+  ///   - maximum number of buffers
+  ///   - a pointer to an argument struct that the implementation MUST handle
+  ///   - the size of the argument struct
+  ///
+  /// See XRayLogInitStatus for details on what the implementation MUST return
+  /// when called.
+  ///
+  /// If the implementation needs to install handlers aside from the 0-argument
+  /// function call handler, it MUST do so in this initialization handler.
+  ///
+  /// See xray_interface.h for available handler installation routines.
   XRayLogInitStatus (*log_init)(size_t, size_t, void *, size_t);
+
+  /// The log finalization routine provided by the implementation.
+  ///
+  /// See XRayLogInitStatus for details on what the implementation MUST return
+  /// when called.
   XRayLogInitStatus (*log_finalize)();
+
+  /// The 0-argument function call handler. XRay logging implementations MUST
+  /// always have a handler for function entry and exit events. In case the
+  /// implementation wants to support arg1 (or other future extensions to XRay
+  /// logging) those MUST be installed by the installed 'log_init' handler.
+  ///
+  /// Because we didn't want to change the ABI of this struct, the arg1 handler
+  /// may be silently overwritten during initialization as well.
   void (*handle_arg0)(int32_t, XRayEntryType);
+
+  /// The log implementation provided routine for when __xray_log_flushLog() is
+  /// called.
+  ///
+  /// See XRayLogFlushStatus for details on what the implementation MUST return
+  /// when called.
   XRayLogFlushStatus (*flush_log)();
 };
 
+/// This function installs a new logging implementation that XRay will use. In
+/// case there are any nullptr members in Impl, XRay will *uninstall any
+/// existing implementations*. It does NOT patch the instrumentation sleds.
+///
+/// NOTE: This function does NOT attempt to finalize the currently installed
+/// implementation. Use with caution.
+///
+/// It is guaranteed safe to call this function in the following states:
+///
+///   - When the implementation is UNINITIALIZED.
+///   - When the implementation is FINALIZED.
+///   - When there is no current implementation installed.
+///
+/// It is logging implementation defined what happens when this function is
+/// called while in any other states.
 void __xray_set_log_impl(XRayLogImpl Impl);
+
+/// This function removes the currently installed implementation. It will also
+/// uninstall any handlers that have been previously installed. It does NOT
+/// unpatch the instrumentation sleds.
+///
+/// NOTE: This function does NOT attempt to finalize the currently installed
+/// implementation. Use with caution.
+///
+/// It is guaranteed safe to call this function in the following states:
+///
+///   - When the implementation is UNINITIALIZED.
+///   - When the implementation is FINALIZED.
+///   - When there is no current implementation installed.
+///
+/// It is logging implementation defined what happens when this function is
+/// called while in any other states.
+void __xray_remove_log_impl();
+
+/// Invokes the installed implementation initialization routine. See
+/// XRayLogInitStatus for what the return values mean.
 XRayLogInitStatus __xray_log_init(size_t BufferSize, size_t MaxBuffers,
                                   void *Args, size_t ArgsSize);
+
+/// Invokes the installed implementation finalization routine. See
+/// XRayLogInitStatus for what the return values mean.
 XRayLogInitStatus __xray_log_finalize();
+
+/// Invokes the install implementation log flushing routine. See
+/// XRayLogFlushStatus for what the return values mean.
 XRayLogFlushStatus __xray_log_flushLog();
 
 } // extern "C"
 
+namespace __xray {
+
+// Options used by the LLVM XRay FDR implementation.
+struct FDRLoggingOptions {
+  bool ReportErrors = false;
+  int Fd = -1;
+};
+
+} // namespace __xray
+
 #endif // XRAY_XRAY_LOG_INTERFACE_H
diff --git a/include/xray/xray_records.h b/include/xray/xray_records.h
index 71637d1..d4b7b4c 100644
--- a/include/xray/xray_records.h
+++ b/include/xray/xray_records.h
@@ -17,6 +17,8 @@
 #ifndef XRAY_XRAY_RECORDS_H
 #define XRAY_XRAY_RECORDS_H
 
+#include <cstdint>
+
 namespace __xray {
 
 enum FileTypes {
@@ -24,6 +26,14 @@
   FDR_LOG = 1,
 };
 
+// FDR mode use of the union field in the XRayFileHeader.
+struct alignas(16) FdrAdditionalHeaderData {
+  uint64_t ThreadBufferSize;
+};
+
+static_assert(sizeof(FdrAdditionalHeaderData) == 16,
+              "FdrAdditionalHeaderData != 16 bytes");
+
 // This data structure is used to describe the contents of the file. We use this
 // for versioning the supported XRay file formats.
 struct alignas(32) XRayFileHeader {
@@ -42,28 +52,38 @@
   // The frequency by which TSC increases per-second.
   alignas(8) uint64_t CycleFrequency = 0;
 
-  // The current civiltime timestamp, as retrived from 'clock_gettime'. This
-  // allows readers of the file to determine when the file was created or
-  // written down.
-  struct timespec TS;
+  union {
+    char FreeForm[16];
+    // The current civiltime timestamp, as retrived from 'clock_gettime'. This
+    // allows readers of the file to determine when the file was created or
+    // written down.
+    struct timespec TS;
+
+    struct FdrAdditionalHeaderData FdrData;
+  };
 } __attribute__((packed));
 
 static_assert(sizeof(XRayFileHeader) == 32, "XRayFileHeader != 32 bytes");
 
 enum RecordTypes {
   NORMAL = 0,
+  ARG_PAYLOAD = 1,
 };
 
 struct alignas(32) XRayRecord {
   // This is the type of the record being written. We use 16 bits to allow us to
   // treat this as a discriminant, and so that the first 4 bytes get packed
   // properly. See RecordTypes for more supported types.
-  uint16_t RecordType = 0;
+  uint16_t RecordType = RecordTypes::NORMAL;
 
   // The CPU where the thread is running. We assume number of CPUs <= 256.
   uint8_t CPU = 0;
 
-  // The type of the event. Usually either ENTER = 0 or EXIT = 1.
+  // The type of the event. One of the following:
+  //   ENTER = 0
+  //   EXIT = 1
+  //   TAIL_EXIT = 2
+  //   ENTER_ARG = 3
   uint8_t Type = 0;
 
   // The function ID for the record.
@@ -81,6 +101,32 @@
 
 static_assert(sizeof(XRayRecord) == 32, "XRayRecord != 32 bytes");
 
+struct alignas(32) XRayArgPayload {
+  // We use the same 16 bits as a discriminant for the records in the log here
+  // too, and so that the first 4 bytes are packed properly.
+  uint16_t RecordType = RecordTypes::ARG_PAYLOAD;
+
+  // Add a few bytes to pad.
+  uint8_t Padding[2] = {};
+
+  // The function ID for the record.
+  int32_t FuncId = 0;
+
+  // The thread ID for the currently running thread.
+  uint32_t TId = 0;
+
+  // Add more padding.
+  uint8_t Padding2[4] = {};
+
+  // The argument payload.
+  uint64_t Arg = 0;
+
+  // The rest of this record ought to be left as padding.
+  uint8_t TailPadding[8] = {};
+} __attribute__((packed));
+
+static_assert(sizeof(XRayArgPayload) == 32, "XRayArgPayload != 32 bytes");
+
 } // namespace __xray
 
 #endif // XRAY_XRAY_RECORDS_H
diff --git a/lib/BlocksRuntime/Block.h b/lib/BlocksRuntime/Block.h
index 55cdd01..e6cb142 100644
--- a/lib/BlocksRuntime/Block.h
+++ b/lib/BlocksRuntime/Block.h
@@ -27,7 +27,7 @@
 
 #if !defined(BLOCK_EXPORT)
 #   if defined(__cplusplus)
-#       define BLOCK_EXPORT extern "C" 
+#       define BLOCK_EXPORT extern "C"
 #   else
 #       define BLOCK_EXPORT extern
 #   endif
diff --git a/lib/BlocksRuntime/Block_private.h b/lib/BlocksRuntime/Block_private.h
index 8ae8218..91d8d8a 100644
--- a/lib/BlocksRuntime/Block_private.h
+++ b/lib/BlocksRuntime/Block_private.h
@@ -27,7 +27,7 @@
 
 #if !defined(BLOCK_EXPORT)
 #   if defined(__cplusplus)
-#       define BLOCK_EXPORT extern "C" 
+#       define BLOCK_EXPORT extern "C"
 #   else
 #       define BLOCK_EXPORT extern
 #   endif
diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt
index 4ab1e93..b3731f6 100644
--- a/lib/CMakeLists.txt
+++ b/lib/CMakeLists.txt
@@ -21,18 +21,9 @@
   string(TOUPPER ${runtime} runtime_uppercase)
   if(COMPILER_RT_HAS_${runtime_uppercase})
     add_subdirectory(${runtime})
-    foreach(directory ${ARGN})
-      add_subdirectory(${directory})
-    endforeach()
-  endif()
-endfunction()
-
-function(compiler_rt_build_sanitizer sanitizer)
-  string(TOUPPER ${sanitizer} sanitizer_uppercase)
-  string(TOLOWER ${sanitizer} sanitizer_lowercase)
-  list(FIND COMPILER_RT_SANITIZERS_TO_BUILD ${sanitizer_lowercase} result)
-  if(NOT ${result} EQUAL -1)
-    compiler_rt_build_runtime(${sanitizer} ${ARGN})
+    if(${runtime} STREQUAL tsan)
+      add_subdirectory(tsan/dd)
+    endif()
   endif()
 endfunction()
 
@@ -45,18 +36,19 @@
     add_subdirectory(ubsan)
   endif()
 
-  compiler_rt_build_sanitizer(asan)
-  compiler_rt_build_sanitizer(dfsan)
-  compiler_rt_build_sanitizer(msan)
-  compiler_rt_build_sanitizer(tsan tsan/dd)
-  compiler_rt_build_sanitizer(safestack)
-  compiler_rt_build_sanitizer(cfi)
-  compiler_rt_build_sanitizer(esan)
-  compiler_rt_build_sanitizer(scudo)
+  foreach(sanitizer ${COMPILER_RT_SANITIZERS_TO_BUILD})
+    compiler_rt_build_runtime(${sanitizer})
+  endforeach()
+endif()
 
+if(COMPILER_RT_BUILD_PROFILE AND COMPILER_RT_HAS_PROFILE)
   compiler_rt_build_runtime(profile)
 endif()
 
 if(COMPILER_RT_BUILD_XRAY)
   compiler_rt_build_runtime(xray)
 endif()
+
+if(COMPILER_RT_BUILD_LIBFUZZER)
+  compiler_rt_build_runtime(fuzzer)
+endif()
diff --git a/lib/asan/CMakeLists.txt b/lib/asan/CMakeLists.txt
index e940cc7..1bd4490 100644
--- a/lib/asan/CMakeLists.txt
+++ b/lib/asan/CMakeLists.txt
@@ -8,9 +8,11 @@
   asan_errors.cc
   asan_fake_stack.cc
   asan_flags.cc
+  asan_fuchsia.cc
   asan_globals.cc
   asan_globals_win.cc
   asan_interceptors.cc
+  asan_interceptors_memintrinsics.cc
   asan_linux.cc
   asan_mac.cc
   asan_malloc_linux.cc
@@ -21,6 +23,7 @@
   asan_posix.cc
   asan_report.cc
   asan_rtl.cc
+  asan_shadow_setup.cc
   asan_stack.cc
   asan_stats.cc
   asan_suppressions.cc
@@ -37,13 +40,9 @@
 
 set(ASAN_CFLAGS ${SANITIZER_COMMON_CFLAGS})
 
-# FIXME(fjricci) - remove this once lsan for darwin is fully enabled
-if(APPLE AND COMPILER_RT_HAS_LSAN)
-  set(ASAN_CFLAGS ${ASAN_CFLAGS} -DCAN_SANITIZE_LEAKS_MAC=1)
-endif()
 append_rtti_flag(OFF ASAN_CFLAGS)
 
-set(ASAN_DYNAMIC_LINK_FLAGS)
+set(ASAN_DYNAMIC_LINK_FLAGS ${SANITIZER_COMMON_LINK_FLAGS})
 
 if(ANDROID)
 # On Android, -z global does not do what it is documented to do.
@@ -68,12 +67,12 @@
   -ftls-model=initial-exec ASAN_DYNAMIC_CFLAGS)
 append_list_if(MSVC /DEBUG ASAN_DYNAMIC_LINK_FLAGS)
 
-append_list_if(COMPILER_RT_HAS_LIBC c ASAN_DYNAMIC_LIBS)
+set(ASAN_DYNAMIC_LIBS ${SANITIZER_CXX_ABI_LIBRARY} ${SANITIZER_COMMON_LINK_LIBS})
+
 append_list_if(COMPILER_RT_HAS_LIBDL dl ASAN_DYNAMIC_LIBS)
 append_list_if(COMPILER_RT_HAS_LIBRT rt ASAN_DYNAMIC_LIBS)
 append_list_if(COMPILER_RT_HAS_LIBM m ASAN_DYNAMIC_LIBS)
 append_list_if(COMPILER_RT_HAS_LIBPTHREAD pthread ASAN_DYNAMIC_LIBS)
-append_list_if(COMPILER_RT_HAS_LIBSTDCXX stdc++ ASAN_DYNAMIC_LIBS)
 append_list_if(COMPILER_RT_HAS_LIBLOG log ASAN_DYNAMIC_LIBS)
 
 # Compile ASan sources into an object library.
@@ -168,15 +167,15 @@
     PARENT_TARGET asan)
 
   foreach(arch ${ASAN_SUPPORTED_ARCH})
-    if (UNIX AND NOT ${arch} MATCHES "i386|i686")
+    if (UNIX AND NOT ${arch} STREQUAL "i386")
       add_sanitizer_rt_version_list(clang_rt.asan-dynamic-${arch}
                                     LIBS clang_rt.asan-${arch} clang_rt.asan_cxx-${arch}
                                     EXTRA asan.syms.extra)
       set(VERSION_SCRIPT_FLAG
            -Wl,--version-script,${CMAKE_CURRENT_BINARY_DIR}/clang_rt.asan-dynamic-${arch}.vers)
-      set_source_files_properties(
+      set_property(SOURCE
         ${CMAKE_CURRENT_BINARY_DIR}/dummy.cc
-        PROPERTIES
+        APPEND PROPERTY
         OBJECT_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/clang_rt.asan-dynamic-${arch}.vers)
     else()
       set(VERSION_SCRIPT_FLAG)
@@ -202,10 +201,11 @@
       ARCHS ${arch}
       OBJECT_LIBS ${ASAN_COMMON_RUNTIME_OBJECT_LIBS}
               RTAsan_dynamic
-              # The only purpose of RTAsan_dynamic_version_script_dummy is to carry
-              # a dependency of the shared runtime on the version script. With CMake
-              # 3.1 or later it can be replaced with a straightforward
+              # The only purpose of RTAsan_dynamic_version_script_dummy is to
+              # carry a dependency of the shared runtime on the version script.
+              # Replacing it with a straightforward
               # add_dependencies(clang_rt.asan-dynamic-${arch} clang_rt.asan-dynamic-${arch}-version-list)
+              # generates an order-only dependency in ninja.
               RTAsan_dynamic_version_script_dummy
               RTUbsan_cxx
               ${ASAN_DYNAMIC_WEAK_INTERCEPTION}
@@ -216,7 +216,7 @@
       DEFS ${ASAN_DYNAMIC_DEFINITIONS}
       PARENT_TARGET asan)
 
-    if (UNIX AND NOT ${arch} MATCHES "i386|i686")
+    if (UNIX AND NOT ${arch} STREQUAL "i386")
       add_sanitizer_rt_symbols(clang_rt.asan_cxx
         ARCHS ${arch})
       add_dependencies(asan clang_rt.asan_cxx-${arch}-symbols)
diff --git a/lib/asan/asan_activation.cc b/lib/asan/asan_activation.cc
index 7e4e604..66eba9c 100644
--- a/lib/asan/asan_activation.cc
+++ b/lib/asan/asan_activation.cc
@@ -106,7 +106,6 @@
   // Deactivate the runtime.
   SetCanPoisonMemory(false);
   SetMallocContextSize(1);
-  ReInitializeCoverage(false, nullptr);
 
   AllocatorOptions disabled = asan_deactivated_flags.allocator_options;
   disabled.quarantine_size_mb = 0;
@@ -130,8 +129,6 @@
 
   SetCanPoisonMemory(asan_deactivated_flags.poison_heap);
   SetMallocContextSize(asan_deactivated_flags.malloc_context_size);
-  ReInitializeCoverage(asan_deactivated_flags.coverage,
-                       asan_deactivated_flags.coverage_dir);
   ReInitializeAllocator(asan_deactivated_flags.allocator_options);
 
   asan_is_deactivated = false;
diff --git a/lib/asan/asan_allocator.cc b/lib/asan/asan_allocator.cc
index 4be1f1c..bc9b896 100644
--- a/lib/asan/asan_allocator.cc
+++ b/lib/asan/asan_allocator.cc
@@ -21,7 +21,9 @@
 #include "asan_report.h"
 #include "asan_stack.h"
 #include "asan_thread.h"
+#include "sanitizer_common/sanitizer_allocator_checks.h"
 #include "sanitizer_common/sanitizer_allocator_interface.h"
+#include "sanitizer_common/sanitizer_errno.h"
 #include "sanitizer_common/sanitizer_flags.h"
 #include "sanitizer_common/sanitizer_internal_defs.h"
 #include "sanitizer_common/sanitizer_list.h"
@@ -82,7 +84,10 @@
   // This field is used for small sizes. For large sizes it is equal to
   // SizeClassMap::kMaxSize and the actual size is stored in the
   // SecondaryAllocator's metadata.
-  u32 user_requested_size;
+  u32 user_requested_size : 29;
+  // align < 8 -> 0
+  // else      -> log2(min(align, 512)) - 2
+  u32 user_requested_alignment_log : 3;
   u32 alloc_context_id;
 };
 
@@ -160,7 +165,11 @@
   }
 
   void *Allocate(uptr size) {
-    return get_allocator().Allocate(cache_, size, 1, false);
+    void *res = get_allocator().Allocate(cache_, size, 1);
+    // TODO(alekseys): Consider making quarantine OOM-friendly.
+    if (UNLIKELY(!res))
+      return DieOnFailure::OnOOM();
+    return res;
   }
 
   void Deallocate(void *p) {
@@ -235,6 +244,8 @@
   AllocatorCache fallback_allocator_cache;
   QuarantineCache fallback_quarantine_cache;
 
+  atomic_uint8_t rss_limit_exceeded;
+
   // ------------------- Options --------------------------
   atomic_uint16_t min_redzone;
   atomic_uint16_t max_redzone;
@@ -264,10 +275,19 @@
   }
 
   void Initialize(const AllocatorOptions &options) {
-    allocator.Init(options.may_return_null, options.release_to_os_interval_ms);
+    SetAllocatorMayReturnNull(options.may_return_null);
+    allocator.Init(options.release_to_os_interval_ms);
     SharedInitCode(options);
   }
 
+  bool RssLimitExceeded() {
+    return atomic_load(&rss_limit_exceeded, memory_order_relaxed);
+  }
+
+  void SetRssLimitExceeded(bool limit_exceeded) {
+    atomic_store(&rss_limit_exceeded, limit_exceeded, memory_order_relaxed);
+  }
+
   void RePoisonChunk(uptr chunk) {
     // This could be a user-facing chunk (with redzones), or some internal
     // housekeeping chunk, like TransferBatch. Start by assuming the former.
@@ -292,7 +312,7 @@
   }
 
   void ReInitialize(const AllocatorOptions &options) {
-    allocator.SetMayReturnNull(options.may_return_null);
+    SetAllocatorMayReturnNull(options.may_return_null);
     allocator.SetReleaseToOSIntervalMs(options.release_to_os_interval_ms);
     SharedInitCode(options);
 
@@ -313,7 +333,7 @@
     options->thread_local_quarantine_size_kb = quarantine.GetCacheSize() >> 10;
     options->min_redzone = atomic_load(&min_redzone, memory_order_acquire);
     options->max_redzone = atomic_load(&max_redzone, memory_order_acquire);
-    options->may_return_null = allocator.MayReturnNull();
+    options->may_return_null = AllocatorMayReturnNull();
     options->alloc_dealloc_mismatch =
         atomic_load(&alloc_dealloc_mismatch, memory_order_acquire);
     options->release_to_os_interval_ms = allocator.ReleaseToOSIntervalMs();
@@ -334,6 +354,20 @@
     return Min(Max(rz_log, RZSize2Log(min_rz)), RZSize2Log(max_rz));
   }
 
+  static uptr ComputeUserRequestedAlignmentLog(uptr user_requested_alignment) {
+    if (user_requested_alignment < 8)
+      return 0;
+    if (user_requested_alignment > 512)
+      user_requested_alignment = 512;
+    return Log2(user_requested_alignment) - 2;
+  }
+
+  static uptr ComputeUserAlignment(uptr user_requested_alignment_log) {
+    if (user_requested_alignment_log == 0)
+      return 0;
+    return 1LL << (user_requested_alignment_log + 2);
+  }
+
   // We have an address between two chunks, and we want to report just one.
   AsanChunk *ChooseChunk(uptr addr, AsanChunk *left_chunk,
                          AsanChunk *right_chunk) {
@@ -363,9 +397,13 @@
                  AllocType alloc_type, bool can_fill) {
     if (UNLIKELY(!asan_inited))
       AsanInitFromRtl();
+    if (RssLimitExceeded())
+      return AsanAllocator::FailureHandler::OnOOM();
     Flags &fl = *flags();
     CHECK(stack);
     const uptr min_alignment = SHADOW_GRANULARITY;
+    const uptr user_requested_alignment_log =
+        ComputeUserRequestedAlignmentLog(alignment);
     if (alignment < min_alignment)
       alignment = min_alignment;
     if (size == 0) {
@@ -395,24 +433,21 @@
     if (size > kMaxAllowedMallocSize || needed_size > kMaxAllowedMallocSize) {
       Report("WARNING: AddressSanitizer failed to allocate 0x%zx bytes\n",
              (void*)size);
-      return allocator.ReturnNullOrDieOnBadRequest();
+      return AsanAllocator::FailureHandler::OnBadRequest();
     }
 
     AsanThread *t = GetCurrentThread();
     void *allocated;
-    bool check_rss_limit = true;
     if (t) {
       AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage());
-      allocated =
-          allocator.Allocate(cache, needed_size, 8, false, check_rss_limit);
+      allocated = allocator.Allocate(cache, needed_size, 8);
     } else {
       SpinMutexLock l(&fallback_mutex);
       AllocatorCache *cache = &fallback_allocator_cache;
-      allocated =
-          allocator.Allocate(cache, needed_size, 8, false, check_rss_limit);
+      allocated = allocator.Allocate(cache, needed_size, 8);
     }
-
-    if (!allocated) return allocator.ReturnNullOrDieOnOOM();
+    if (!allocated)
+      return nullptr;
 
     if (*(u8 *)MEM_TO_SHADOW((uptr)allocated) == 0 && CanPoisonMemory()) {
       // Heap poisoning is enabled, but the allocator provides an unpoisoned
@@ -456,6 +491,7 @@
       meta[0] = size;
       meta[1] = chunk_beg;
     }
+    m->user_requested_alignment_log = user_requested_alignment_log;
 
     m->alloc_context_id = StackDepotPut(*stack);
 
@@ -514,8 +550,7 @@
 
   // Expects the chunk to already be marked as quarantined by using
   // AtomicallySetQuarantineFlagIfAllocated.
-  void QuarantineChunk(AsanChunk *m, void *ptr, BufferedStackTrace *stack,
-                       AllocType alloc_type) {
+  void QuarantineChunk(AsanChunk *m, void *ptr, BufferedStackTrace *stack) {
     CHECK_EQ(m->chunk_state, CHUNK_QUARANTINE);
     CHECK_GE(m->alloc_tid, 0);
     if (SANITIZER_WORDSIZE == 64)  // On 32-bits this resides in user area.
@@ -523,6 +558,18 @@
     AsanThread *t = GetCurrentThread();
     m->free_tid = t ? t->tid() : 0;
     m->free_context_id = StackDepotPut(*stack);
+
+    Flags &fl = *flags();
+    if (fl.max_free_fill_size > 0) {
+      // We have to skip the chunk header, it contains free_context_id.
+      uptr scribble_start = (uptr)m + kChunkHeaderSize + kChunkHeader2Size;
+      if (m->UsedSize() >= kChunkHeader2Size) {  // Skip Header2 in user area.
+        uptr size_to_fill = m->UsedSize() - kChunkHeader2Size;
+        size_to_fill = Min(size_to_fill, (uptr)fl.max_free_fill_size);
+        REAL(memset)((void *)scribble_start, fl.free_fill_byte, size_to_fill);
+      }
+    }
+
     // Poison the region.
     PoisonShadow(m->Beg(),
                  RoundUpTo(m->UsedSize(), SHADOW_GRANULARITY),
@@ -546,8 +593,8 @@
     }
   }
 
-  void Deallocate(void *ptr, uptr delete_size, BufferedStackTrace *stack,
-                  AllocType alloc_type) {
+  void Deallocate(void *ptr, uptr delete_size, uptr delete_alignment,
+                  BufferedStackTrace *stack, AllocType alloc_type) {
     uptr p = reinterpret_cast<uptr>(ptr);
     if (p == 0) return;
 
@@ -574,14 +621,17 @@
         ReportAllocTypeMismatch((uptr)ptr, stack, (AllocType)m->alloc_type,
                                 (AllocType)alloc_type);
       }
+    } else {
+      if (flags()->new_delete_type_mismatch &&
+          (alloc_type == FROM_NEW || alloc_type == FROM_NEW_BR) &&
+          ((delete_size && delete_size != m->UsedSize()) ||
+           ComputeUserRequestedAlignmentLog(delete_alignment) !=
+               m->user_requested_alignment_log)) {
+        ReportNewDeleteTypeMismatch(p, delete_size, delete_alignment, stack);
+      }
     }
 
-    if (delete_size && flags()->new_delete_type_mismatch &&
-        delete_size != m->UsedSize()) {
-      ReportNewDeleteSizeMismatch(p, delete_size, stack);
-    }
-
-    QuarantineChunk(m, ptr, stack, alloc_type);
+    QuarantineChunk(m, ptr, stack);
   }
 
   void *Reallocate(void *old_ptr, uptr new_size, BufferedStackTrace *stack) {
@@ -604,14 +654,14 @@
       // If realloc() races with free(), we may start copying freed memory.
       // However, we will report racy double-free later anyway.
       REAL(memcpy)(new_ptr, old_ptr, memcpy_size);
-      Deallocate(old_ptr, 0, stack, FROM_MALLOC);
+      Deallocate(old_ptr, 0, 0, stack, FROM_MALLOC);
     }
     return new_ptr;
   }
 
   void *Calloc(uptr nmemb, uptr size, BufferedStackTrace *stack) {
-    if (CallocShouldReturnNullDueToOverflow(size, nmemb))
-      return allocator.ReturnNullOrDieOnBadRequest();
+    if (CheckForCallocOverflow(size, nmemb))
+      return AsanAllocator::FailureHandler::OnBadRequest();
     void *ptr = Allocate(nmemb * size, 8, stack, FROM_MALLOC, false);
     // If the memory comes from the secondary allocator no need to clear it
     // as it comes directly from mmap.
@@ -689,6 +739,22 @@
     return AsanChunkView(m1);
   }
 
+  void Purge() {
+    AsanThread *t = GetCurrentThread();
+    if (t) {
+      AsanThreadLocalMallocStorage *ms = &t->malloc_storage();
+      quarantine.DrainAndRecycle(GetQuarantineCache(ms),
+                                 QuarantineCallback(GetAllocatorCache(ms)));
+    }
+    {
+      SpinMutexLock l(&fallback_mutex);
+      quarantine.DrainAndRecycle(&fallback_quarantine_cache,
+                                 QuarantineCallback(&fallback_allocator_cache));
+    }
+
+    allocator.ForceReleaseToOS();
+  }
+
   void PrintStats() {
     allocator.PrintStats();
     quarantine.PrintStats();
@@ -723,6 +789,9 @@
 uptr AsanChunkView::Beg() const { return chunk_->Beg(); }
 uptr AsanChunkView::End() const { return Beg() + UsedSize(); }
 uptr AsanChunkView::UsedSize() const { return chunk_->UsedSize(); }
+u32 AsanChunkView::UserRequestedAlignment() const {
+  return Allocator::ComputeUserAlignment(chunk_->user_requested_alignment_log);
+}
 uptr AsanChunkView::AllocTid() const { return chunk_->alloc_tid; }
 uptr AsanChunkView::FreeTid() const { return chunk_->free_tid; }
 AllocType AsanChunkView::GetAllocType() const {
@@ -774,55 +843,73 @@
   instance.PrintStats();
 }
 
-void *asan_memalign(uptr alignment, uptr size, BufferedStackTrace *stack,
-                    AllocType alloc_type) {
-  return instance.Allocate(size, alignment, stack, alloc_type, true);
-}
-
 void asan_free(void *ptr, BufferedStackTrace *stack, AllocType alloc_type) {
-  instance.Deallocate(ptr, 0, stack, alloc_type);
+  instance.Deallocate(ptr, 0, 0, stack, alloc_type);
 }
 
-void asan_sized_free(void *ptr, uptr size, BufferedStackTrace *stack,
-                     AllocType alloc_type) {
-  instance.Deallocate(ptr, size, stack, alloc_type);
+void asan_delete(void *ptr, uptr size, uptr alignment,
+                 BufferedStackTrace *stack, AllocType alloc_type) {
+  instance.Deallocate(ptr, size, alignment, stack, alloc_type);
 }
 
 void *asan_malloc(uptr size, BufferedStackTrace *stack) {
-  return instance.Allocate(size, 8, stack, FROM_MALLOC, true);
+  return SetErrnoOnNull(instance.Allocate(size, 8, stack, FROM_MALLOC, true));
 }
 
 void *asan_calloc(uptr nmemb, uptr size, BufferedStackTrace *stack) {
-  return instance.Calloc(nmemb, size, stack);
+  return SetErrnoOnNull(instance.Calloc(nmemb, size, stack));
 }
 
 void *asan_realloc(void *p, uptr size, BufferedStackTrace *stack) {
   if (!p)
-    return instance.Allocate(size, 8, stack, FROM_MALLOC, true);
+    return SetErrnoOnNull(instance.Allocate(size, 8, stack, FROM_MALLOC, true));
   if (size == 0) {
-    instance.Deallocate(p, 0, stack, FROM_MALLOC);
-    return nullptr;
+    if (flags()->allocator_frees_and_returns_null_on_realloc_zero) {
+      instance.Deallocate(p, 0, 0, stack, FROM_MALLOC);
+      return nullptr;
+    }
+    // Allocate a size of 1 if we shouldn't free() on Realloc to 0
+    size = 1;
   }
-  return instance.Reallocate(p, size, stack);
+  return SetErrnoOnNull(instance.Reallocate(p, size, stack));
 }
 
 void *asan_valloc(uptr size, BufferedStackTrace *stack) {
-  return instance.Allocate(size, GetPageSizeCached(), stack, FROM_MALLOC, true);
+  return SetErrnoOnNull(
+      instance.Allocate(size, GetPageSizeCached(), stack, FROM_MALLOC, true));
 }
 
 void *asan_pvalloc(uptr size, BufferedStackTrace *stack) {
   uptr PageSize = GetPageSizeCached();
-  size = RoundUpTo(size, PageSize);
-  if (size == 0) {
-    // pvalloc(0) should allocate one page.
-    size = PageSize;
+  if (UNLIKELY(CheckForPvallocOverflow(size, PageSize))) {
+    errno = errno_ENOMEM;
+    return AsanAllocator::FailureHandler::OnBadRequest();
   }
-  return instance.Allocate(size, PageSize, stack, FROM_MALLOC, true);
+  // pvalloc(0) should allocate one page.
+  size = size ? RoundUpTo(size, PageSize) : PageSize;
+  return SetErrnoOnNull(
+      instance.Allocate(size, PageSize, stack, FROM_MALLOC, true));
+}
+
+void *asan_memalign(uptr alignment, uptr size, BufferedStackTrace *stack,
+                    AllocType alloc_type) {
+  if (UNLIKELY(!IsPowerOfTwo(alignment))) {
+    errno = errno_EINVAL;
+    return AsanAllocator::FailureHandler::OnBadRequest();
+  }
+  return SetErrnoOnNull(
+      instance.Allocate(size, alignment, stack, alloc_type, true));
 }
 
 int asan_posix_memalign(void **memptr, uptr alignment, uptr size,
                         BufferedStackTrace *stack) {
+  if (UNLIKELY(!CheckPosixMemalignAlignment(alignment))) {
+    AsanAllocator::FailureHandler::OnBadRequest();
+    return errno_EINVAL;
+  }
   void *ptr = instance.Allocate(size, alignment, stack, FROM_MALLOC, true);
+  if (UNLIKELY(!ptr))
+    return errno_ENOMEM;
   CHECK(IsAligned((uptr)ptr, alignment));
   *memptr = ptr;
   return 0;
@@ -850,8 +937,8 @@
   instance.ForceUnlock();
 }
 
-void AsanSoftRssLimitExceededCallback(bool exceeded) {
-  instance.allocator.SetRssLimitIsExceeded(exceeded);
+void AsanSoftRssLimitExceededCallback(bool limit_exceeded) {
+  instance.SetRssLimitExceeded(limit_exceeded);
 }
 
 } // namespace __asan
@@ -966,6 +1053,10 @@
   return allocated_size;
 }
 
+void __sanitizer_purge_allocator() {
+  instance.Purge();
+}
+
 #if !SANITIZER_SUPPORTS_WEAK_HOOKS
 // Provide default (no-op) implementation of malloc hooks.
 SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_malloc_hook,
diff --git a/lib/asan/asan_allocator.h b/lib/asan/asan_allocator.h
index ee28ecf..26483db 100644
--- a/lib/asan/asan_allocator.h
+++ b/lib/asan/asan_allocator.h
@@ -58,6 +58,7 @@
   uptr Beg() const;            // First byte of user memory.
   uptr End() const;            // Last byte of user memory.
   uptr UsedSize() const;       // Size requested by the user.
+  u32 UserRequestedAlignment() const;  // Originally requested alignment.
   uptr AllocTid() const;
   uptr FreeTid() const;
   bool Eq(const AsanChunkView &c) const { return chunk_ == c.chunk_; }
@@ -119,7 +120,11 @@
 };
 
 #if SANITIZER_CAN_USE_ALLOCATOR64
-# if defined(__powerpc64__)
+# if SANITIZER_FUCHSIA
+const uptr kAllocatorSpace = ~(uptr)0;
+const uptr kAllocatorSize  =  0x40000000000ULL;  // 4T.
+typedef DefaultSizeClassMap SizeClassMap;
+# elif defined(__powerpc64__)
 const uptr kAllocatorSpace =  0xa0000000000ULL;
 const uptr kAllocatorSize  =  0x20000000000ULL;  // 2T.
 typedef DefaultSizeClassMap SizeClassMap;
@@ -161,10 +166,17 @@
 typedef TwoLevelByteMap<(kNumRegions >> 12), 1 << 12> ByteMap;
 # endif
 typedef CompactSizeClassMap SizeClassMap;
-typedef SizeClassAllocator32<0, SANITIZER_MMAP_RANGE_SIZE, 16,
-  SizeClassMap, kRegionSizeLog,
-  ByteMap,
-  AsanMapUnmapCallback> PrimaryAllocator;
+struct AP32 {
+  static const uptr kSpaceBeg = 0;
+  static const u64 kSpaceSize = SANITIZER_MMAP_RANGE_SIZE;
+  static const uptr kMetadataSize = 16;
+  typedef __asan::SizeClassMap SizeClassMap;
+  static const uptr kRegionSizeLog = __asan::kRegionSizeLog;
+  typedef __asan::ByteMap ByteMap;
+  typedef AsanMapUnmapCallback MapUnmapCallback;
+  static const uptr kFlags = 0;
+};
+typedef SizeClassAllocator32<AP32> PrimaryAllocator;
 #endif  // SANITIZER_CAN_USE_ALLOCATOR64
 
 static const uptr kNumberOfSizeClasses = SizeClassMap::kNumClasses;
@@ -186,8 +198,8 @@
 void *asan_memalign(uptr alignment, uptr size, BufferedStackTrace *stack,
                     AllocType alloc_type);
 void asan_free(void *ptr, BufferedStackTrace *stack, AllocType alloc_type);
-void asan_sized_free(void *ptr, uptr size, BufferedStackTrace *stack,
-                     AllocType alloc_type);
+void asan_delete(void *ptr, uptr size, uptr alignment,
+                 BufferedStackTrace *stack, AllocType alloc_type);
 
 void *asan_malloc(uptr size, BufferedStackTrace *stack);
 void *asan_calloc(uptr nmemb, uptr size, BufferedStackTrace *stack);
diff --git a/lib/asan/asan_descriptions.cc b/lib/asan/asan_descriptions.cc
index 0ecbe09..0a4fb82 100644
--- a/lib/asan/asan_descriptions.cc
+++ b/lib/asan/asan_descriptions.cc
@@ -122,6 +122,7 @@
   }
   descr->chunk_begin = chunk.Beg();
   descr->chunk_size = chunk.UsedSize();
+  descr->user_requested_alignment = chunk.UserRequestedAlignment();
   descr->alloc_type = chunk.GetAllocType();
 }
 
@@ -150,7 +151,7 @@
   str.append(" %zu-byte region [%p,%p)\n", descr.chunk_size,
              (void *)descr.chunk_begin,
              (void *)(descr.chunk_begin + descr.chunk_size));
-  str.append("%s", d.EndLocation());
+  str.append("%s", d.Default());
   Printf("%s", str.data());
 }
 
@@ -252,12 +253,15 @@
     str.append("%c", var.name_pos[i]);
   }
   str.append("'");
+  if (var.line > 0) {
+    str.append(" (line %d)", var.line);
+  }
   if (pos_descr) {
     Decorator d;
     // FIXME: we may want to also print the size of the access here,
     // but in case of accesses generated by memset it may be confusing.
     str.append("%s <== Memory access at offset %zd %s this variable%s\n",
-               d.Location(), addr, pos_descr, d.EndLocation());
+               d.Location(), addr, pos_descr, d.Default());
   } else {
     str.append("\n");
   }
@@ -292,7 +296,7 @@
              MaybeDemangleGlobalName(g.name));
   PrintGlobalLocation(&str, g);
   str.append("' (0x%zx) of size %zu\n", g.beg, g.size);
-  str.append("%s", d.EndLocation());
+  str.append("%s", d.Default());
   PrintGlobalNameIfASCII(&str, g);
   Printf("%s", str.data());
 }
@@ -340,10 +344,10 @@
          ThreadNameWithParenthesis(tid, tname, sizeof(tname)));
 
   if (!frame_descr) {
-    Printf("%s\n", d.EndLocation());
+    Printf("%s\n", d.Default());
     return;
   }
-  Printf(" at offset %zu in frame%s\n", offset, d.EndLocation());
+  Printf(" at offset %zu in frame%s\n", offset, d.Default());
 
   // Now we print the frame where the alloca has happened.
   // We print this frame as a stack trace with one element.
@@ -352,7 +356,7 @@
   // previously. That's unfortunate, but I have no better solution,
   // especially given that the alloca may be from entirely different place
   // (e.g. use-after-scope, or different thread's stack).
-  Printf("%s", d.EndLocation());
+  Printf("%s", d.Default());
   StackTrace alloca_stack(&frame_pc, 1);
   alloca_stack.Print();
 
@@ -402,18 +406,18 @@
     Printf("%sfreed by thread T%d%s here:%s\n", d.Allocation(),
            free_thread->tid,
            ThreadNameWithParenthesis(free_thread, tname, sizeof(tname)),
-           d.EndAllocation());
+           d.Default());
     StackTrace free_stack = GetStackTraceFromId(free_stack_id);
     free_stack.Print();
     Printf("%spreviously allocated by thread T%d%s here:%s\n", d.Allocation(),
            alloc_thread->tid,
            ThreadNameWithParenthesis(alloc_thread, tname, sizeof(tname)),
-           d.EndAllocation());
+           d.Default());
   } else {
     Printf("%sallocated by thread T%d%s here:%s\n", d.Allocation(),
            alloc_thread->tid,
            ThreadNameWithParenthesis(alloc_thread, tname, sizeof(tname)),
-           d.EndAllocation());
+           d.Default());
   }
   alloc_stack.Print();
   DescribeThread(GetCurrentThread());
diff --git a/lib/asan/asan_descriptions.h b/lib/asan/asan_descriptions.h
index 0ee677e..cd278ad 100644
--- a/lib/asan/asan_descriptions.h
+++ b/lib/asan/asan_descriptions.h
@@ -34,11 +34,8 @@
  public:
   Decorator() : SanitizerCommonDecorator() {}
   const char *Access() { return Blue(); }
-  const char *EndAccess() { return Default(); }
   const char *Location() { return Green(); }
-  const char *EndLocation() { return Default(); }
   const char *Allocation() { return Magenta(); }
-  const char *EndAllocation() { return Default(); }
 
   const char *ShadowByte(u8 byte) {
     switch (byte) {
@@ -72,9 +69,6 @@
         return Default();
     }
   }
-  const char *EndShadowByte() { return Default(); }
-  const char *MemoryByte() { return Magenta(); }
-  const char *EndMemoryByte() { return Default(); }
 };
 
 enum ShadowKind : u8 {
@@ -108,6 +102,7 @@
   sptr offset;
   uptr chunk_begin;
   uptr chunk_size;
+  u32 user_requested_alignment : 12;
   u32 access_type : 2;
   u32 alloc_type : 2;
 };
diff --git a/lib/asan/asan_errors.cc b/lib/asan/asan_errors.cc
index 57490ad..6413b98 100644
--- a/lib/asan/asan_errors.cc
+++ b/lib/asan/asan_errors.cc
@@ -22,83 +22,27 @@
 
 namespace __asan {
 
-void ErrorStackOverflow::Print() {
-  Decorator d;
-  Printf("%s", d.Warning());
-  Report(
-      "ERROR: AddressSanitizer: %s on address %p"
-      " (pc %p bp %p sp %p T%d)\n", scariness.GetDescription(),
-      (void *)addr, (void *)pc, (void *)bp, (void *)sp, tid);
-  Printf("%s", d.EndWarning());
-  scariness.Print();
-  BufferedStackTrace stack;
-  GetStackTraceWithPcBpAndContext(&stack, kStackTraceMax, pc, bp, context,
-                                  common_flags()->fast_unwind_on_fatal);
-  stack.Print();
-  ReportErrorSummary(scariness.GetDescription(), &stack);
-}
-
-static void MaybeDumpInstructionBytes(uptr pc) {
-  if (!flags()->dump_instruction_bytes || (pc < GetPageSizeCached())) return;
-  InternalScopedString str(1024);
-  str.append("First 16 instruction bytes at pc: ");
-  if (IsAccessibleMemoryRange(pc, 16)) {
-    for (int i = 0; i < 16; ++i) {
-      PrintMemoryByte(&str, "", ((u8 *)pc)[i], /*in_shadow*/ false, " ");
-    }
-    str.append("\n");
-  } else {
-    str.append("unaccessible\n");
-  }
-  Report("%s", str.data());
-}
-
-static void MaybeDumpRegisters(void *context) {
-  if (!flags()->dump_registers) return;
-  SignalContext::DumpAllRegisters(context);
-}
-
-static void MaybeReportNonExecRegion(uptr pc) {
-#if SANITIZER_FREEBSD || SANITIZER_LINUX
-  MemoryMappingLayout proc_maps(/*cache_enabled*/ true);
-  uptr start, end, protection;
-  while (proc_maps.Next(&start, &end, nullptr, nullptr, 0, &protection)) {
-    if (pc >= start && pc < end &&
-        !(protection & MemoryMappingLayout::kProtectionExecute))
-      Report("Hint: PC is at a non-executable region. Maybe a wild jump?\n");
-  }
+static void OnStackUnwind(const SignalContext &sig,
+                          const void *callback_context,
+                          BufferedStackTrace *stack) {
+  bool fast = common_flags()->fast_unwind_on_fatal;
+#if SANITIZER_FREEBSD || SANITIZER_NETBSD
+  // On FreeBSD the slow unwinding that leverages _Unwind_Backtrace()
+  // yields the call stack of the signal's handler and not of the code
+  // that raised the signal (as it does on Linux).
+  fast = true;
 #endif
+  // Tests and maybe some users expect that scariness is going to be printed
+  // just before the stack. As only asan has scariness score we have no
+  // corresponding code in the sanitizer_common and we use this callback to
+  // print it.
+  static_cast<const ScarinessScoreBase *>(callback_context)->Print();
+  GetStackTraceWithPcBpAndContext(stack, kStackTraceMax, sig.pc, sig.bp,
+                                  sig.context, fast);
 }
 
 void ErrorDeadlySignal::Print() {
-  Decorator d;
-  Printf("%s", d.Warning());
-  const char *description = __sanitizer::DescribeSignalOrException(signo);
-  Report(
-      "ERROR: AddressSanitizer: %s on unknown address %p (pc %p bp %p sp %p "
-      "T%d)\n",
-      description, (void *)addr, (void *)pc, (void *)bp, (void *)sp, tid);
-  Printf("%s", d.EndWarning());
-  if (pc < GetPageSizeCached()) Report("Hint: pc points to the zero page.\n");
-  if (is_memory_access) {
-    const char *access_type =
-        write_flag == SignalContext::WRITE
-            ? "WRITE"
-            : (write_flag == SignalContext::READ ? "READ" : "UNKNOWN");
-    Report("The signal is caused by a %s memory access.\n", access_type);
-    if (addr < GetPageSizeCached())
-      Report("Hint: address points to the zero page.\n");
-  }
-  MaybeReportNonExecRegion(pc);
-  scariness.Print();
-  BufferedStackTrace stack;
-  GetStackTraceWithPcBpAndContext(&stack, kStackTraceMax, pc, bp, context,
-                                  common_flags()->fast_unwind_on_fatal);
-  stack.Print();
-  MaybeDumpInstructionBytes(pc);
-  MaybeDumpRegisters(context);
-  Printf("AddressSanitizer can not provide additional info.\n");
-  ReportErrorSummary(description, &stack);
+  ReportDeadlySignal(signal, tid, &OnStackUnwind, &scariness);
 }
 
 void ErrorDoubleFree::Print() {
@@ -110,7 +54,7 @@
       "thread T%d%s:\n",
       scariness.GetDescription(), addr_description.addr, tid,
       ThreadNameWithParenthesis(tid, tname, sizeof(tname)));
-  Printf("%s", d.EndWarning());
+  Printf("%s", d.Default());
   scariness.Print();
   GET_STACK_TRACE_FATAL(second_free_stack->trace[0],
                         second_free_stack->top_frame_bp);
@@ -119,7 +63,7 @@
   ReportErrorSummary(scariness.GetDescription(), &stack);
 }
 
-void ErrorNewDeleteSizeMismatch::Print() {
+void ErrorNewDeleteTypeMismatch::Print() {
   Decorator d;
   Printf("%s", d.Warning());
   char tname[128];
@@ -128,11 +72,29 @@
       "T%d%s:\n",
       scariness.GetDescription(), addr_description.addr, tid,
       ThreadNameWithParenthesis(tid, tname, sizeof(tname)));
-  Printf("%s  object passed to delete has wrong type:\n", d.EndWarning());
-  Printf(
-      "  size of the allocated type:   %zd bytes;\n"
-      "  size of the deallocated type: %zd bytes.\n",
-      addr_description.chunk_access.chunk_size, delete_size);
+  Printf("%s  object passed to delete has wrong type:\n", d.Default());
+  if (delete_size != 0) {
+    Printf(
+        "  size of the allocated type:   %zd bytes;\n"
+        "  size of the deallocated type: %zd bytes.\n",
+        addr_description.chunk_access.chunk_size, delete_size);
+  }
+  const uptr user_alignment =
+      addr_description.chunk_access.user_requested_alignment;
+  if (delete_alignment != user_alignment) {
+    char user_alignment_str[32];
+    char delete_alignment_str[32];
+    internal_snprintf(user_alignment_str, sizeof(user_alignment_str),
+                      "%zd bytes", user_alignment);
+    internal_snprintf(delete_alignment_str, sizeof(delete_alignment_str),
+                      "%zd bytes", delete_alignment);
+    static const char *kDefaultAlignment = "default-aligned";
+    Printf(
+        "  alignment of the allocated type:   %s;\n"
+        "  alignment of the deallocated type: %s.\n",
+        user_alignment > 0 ? user_alignment_str : kDefaultAlignment,
+        delete_alignment > 0 ? delete_alignment_str : kDefaultAlignment);
+  }
   CHECK_GT(free_stack->size, 0);
   scariness.Print();
   GET_STACK_TRACE_FATAL(free_stack->trace[0], free_stack->top_frame_bp);
@@ -153,7 +115,7 @@
       "which was not malloc()-ed: %p in thread T%d%s\n",
       addr_description.Address(), tid,
       ThreadNameWithParenthesis(tid, tname, sizeof(tname)));
-  Printf("%s", d.EndWarning());
+  Printf("%s", d.Default());
   CHECK_GT(free_stack->size, 0);
   scariness.Print();
   GET_STACK_TRACE_FATAL(free_stack->trace[0], free_stack->top_frame_bp);
@@ -174,7 +136,7 @@
          scariness.GetDescription(),
          alloc_names[alloc_type], dealloc_names[dealloc_type],
          addr_description.addr);
-  Printf("%s", d.EndWarning());
+  Printf("%s", d.Default());
   CHECK_GT(dealloc_stack->size, 0);
   scariness.Print();
   GET_STACK_TRACE_FATAL(dealloc_stack->trace[0], dealloc_stack->top_frame_bp);
@@ -193,7 +155,7 @@
       "ERROR: AddressSanitizer: attempting to call malloc_usable_size() for "
       "pointer which is not owned: %p\n",
       addr_description.Address());
-  Printf("%s", d.EndWarning());
+  Printf("%s", d.Default());
   stack->Print();
   addr_description.Print();
   ReportErrorSummary(scariness.GetDescription(), stack);
@@ -206,7 +168,7 @@
       "ERROR: AddressSanitizer: attempting to call "
       "__sanitizer_get_allocated_size() for pointer which is not owned: %p\n",
       addr_description.Address());
-  Printf("%s", d.EndWarning());
+  Printf("%s", d.Default());
   stack->Print();
   addr_description.Print();
   ReportErrorSummary(scariness.GetDescription(), stack);
@@ -223,7 +185,7 @@
       bug_type, addr1_description.Address(),
       addr1_description.Address() + length1, addr2_description.Address(),
       addr2_description.Address() + length2);
-  Printf("%s", d.EndWarning());
+  Printf("%s", d.Default());
   scariness.Print();
   stack->Print();
   addr1_description.Print();
@@ -236,7 +198,7 @@
   Printf("%s", d.Warning());
   Report("ERROR: AddressSanitizer: %s: (size=%zd)\n",
          scariness.GetDescription(), size);
-  Printf("%s", d.EndWarning());
+  Printf("%s", d.Default());
   scariness.Print();
   stack->Print();
   addr_description.Print();
@@ -264,7 +226,7 @@
   Printf("%s", d.Warning());
   Report("ERROR: AddressSanitizer: %s (%p):\n", scariness.GetDescription(),
          global1.beg);
-  Printf("%s", d.EndWarning());
+  Printf("%s", d.Default());
   InternalScopedString g1_loc(256), g2_loc(256);
   PrintGlobalLocation(&g1_loc, global1);
   PrintGlobalLocation(&g2_loc, global2);
@@ -293,7 +255,7 @@
   Printf("%s", d.Warning());
   Report("ERROR: AddressSanitizer: %s: %p %p\n", scariness.GetDescription(),
          addr1_description.Address(), addr2_description.Address());
-  Printf("%s", d.EndWarning());
+  Printf("%s", d.Default());
   GET_STACK_TRACE_FATAL(pc, bp);
   stack.Print();
   addr1_description.Print();
@@ -478,9 +440,14 @@
   InternalScopedString str(4096 * 8);
   str.append("Shadow bytes around the buggy address:\n");
   for (int i = -5; i <= 5; i++) {
+    uptr row_shadow_addr = aligned_shadow + i * n_bytes_per_row;
+    // Skip rows that would be outside the shadow range. This can happen when
+    // the user address is near the bottom, top, or shadow gap of the address
+    // space.
+    if (!AddrIsInShadow(row_shadow_addr)) continue;
     const char *prefix = (i == 0) ? "=>" : "  ";
-    PrintShadowBytes(&str, prefix, (u8 *)(aligned_shadow + i * n_bytes_per_row),
-                     (u8 *)shadow_addr, n_bytes_per_row);
+    PrintShadowBytes(&str, prefix, (u8 *)row_shadow_addr, (u8 *)shadow_addr,
+                     n_bytes_per_row);
   }
   if (flags()->print_legend) PrintLegend(&str);
   Printf("%s", str.data());
@@ -492,13 +459,13 @@
   uptr addr = addr_description.Address();
   Report("ERROR: AddressSanitizer: %s on address %p at pc %p bp %p sp %p\n",
          bug_descr, (void *)addr, pc, bp, sp);
-  Printf("%s", d.EndWarning());
+  Printf("%s", d.Default());
 
   char tname[128];
   Printf("%s%s of size %zu at %p thread T%d%s%s\n", d.Access(),
          access_size ? (is_write ? "WRITE" : "READ") : "ACCESS", access_size,
          (void *)addr, tid,
-         ThreadNameWithParenthesis(tid, tname, sizeof(tname)), d.EndAccess());
+         ThreadNameWithParenthesis(tid, tname, sizeof(tname)), d.Default());
 
   scariness.Print();
   GET_STACK_TRACE_FATAL(pc, bp);
diff --git a/lib/asan/asan_errors.h b/lib/asan/asan_errors.h
index 9a12492..518ba0c 100644
--- a/lib/asan/asan_errors.h
+++ b/lib/asan/asan_errors.h
@@ -27,61 +27,28 @@
   u32 tid;
 };
 
-struct ErrorStackOverflow : ErrorBase {
-  uptr addr, pc, bp, sp;
-  // ErrorStackOverflow never owns the context.
-  void *context;
-  // VS2013 doesn't implement unrestricted unions, so we need a trivial default
-  // constructor
-  ErrorStackOverflow() = default;
-  ErrorStackOverflow(u32 tid, const SignalContext &sig)
-      : ErrorBase(tid),
-        addr(sig.addr),
-        pc(sig.pc),
-        bp(sig.bp),
-        sp(sig.sp),
-        context(sig.context) {
-    scariness.Clear();
-    scariness.Scare(10, "stack-overflow");
-  }
-  void Print();
-};
-
 struct ErrorDeadlySignal : ErrorBase {
-  uptr addr, pc, bp, sp;
-  // ErrorDeadlySignal never owns the context.
-  void *context;
-  int signo;
-  SignalContext::WriteFlag write_flag;
-  bool is_memory_access;
+  SignalContext signal;
   // VS2013 doesn't implement unrestricted unions, so we need a trivial default
   // constructor
   ErrorDeadlySignal() = default;
-  ErrorDeadlySignal(u32 tid, const SignalContext &sig, int signo_)
-      : ErrorBase(tid),
-        addr(sig.addr),
-        pc(sig.pc),
-        bp(sig.bp),
-        sp(sig.sp),
-        context(sig.context),
-        signo(signo_),
-        write_flag(sig.write_flag),
-        is_memory_access(sig.is_memory_access) {
+  ErrorDeadlySignal(u32 tid, const SignalContext &sig)
+      : ErrorBase(tid), signal(sig) {
     scariness.Clear();
-    if (is_memory_access) {
-      if (addr < GetPageSizeCached()) {
-        scariness.Scare(10, "null-deref");
-      } else if (addr == pc) {
-        scariness.Scare(60, "wild-jump");
-      } else if (write_flag == SignalContext::WRITE) {
-        scariness.Scare(30, "wild-addr-write");
-      } else if (write_flag == SignalContext::READ) {
-        scariness.Scare(20, "wild-addr-read");
-      } else {
-        scariness.Scare(25, "wild-addr");
-      }
-    } else {
+    if (signal.IsStackOverflow()) {
+      scariness.Scare(10, "stack-overflow");
+    } else if (!signal.is_memory_access) {
       scariness.Scare(10, "signal");
+    } else if (signal.addr < GetPageSizeCached()) {
+      scariness.Scare(10, "null-deref");
+    } else if (signal.addr == signal.pc) {
+      scariness.Scare(60, "wild-jump");
+    } else if (signal.write_flag == SignalContext::WRITE) {
+      scariness.Scare(30, "wild-addr-write");
+    } else if (signal.write_flag == SignalContext::READ) {
+      scariness.Scare(20, "wild-addr-read");
+    } else {
+      scariness.Scare(25, "wild-addr");
     }
   }
   void Print();
@@ -104,17 +71,19 @@
   void Print();
 };
 
-struct ErrorNewDeleteSizeMismatch : ErrorBase {
-  // ErrorNewDeleteSizeMismatch doesn't own the stack trace.
+struct ErrorNewDeleteTypeMismatch : ErrorBase {
+  // ErrorNewDeleteTypeMismatch doesn't own the stack trace.
   const BufferedStackTrace *free_stack;
   HeapAddressDescription addr_description;
   uptr delete_size;
+  uptr delete_alignment;
   // VS2013 doesn't implement unrestricted unions, so we need a trivial default
   // constructor
-  ErrorNewDeleteSizeMismatch() = default;
-  ErrorNewDeleteSizeMismatch(u32 tid, BufferedStackTrace *stack, uptr addr,
-                             uptr delete_size_)
-      : ErrorBase(tid), free_stack(stack), delete_size(delete_size_) {
+  ErrorNewDeleteTypeMismatch() = default;
+  ErrorNewDeleteTypeMismatch(u32 tid, BufferedStackTrace *stack, uptr addr,
+                             uptr delete_size_, uptr delete_alignment_)
+      : ErrorBase(tid), free_stack(stack), delete_size(delete_size_),
+        delete_alignment(delete_alignment_) {
     GetHeapAddressInformation(addr, 1, &addr_description);
     scariness.Clear();
     scariness.Scare(10, "new-delete-type-mismatch");
@@ -324,10 +293,9 @@
 
 // clang-format off
 #define ASAN_FOR_EACH_ERROR_KIND(macro)         \
-  macro(StackOverflow)                          \
   macro(DeadlySignal)                           \
   macro(DoubleFree)                             \
-  macro(NewDeleteSizeMismatch)                  \
+  macro(NewDeleteTypeMismatch)                  \
   macro(FreeNotMalloced)                        \
   macro(AllocTypeMismatch)                      \
   macro(MallocUsableSizeNotOwned)               \
diff --git a/lib/asan/asan_fake_stack.cc b/lib/asan/asan_fake_stack.cc
index 017b7d2..6a064aa 100644
--- a/lib/asan/asan_fake_stack.cc
+++ b/lib/asan/asan_fake_stack.cc
@@ -171,7 +171,7 @@
   }
 }
 
-#if SANITIZER_LINUX && !SANITIZER_ANDROID
+#if (SANITIZER_LINUX && !SANITIZER_ANDROID) || SANITIZER_FUCHSIA
 static THREADLOCAL FakeStack *fake_stack_tls;
 
 FakeStack *GetTLSFakeStack() {
@@ -183,7 +183,7 @@
 #else
 FakeStack *GetTLSFakeStack() { return 0; }
 void SetTLSFakeStack(FakeStack *fs) { }
-#endif  // SANITIZER_LINUX && !SANITIZER_ANDROID
+#endif  // (SANITIZER_LINUX && !SANITIZER_ANDROID) || SANITIZER_FUCHSIA
 
 static FakeStack *GetFakeStack() {
   AsanThread *t = GetCurrentThread();
diff --git a/lib/asan/asan_flags.cc b/lib/asan/asan_flags.cc
index 74c441a..d3efadc 100644
--- a/lib/asan/asan_flags.cc
+++ b/lib/asan/asan_flags.cc
@@ -61,7 +61,7 @@
   {
     CommonFlags cf;
     cf.CopyFrom(*common_flags());
-    cf.detect_leaks = CAN_SANITIZE_LEAKS;
+    cf.detect_leaks = cf.detect_leaks && CAN_SANITIZE_LEAKS;
     cf.external_symbolizer_path = GetEnv("ASAN_SYMBOLIZER_PATH");
     cf.malloc_context_size = kDefaultMallocContextSize;
     cf.intercept_tls_get_addr = true;
@@ -95,6 +95,18 @@
   RegisterCommonFlags(&ubsan_parser);
 #endif
 
+  if (SANITIZER_MAC) {
+    // Support macOS MallocScribble and MallocPreScribble:
+    // <https://developer.apple.com/library/content/documentation/Performance/
+    // Conceptual/ManagingMemory/Articles/MallocDebug.html>
+    if (GetEnv("MallocScribble")) {
+      f->max_free_fill_size = 0x1000;
+    }
+    if (GetEnv("MallocPreScribble")) {
+      f->malloc_fill_byte = 0xaa;
+    }
+  }
+
   // Override from ASan compile definition.
   const char *asan_compile_def = MaybeUseAsanDefaultOptionsCompileDefinition();
   asan_parser.ParseString(asan_compile_def);
@@ -106,6 +118,10 @@
   const char *ubsan_default_options = __ubsan::MaybeCallUbsanDefaultOptions();
   ubsan_parser.ParseString(ubsan_default_options);
 #endif
+#if CAN_SANITIZE_LEAKS
+  const char *lsan_default_options = __lsan::MaybeCallLsanDefaultOptions();
+  lsan_parser.ParseString(lsan_default_options);
+#endif
 
   // Override from command line.
   asan_parser.ParseString(GetEnv("ASAN_OPTIONS"));
@@ -182,6 +198,10 @@
     Report("WARNING: strchr* interceptors are enabled even though "
            "replace_str=0. Use intercept_strchr=0 to disable them.");
   }
+  if (!f->replace_str && common_flags()->intercept_strndup) {
+    Report("WARNING: strndup* interceptors are enabled even though "
+           "replace_str=0. Use intercept_strndup=0 to disable them.");
+  }
 }
 
 }  // namespace __asan
diff --git a/lib/asan/asan_flags.inc b/lib/asan/asan_flags.inc
index 4712efb..00071d3 100644
--- a/lib/asan/asan_flags.inc
+++ b/lib/asan/asan_flags.inc
@@ -63,8 +63,14 @@
     int, max_malloc_fill_size, 0x1000,  // By default, fill only the first 4K.
     "ASan allocator flag. max_malloc_fill_size is the maximal amount of "
     "bytes that will be filled with malloc_fill_byte on malloc.")
+ASAN_FLAG(
+    int, max_free_fill_size, 0,
+    "ASan allocator flag. max_free_fill_size is the maximal amount of "
+    "bytes that will be filled with free_fill_byte during free.")
 ASAN_FLAG(int, malloc_fill_byte, 0xbe,
           "Value used to fill the newly allocated memory.")
+ASAN_FLAG(int, free_fill_byte, 0x55,
+          "Value used to fill deallocated memory.")
 ASAN_FLAG(bool, allow_user_poisoning, true,
           "If set, user may manually mark memory regions as poisoned or "
           "unpoisoned.")
@@ -73,6 +79,10 @@
     "Number of seconds to sleep between printing an error report and "
     "terminating the program. Useful for debugging purposes (e.g. when one "
     "needs to attach gdb).")
+ASAN_FLAG(
+    int, sleep_after_init, 0,
+    "Number of seconds to sleep after AddressSanitizer is initialized. "
+    "Useful for debugging purposes (e.g. when one needs to attach gdb).")
 ASAN_FLAG(bool, check_malloc_usable_size, true,
           "Allows the users to work around the bug in Nvidia drivers prior to "
           "295.*.")
@@ -137,14 +147,16 @@
           "If >=2, detect violation of One-Definition-Rule (ODR); "
           "If ==1, detect ODR-violation only if the two variables "
           "have different sizes")
-ASAN_FLAG(bool, dump_instruction_bytes, false,
-          "If true, dump 16 bytes starting at the instruction that caused SEGV")
-ASAN_FLAG(bool, dump_registers, true,
-          "If true, dump values of CPU registers when SEGV happens. Only "
-          "available on OS X for now.")
 ASAN_FLAG(const char *, suppressions, "", "Suppressions file name.")
 ASAN_FLAG(bool, halt_on_error, true,
           "Crash the program after printing the first error report "
           "(WARNING: USE AT YOUR OWN RISK!)")
 ASAN_FLAG(bool, use_odr_indicator, false,
           "Use special ODR indicator symbol for ODR violation detection")
+ASAN_FLAG(bool, allocator_frees_and_returns_null_on_realloc_zero, true,
+          "realloc(p, 0) is equivalent to free(p) by default (Same as the "
+          "POSIX standard). If set to false, realloc(p, 0) will return a "
+          "pointer to an allocated space which can not be used.")
+ASAN_FLAG(bool, verify_asan_link_order, true,
+          "Check position of ASan runtime in library list (needs to be disabled"
+          " when other library has to be preloaded system-wide)")
diff --git a/lib/asan/asan_fuchsia.cc b/lib/asan/asan_fuchsia.cc
new file mode 100644
index 0000000..02fb8be
--- /dev/null
+++ b/lib/asan/asan_fuchsia.cc
@@ -0,0 +1,218 @@
+//===-- asan_fuchsia.cc --------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Fuchsia-specific details.
+//===---------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_fuchsia.h"
+#if SANITIZER_FUCHSIA
+
+#include "asan_interceptors.h"
+#include "asan_internal.h"
+#include "asan_stack.h"
+#include "asan_thread.h"
+
+#include <limits.h>
+#include <zircon/sanitizer.h>
+#include <zircon/syscalls.h>
+#include <zircon/threads.h>
+
+namespace __asan {
+
+// The system already set up the shadow memory for us.
+// __sanitizer::GetMaxVirtualAddress has already been called by
+// AsanInitInternal->InitializeHighMemEnd (asan_rtl.cc).
+// Just do some additional sanity checks here.
+void InitializeShadowMemory() {
+  if (Verbosity()) PrintAddressSpaceLayout();
+
+  // Make sure SHADOW_OFFSET doesn't use __asan_shadow_memory_dynamic_address.
+  __asan_shadow_memory_dynamic_address = kDefaultShadowSentinel;
+  DCHECK(kLowShadowBeg != kDefaultShadowSentinel);
+  __asan_shadow_memory_dynamic_address = kLowShadowBeg;
+
+  CHECK_EQ(kShadowGapEnd, kHighShadowBeg - 1);
+  CHECK_EQ(kHighMemEnd, __sanitizer::ShadowBounds.memory_limit - 1);
+  CHECK_EQ(kHighMemBeg, __sanitizer::ShadowBounds.shadow_limit);
+  CHECK_EQ(kHighShadowBeg, __sanitizer::ShadowBounds.shadow_base);
+  CHECK_EQ(kShadowGapEnd, __sanitizer::ShadowBounds.shadow_base - 1);
+  CHECK_EQ(kLowShadowEnd, 0);
+  CHECK_EQ(kLowShadowBeg, 0);
+}
+
+void AsanApplyToGlobals(globals_op_fptr op, const void *needle) {
+  UNIMPLEMENTED();
+}
+
+void AsanCheckDynamicRTPrereqs() {}
+void AsanCheckIncompatibleRT() {}
+void InitializeAsanInterceptors() {}
+
+void *AsanDoesNotSupportStaticLinkage() { return nullptr; }
+
+void InitializePlatformExceptionHandlers() {}
+void AsanOnDeadlySignal(int signo, void *siginfo, void *context) {
+  UNIMPLEMENTED();
+}
+
+// We can use a plain thread_local variable for TSD.
+static thread_local void *per_thread;
+
+void *AsanTSDGet() { return per_thread; }
+
+void AsanTSDSet(void *tsd) { per_thread = tsd; }
+
+// There's no initialization needed, and the passed-in destructor
+// will never be called.  Instead, our own thread destruction hook
+// (below) will call AsanThread::TSDDtor directly.
+void AsanTSDInit(void (*destructor)(void *tsd)) {
+  DCHECK(destructor == &PlatformTSDDtor);
+}
+
+void PlatformTSDDtor(void *tsd) { UNREACHABLE(__func__); }
+
+static inline size_t AsanThreadMmapSize() {
+  return RoundUpTo(sizeof(AsanThread), PAGE_SIZE);
+}
+
+struct AsanThread::InitOptions {
+  uptr stack_bottom, stack_size;
+};
+
+// Shared setup between thread creation and startup for the initial thread.
+static AsanThread *CreateAsanThread(StackTrace *stack, u32 parent_tid,
+                                    uptr user_id, bool detached,
+                                    const char *name, uptr stack_bottom,
+                                    uptr stack_size) {
+  // In lieu of AsanThread::Create.
+  AsanThread *thread = (AsanThread *)MmapOrDie(AsanThreadMmapSize(), __func__);
+
+  AsanThreadContext::CreateThreadContextArgs args = {thread, stack};
+  u32 tid =
+      asanThreadRegistry().CreateThread(user_id, detached, parent_tid, &args);
+  asanThreadRegistry().SetThreadName(tid, name);
+
+  // On other systems, AsanThread::Init() is called from the new
+  // thread itself.  But on Fuchsia we already know the stack address
+  // range beforehand, so we can do most of the setup right now.
+  const AsanThread::InitOptions options = {stack_bottom, stack_size};
+  thread->Init(&options);
+
+  return thread;
+}
+
+// This gets the same arguments passed to Init by CreateAsanThread, above.
+// We're in the creator thread before the new thread is actually started,
+// but its stack address range is already known.  We don't bother tracking
+// the static TLS address range because the system itself already uses an
+// ASan-aware allocator for that.
+void AsanThread::SetThreadStackAndTls(const AsanThread::InitOptions *options) {
+  DCHECK_NE(GetCurrentThread(), this);
+  DCHECK_NE(GetCurrentThread(), nullptr);
+  CHECK_NE(options->stack_bottom, 0);
+  CHECK_NE(options->stack_size, 0);
+  stack_bottom_ = options->stack_bottom;
+  stack_top_ = options->stack_bottom + options->stack_size;
+}
+
+// Called by __asan::AsanInitInternal (asan_rtl.c).
+AsanThread *CreateMainThread() {
+  thrd_t self = thrd_current();
+  char name[ZX_MAX_NAME_LEN];
+  CHECK_NE(__sanitizer::MainThreadStackBase, 0);
+  CHECK_GT(__sanitizer::MainThreadStackSize, 0);
+  AsanThread *t = CreateAsanThread(
+      nullptr, 0, reinterpret_cast<uptr>(self), true,
+      _zx_object_get_property(thrd_get_zx_handle(self), ZX_PROP_NAME, name,
+                              sizeof(name)) == ZX_OK
+          ? name
+          : nullptr,
+      __sanitizer::MainThreadStackBase, __sanitizer::MainThreadStackSize);
+  SetCurrentThread(t);
+  return t;
+}
+
+// This is called before each thread creation is attempted.  So, in
+// its first call, the calling thread is the initial and sole thread.
+static void *BeforeThreadCreateHook(uptr user_id, bool detached,
+                                    const char *name, uptr stack_bottom,
+                                    uptr stack_size) {
+  EnsureMainThreadIDIsCorrect();
+  // Strict init-order checking is thread-hostile.
+  if (flags()->strict_init_order) StopInitOrderChecking();
+
+  GET_STACK_TRACE_THREAD;
+  u32 parent_tid = GetCurrentTidOrInvalid();
+
+  return CreateAsanThread(&stack, parent_tid, user_id, detached, name,
+                          stack_bottom, stack_size);
+}
+
+// This is called after creating a new thread (in the creating thread),
+// with the pointer returned by BeforeThreadCreateHook (above).
+static void ThreadCreateHook(void *hook, bool aborted) {
+  AsanThread *thread = static_cast<AsanThread *>(hook);
+  if (!aborted) {
+    // The thread was created successfully.
+    // ThreadStartHook is already running in the new thread.
+  } else {
+    // The thread wasn't created after all.
+    // Clean up everything we set up in BeforeThreadCreateHook.
+    asanThreadRegistry().FinishThread(thread->tid());
+    UnmapOrDie(thread, AsanThreadMmapSize());
+  }
+}
+
+// This is called in the newly-created thread before it runs anything else,
+// with the pointer returned by BeforeThreadCreateHook (above).
+// cf. asan_interceptors.cc:asan_thread_start
+static void ThreadStartHook(void *hook, uptr os_id) {
+  AsanThread *thread = static_cast<AsanThread *>(hook);
+  SetCurrentThread(thread);
+
+  // In lieu of AsanThread::ThreadStart.
+  asanThreadRegistry().StartThread(thread->tid(), os_id, /*workerthread*/ false,
+                                   nullptr);
+}
+
+// Each thread runs this just before it exits,
+// with the pointer returned by BeforeThreadCreateHook (above).
+// All per-thread destructors have already been called.
+static void ThreadExitHook(void *hook, uptr os_id) {
+  AsanThread::TSDDtor(per_thread);
+}
+
+}  // namespace __asan
+
+// These are declared (in extern "C") by <zircon/sanitizer.h>.
+// The system runtime will call our definitions directly.
+
+void *__sanitizer_before_thread_create_hook(thrd_t thread, bool detached,
+                                            const char *name, void *stack_base,
+                                            size_t stack_size) {
+  return __asan::BeforeThreadCreateHook(
+      reinterpret_cast<uptr>(thread), detached, name,
+      reinterpret_cast<uptr>(stack_base), stack_size);
+}
+
+void __sanitizer_thread_create_hook(void *hook, thrd_t thread, int error) {
+  __asan::ThreadCreateHook(hook, error != thrd_success);
+}
+
+void __sanitizer_thread_start_hook(void *hook, thrd_t self) {
+  __asan::ThreadStartHook(hook, reinterpret_cast<uptr>(self));
+}
+
+void __sanitizer_thread_exit_hook(void *hook, thrd_t self) {
+  __asan::ThreadExitHook(hook, reinterpret_cast<uptr>(self));
+}
+
+#endif  // SANITIZER_FUCHSIA
diff --git a/lib/asan/asan_globals.cc b/lib/asan/asan_globals.cc
index b723306..0db65d0 100644
--- a/lib/asan/asan_globals.cc
+++ b/lib/asan/asan_globals.cc
@@ -332,6 +332,26 @@
   *flag = 0;
 }
 
+void __asan_register_elf_globals(uptr *flag, void *start, void *stop) {
+  if (*flag) return;
+  if (!start) return;
+  CHECK_EQ(0, ((uptr)stop - (uptr)start) % sizeof(__asan_global));
+  __asan_global *globals_start = (__asan_global*)start;
+  __asan_global *globals_stop = (__asan_global*)stop;
+  __asan_register_globals(globals_start, globals_stop - globals_start);
+  *flag = 1;
+}
+
+void __asan_unregister_elf_globals(uptr *flag, void *start, void *stop) {
+  if (!*flag) return;
+  if (!start) return;
+  CHECK_EQ(0, ((uptr)stop - (uptr)start) % sizeof(__asan_global));
+  __asan_global *globals_start = (__asan_global*)start;
+  __asan_global *globals_stop = (__asan_global*)stop;
+  __asan_unregister_globals(globals_start, globals_stop - globals_start);
+  *flag = 0;
+}
+
 // Register an array of globals.
 void __asan_register_globals(__asan_global *globals, uptr n) {
   if (!flags()->report_globals) return;
@@ -364,6 +384,10 @@
     }
     RegisterGlobal(&globals[i]);
   }
+
+  // Poison the metadata. It should not be accessible to user code.
+  PoisonShadow(reinterpret_cast<uptr>(globals), n * sizeof(__asan_global),
+               kAsanGlobalRedzoneMagic);
 }
 
 // Unregister an array of globals.
@@ -379,6 +403,9 @@
     }
     UnregisterGlobal(&globals[i]);
   }
+
+  // Unpoison the metadata.
+  PoisonShadow(reinterpret_cast<uptr>(globals), n * sizeof(__asan_global), 0);
 }
 
 // This method runs immediately prior to dynamic initialization in each TU,
diff --git a/lib/asan/asan_interceptors.cc b/lib/asan/asan_interceptors.cc
index 6ee3266..cb7dcb3 100644
--- a/lib/asan/asan_interceptors.cc
+++ b/lib/asan/asan_interceptors.cc
@@ -24,6 +24,11 @@
 #include "lsan/lsan_common.h"
 #include "sanitizer_common/sanitizer_libc.h"
 
+// There is no general interception at all on Fuchsia.
+// Only the functions in asan_interceptors_memintrinsics.cc are
+// really defined to replace libc functions.
+#if !SANITIZER_FUCHSIA
+
 #if SANITIZER_POSIX
 #include "sanitizer_common/sanitizer_posix.h"
 #endif
@@ -36,101 +41,6 @@
 
 namespace __asan {
 
-// Return true if we can quickly decide that the region is unpoisoned.
-static inline bool QuickCheckForUnpoisonedRegion(uptr beg, uptr size) {
-  if (size == 0) return true;
-  if (size <= 32)
-    return !AddressIsPoisoned(beg) &&
-           !AddressIsPoisoned(beg + size - 1) &&
-           !AddressIsPoisoned(beg + size / 2);
-  return false;
-}
-
-struct AsanInterceptorContext {
-  const char *interceptor_name;
-};
-
-// We implement ACCESS_MEMORY_RANGE, ASAN_READ_RANGE,
-// and ASAN_WRITE_RANGE as macro instead of function so
-// that no extra frames are created, and stack trace contains
-// relevant information only.
-// We check all shadow bytes.
-#define ACCESS_MEMORY_RANGE(ctx, offset, size, isWrite) do {            \
-    uptr __offset = (uptr)(offset);                                     \
-    uptr __size = (uptr)(size);                                         \
-    uptr __bad = 0;                                                     \
-    if (__offset > __offset + __size) {                                 \
-      GET_STACK_TRACE_FATAL_HERE;                                       \
-      ReportStringFunctionSizeOverflow(__offset, __size, &stack);       \
-    }                                                                   \
-    if (!QuickCheckForUnpoisonedRegion(__offset, __size) &&             \
-        (__bad = __asan_region_is_poisoned(__offset, __size))) {        \
-      AsanInterceptorContext *_ctx = (AsanInterceptorContext *)ctx;     \
-      bool suppressed = false;                                          \
-      if (_ctx) {                                                       \
-        suppressed = IsInterceptorSuppressed(_ctx->interceptor_name);   \
-        if (!suppressed && HaveStackTraceBasedSuppressions()) {         \
-          GET_STACK_TRACE_FATAL_HERE;                                   \
-          suppressed = IsStackTraceSuppressed(&stack);                  \
-        }                                                               \
-      }                                                                 \
-      if (!suppressed) {                                                \
-        GET_CURRENT_PC_BP_SP;                                           \
-        ReportGenericError(pc, bp, sp, __bad, isWrite, __size, 0, false);\
-      }                                                                 \
-    }                                                                   \
-  } while (0)
-
-// memcpy is called during __asan_init() from the internals of printf(...).
-// We do not treat memcpy with to==from as a bug.
-// See http://llvm.org/bugs/show_bug.cgi?id=11763.
-#define ASAN_MEMCPY_IMPL(ctx, to, from, size)                           \
-  do {                                                                  \
-    if (UNLIKELY(!asan_inited)) return internal_memcpy(to, from, size); \
-    if (asan_init_is_running) {                                         \
-      return REAL(memcpy)(to, from, size);                              \
-    }                                                                   \
-    ENSURE_ASAN_INITED();                                               \
-    if (flags()->replace_intrin) {                                      \
-      if (to != from) {                                                 \
-        CHECK_RANGES_OVERLAP("memcpy", to, size, from, size);           \
-      }                                                                 \
-      ASAN_READ_RANGE(ctx, from, size);                                 \
-      ASAN_WRITE_RANGE(ctx, to, size);                                  \
-    }                                                                   \
-    return REAL(memcpy)(to, from, size);                                \
-  } while (0)
-
-// memset is called inside Printf.
-#define ASAN_MEMSET_IMPL(ctx, block, c, size)                           \
-  do {                                                                  \
-    if (UNLIKELY(!asan_inited)) return internal_memset(block, c, size); \
-    if (asan_init_is_running) {                                         \
-      return REAL(memset)(block, c, size);                              \
-    }                                                                   \
-    ENSURE_ASAN_INITED();                                               \
-    if (flags()->replace_intrin) {                                      \
-      ASAN_WRITE_RANGE(ctx, block, size);                               \
-    }                                                                   \
-    return REAL(memset)(block, c, size);                                \
-  } while (0)
-
-#define ASAN_MEMMOVE_IMPL(ctx, to, from, size)                           \
-  do {                                                                   \
-    if (UNLIKELY(!asan_inited)) return internal_memmove(to, from, size); \
-    ENSURE_ASAN_INITED();                                                \
-    if (flags()->replace_intrin) {                                       \
-      ASAN_READ_RANGE(ctx, from, size);                                  \
-      ASAN_WRITE_RANGE(ctx, to, size);                                   \
-    }                                                                    \
-    return internal_memmove(to, from, size);                             \
-  } while (0)
-
-#define ASAN_READ_RANGE(ctx, offset, size) \
-  ACCESS_MEMORY_RANGE(ctx, offset, size, false)
-#define ASAN_WRITE_RANGE(ctx, offset, size) \
-  ACCESS_MEMORY_RANGE(ctx, offset, size, true)
-
 #define ASAN_READ_STRING_OF_LEN(ctx, s, len, n)                 \
   ASAN_READ_RANGE((ctx), (s),                                   \
     common_flags()->strict_string_checks ? (len) + 1 : (n))
@@ -138,23 +48,6 @@
 #define ASAN_READ_STRING(ctx, s, n)                             \
   ASAN_READ_STRING_OF_LEN((ctx), (s), REAL(strlen)(s), (n))
 
-// Behavior of functions like "memcpy" or "strcpy" is undefined
-// if memory intervals overlap. We report error in this case.
-// Macro is used to avoid creation of new frames.
-static inline bool RangesOverlap(const char *offset1, uptr length1,
-                                 const char *offset2, uptr length2) {
-  return !((offset1 + length1 <= offset2) || (offset2 + length2 <= offset1));
-}
-#define CHECK_RANGES_OVERLAP(name, _offset1, length1, _offset2, length2) do { \
-  const char *offset1 = (const char*)_offset1; \
-  const char *offset2 = (const char*)_offset2; \
-  if (RangesOverlap(offset1, length1, offset2, length2)) { \
-    GET_STACK_TRACE_FATAL_HERE; \
-    ReportStringFunctionMemoryRangesOverlap(name, offset1, length1, \
-                                            offset2, length2, &stack); \
-  } \
-} while (0)
-
 static inline uptr MaybeRealStrnlen(const char *s, uptr maxlen) {
 #if SANITIZER_INTERCEPT_STRNLEN
   if (REAL(strnlen)) {
@@ -171,6 +64,10 @@
 }
 
 int OnExit() {
+  if (CAN_SANITIZE_LEAKS && common_flags()->detect_leaks &&
+      __lsan::HasReportedLeaks()) {
+    return common_flags()->exitcode;
+  }
   // FIXME: ask frontend whether we need to return failure.
   return 0;
 }
@@ -234,9 +131,8 @@
     CheckNoDeepBind(filename, flag);                                           \
   } while (false)
 #define COMMON_INTERCEPTOR_ON_EXIT(ctx) OnExit()
-#define COMMON_INTERCEPTOR_LIBRARY_LOADED(filename, handle) \
-  CoverageUpdateMapping()
-#define COMMON_INTERCEPTOR_LIBRARY_UNLOADED() CoverageUpdateMapping()
+#define COMMON_INTERCEPTOR_LIBRARY_LOADED(filename, handle)
+#define COMMON_INTERCEPTOR_LIBRARY_UNLOADED()
 #define COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED (!asan_inited)
 #define COMMON_INTERCEPTOR_GET_TLS_RANGE(begin, end)                           \
   if (AsanThread *t = GetCurrentThread()) {                                    \
@@ -265,6 +161,7 @@
   } while (false)
 
 #include "sanitizer_common/sanitizer_common_interceptors.inc"
+#include "sanitizer_common/sanitizer_signal_interceptors.inc"
 
 // Syscall interceptors don't have contexts, we don't support suppressions
 // for them.
@@ -346,48 +243,6 @@
 DEFINE_REAL_PTHREAD_FUNCTIONS
 #endif  // ASAN_INTERCEPT_PTHREAD_CREATE
 
-#if ASAN_INTERCEPT_SIGNAL_AND_SIGACTION
-
-#if SANITIZER_ANDROID
-INTERCEPTOR(void*, bsd_signal, int signum, void *handler) {
-  if (!IsHandledDeadlySignal(signum) ||
-      common_flags()->allow_user_segv_handler) {
-    return REAL(bsd_signal)(signum, handler);
-  }
-  return 0;
-}
-#endif
-
-INTERCEPTOR(void*, signal, int signum, void *handler) {
-  if (!IsHandledDeadlySignal(signum) ||
-      common_flags()->allow_user_segv_handler) {
-    return REAL(signal)(signum, handler);
-  }
-  return nullptr;
-}
-
-INTERCEPTOR(int, sigaction, int signum, const struct sigaction *act,
-                            struct sigaction *oldact) {
-  if (!IsHandledDeadlySignal(signum) ||
-      common_flags()->allow_user_segv_handler) {
-    return REAL(sigaction)(signum, act, oldact);
-  }
-  return 0;
-}
-
-namespace __sanitizer {
-int real_sigaction(int signum, const void *act, void *oldact) {
-  return REAL(sigaction)(signum, (const struct sigaction *)act,
-                         (struct sigaction *)oldact);
-}
-} // namespace __sanitizer
-
-#elif SANITIZER_POSIX
-// We need to have defined REAL(sigaction) on posix systems.
-DEFINE_REAL(int, sigaction, int signum, const struct sigaction *act,
-    struct sigaction *oldact)
-#endif  // ASAN_INTERCEPT_SIGNAL_AND_SIGACTION
-
 #if ASAN_INTERCEPT_SWAPCONTEXT
 static void ClearShadowMemoryForContextStack(uptr stack, uptr ssize) {
   // Align to page size.
@@ -424,6 +279,11 @@
 }
 #endif  // ASAN_INTERCEPT_SWAPCONTEXT
 
+#if SANITIZER_NETBSD
+#define longjmp __longjmp14
+#define siglongjmp __siglongjmp14
+#endif
+
 INTERCEPTOR(void, longjmp, void *env, int val) {
   __asan_handle_no_return();
   REAL(longjmp)(env, val);
@@ -436,6 +296,13 @@
 }
 #endif
 
+#if ASAN_INTERCEPT___LONGJMP_CHK
+INTERCEPTOR(void, __longjmp_chk, void *env, int val) {
+  __asan_handle_no_return();
+  REAL(__longjmp_chk)(env, val);
+}
+#endif
+
 #if ASAN_INTERCEPT_SIGLONGJMP
 INTERCEPTOR(void, siglongjmp, void *env, int val) {
   __asan_handle_no_return();
@@ -451,18 +318,6 @@
 }
 #endif
 
-void *__asan_memcpy(void *to, const void *from, uptr size) {
-  ASAN_MEMCPY_IMPL(nullptr, to, from, size);
-}
-
-void *__asan_memset(void *block, int c, uptr size) {
-  ASAN_MEMSET_IMPL(nullptr, block, c, size);
-}
-
-void *__asan_memmove(void *to, const void *from, uptr size) {
-  ASAN_MEMMOVE_IMPL(nullptr, to, from, size);
-}
-
 #if ASAN_INTERCEPT_INDEX
 # if ASAN_USE_ALIAS_ATTRIBUTE_FOR_INDEX
 INTERCEPTOR(char*, index, const char *string, int c)
@@ -572,17 +427,6 @@
 }
 #endif // ASAN_INTERCEPT___STRDUP
 
-INTERCEPTOR(SIZE_T, wcslen, const wchar_t *s) {
-  void *ctx;
-  ASAN_INTERCEPTOR_ENTER(ctx, wcslen);
-  SIZE_T length = internal_wcslen(s);
-  if (!asan_init_is_running) {
-    ENSURE_ASAN_INITED();
-    ASAN_READ_RANGE(ctx, s, (length + 1) * sizeof(wchar_t));
-  }
-  return length;
-}
-
 INTERCEPTOR(char*, strncpy, char *to, const char *from, uptr size) {
   void *ctx;
   ASAN_INTERCEPTOR_ENTER(ctx, strncpy);
@@ -699,9 +543,7 @@
 #if ASAN_INTERCEPT_FORK
 INTERCEPTOR(int, fork, void) {
   ENSURE_ASAN_INITED();
-  if (common_flags()->coverage) CovBeforeFork();
   int pid = REAL(fork)();
-  if (common_flags()->coverage) CovAfterFork(pid);
   return pid;
 }
 #endif  // ASAN_INTERCEPT_FORK
@@ -713,11 +555,11 @@
   CHECK(!was_called_once);
   was_called_once = true;
   InitializeCommonInterceptors();
+  InitializeSignalInterceptors();
 
   // Intercept str* functions.
   ASAN_INTERCEPT_FUNC(strcat);  // NOLINT
   ASAN_INTERCEPT_FUNC(strcpy);  // NOLINT
-  ASAN_INTERCEPT_FUNC(wcslen);
   ASAN_INTERCEPT_FUNC(strncat);
   ASAN_INTERCEPT_FUNC(strncpy);
   ASAN_INTERCEPT_FUNC(strdup);
@@ -736,21 +578,18 @@
   ASAN_INTERCEPT_FUNC(strtoll);
 #endif
 
-  // Intecept signal- and jump-related functions.
+  // Intecept jump-related functions.
   ASAN_INTERCEPT_FUNC(longjmp);
-#if ASAN_INTERCEPT_SIGNAL_AND_SIGACTION
-  ASAN_INTERCEPT_FUNC(sigaction);
-#if SANITIZER_ANDROID
-  ASAN_INTERCEPT_FUNC(bsd_signal);
-#endif
-  ASAN_INTERCEPT_FUNC(signal);
-#endif
+
 #if ASAN_INTERCEPT_SWAPCONTEXT
   ASAN_INTERCEPT_FUNC(swapcontext);
 #endif
 #if ASAN_INTERCEPT__LONGJMP
   ASAN_INTERCEPT_FUNC(_longjmp);
 #endif
+#if ASAN_INTERCEPT___LONGJMP_CHK
+  ASAN_INTERCEPT_FUNC(__longjmp_chk);
+#endif
 #if ASAN_INTERCEPT_SIGLONGJMP
   ASAN_INTERCEPT_FUNC(siglongjmp);
 #endif
@@ -785,3 +624,5 @@
 }
 
 } // namespace __asan
+
+#endif  // !SANITIZER_FUCHSIA
diff --git a/lib/asan/asan_interceptors.h b/lib/asan/asan_interceptors.h
index d747c31..7728b35 100644
--- a/lib/asan/asan_interceptors.h
+++ b/lib/asan/asan_interceptors.h
@@ -15,9 +15,30 @@
 #define ASAN_INTERCEPTORS_H
 
 #include "asan_internal.h"
+#include "asan_interceptors_memintrinsics.h"
 #include "interception/interception.h"
 #include "sanitizer_common/sanitizer_platform_interceptors.h"
 
+namespace __asan {
+
+void InitializeAsanInterceptors();
+void InitializePlatformInterceptors();
+
+#define ENSURE_ASAN_INITED()      \
+  do {                            \
+    CHECK(!asan_init_is_running); \
+    if (UNLIKELY(!asan_inited)) { \
+      AsanInitFromRtl();          \
+    }                             \
+  } while (0)
+
+}  // namespace __asan
+
+// There is no general interception at all on Fuchsia.
+// Only the functions in asan_interceptors_memintrinsics.h are
+// really defined to replace libc functions.
+#if !SANITIZER_FUCHSIA
+
 // Use macro to describe if specific function should be
 // intercepted on a given platform.
 #if !SANITIZER_WINDOWS
@@ -34,7 +55,7 @@
 # define ASAN_INTERCEPT_FORK 0
 #endif
 
-#if SANITIZER_FREEBSD || SANITIZER_LINUX
+#if SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_NETBSD
 # define ASAN_USE_ALIAS_ATTRIBUTE_FOR_INDEX 1
 #else
 # define ASAN_USE_ALIAS_ATTRIBUTE_FOR_INDEX 0
@@ -47,17 +68,17 @@
 #endif
 
 #if !SANITIZER_WINDOWS
-# define ASAN_INTERCEPT_SIGNAL_AND_SIGACTION 1
-#else
-# define ASAN_INTERCEPT_SIGNAL_AND_SIGACTION 0
-#endif
-
-#if !SANITIZER_WINDOWS
 # define ASAN_INTERCEPT_SIGLONGJMP 1
 #else
 # define ASAN_INTERCEPT_SIGLONGJMP 0
 #endif
 
+#if SANITIZER_LINUX && !SANITIZER_ANDROID
+# define ASAN_INTERCEPT___LONGJMP_CHK 1
+#else
+# define ASAN_INTERCEPT___LONGJMP_CHK 0
+#endif
+
 // Android bug: https://code.google.com/p/android/issues/detail?id=61799
 #if ASAN_HAS_EXCEPTIONS && !SANITIZER_WINDOWS && \
     !(SANITIZER_ANDROID && defined(__i386))
@@ -79,8 +100,6 @@
 #endif
 
 DECLARE_REAL(int, memcmp, const void *a1, const void *a2, uptr size)
-DECLARE_REAL(void*, memcpy, void *to, const void *from, uptr size)
-DECLARE_REAL(void*, memset, void *block, int c, uptr size)
 DECLARE_REAL(char*, strchr, const char *str, int c)
 DECLARE_REAL(SIZE_T, strlen, const char *s)
 DECLARE_REAL(char*, strncpy, char *to, const char *from, uptr size)
@@ -107,18 +126,6 @@
 #define ASAN_INTERCEPT_FUNC(name)
 #endif  // SANITIZER_MAC
 
-namespace __asan {
-
-void InitializeAsanInterceptors();
-void InitializePlatformInterceptors();
-
-#define ENSURE_ASAN_INITED() do { \
-  CHECK(!asan_init_is_running); \
-  if (UNLIKELY(!asan_inited)) { \
-    AsanInitFromRtl(); \
-  } \
-} while (0)
-
-}  // namespace __asan
+#endif  // !SANITIZER_FUCHSIA
 
 #endif  // ASAN_INTERCEPTORS_H
diff --git a/lib/asan/asan_interceptors_memintrinsics.cc b/lib/asan/asan_interceptors_memintrinsics.cc
new file mode 100644
index 0000000..c89cb01
--- /dev/null
+++ b/lib/asan/asan_interceptors_memintrinsics.cc
@@ -0,0 +1,44 @@
+//===-- asan_interceptors_memintrinsics.cc --------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// ASan versions of memcpy, memmove, and memset.
+//===---------------------------------------------------------------------===//
+
+#include "asan_interceptors_memintrinsics.h"
+#include "asan_report.h"
+#include "asan_stack.h"
+#include "asan_suppressions.h"
+
+using namespace __asan;  // NOLINT
+
+void *__asan_memcpy(void *to, const void *from, uptr size) {
+  ASAN_MEMCPY_IMPL(nullptr, to, from, size);
+}
+
+void *__asan_memset(void *block, int c, uptr size) {
+  ASAN_MEMSET_IMPL(nullptr, block, c, size);
+}
+
+void *__asan_memmove(void *to, const void *from, uptr size) {
+  ASAN_MEMMOVE_IMPL(nullptr, to, from, size);
+}
+
+#if SANITIZER_FUCHSIA
+
+// Fuchsia doesn't use sanitizer_common_interceptors.inc, but the only
+// things there it wants are these three.  Just define them as aliases
+// here rather than repeating the contents.
+
+decltype(memcpy) memcpy[[gnu::alias("__asan_memcpy")]];
+decltype(memmove) memmove[[gnu::alias("__asan_memmove")]];
+decltype(memset) memset[[gnu::alias("__asan_memset")]];
+
+#endif  // SANITIZER_FUCHSIA
diff --git a/lib/asan/asan_interceptors_memintrinsics.h b/lib/asan/asan_interceptors_memintrinsics.h
new file mode 100644
index 0000000..5a8339a
--- /dev/null
+++ b/lib/asan/asan_interceptors_memintrinsics.h
@@ -0,0 +1,148 @@
+//===-- asan_interceptors_memintrinsics.h -----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// ASan-private header for asan_memintrin.cc
+//===---------------------------------------------------------------------===//
+#ifndef ASAN_MEMINTRIN_H
+#define ASAN_MEMINTRIN_H
+
+#include "asan_interface_internal.h"
+#include "asan_internal.h"
+#include "asan_mapping.h"
+#include "interception/interception.h"
+
+DECLARE_REAL(void*, memcpy, void *to, const void *from, uptr size)
+DECLARE_REAL(void*, memset, void *block, int c, uptr size)
+
+namespace __asan {
+
+// Return true if we can quickly decide that the region is unpoisoned.
+// We assume that a redzone is at least 16 bytes.
+static inline bool QuickCheckForUnpoisonedRegion(uptr beg, uptr size) {
+  if (size == 0) return true;
+  if (size <= 32)
+    return !AddressIsPoisoned(beg) &&
+           !AddressIsPoisoned(beg + size - 1) &&
+           !AddressIsPoisoned(beg + size / 2);
+  if (size <= 64)
+    return !AddressIsPoisoned(beg) &&
+           !AddressIsPoisoned(beg + size / 4) &&
+           !AddressIsPoisoned(beg + size - 1) &&
+           !AddressIsPoisoned(beg + 3 * size / 4) &&
+           !AddressIsPoisoned(beg + size / 2);
+  return false;
+}
+
+struct AsanInterceptorContext {
+  const char *interceptor_name;
+};
+
+// We implement ACCESS_MEMORY_RANGE, ASAN_READ_RANGE,
+// and ASAN_WRITE_RANGE as macro instead of function so
+// that no extra frames are created, and stack trace contains
+// relevant information only.
+// We check all shadow bytes.
+#define ACCESS_MEMORY_RANGE(ctx, offset, size, isWrite) do {            \
+    uptr __offset = (uptr)(offset);                                     \
+    uptr __size = (uptr)(size);                                         \
+    uptr __bad = 0;                                                     \
+    if (__offset > __offset + __size) {                                 \
+      GET_STACK_TRACE_FATAL_HERE;                                       \
+      ReportStringFunctionSizeOverflow(__offset, __size, &stack);       \
+    }                                                                   \
+    if (!QuickCheckForUnpoisonedRegion(__offset, __size) &&             \
+        (__bad = __asan_region_is_poisoned(__offset, __size))) {        \
+      AsanInterceptorContext *_ctx = (AsanInterceptorContext *)ctx;     \
+      bool suppressed = false;                                          \
+      if (_ctx) {                                                       \
+        suppressed = IsInterceptorSuppressed(_ctx->interceptor_name);   \
+        if (!suppressed && HaveStackTraceBasedSuppressions()) {         \
+          GET_STACK_TRACE_FATAL_HERE;                                   \
+          suppressed = IsStackTraceSuppressed(&stack);                  \
+        }                                                               \
+      }                                                                 \
+      if (!suppressed) {                                                \
+        GET_CURRENT_PC_BP_SP;                                           \
+        ReportGenericError(pc, bp, sp, __bad, isWrite, __size, 0, false);\
+      }                                                                 \
+    }                                                                   \
+  } while (0)
+
+// memcpy is called during __asan_init() from the internals of printf(...).
+// We do not treat memcpy with to==from as a bug.
+// See http://llvm.org/bugs/show_bug.cgi?id=11763.
+#define ASAN_MEMCPY_IMPL(ctx, to, from, size)                           \
+  do {                                                                  \
+    if (UNLIKELY(!asan_inited)) return internal_memcpy(to, from, size); \
+    if (asan_init_is_running) {                                         \
+      return REAL(memcpy)(to, from, size);                              \
+    }                                                                   \
+    ENSURE_ASAN_INITED();                                               \
+    if (flags()->replace_intrin) {                                      \
+      if (to != from) {                                                 \
+        CHECK_RANGES_OVERLAP("memcpy", to, size, from, size);           \
+      }                                                                 \
+      ASAN_READ_RANGE(ctx, from, size);                                 \
+      ASAN_WRITE_RANGE(ctx, to, size);                                  \
+    }                                                                   \
+    return REAL(memcpy)(to, from, size);                                \
+  } while (0)
+
+// memset is called inside Printf.
+#define ASAN_MEMSET_IMPL(ctx, block, c, size)                           \
+  do {                                                                  \
+    if (UNLIKELY(!asan_inited)) return internal_memset(block, c, size); \
+    if (asan_init_is_running) {                                         \
+      return REAL(memset)(block, c, size);                              \
+    }                                                                   \
+    ENSURE_ASAN_INITED();                                               \
+    if (flags()->replace_intrin) {                                      \
+      ASAN_WRITE_RANGE(ctx, block, size);                               \
+    }                                                                   \
+    return REAL(memset)(block, c, size);                                \
+  } while (0)
+
+#define ASAN_MEMMOVE_IMPL(ctx, to, from, size)                           \
+  do {                                                                   \
+    if (UNLIKELY(!asan_inited)) return internal_memmove(to, from, size); \
+    ENSURE_ASAN_INITED();                                                \
+    if (flags()->replace_intrin) {                                       \
+      ASAN_READ_RANGE(ctx, from, size);                                  \
+      ASAN_WRITE_RANGE(ctx, to, size);                                   \
+    }                                                                    \
+    return internal_memmove(to, from, size);                             \
+  } while (0)
+
+#define ASAN_READ_RANGE(ctx, offset, size) \
+  ACCESS_MEMORY_RANGE(ctx, offset, size, false)
+#define ASAN_WRITE_RANGE(ctx, offset, size) \
+  ACCESS_MEMORY_RANGE(ctx, offset, size, true)
+
+// Behavior of functions like "memcpy" or "strcpy" is undefined
+// if memory intervals overlap. We report error in this case.
+// Macro is used to avoid creation of new frames.
+static inline bool RangesOverlap(const char *offset1, uptr length1,
+                                 const char *offset2, uptr length2) {
+  return !((offset1 + length1 <= offset2) || (offset2 + length2 <= offset1));
+}
+#define CHECK_RANGES_OVERLAP(name, _offset1, length1, _offset2, length2) do { \
+  const char *offset1 = (const char*)_offset1; \
+  const char *offset2 = (const char*)_offset2; \
+  if (RangesOverlap(offset1, length1, offset2, length2)) { \
+    GET_STACK_TRACE_FATAL_HERE; \
+    ReportStringFunctionMemoryRangesOverlap(name, offset1, length1, \
+                                            offset2, length2, &stack); \
+  } \
+} while (0)
+
+}  // namespace __asan
+
+#endif  // ASAN_MEMINTRIN_H
diff --git a/lib/asan/asan_interface.inc b/lib/asan/asan_interface.inc
index 351be4d..e65f617 100644
--- a/lib/asan/asan_interface.inc
+++ b/lib/asan/asan_interface.inc
@@ -64,6 +64,7 @@
 INTERFACE_FUNCTION(__asan_print_accumulated_stats)
 INTERFACE_FUNCTION(__asan_region_is_poisoned)
 INTERFACE_FUNCTION(__asan_register_globals)
+INTERFACE_FUNCTION(__asan_register_elf_globals)
 INTERFACE_FUNCTION(__asan_register_image_globals)
 INTERFACE_FUNCTION(__asan_report_error)
 INTERFACE_FUNCTION(__asan_report_exp_load1)
@@ -149,6 +150,7 @@
 INTERFACE_FUNCTION(__asan_unpoison_memory_region)
 INTERFACE_FUNCTION(__asan_unpoison_stack_memory)
 INTERFACE_FUNCTION(__asan_unregister_globals)
+INTERFACE_FUNCTION(__asan_unregister_elf_globals)
 INTERFACE_FUNCTION(__asan_unregister_image_globals)
 INTERFACE_FUNCTION(__asan_version_mismatch_check_v8)
 INTERFACE_FUNCTION(__sanitizer_finish_switch_fiber)
diff --git a/lib/asan/asan_interface_internal.h b/lib/asan/asan_interface_internal.h
index b18c315..b974c0c 100644
--- a/lib/asan/asan_interface_internal.h
+++ b/lib/asan/asan_interface_internal.h
@@ -67,6 +67,11 @@
   SANITIZER_INTERFACE_ATTRIBUTE
   void __asan_unregister_image_globals(uptr *flag);
 
+  SANITIZER_INTERFACE_ATTRIBUTE
+  void __asan_register_elf_globals(uptr *flag, void *start, void *stop);
+  SANITIZER_INTERFACE_ATTRIBUTE
+  void __asan_unregister_elf_globals(uptr *flag, void *start, void *stop);
+
   // These two functions should be called by the instrumented code.
   // 'globals' is an array of structures describing 'n' globals.
   SANITIZER_INTERFACE_ATTRIBUTE
diff --git a/lib/asan/asan_internal.h b/lib/asan/asan_internal.h
index 3b70695..19133e5 100644
--- a/lib/asan/asan_internal.h
+++ b/lib/asan/asan_internal.h
@@ -69,16 +69,24 @@
 bool IsSystemHeapAddress(uptr addr);
 
 // asan_rtl.cc
+void PrintAddressSpaceLayout();
 void NORETURN ShowStatsAndAbort();
 
+// asan_shadow_setup.cc
+void InitializeShadowMemory();
+
 // asan_malloc_linux.cc / asan_malloc_mac.cc
 void ReplaceSystemMalloc();
 
 // asan_linux.cc / asan_mac.cc / asan_win.cc
+uptr FindDynamicShadowStart();
 void *AsanDoesNotSupportStaticLinkage();
 void AsanCheckDynamicRTPrereqs();
 void AsanCheckIncompatibleRT();
 
+// asan_thread.cc
+AsanThread *CreateMainThread();
+
 // Support function for __asan_(un)register_image_globals. Searches for the
 // loaded image containing `needle' and then enumerates all global metadata
 // structures declared in that image, applying `op' (e.g.,
diff --git a/lib/asan/asan_linux.cc b/lib/asan/asan_linux.cc
index 6d150de..a949a98 100644
--- a/lib/asan/asan_linux.cc
+++ b/lib/asan/asan_linux.cc
@@ -13,7 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "sanitizer_common/sanitizer_platform.h"
-#if SANITIZER_FREEBSD || SANITIZER_LINUX
+#if SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_NETBSD
 
 #include "asan_interceptors.h"
 #include "asan_internal.h"
@@ -42,6 +42,10 @@
 #if SANITIZER_ANDROID || SANITIZER_FREEBSD
 #include <ucontext.h>
 extern "C" void* _DYNAMIC;
+#elif SANITIZER_NETBSD
+#include <link_elf.h>
+#include <ucontext.h>
+extern Elf_Dyn _DYNAMIC;
 #else
 #include <sys/ucontext.h>
 #include <link.h>
@@ -77,6 +81,11 @@
   return &_DYNAMIC;  // defined in link.h
 }
 
+uptr FindDynamicShadowStart() {
+  UNREACHABLE("FindDynamicShadowStart is not available");
+  return 0;
+}
+
 void AsanApplyToGlobals(globals_op_fptr op, const void *needle) {
   UNIMPLEMENTED();
 }
@@ -96,6 +105,15 @@
   if (internal_strncmp(info->dlpi_name, "linux-", sizeof("linux-") - 1) == 0)
     return 0;
 
+#if SANITIZER_FREEBSD || SANITIZER_NETBSD
+  // Ignore first entry (the main program)
+  char **p = (char **)data;
+  if (!(*p)) {
+    *p = (char *)-1;
+    return 0;
+  }
+#endif
+
   *(const char **)data = info->dlpi_name;
   return 1;
 }
@@ -111,7 +129,7 @@
 }
 
 void AsanCheckDynamicRTPrereqs() {
-  if (!ASAN_DYNAMIC)
+  if (!ASAN_DYNAMIC || !flags()->verify_asan_link_order)
     return;
 
   // Ensure that dynamic RT is the first DSO in the list
@@ -140,9 +158,9 @@
       // system libraries, causing crashes later in ASan initialization.
       MemoryMappingLayout proc_maps(/*cache_enabled*/true);
       char filename[128];
-      while (proc_maps.Next(nullptr, nullptr, nullptr, filename,
-                            sizeof(filename), nullptr)) {
-        if (IsDynamicRTName(filename)) {
+      MemoryMappedSegment segment(filename, sizeof(filename));
+      while (proc_maps.Next(&segment)) {
+        if (IsDynamicRTName(segment.filename)) {
           Report("Your application is linked against "
                  "incompatible ASan runtimes.\n");
           Die();
@@ -174,4 +192,4 @@
 
 } // namespace __asan
 
-#endif // SANITIZER_FREEBSD || SANITIZER_LINUX
+#endif  // SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_NETBSD
diff --git a/lib/asan/asan_mac.cc b/lib/asan/asan_mac.cc
index 3c93b26..b7af1a5 100644
--- a/lib/asan/asan_mac.cc
+++ b/lib/asan/asan_mac.cc
@@ -55,6 +55,29 @@
   return 0;
 }
 
+uptr FindDynamicShadowStart() {
+  uptr granularity = GetMmapGranularity();
+  uptr alignment = 8 * granularity;
+  uptr left_padding = granularity;
+  uptr space_size = kHighShadowEnd + left_padding;
+
+  uptr largest_gap_found = 0;
+  uptr shadow_start = FindAvailableMemoryRange(space_size, alignment,
+                                               granularity, &largest_gap_found);
+  // If the shadow doesn't fit, restrict the address space to make it fit.
+  if (shadow_start == 0) {
+    uptr new_max_vm = RoundDownTo(largest_gap_found << SHADOW_SCALE, alignment);
+    RestrictMemoryToMaxAddress(new_max_vm);
+    kHighMemEnd = new_max_vm - 1;
+    space_size = kHighShadowEnd + left_padding;
+    shadow_start =
+        FindAvailableMemoryRange(space_size, alignment, granularity, nullptr);
+  }
+  CHECK_NE((uptr)0, shadow_start);
+  CHECK(IsAligned(shadow_start, alignment));
+  return shadow_start;
+}
+
 // No-op. Mac does not support static linkage anyway.
 void AsanCheckDynamicRTPrereqs() {}
 
diff --git a/lib/asan/asan_malloc_linux.cc b/lib/asan/asan_malloc_linux.cc
index 8c99d3b..118da9c 100644
--- a/lib/asan/asan_malloc_linux.cc
+++ b/lib/asan/asan_malloc_linux.cc
@@ -15,7 +15,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "sanitizer_common/sanitizer_platform.h"
-#if SANITIZER_FREEBSD || SANITIZER_LINUX
+#if SANITIZER_FREEBSD || SANITIZER_FUCHSIA || SANITIZER_LINUX || \
+    SANITIZER_NETBSD
 
 #include "sanitizer_common/sanitizer_tls_get_addr.h"
 #include "asan_allocator.h"
@@ -30,9 +31,9 @@
 static const uptr kDlsymAllocPoolSize = 1024;
 static uptr alloc_memory_for_dlsym[kDlsymAllocPoolSize];
 
-static bool IsInDlsymAllocPool(const void *ptr) {
+static INLINE bool IsInDlsymAllocPool(const void *ptr) {
   uptr off = (uptr)ptr - (uptr)alloc_memory_for_dlsym;
-  return off < sizeof(alloc_memory_for_dlsym);
+  return off < allocated_for_dlsym * sizeof(alloc_memory_for_dlsym[0]);
 }
 
 static void *AllocateFromLocalPool(uptr size_in_bytes) {
@@ -43,6 +44,26 @@
   return mem;
 }
 
+static INLINE bool MaybeInDlsym() {
+  // Fuchsia doesn't use dlsym-based interceptors.
+  return !SANITIZER_FUCHSIA && asan_init_is_running;
+}
+
+static void *ReallocFromLocalPool(void *ptr, uptr size) {
+  const uptr offset = (uptr)ptr - (uptr)alloc_memory_for_dlsym;
+  const uptr copy_size = Min(size, kDlsymAllocPoolSize - offset);
+  void *new_ptr;
+  if (UNLIKELY(MaybeInDlsym())) {
+    new_ptr = AllocateFromLocalPool(size);
+  } else {
+    ENSURE_ASAN_INITED();
+    GET_STACK_TRACE_MALLOC;
+    new_ptr = asan_malloc(size, &stack);
+  }
+  internal_memcpy(new_ptr, ptr, copy_size);
+  return new_ptr;
+}
+
 INTERCEPTOR(void, free, void *ptr) {
   GET_STACK_TRACE_FREE;
   if (UNLIKELY(IsInDlsymAllocPool(ptr)))
@@ -60,36 +81,30 @@
 #endif // SANITIZER_INTERCEPT_CFREE
 
 INTERCEPTOR(void*, malloc, uptr size) {
-  if (UNLIKELY(!asan_inited))
+  if (UNLIKELY(MaybeInDlsym()))
     // Hack: dlsym calls malloc before REAL(malloc) is retrieved from dlsym.
     return AllocateFromLocalPool(size);
+  ENSURE_ASAN_INITED();
   GET_STACK_TRACE_MALLOC;
   return asan_malloc(size, &stack);
 }
 
 INTERCEPTOR(void*, calloc, uptr nmemb, uptr size) {
-  if (UNLIKELY(!asan_inited))
+  if (UNLIKELY(MaybeInDlsym()))
     // Hack: dlsym calls calloc before REAL(calloc) is retrieved from dlsym.
     return AllocateFromLocalPool(nmemb * size);
+  ENSURE_ASAN_INITED();
   GET_STACK_TRACE_MALLOC;
   return asan_calloc(nmemb, size, &stack);
 }
 
 INTERCEPTOR(void*, realloc, void *ptr, uptr size) {
+  if (UNLIKELY(IsInDlsymAllocPool(ptr)))
+    return ReallocFromLocalPool(ptr, size);
+  if (UNLIKELY(MaybeInDlsym()))
+    return AllocateFromLocalPool(size);
+  ENSURE_ASAN_INITED();
   GET_STACK_TRACE_MALLOC;
-  if (UNLIKELY(IsInDlsymAllocPool(ptr))) {
-    uptr offset = (uptr)ptr - (uptr)alloc_memory_for_dlsym;
-    uptr copy_size = Min(size, kDlsymAllocPoolSize - offset);
-    void *new_ptr;
-    if (UNLIKELY(!asan_inited)) {
-      new_ptr = AllocateFromLocalPool(size);
-    } else {
-      copy_size = size;
-      new_ptr = asan_malloc(copy_size, &stack);
-    }
-    internal_memcpy(new_ptr, ptr, copy_size);
-    return new_ptr;
-  }
   return asan_realloc(ptr, size, &stack);
 }
 
@@ -220,4 +235,5 @@
 }  // namespace __asan
 #endif  // SANITIZER_ANDROID
 
-#endif  // SANITIZER_FREEBSD || SANITIZER_LINUX
+#endif  // SANITIZER_FREEBSD || SANITIZER_FUCHSIA || SANITIZER_LINUX ||
+        // SANITIZER_NETBSD
diff --git a/lib/asan/asan_malloc_win.cc b/lib/asan/asan_malloc_win.cc
index 5163c04..efa0582 100644
--- a/lib/asan/asan_malloc_win.cc
+++ b/lib/asan/asan_malloc_win.cc
@@ -100,7 +100,7 @@
 
 ALLOCATION_FUNCTION_ATTRIBUTE
 void *_realloc_dbg(void *ptr, size_t size, int) {
-  CHECK(!"_realloc_dbg should not exist!");
+  UNREACHABLE("_realloc_dbg should not exist!");
   return 0;
 }
 
diff --git a/lib/asan/asan_mapping.h b/lib/asan/asan_mapping.h
index d8e60a4..fcd9511 100644
--- a/lib/asan/asan_mapping.h
+++ b/lib/asan/asan_mapping.h
@@ -115,6 +115,13 @@
 // || `[0x40000000, 0x47ffffff]` || LowShadow  ||
 // || `[0x00000000, 0x3fffffff]` || LowMem     ||
 //
+// Shadow mapping on NetBSD/x86-64 with SHADOW_OFFSET == 0x400000000000:
+// || `[0x4feffffffe01, 0x7f7ffffff000]` || HighMem    ||
+// || `[0x49fdffffffc0, 0x4feffffffe00]` || HighShadow ||
+// || `[0x480000000000, 0x49fdffffffbf]` || ShadowGap  ||
+// || `[0x400000000000, 0x47ffffffffff]` || LowShadow  ||
+// || `[0x000000000000, 0x3fffffffffff]` || LowMem     ||
+//
 // Default Windows/i386 mapping:
 // (the exact location of HighShadow/HighMem may vary depending
 //  on WoW64, /LARGEADDRESSAWARE, etc).
@@ -140,12 +147,14 @@
 static const u64 kSystemZ_ShadowOffset64 = 1ULL << 52;
 static const u64 kFreeBSD_ShadowOffset32 = 1ULL << 30;  // 0x40000000
 static const u64 kFreeBSD_ShadowOffset64 = 1ULL << 46;  // 0x400000000000
+static const u64 kNetBSD_ShadowOffset64 = 1ULL << 46;  // 0x400000000000
 static const u64 kWindowsShadowOffset32 = 3ULL << 28;  // 0x30000000
 
 #define SHADOW_SCALE kDefaultShadowScale
 
-
-#if SANITIZER_WORDSIZE == 32
+#if SANITIZER_FUCHSIA
+#  define SHADOW_OFFSET (0)
+#elif SANITIZER_WORDSIZE == 32
 #  if SANITIZER_ANDROID
 #    define SHADOW_OFFSET (0)
 #  elif defined(__mips__)
@@ -178,6 +187,8 @@
 #    define SHADOW_OFFSET kSystemZ_ShadowOffset64
 #  elif SANITIZER_FREEBSD
 #    define SHADOW_OFFSET kFreeBSD_ShadowOffset64
+#  elif SANITIZER_NETBSD
+#    define SHADOW_OFFSET kNetBSD_ShadowOffset64
 #  elif SANITIZER_MAC
 #   define SHADOW_OFFSET kDefaultShadowOffset64
 #  elif defined(__mips64)
@@ -191,7 +202,6 @@
 
 #define SHADOW_GRANULARITY (1ULL << SHADOW_SCALE)
 #define MEM_TO_SHADOW(mem) (((mem) >> SHADOW_SCALE) + (SHADOW_OFFSET))
-#define SHADOW_TO_MEM(shadow) (((shadow) - SHADOW_OFFSET) << SHADOW_SCALE)
 
 #define kLowMemBeg      0
 #define kLowMemEnd      (SHADOW_OFFSET ? SHADOW_OFFSET - 1 : 0)
diff --git a/lib/asan/asan_memory_profile.cc b/lib/asan/asan_memory_profile.cc
index c2678b9..603284c 100644
--- a/lib/asan/asan_memory_profile.cc
+++ b/lib/asan/asan_memory_profile.cc
@@ -48,7 +48,7 @@
     }
   }
 
-  void Print(uptr top_percent) {
+  void Print(uptr top_percent, uptr max_number_of_contexts) {
     InternalSort(&allocations_, allocations_.size(),
                  [](const AllocationSite &a, const AllocationSite &b) {
                    return a.total_size > b.total_size;
@@ -57,12 +57,14 @@
     uptr total_shown = 0;
     Printf("Live Heap Allocations: %zd bytes in %zd chunks; quarantined: "
            "%zd bytes in %zd chunks; %zd other chunks; total chunks: %zd; "
-           "showing top %zd%%\n",
+           "showing top %zd%% (at most %zd unique contexts)\n",
            total_allocated_user_size_, total_allocated_count_,
            total_quarantined_user_size_, total_quarantined_count_,
            total_other_count_, total_allocated_count_ +
-           total_quarantined_count_ + total_other_count_, top_percent);
-    for (uptr i = 0; i < allocations_.size(); i++) {
+           total_quarantined_count_ + total_other_count_, top_percent,
+           max_number_of_contexts);
+    for (uptr i = 0; i < Min(allocations_.size(), max_number_of_contexts);
+         i++) {
       auto &a = allocations_[i];
       Printf("%zd byte(s) (%zd%%) in %zd allocation(s)\n", a.total_size,
              a.total_size * 100 / total_allocated_user_size_, a.count);
@@ -103,16 +105,26 @@
                             void *argument) {
   HeapProfile hp;
   __lsan::ForEachChunk(ChunkCallback, &hp);
-  hp.Print(reinterpret_cast<uptr>(argument));
+  uptr *Arg = reinterpret_cast<uptr*>(argument);
+  hp.Print(Arg[0], Arg[1]);
+
+  if (Verbosity())
+    __asan_print_accumulated_stats();
 }
 
 }  // namespace __asan
 
+#endif  // CAN_SANITIZE_LEAKS
+
 extern "C" {
 SANITIZER_INTERFACE_ATTRIBUTE
-void __sanitizer_print_memory_profile(uptr top_percent) {
-  __sanitizer::StopTheWorld(__asan::MemoryProfileCB, (void*)top_percent);
+void __sanitizer_print_memory_profile(uptr top_percent,
+                                      uptr max_number_of_contexts) {
+#if CAN_SANITIZE_LEAKS
+  uptr Arg[2];
+  Arg[0] = top_percent;
+  Arg[1] = max_number_of_contexts;
+  __sanitizer::StopTheWorld(__asan::MemoryProfileCB, Arg);
+#endif  // CAN_SANITIZE_LEAKS
 }
 }  // extern "C"
-
-#endif  // CAN_SANITIZE_LEAKS
diff --git a/lib/asan/asan_new_delete.cc b/lib/asan/asan_new_delete.cc
index 3283fb3..072f027 100644
--- a/lib/asan/asan_new_delete.cc
+++ b/lib/asan/asan_new_delete.cc
@@ -25,22 +25,26 @@
 // dllexport would normally do. We need to export them in order to make the
 // VS2015 dynamic CRT (MD) work.
 #if SANITIZER_WINDOWS
-# define CXX_OPERATOR_ATTRIBUTE
-# ifdef _WIN64
-#  pragma comment(linker, "/export:??2@YAPEAX_K@Z")   // operator new
-#  pragma comment(linker, "/export:??3@YAXPEAX@Z")    // operator delete
-#  pragma comment(linker, "/export:??3@YAXPEAX_K@Z")  // sized operator delete
-#  pragma comment(linker, "/export:??_U@YAPEAX_K@Z")  // operator new[]
-#  pragma comment(linker, "/export:??_V@YAXPEAX@Z")   // operator delete[]
-# else
-#  pragma comment(linker, "/export:??2@YAPAXI@Z")   // operator new
-#  pragma comment(linker, "/export:??3@YAXPAX@Z")   // operator delete
-#  pragma comment(linker, "/export:??3@YAXPAXI@Z")  // sized operator delete
-#  pragma comment(linker, "/export:??_U@YAPAXI@Z")  // operator new[]
-#  pragma comment(linker, "/export:??_V@YAXPAX@Z")  // operator delete[]
-# endif
+#define CXX_OPERATOR_ATTRIBUTE
+#define COMMENT_EXPORT(sym) __pragma(comment(linker, "/export:" sym))
+#ifdef _WIN64
+COMMENT_EXPORT("??2@YAPEAX_K@Z")                     // operator new
+COMMENT_EXPORT("??2@YAPEAX_KAEBUnothrow_t@std@@@Z")  // operator new nothrow
+COMMENT_EXPORT("??3@YAXPEAX@Z")                      // operator delete
+COMMENT_EXPORT("??3@YAXPEAX_K@Z")                    // sized operator delete
+COMMENT_EXPORT("??_U@YAPEAX_K@Z")                    // operator new[]
+COMMENT_EXPORT("??_V@YAXPEAX@Z")                     // operator delete[]
 #else
-# define CXX_OPERATOR_ATTRIBUTE INTERCEPTOR_ATTRIBUTE
+COMMENT_EXPORT("??2@YAPAXI@Z")                    // operator new
+COMMENT_EXPORT("??2@YAPAXIABUnothrow_t@std@@@Z")  // operator new nothrow
+COMMENT_EXPORT("??3@YAXPAX@Z")                    // operator delete
+COMMENT_EXPORT("??3@YAXPAXI@Z")                   // sized operator delete
+COMMENT_EXPORT("??_U@YAPAXI@Z")                   // operator new[]
+COMMENT_EXPORT("??_V@YAXPAX@Z")                   // operator delete[]
+#endif
+#undef COMMENT_EXPORT
+#else
+#define CXX_OPERATOR_ATTRIBUTE INTERCEPTOR_ATTRIBUTE
 #endif
 
 using namespace __asan;  // NOLINT
@@ -63,12 +67,17 @@
 enum class align_val_t: size_t {};
 }  // namespace std
 
-#define OPERATOR_NEW_BODY(type) \
+// TODO(alekseys): throw std::bad_alloc instead of dying on OOM.
+#define OPERATOR_NEW_BODY(type, nothrow) \
   GET_STACK_TRACE_MALLOC;\
-  return asan_memalign(0, size, &stack, type);
-#define OPERATOR_NEW_BODY_ALIGN(type) \
+  void *res = asan_memalign(0, size, &stack, type);\
+  if (!nothrow && UNLIKELY(!res)) DieOnFailure::OnOOM();\
+  return res;
+#define OPERATOR_NEW_BODY_ALIGN(type, nothrow) \
   GET_STACK_TRACE_MALLOC;\
-  return asan_memalign((uptr)align, size, &stack, type);
+  void *res = asan_memalign((uptr)align, size, &stack, type);\
+  if (!nothrow && UNLIKELY(!res)) DieOnFailure::OnOOM();\
+  return res;
 
 // On OS X it's not enough to just provide our own 'operator new' and
 // 'operator delete' implementations, because they're going to be in the
@@ -79,112 +88,106 @@
 // OS X we need to intercept them using their mangled names.
 #if !SANITIZER_MAC
 CXX_OPERATOR_ATTRIBUTE
-void *operator new(size_t size) { OPERATOR_NEW_BODY(FROM_NEW); }
+void *operator new(size_t size)
+{ OPERATOR_NEW_BODY(FROM_NEW, false /*nothrow*/); }
 CXX_OPERATOR_ATTRIBUTE
-void *operator new[](size_t size) { OPERATOR_NEW_BODY(FROM_NEW_BR); }
+void *operator new[](size_t size)
+{ OPERATOR_NEW_BODY(FROM_NEW_BR, false /*nothrow*/); }
 CXX_OPERATOR_ATTRIBUTE
 void *operator new(size_t size, std::nothrow_t const&)
-{ OPERATOR_NEW_BODY(FROM_NEW); }
+{ OPERATOR_NEW_BODY(FROM_NEW, true /*nothrow*/); }
 CXX_OPERATOR_ATTRIBUTE
 void *operator new[](size_t size, std::nothrow_t const&)
-{ OPERATOR_NEW_BODY(FROM_NEW_BR); }
+{ OPERATOR_NEW_BODY(FROM_NEW_BR, true /*nothrow*/); }
 CXX_OPERATOR_ATTRIBUTE
 void *operator new(size_t size, std::align_val_t align)
-{ OPERATOR_NEW_BODY_ALIGN(FROM_NEW); }
+{ OPERATOR_NEW_BODY_ALIGN(FROM_NEW, false /*nothrow*/); }
 CXX_OPERATOR_ATTRIBUTE
 void *operator new[](size_t size, std::align_val_t align)
-{ OPERATOR_NEW_BODY_ALIGN(FROM_NEW_BR); }
+{ OPERATOR_NEW_BODY_ALIGN(FROM_NEW_BR, false /*nothrow*/); }
 CXX_OPERATOR_ATTRIBUTE
 void *operator new(size_t size, std::align_val_t align, std::nothrow_t const&)
-{ OPERATOR_NEW_BODY_ALIGN(FROM_NEW); }
+{ OPERATOR_NEW_BODY_ALIGN(FROM_NEW, true /*nothrow*/); }
 CXX_OPERATOR_ATTRIBUTE
 void *operator new[](size_t size, std::align_val_t align, std::nothrow_t const&)
-{ OPERATOR_NEW_BODY_ALIGN(FROM_NEW_BR); }
+{ OPERATOR_NEW_BODY_ALIGN(FROM_NEW_BR, true /*nothrow*/); }
 
 #else  // SANITIZER_MAC
 INTERCEPTOR(void *, _Znwm, size_t size) {
-  OPERATOR_NEW_BODY(FROM_NEW);
+  OPERATOR_NEW_BODY(FROM_NEW, false /*nothrow*/);
 }
 INTERCEPTOR(void *, _Znam, size_t size) {
-  OPERATOR_NEW_BODY(FROM_NEW_BR);
+  OPERATOR_NEW_BODY(FROM_NEW_BR, false /*nothrow*/);
 }
 INTERCEPTOR(void *, _ZnwmRKSt9nothrow_t, size_t size, std::nothrow_t const&) {
-  OPERATOR_NEW_BODY(FROM_NEW);
+  OPERATOR_NEW_BODY(FROM_NEW, true /*nothrow*/);
 }
 INTERCEPTOR(void *, _ZnamRKSt9nothrow_t, size_t size, std::nothrow_t const&) {
-  OPERATOR_NEW_BODY(FROM_NEW_BR);
+  OPERATOR_NEW_BODY(FROM_NEW_BR, true /*nothrow*/);
 }
-#endif
+#endif  // !SANITIZER_MAC
 
 #define OPERATOR_DELETE_BODY(type) \
   GET_STACK_TRACE_FREE;\
-  asan_free(ptr, &stack, type);
+  asan_delete(ptr, 0, 0, &stack, type);
+
+#define OPERATOR_DELETE_BODY_SIZE(type) \
+  GET_STACK_TRACE_FREE;\
+  asan_delete(ptr, size, 0, &stack, type);
+
+#define OPERATOR_DELETE_BODY_ALIGN(type) \
+  GET_STACK_TRACE_FREE;\
+  asan_delete(ptr, 0, static_cast<uptr>(align), &stack, type);
+
+#define OPERATOR_DELETE_BODY_SIZE_ALIGN(type) \
+  GET_STACK_TRACE_FREE;\
+  asan_delete(ptr, size, static_cast<uptr>(align), &stack, type);
 
 #if !SANITIZER_MAC
 CXX_OPERATOR_ATTRIBUTE
-void operator delete(void *ptr) NOEXCEPT {
-  OPERATOR_DELETE_BODY(FROM_NEW);
-}
+void operator delete(void *ptr) NOEXCEPT
+{ OPERATOR_DELETE_BODY(FROM_NEW); }
 CXX_OPERATOR_ATTRIBUTE
-void operator delete[](void *ptr) NOEXCEPT {
-  OPERATOR_DELETE_BODY(FROM_NEW_BR);
-}
+void operator delete[](void *ptr) NOEXCEPT
+{ OPERATOR_DELETE_BODY(FROM_NEW_BR); }
 CXX_OPERATOR_ATTRIBUTE
-void operator delete(void *ptr, std::nothrow_t const&) {
-  OPERATOR_DELETE_BODY(FROM_NEW);
-}
+void operator delete(void *ptr, std::nothrow_t const&)
+{ OPERATOR_DELETE_BODY(FROM_NEW); }
 CXX_OPERATOR_ATTRIBUTE
-void operator delete[](void *ptr, std::nothrow_t const&) {
-  OPERATOR_DELETE_BODY(FROM_NEW_BR);
-}
+void operator delete[](void *ptr, std::nothrow_t const&)
+{ OPERATOR_DELETE_BODY(FROM_NEW_BR); }
 CXX_OPERATOR_ATTRIBUTE
-void operator delete(void *ptr, size_t size) NOEXCEPT {
-  GET_STACK_TRACE_FREE;
-  asan_sized_free(ptr, size, &stack, FROM_NEW);
-}
+void operator delete(void *ptr, size_t size) NOEXCEPT
+{ OPERATOR_DELETE_BODY_SIZE(FROM_NEW); }
 CXX_OPERATOR_ATTRIBUTE
-void operator delete[](void *ptr, size_t size) NOEXCEPT {
-  GET_STACK_TRACE_FREE;
-  asan_sized_free(ptr, size, &stack, FROM_NEW_BR);
-}
+void operator delete[](void *ptr, size_t size) NOEXCEPT
+{ OPERATOR_DELETE_BODY_SIZE(FROM_NEW_BR); }
 CXX_OPERATOR_ATTRIBUTE
-void operator delete(void *ptr, std::align_val_t) NOEXCEPT {
-  OPERATOR_DELETE_BODY(FROM_NEW);
-}
+void operator delete(void *ptr, std::align_val_t align) NOEXCEPT
+{ OPERATOR_DELETE_BODY_ALIGN(FROM_NEW); }
 CXX_OPERATOR_ATTRIBUTE
-void operator delete[](void *ptr, std::align_val_t) NOEXCEPT {
-  OPERATOR_DELETE_BODY(FROM_NEW_BR);
-}
+void operator delete[](void *ptr, std::align_val_t align) NOEXCEPT
+{ OPERATOR_DELETE_BODY_ALIGN(FROM_NEW_BR); }
 CXX_OPERATOR_ATTRIBUTE
-void operator delete(void *ptr, std::align_val_t, std::nothrow_t const&) {
-  OPERATOR_DELETE_BODY(FROM_NEW);
-}
+void operator delete(void *ptr, std::align_val_t align, std::nothrow_t const&)
+{ OPERATOR_DELETE_BODY_ALIGN(FROM_NEW); }
 CXX_OPERATOR_ATTRIBUTE
-void operator delete[](void *ptr, std::align_val_t, std::nothrow_t const&) {
-  OPERATOR_DELETE_BODY(FROM_NEW_BR);
-}
+void operator delete[](void *ptr, std::align_val_t align, std::nothrow_t const&)
+{ OPERATOR_DELETE_BODY_ALIGN(FROM_NEW_BR); }
 CXX_OPERATOR_ATTRIBUTE
-void operator delete(void *ptr, size_t size, std::align_val_t) NOEXCEPT {
-  GET_STACK_TRACE_FREE;
-  asan_sized_free(ptr, size, &stack, FROM_NEW);
-}
+void operator delete(void *ptr, size_t size, std::align_val_t align) NOEXCEPT
+{ OPERATOR_DELETE_BODY_SIZE_ALIGN(FROM_NEW); }
 CXX_OPERATOR_ATTRIBUTE
-void operator delete[](void *ptr, size_t size, std::align_val_t) NOEXCEPT {
-  GET_STACK_TRACE_FREE;
-  asan_sized_free(ptr, size, &stack, FROM_NEW_BR);
-}
+void operator delete[](void *ptr, size_t size, std::align_val_t align) NOEXCEPT
+{ OPERATOR_DELETE_BODY_SIZE_ALIGN(FROM_NEW_BR); }
 
 #else  // SANITIZER_MAC
-INTERCEPTOR(void, _ZdlPv, void *ptr) {
-  OPERATOR_DELETE_BODY(FROM_NEW);
-}
-INTERCEPTOR(void, _ZdaPv, void *ptr) {
-  OPERATOR_DELETE_BODY(FROM_NEW_BR);
-}
-INTERCEPTOR(void, _ZdlPvRKSt9nothrow_t, void *ptr, std::nothrow_t const&) {
-  OPERATOR_DELETE_BODY(FROM_NEW);
-}
-INTERCEPTOR(void, _ZdaPvRKSt9nothrow_t, void *ptr, std::nothrow_t const&) {
-  OPERATOR_DELETE_BODY(FROM_NEW_BR);
-}
-#endif
+INTERCEPTOR(void, _ZdlPv, void *ptr)
+{ OPERATOR_DELETE_BODY(FROM_NEW); }
+INTERCEPTOR(void, _ZdaPv, void *ptr)
+{ OPERATOR_DELETE_BODY(FROM_NEW_BR); }
+INTERCEPTOR(void, _ZdlPvRKSt9nothrow_t, void *ptr, std::nothrow_t const&)
+{ OPERATOR_DELETE_BODY(FROM_NEW); }
+INTERCEPTOR(void, _ZdaPvRKSt9nothrow_t, void *ptr, std::nothrow_t const&)
+{ OPERATOR_DELETE_BODY(FROM_NEW_BR); }
+#endif  // !SANITIZER_MAC
diff --git a/lib/asan/asan_poisoning.h b/lib/asan/asan_poisoning.h
index cc3281e..1e00070 100644
--- a/lib/asan/asan_poisoning.h
+++ b/lib/asan/asan_poisoning.h
@@ -46,8 +46,11 @@
   // for mapping shadow and zeroing out pages doesn't "just work", so we should
   // probably provide higher-level interface for these operations.
   // For now, just memset on Windows.
-  if (value ||
-      SANITIZER_WINDOWS == 1 ||
+  if (value || SANITIZER_WINDOWS == 1 ||
+      // TODO(mcgrathr): Fuchsia doesn't allow the shadow mapping to be
+      // changed at all.  It doesn't currently have an efficient means
+      // to zero a bunch of pages, but maybe we should add one.
+      SANITIZER_FUCHSIA == 1 ||
       shadow_end - shadow_beg < common_flags()->clear_shadow_mmap_threshold) {
     REAL(memset)((void*)shadow_beg, value, shadow_end - shadow_beg);
   } else {
diff --git a/lib/asan/asan_posix.cc b/lib/asan/asan_posix.cc
index 68fde91..38299e5 100644
--- a/lib/asan/asan_posix.cc
+++ b/lib/asan/asan_posix.cc
@@ -34,58 +34,9 @@
 namespace __asan {
 
 void AsanOnDeadlySignal(int signo, void *siginfo, void *context) {
-  ScopedDeadlySignal signal_scope(GetCurrentThread());
-  int code = (int)((siginfo_t*)siginfo)->si_code;
-  // Write the first message using fd=2, just in case.
-  // It may actually fail to write in case stderr is closed.
-  internal_write(2, "ASAN:DEADLYSIGNAL\n", 18);
-  SignalContext sig = SignalContext::Create(siginfo, context);
-
-  // Access at a reasonable offset above SP, or slightly below it (to account
-  // for x86_64 or PowerPC redzone, ARM push of multiple registers, etc) is
-  // probably a stack overflow.
-#ifdef __s390__
-  // On s390, the fault address in siginfo points to start of the page, not
-  // to the precise word that was accessed.  Mask off the low bits of sp to
-  // take it into account.
-  bool IsStackAccess = sig.addr >= (sig.sp & ~0xFFF) &&
-                       sig.addr < sig.sp + 0xFFFF;
-#else
-  bool IsStackAccess = sig.addr + 512 > sig.sp && sig.addr < sig.sp + 0xFFFF;
-#endif
-
-#if __powerpc__
-  // Large stack frames can be allocated with e.g.
-  //   lis r0,-10000
-  //   stdux r1,r1,r0 # store sp to [sp-10000] and update sp by -10000
-  // If the store faults then sp will not have been updated, so test above
-  // will not work, becase the fault address will be more than just "slightly"
-  // below sp.
-  if (!IsStackAccess && IsAccessibleMemoryRange(sig.pc, 4)) {
-    u32 inst = *(unsigned *)sig.pc;
-    u32 ra = (inst >> 16) & 0x1F;
-    u32 opcd = inst >> 26;
-    u32 xo = (inst >> 1) & 0x3FF;
-    // Check for store-with-update to sp. The instructions we accept are:
-    //   stbu rs,d(ra)          stbux rs,ra,rb
-    //   sthu rs,d(ra)          sthux rs,ra,rb
-    //   stwu rs,d(ra)          stwux rs,ra,rb
-    //   stdu rs,ds(ra)         stdux rs,ra,rb
-    // where ra is r1 (the stack pointer).
-    if (ra == 1 &&
-        (opcd == 39 || opcd == 45 || opcd == 37 || opcd == 62 ||
-         (opcd == 31 && (xo == 247 || xo == 439 || xo == 183 || xo == 181))))
-      IsStackAccess = true;
-  }
-#endif // __powerpc__
-
-  // We also check si_code to filter out SEGV caused by something else other
-  // then hitting the guard page or unmapped memory, like, for example,
-  // unaligned memory access.
-  if (IsStackAccess && (code == si_SEGV_MAPERR || code == si_SEGV_ACCERR))
-    ReportStackOverflow(sig);
-  else
-    ReportDeadlySignal(signo, sig);
+  StartReportDeadlySignal();
+  SignalContext sig(siginfo, context);
+  ReportDeadlySignal(sig);
 }
 
 // ---------------------- TSD ---------------- {{{1
diff --git a/lib/asan/asan_report.cc b/lib/asan/asan_report.cc
index cd44ba8..42fae9c 100644
--- a/lib/asan/asan_report.cc
+++ b/lib/asan/asan_report.cc
@@ -60,9 +60,8 @@
                      bool in_shadow, const char *after) {
   Decorator d;
   str->append("%s%s%x%x%s%s", before,
-              in_shadow ? d.ShadowByte(byte) : d.MemoryByte(),
-              byte >> 4, byte & 15,
-              in_shadow ? d.EndShadowByte() : d.EndMemoryByte(), after);
+              in_shadow ? d.ShadowByte(byte) : d.MemoryByte(), byte >> 4,
+              byte & 15, d.Default(), after);
 }
 
 static void PrintZoneForPointer(uptr ptr, uptr zone_ptr,
@@ -88,7 +87,8 @@
   char *p;
   // This string is created by the compiler and has the following form:
   // "n alloc_1 alloc_2 ... alloc_n"
-  // where alloc_i looks like "offset size len ObjectName".
+  // where alloc_i looks like "offset size len ObjectName"
+  // or                       "offset size len ObjectName:line".
   uptr n_objects = (uptr)internal_simple_strtoll(frame_descr, &p, 10);
   if (n_objects == 0)
     return false;
@@ -101,7 +101,14 @@
       return false;
     }
     p++;
-    StackVarDescr var = {beg, size, p, len};
+    char *colon_pos = internal_strchr(p, ':');
+    uptr line = 0;
+    uptr name_len = len;
+    if (colon_pos != nullptr && colon_pos < p + len) {
+      name_len = colon_pos - p;
+      line = (uptr)internal_simple_strtoll(colon_pos + 1, nullptr, 10);
+    }
+    StackVarDescr var = {beg, size, p, name_len, line};
     vars->push_back(var);
     p += len;
   }
@@ -115,53 +122,15 @@
 // immediately after printing error report.
 class ScopedInErrorReport {
  public:
-  explicit ScopedInErrorReport(bool fatal = false) {
-    halt_on_error_ = fatal || flags()->halt_on_error;
-
-    if (lock_.TryLock()) {
-      StartReporting();
-      return;
-    }
-
-    // ASan found two bugs in different threads simultaneously.
-
-    u32 current_tid = GetCurrentTidOrInvalid();
-    if (reporting_thread_tid_ == current_tid ||
-        reporting_thread_tid_ == kInvalidTid) {
-      // This is either asynch signal or nested error during error reporting.
-      // Fail simple to avoid deadlocks in Report().
-
-      // Can't use Report() here because of potential deadlocks
-      // in nested signal handlers.
-      const char msg[] = "AddressSanitizer: nested bug in the same thread, "
-                         "aborting.\n";
-      WriteToFile(kStderrFd, msg, sizeof(msg));
-
-      internal__exit(common_flags()->exitcode);
-    }
-
-    if (halt_on_error_) {
-      // Do not print more than one report, otherwise they will mix up.
-      // Error reporting functions shouldn't return at this situation, as
-      // they are effectively no-returns.
-
-      Report("AddressSanitizer: while reporting a bug found another one. "
-             "Ignoring.\n");
-
-      // Sleep long enough to make sure that the thread which started
-      // to print an error report will finish doing it.
-      SleepForSeconds(Max(100, flags()->sleep_before_dying + 1));
-
-      // If we're still not dead for some reason, use raw _exit() instead of
-      // Die() to bypass any additional checks.
-      internal__exit(common_flags()->exitcode);
-    } else {
-      // The other thread will eventually finish reporting
-      // so it's safe to wait
-      lock_.Lock();
-    }
-
-    StartReporting();
+  explicit ScopedInErrorReport(bool fatal = false)
+      : halt_on_error_(fatal || flags()->halt_on_error) {
+    // Make sure the registry and sanitizer report mutexes are locked while
+    // we're printing an error report.
+    // We can lock them only here to avoid self-deadlock in case of
+    // recursive reports.
+    asanThreadRegistry().Lock();
+    Printf(
+        "=================================================================\n");
   }
 
   ~ScopedInErrorReport() {
@@ -196,14 +165,19 @@
       error_report_callback(buffer_copy.data());
     }
 
+    if (halt_on_error_ && common_flags()->abort_on_error) {
+      // On Android the message is truncated to 512 characters.
+      // FIXME: implement "compact" error format, possibly without, or with
+      // highly compressed stack traces?
+      // FIXME: or just use the summary line as abort message?
+      SetAbortMessage(buffer_copy.data());
+    }
+
     // In halt_on_error = false mode, reset the current error object (before
     // unlocking).
     if (!halt_on_error_)
       internal_memset(&current_error_, 0, sizeof(current_error_));
 
-    CommonSanitizerReportMutex.Unlock();
-    reporting_thread_tid_ = kInvalidTid;
-    lock_.Unlock();
     if (halt_on_error_) {
       Report("ABORTING\n");
       Die();
@@ -221,39 +195,18 @@
   }
 
  private:
-  void StartReporting() {
-    // Make sure the registry and sanitizer report mutexes are locked while
-    // we're printing an error report.
-    // We can lock them only here to avoid self-deadlock in case of
-    // recursive reports.
-    asanThreadRegistry().Lock();
-    CommonSanitizerReportMutex.Lock();
-    reporting_thread_tid_ = GetCurrentTidOrInvalid();
-    Printf("===================================================="
-           "=============\n");
-  }
-
-  static StaticSpinMutex lock_;
-  static u32 reporting_thread_tid_;
+  ScopedErrorReportLock error_report_lock_;
   // Error currently being reported. This enables the destructor to interact
   // with the debugger and point it to an error description.
   static ErrorDescription current_error_;
   bool halt_on_error_;
 };
 
-StaticSpinMutex ScopedInErrorReport::lock_;
-u32 ScopedInErrorReport::reporting_thread_tid_ = kInvalidTid;
 ErrorDescription ScopedInErrorReport::current_error_;
 
-void ReportStackOverflow(const SignalContext &sig) {
+void ReportDeadlySignal(const SignalContext &sig) {
   ScopedInErrorReport in_report(/*fatal*/ true);
-  ErrorStackOverflow error(GetCurrentTidOrInvalid(), sig);
-  in_report.ReportError(error);
-}
-
-void ReportDeadlySignal(int signo, const SignalContext &sig) {
-  ScopedInErrorReport in_report(/*fatal*/ true);
-  ErrorDeadlySignal error(GetCurrentTidOrInvalid(), sig, signo);
+  ErrorDeadlySignal error(GetCurrentTidOrInvalid(), sig);
   in_report.ReportError(error);
 }
 
@@ -263,11 +216,12 @@
   in_report.ReportError(error);
 }
 
-void ReportNewDeleteSizeMismatch(uptr addr, uptr delete_size,
+void ReportNewDeleteTypeMismatch(uptr addr, uptr delete_size,
+                                 uptr delete_alignment,
                                  BufferedStackTrace *free_stack) {
   ScopedInErrorReport in_report;
-  ErrorNewDeleteSizeMismatch error(GetCurrentTidOrInvalid(), free_stack, addr,
-                                   delete_size);
+  ErrorNewDeleteTypeMismatch error(GetCurrentTidOrInvalid(), free_stack, addr,
+                                   delete_size, delete_alignment);
   in_report.ReportError(error);
 }
 
diff --git a/lib/asan/asan_report.h b/lib/asan/asan_report.h
index 5ebfda6..f2cdad4 100644
--- a/lib/asan/asan_report.h
+++ b/lib/asan/asan_report.h
@@ -23,6 +23,7 @@
   uptr size;
   const char *name_pos;
   uptr name_len;
+  uptr line;
 };
 
 // Returns the number of globals close to the provided address and copies
@@ -45,9 +46,9 @@
 // Different kinds of error reports.
 void ReportGenericError(uptr pc, uptr bp, uptr sp, uptr addr, bool is_write,
                         uptr access_size, u32 exp, bool fatal);
-void ReportStackOverflow(const SignalContext &sig);
-void ReportDeadlySignal(int signo, const SignalContext &sig);
-void ReportNewDeleteSizeMismatch(uptr addr, uptr delete_size,
+void ReportDeadlySignal(const SignalContext &sig);
+void ReportNewDeleteTypeMismatch(uptr addr, uptr delete_size,
+                                 uptr delete_alignment,
                                  BufferedStackTrace *free_stack);
 void ReportDoubleFree(uptr addr, BufferedStackTrace *free_stack);
 void ReportFreeNotMalloced(uptr addr, BufferedStackTrace *free_stack);
diff --git a/lib/asan/asan_rtl.cc b/lib/asan/asan_rtl.cc
index d9d7d7e..8608e4a 100644
--- a/lib/asan/asan_rtl.cc
+++ b/lib/asan/asan_rtl.cc
@@ -84,26 +84,6 @@
   Die();
 }
 
-// ---------------------- mmap -------------------- {{{1
-// Reserve memory range [beg, end].
-// We need to use inclusive range because end+1 may not be representable.
-void ReserveShadowMemoryRange(uptr beg, uptr end, const char *name) {
-  CHECK_EQ((beg % GetMmapGranularity()), 0);
-  CHECK_EQ(((end + 1) % GetMmapGranularity()), 0);
-  uptr size = end - beg + 1;
-  DecreaseTotalMmap(size);  // Don't count the shadow against mmap_limit_mb.
-  void *res = MmapFixedNoReserve(beg, size, name);
-  if (res != (void*)beg) {
-    Report("ReserveShadowMemoryRange failed while trying to map 0x%zx bytes. "
-           "Perhaps you're using ulimit -v\n", size);
-    Abort();
-  }
-  if (common_flags()->no_huge_pages_for_shadow)
-    NoHugePagesInRegion(beg, size);
-  if (common_flags()->use_madv_dontdump)
-    DontDumpShadowMemory(beg, size);
-}
-
 // --------------- LowLevelAllocateCallbac ---------- {{{1
 static void OnLowLevelAllocate(uptr ptr, uptr size) {
   PoisonShadow(ptr, size, kAsanInternalHeapMagic);
@@ -335,46 +315,7 @@
   CHECK_EQ((kHighMemBeg % GetMmapGranularity()), 0);
 }
 
-static void ProtectGap(uptr addr, uptr size) {
-  if (!flags()->protect_shadow_gap) {
-    // The shadow gap is unprotected, so there is a chance that someone
-    // is actually using this memory. Which means it needs a shadow...
-    uptr GapShadowBeg = RoundDownTo(MEM_TO_SHADOW(addr), GetPageSizeCached());
-    uptr GapShadowEnd =
-        RoundUpTo(MEM_TO_SHADOW(addr + size), GetPageSizeCached()) - 1;
-    if (Verbosity())
-      Printf("protect_shadow_gap=0:"
-             " not protecting shadow gap, allocating gap's shadow\n"
-             "|| `[%p, %p]` || ShadowGap's shadow ||\n", GapShadowBeg,
-             GapShadowEnd);
-    ReserveShadowMemoryRange(GapShadowBeg, GapShadowEnd,
-                             "unprotected gap shadow");
-    return;
-  }
-  void *res = MmapFixedNoAccess(addr, size, "shadow gap");
-  if (addr == (uptr)res)
-    return;
-  // A few pages at the start of the address space can not be protected.
-  // But we really want to protect as much as possible, to prevent this memory
-  // being returned as a result of a non-FIXED mmap().
-  if (addr == kZeroBaseShadowStart) {
-    uptr step = GetMmapGranularity();
-    while (size > step && addr < kZeroBaseMaxShadowStart) {
-      addr += step;
-      size -= step;
-      void *res = MmapFixedNoAccess(addr, size, "shadow gap");
-      if (addr == (uptr)res)
-        return;
-    }
-  }
-
-  Report("ERROR: Failed to protect the shadow gap. "
-         "ASan cannot proceed correctly. ABORTING.\n");
-  DumpProcessMap();
-  Die();
-}
-
-static void PrintAddressSpaceLayout() {
+void PrintAddressSpaceLayout() {
   Printf("|| `[%p, %p]` || HighMem    ||\n",
          (void*)kHighMemBeg, (void*)kHighMemEnd);
   Printf("|| `[%p, %p]` || HighShadow ||\n",
@@ -426,79 +367,6 @@
           kHighShadowBeg > kMidMemEnd);
 }
 
-static void InitializeShadowMemory() {
-  // Set the shadow memory address to uninitialized.
-  __asan_shadow_memory_dynamic_address = kDefaultShadowSentinel;
-
-  uptr shadow_start = kLowShadowBeg;
-  // Detect if a dynamic shadow address must used and find a available location
-  // when necessary. When dynamic address is used, the macro |kLowShadowBeg|
-  // expands to |__asan_shadow_memory_dynamic_address| which is
-  // |kDefaultShadowSentinel|.
-  if (shadow_start == kDefaultShadowSentinel) {
-    __asan_shadow_memory_dynamic_address = 0;
-    CHECK_EQ(0, kLowShadowBeg);
-
-    uptr granularity = GetMmapGranularity();
-    uptr alignment = 8 * granularity;
-    uptr left_padding = granularity;
-    uptr space_size = kHighShadowEnd + left_padding;
-
-    shadow_start = FindAvailableMemoryRange(space_size, alignment, granularity);
-    CHECK_NE((uptr)0, shadow_start);
-    CHECK(IsAligned(shadow_start, alignment));
-  }
-  // Update the shadow memory address (potentially) used by instrumentation.
-  __asan_shadow_memory_dynamic_address = shadow_start;
-
-  if (kLowShadowBeg)
-    shadow_start -= GetMmapGranularity();
-  bool full_shadow_is_available =
-      MemoryRangeIsAvailable(shadow_start, kHighShadowEnd);
-
-#if SANITIZER_LINUX && defined(__x86_64__) && defined(_LP64) &&                \
-    !ASAN_FIXED_MAPPING
-  if (!full_shadow_is_available) {
-    kMidMemBeg = kLowMemEnd < 0x3000000000ULL ? 0x3000000000ULL : 0;
-    kMidMemEnd = kLowMemEnd < 0x3000000000ULL ? 0x4fffffffffULL : 0;
-  }
-#endif
-
-  if (Verbosity()) PrintAddressSpaceLayout();
-
-  if (full_shadow_is_available) {
-    // mmap the low shadow plus at least one page at the left.
-    if (kLowShadowBeg)
-      ReserveShadowMemoryRange(shadow_start, kLowShadowEnd, "low shadow");
-    // mmap the high shadow.
-    ReserveShadowMemoryRange(kHighShadowBeg, kHighShadowEnd, "high shadow");
-    // protect the gap.
-    ProtectGap(kShadowGapBeg, kShadowGapEnd - kShadowGapBeg + 1);
-    CHECK_EQ(kShadowGapEnd, kHighShadowBeg - 1);
-  } else if (kMidMemBeg &&
-      MemoryRangeIsAvailable(shadow_start, kMidMemBeg - 1) &&
-      MemoryRangeIsAvailable(kMidMemEnd + 1, kHighShadowEnd)) {
-    CHECK(kLowShadowBeg != kLowShadowEnd);
-    // mmap the low shadow plus at least one page at the left.
-    ReserveShadowMemoryRange(shadow_start, kLowShadowEnd, "low shadow");
-    // mmap the mid shadow.
-    ReserveShadowMemoryRange(kMidShadowBeg, kMidShadowEnd, "mid shadow");
-    // mmap the high shadow.
-    ReserveShadowMemoryRange(kHighShadowBeg, kHighShadowEnd, "high shadow");
-    // protect the gaps.
-    ProtectGap(kShadowGapBeg, kShadowGapEnd - kShadowGapBeg + 1);
-    ProtectGap(kShadowGap2Beg, kShadowGap2End - kShadowGap2Beg + 1);
-    ProtectGap(kShadowGap3Beg, kShadowGap3End - kShadowGap3Beg + 1);
-  } else {
-    Report("Shadow memory range interleaves with an existing memory mapping. "
-           "ASan cannot proceed correctly. ABORTING.\n");
-    Report("ASan shadow was supposed to be located in the [%p-%p] range.\n",
-           shadow_start, kHighShadowEnd);
-    DumpProcessMap();
-    Die();
-  }
-}
-
 static void AsanInitInternal() {
   if (LIKELY(asan_inited)) return;
   SanitizerToolName = "AddressSanitizer";
@@ -583,20 +451,18 @@
   InitTlsSize();
 
   // Create main thread.
-  AsanThread *main_thread = AsanThread::Create(
-      /* start_routine */ nullptr, /* arg */ nullptr, /* parent_tid */ 0,
-      /* stack */ nullptr, /* detached */ true);
+  AsanThread *main_thread = CreateMainThread();
   CHECK_EQ(0, main_thread->tid());
-  SetCurrentThread(main_thread);
-  main_thread->ThreadStart(internal_getpid(),
-                           /* signal_thread_is_registered */ nullptr);
   force_interface_symbols();  // no-op.
   SanitizerInitializeUnwinder();
 
   if (CAN_SANITIZE_LEAKS) {
     __lsan::InitCommonLsan();
     if (common_flags()->detect_leaks && common_flags()->leak_check_at_exit) {
-      Atexit(__lsan::DoLeakCheck);
+      if (flags()->halt_on_error)
+        Atexit(__lsan::DoLeakCheck);
+      else
+        Atexit(__lsan::DoRecoverableLeakCheckVoid);
     }
   }
 
@@ -616,6 +482,11 @@
   }
 
   VReport(1, "AddressSanitizer Init done\n");
+
+  if (flags()->sleep_after_init) {
+    Report("Sleeping for %d second(s)\n", flags()->sleep_after_init);
+    SleepForSeconds(flags()->sleep_after_init);
+  }
 }
 
 // Initialize as requested from some part of ASan runtime library (interceptors,
@@ -655,6 +526,7 @@
     top = curr_thread->stack_top();
     bottom = ((uptr)&local_stack - PageSize) & ~(PageSize - 1);
   } else {
+    CHECK(!SANITIZER_FUCHSIA);
     // If we haven't seen this thread, try asking the OS for stack bounds.
     uptr tls_addr, tls_size, stack_size;
     GetThreadStackAndTls(/*main=*/false, &bottom, &stack_size, &tls_addr,
diff --git a/lib/asan/asan_scariness_score.h b/lib/asan/asan_scariness_score.h
index 7f15714..7f095dd 100644
--- a/lib/asan/asan_scariness_score.h
+++ b/lib/asan/asan_scariness_score.h
@@ -47,7 +47,7 @@
   };
   int GetScore() const { return score; }
   const char *GetDescription() const { return descr; }
-  void Print() {
+  void Print() const {
     if (score && flags()->print_scariness)
       Printf("SCARINESS: %d (%s)\n", score, descr);
   }
diff --git a/lib/asan/asan_shadow_setup.cc b/lib/asan/asan_shadow_setup.cc
new file mode 100644
index 0000000..08c0091
--- /dev/null
+++ b/lib/asan/asan_shadow_setup.cc
@@ -0,0 +1,161 @@
+//===-- asan_shadow_setup.cc ----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Set up the shadow memory.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_platform.h"
+
+// asan_fuchsia.cc has its own InitializeShadowMemory implementation.
+#if !SANITIZER_FUCHSIA
+
+#include "asan_internal.h"
+#include "asan_mapping.h"
+
+namespace __asan {
+
+// ---------------------- mmap -------------------- {{{1
+// Reserve memory range [beg, end].
+// We need to use inclusive range because end+1 may not be representable.
+void ReserveShadowMemoryRange(uptr beg, uptr end, const char *name) {
+  CHECK_EQ((beg % GetMmapGranularity()), 0);
+  CHECK_EQ(((end + 1) % GetMmapGranularity()), 0);
+  uptr size = end - beg + 1;
+  DecreaseTotalMmap(size);  // Don't count the shadow against mmap_limit_mb.
+  void *res = MmapFixedNoReserve(beg, size, name);
+  if (res != (void *)beg) {
+    Report(
+        "ReserveShadowMemoryRange failed while trying to map 0x%zx bytes. "
+        "Perhaps you're using ulimit -v\n",
+        size);
+    Abort();
+  }
+  if (common_flags()->no_huge_pages_for_shadow) NoHugePagesInRegion(beg, size);
+  if (common_flags()->use_madv_dontdump) DontDumpShadowMemory(beg, size);
+}
+
+static void ProtectGap(uptr addr, uptr size) {
+  if (!flags()->protect_shadow_gap) {
+    // The shadow gap is unprotected, so there is a chance that someone
+    // is actually using this memory. Which means it needs a shadow...
+    uptr GapShadowBeg = RoundDownTo(MEM_TO_SHADOW(addr), GetPageSizeCached());
+    uptr GapShadowEnd =
+        RoundUpTo(MEM_TO_SHADOW(addr + size), GetPageSizeCached()) - 1;
+    if (Verbosity())
+      Printf(
+          "protect_shadow_gap=0:"
+          " not protecting shadow gap, allocating gap's shadow\n"
+          "|| `[%p, %p]` || ShadowGap's shadow ||\n",
+          GapShadowBeg, GapShadowEnd);
+    ReserveShadowMemoryRange(GapShadowBeg, GapShadowEnd,
+                             "unprotected gap shadow");
+    return;
+  }
+  void *res = MmapFixedNoAccess(addr, size, "shadow gap");
+  if (addr == (uptr)res) return;
+  // A few pages at the start of the address space can not be protected.
+  // But we really want to protect as much as possible, to prevent this memory
+  // being returned as a result of a non-FIXED mmap().
+  if (addr == kZeroBaseShadowStart) {
+    uptr step = GetMmapGranularity();
+    while (size > step && addr < kZeroBaseMaxShadowStart) {
+      addr += step;
+      size -= step;
+      void *res = MmapFixedNoAccess(addr, size, "shadow gap");
+      if (addr == (uptr)res) return;
+    }
+  }
+
+  Report(
+      "ERROR: Failed to protect the shadow gap. "
+      "ASan cannot proceed correctly. ABORTING.\n");
+  DumpProcessMap();
+  Die();
+}
+
+static void MaybeReportLinuxPIEBug() {
+#if SANITIZER_LINUX && (defined(__x86_64__) || defined(__aarch64__))
+  Report("This might be related to ELF_ET_DYN_BASE change in Linux 4.12.\n");
+  Report(
+      "See https://github.com/google/sanitizers/issues/856 for possible "
+      "workarounds.\n");
+#endif
+}
+
+void InitializeShadowMemory() {
+  // Set the shadow memory address to uninitialized.
+  __asan_shadow_memory_dynamic_address = kDefaultShadowSentinel;
+
+  uptr shadow_start = kLowShadowBeg;
+  // Detect if a dynamic shadow address must used and find a available location
+  // when necessary. When dynamic address is used, the macro |kLowShadowBeg|
+  // expands to |__asan_shadow_memory_dynamic_address| which is
+  // |kDefaultShadowSentinel|.
+  if (shadow_start == kDefaultShadowSentinel) {
+    __asan_shadow_memory_dynamic_address = 0;
+    CHECK_EQ(0, kLowShadowBeg);
+    shadow_start = FindDynamicShadowStart();
+  }
+  // Update the shadow memory address (potentially) used by instrumentation.
+  __asan_shadow_memory_dynamic_address = shadow_start;
+
+  if (kLowShadowBeg) shadow_start -= GetMmapGranularity();
+  bool full_shadow_is_available =
+      MemoryRangeIsAvailable(shadow_start, kHighShadowEnd);
+
+#if SANITIZER_LINUX && defined(__x86_64__) && defined(_LP64) && \
+    !ASAN_FIXED_MAPPING
+  if (!full_shadow_is_available) {
+    kMidMemBeg = kLowMemEnd < 0x3000000000ULL ? 0x3000000000ULL : 0;
+    kMidMemEnd = kLowMemEnd < 0x3000000000ULL ? 0x4fffffffffULL : 0;
+  }
+#endif
+
+  if (Verbosity()) PrintAddressSpaceLayout();
+
+  if (full_shadow_is_available) {
+    // mmap the low shadow plus at least one page at the left.
+    if (kLowShadowBeg)
+      ReserveShadowMemoryRange(shadow_start, kLowShadowEnd, "low shadow");
+    // mmap the high shadow.
+    ReserveShadowMemoryRange(kHighShadowBeg, kHighShadowEnd, "high shadow");
+    // protect the gap.
+    ProtectGap(kShadowGapBeg, kShadowGapEnd - kShadowGapBeg + 1);
+    CHECK_EQ(kShadowGapEnd, kHighShadowBeg - 1);
+  } else if (kMidMemBeg &&
+             MemoryRangeIsAvailable(shadow_start, kMidMemBeg - 1) &&
+             MemoryRangeIsAvailable(kMidMemEnd + 1, kHighShadowEnd)) {
+    CHECK(kLowShadowBeg != kLowShadowEnd);
+    // mmap the low shadow plus at least one page at the left.
+    ReserveShadowMemoryRange(shadow_start, kLowShadowEnd, "low shadow");
+    // mmap the mid shadow.
+    ReserveShadowMemoryRange(kMidShadowBeg, kMidShadowEnd, "mid shadow");
+    // mmap the high shadow.
+    ReserveShadowMemoryRange(kHighShadowBeg, kHighShadowEnd, "high shadow");
+    // protect the gaps.
+    ProtectGap(kShadowGapBeg, kShadowGapEnd - kShadowGapBeg + 1);
+    ProtectGap(kShadowGap2Beg, kShadowGap2End - kShadowGap2Beg + 1);
+    ProtectGap(kShadowGap3Beg, kShadowGap3End - kShadowGap3Beg + 1);
+  } else {
+    Report(
+        "Shadow memory range interleaves with an existing memory mapping. "
+        "ASan cannot proceed correctly. ABORTING.\n");
+    Report("ASan shadow was supposed to be located in the [%p-%p] range.\n",
+           shadow_start, kHighShadowEnd);
+    MaybeReportLinuxPIEBug();
+    DumpProcessMap();
+    Die();
+  }
+}
+
+}  // namespace __asan
+
+#endif  // !SANITIZER_FUCHSIA
diff --git a/lib/asan/asan_stack.h b/lib/asan/asan_stack.h
index cc95e0f..9ee7f5c 100644
--- a/lib/asan/asan_stack.h
+++ b/lib/asan/asan_stack.h
@@ -41,10 +41,6 @@
   stack->size = 0;
   if (LIKELY(asan_inited)) {
     if ((t = GetCurrentThread()) && !t->isUnwinding()) {
-      // On FreeBSD the slow unwinding that leverages _Unwind_Backtrace()
-      // yields the call stack of the signal's handler and not of the code
-      // that raised the signal (as it does on Linux).
-      if (SANITIZER_FREEBSD && t->isInDeadlySignal()) fast = true;
       uptr stack_top = t->stack_top();
       uptr stack_bottom = t->stack_bottom();
       ScopedUnwinding unwind_scope(t);
diff --git a/lib/asan/asan_thread.cc b/lib/asan/asan_thread.cc
index 2f9fa81..c41d3ba 100644
--- a/lib/asan/asan_thread.cc
+++ b/lib/asan/asan_thread.cc
@@ -27,11 +27,6 @@
 
 // AsanThreadContext implementation.
 
-struct CreateThreadContextArgs {
-  AsanThread *thread;
-  StackTrace *stack;
-};
-
 void AsanThreadContext::OnCreated(void *arg) {
   CreateThreadContextArgs *args = static_cast<CreateThreadContextArgs*>(arg);
   if (args->stack)
@@ -88,7 +83,7 @@
   AsanThread *thread = (AsanThread*)MmapOrDie(size, __func__);
   thread->start_routine_ = start_routine;
   thread->arg_ = arg;
-  CreateThreadContextArgs args = { thread, stack };
+  AsanThreadContext::CreateThreadContextArgs args = {thread, stack};
   asanThreadRegistry().CreateThread(*reinterpret_cast<uptr *>(thread), detached,
                                     parent_tid, &args);
 
@@ -166,16 +161,19 @@
 }
 
 inline AsanThread::StackBounds AsanThread::GetStackBounds() const {
-  if (!atomic_load(&stack_switching_, memory_order_acquire))
-    return StackBounds{stack_bottom_, stack_top_};  // NOLINT
+  if (!atomic_load(&stack_switching_, memory_order_acquire)) {
+    // Make sure the stack bounds are fully initialized.
+    if (stack_bottom_ >= stack_top_) return {0, 0};
+    return {stack_bottom_, stack_top_};
+  }
   char local;
   const uptr cur_stack = (uptr)&local;
   // Note: need to check next stack first, because FinishSwitchFiber
   // may be in process of overwriting stack_top_/bottom_. But in such case
   // we are already on the next stack.
   if (cur_stack >= next_stack_bottom_ && cur_stack < next_stack_top_)
-    return StackBounds{next_stack_bottom_, next_stack_top_};  // NOLINT
-  return StackBounds{stack_bottom_, stack_top_};              // NOLINT
+    return {next_stack_bottom_, next_stack_top_};
+  return {stack_bottom_, stack_top_};
 }
 
 uptr AsanThread::stack_top() {
@@ -220,12 +218,12 @@
   return nullptr;
 }
 
-void AsanThread::Init() {
+void AsanThread::Init(const InitOptions *options) {
   next_stack_top_ = next_stack_bottom_ = 0;
   atomic_store(&stack_switching_, false, memory_order_release);
   fake_stack_ = nullptr;  // Will be initialized lazily if needed.
   CHECK_EQ(this->stack_size(), 0U);
-  SetThreadStackAndTls();
+  SetThreadStackAndTls(options);
   CHECK_GT(this->stack_size(), 0U);
   CHECK(AddrIsInMem(stack_bottom_));
   CHECK(AddrIsInMem(stack_top_ - 1));
@@ -236,8 +234,12 @@
           &local);
 }
 
+// Fuchsia doesn't use ThreadStart.
+// asan_fuchsia.c defines CreateMainThread and SetThreadStackAndTls.
+#if !SANITIZER_FUCHSIA
+
 thread_return_t AsanThread::ThreadStart(
-    uptr os_id, atomic_uintptr_t *signal_thread_is_registered) {
+    tid_t os_id, atomic_uintptr_t *signal_thread_is_registered) {
   Init();
   asanThreadRegistry().StartThread(tid(), os_id, /*workerthread*/ false,
                                    nullptr);
@@ -267,7 +269,21 @@
   return res;
 }
 
-void AsanThread::SetThreadStackAndTls() {
+AsanThread *CreateMainThread() {
+  AsanThread *main_thread = AsanThread::Create(
+      /* start_routine */ nullptr, /* arg */ nullptr, /* parent_tid */ 0,
+      /* stack */ nullptr, /* detached */ true);
+  SetCurrentThread(main_thread);
+  main_thread->ThreadStart(internal_getpid(),
+                           /* signal_thread_is_registered */ nullptr);
+  return main_thread;
+}
+
+// This implementation doesn't use the argument, which is just passed down
+// from the caller of Init (which see, above).  It's only there to support
+// OS-specific implementations that need more information passed through.
+void AsanThread::SetThreadStackAndTls(const InitOptions *options) {
+  DCHECK_EQ(options, nullptr);
   uptr tls_size = 0;
   uptr stack_size = 0;
   GetThreadStackAndTls(tid() == 0, const_cast<uptr *>(&stack_bottom_),
@@ -280,6 +296,8 @@
   CHECK(AddrIsInStack((uptr)&local));
 }
 
+#endif  // !SANITIZER_FUCHSIA
+
 void AsanThread::ClearShadowForThreadStackAndTLS() {
   PoisonShadow(stack_bottom_, stack_top_ - stack_bottom_, 0);
   if (tls_begin_ != tls_end_)
@@ -300,24 +318,27 @@
     return true;
   }
   uptr aligned_addr = addr & ~(SANITIZER_WORDSIZE/8 - 1);  // align addr.
+  uptr mem_ptr = RoundDownTo(aligned_addr, SHADOW_GRANULARITY);
   u8 *shadow_ptr = (u8*)MemToShadow(aligned_addr);
   u8 *shadow_bottom = (u8*)MemToShadow(bottom);
 
   while (shadow_ptr >= shadow_bottom &&
          *shadow_ptr != kAsanStackLeftRedzoneMagic) {
     shadow_ptr--;
+    mem_ptr -= SHADOW_GRANULARITY;
   }
 
   while (shadow_ptr >= shadow_bottom &&
          *shadow_ptr == kAsanStackLeftRedzoneMagic) {
     shadow_ptr--;
+    mem_ptr -= SHADOW_GRANULARITY;
   }
 
   if (shadow_ptr < shadow_bottom) {
     return false;
   }
 
-  uptr* ptr = (uptr*)SHADOW_TO_MEM((uptr)(shadow_ptr + 1));
+  uptr* ptr = (uptr*)(mem_ptr + SHADOW_GRANULARITY);
   CHECK(ptr[0] == kCurrentStackFrameMagic);
   access->offset = addr - (uptr)ptr;
   access->frame_pc = ptr[2];
@@ -392,7 +413,7 @@
     context->os_id = GetTid();
 }
 
-__asan::AsanThread *GetAsanThreadByOsIDLocked(uptr os_id) {
+__asan::AsanThread *GetAsanThreadByOsIDLocked(tid_t os_id) {
   __asan::AsanThreadContext *context = static_cast<__asan::AsanThreadContext *>(
       __asan::asanThreadRegistry().FindThreadContextByOsIDLocked(os_id));
   if (!context) return nullptr;
@@ -402,7 +423,7 @@
 
 // --- Implementation of LSan-specific functions --- {{{1
 namespace __lsan {
-bool GetThreadRangesLocked(uptr os_id, uptr *stack_begin, uptr *stack_end,
+bool GetThreadRangesLocked(tid_t os_id, uptr *stack_begin, uptr *stack_end,
                            uptr *tls_begin, uptr *tls_end, uptr *cache_begin,
                            uptr *cache_end, DTLS **dtls) {
   __asan::AsanThread *t = __asan::GetAsanThreadByOsIDLocked(os_id);
@@ -418,7 +439,7 @@
   return true;
 }
 
-void ForEachExtraStackRange(uptr os_id, RangeIteratorCallback callback,
+void ForEachExtraStackRange(tid_t os_id, RangeIteratorCallback callback,
                             void *arg) {
   __asan::AsanThread *t = __asan::GetAsanThreadByOsIDLocked(os_id);
   if (t && t->has_fake_stack())
diff --git a/lib/asan/asan_thread.h b/lib/asan/asan_thread.h
index f53dfb7..1cd283a 100644
--- a/lib/asan/asan_thread.h
+++ b/lib/asan/asan_thread.h
@@ -49,6 +49,11 @@
 
   void OnCreated(void *arg) override;
   void OnFinished() override;
+
+  struct CreateThreadContextArgs {
+    AsanThread *thread;
+    StackTrace *stack;
+  };
 };
 
 // AsanThreadContext objects are never freed, so we need many of them.
@@ -62,8 +67,10 @@
   static void TSDDtor(void *tsd);
   void Destroy();
 
-  void Init();  // Should be called from the thread itself.
-  thread_return_t ThreadStart(uptr os_id,
+  struct InitOptions;
+  void Init(const InitOptions *options = nullptr);
+
+  thread_return_t ThreadStart(tid_t os_id,
                               atomic_uintptr_t *signal_thread_is_registered);
 
   uptr stack_top();
@@ -118,17 +125,15 @@
   bool isUnwinding() const { return unwinding_; }
   void setUnwinding(bool b) { unwinding_ = b; }
 
-  // True if we are in a deadly signal handler.
-  bool isInDeadlySignal() const { return in_deadly_signal_; }
-  void setInDeadlySignal(bool b) { in_deadly_signal_ = b; }
-
   AsanThreadLocalMallocStorage &malloc_storage() { return malloc_storage_; }
   AsanStats &stats() { return stats_; }
 
  private:
   // NOTE: There is no AsanThread constructor. It is allocated
   // via mmap() and *must* be valid in zero-initialized state.
-  void SetThreadStackAndTls();
+
+  void SetThreadStackAndTls(const InitOptions *options);
+
   void ClearShadowForThreadStackAndTLS();
   FakeStack *AsyncSignalSafeLazyInitFakeStack();
 
@@ -158,7 +163,6 @@
   AsanThreadLocalMallocStorage malloc_storage_;
   AsanStats stats_;
   bool unwinding_;
-  bool in_deadly_signal_;
 };
 
 // ScopedUnwinding is a scope for stacktracing member of a context
@@ -173,20 +177,6 @@
   AsanThread *thread;
 };
 
-// ScopedDeadlySignal is a scope for handling deadly signals.
-class ScopedDeadlySignal {
- public:
-  explicit ScopedDeadlySignal(AsanThread *t) : thread(t) {
-    if (thread) thread->setInDeadlySignal(true);
-  }
-  ~ScopedDeadlySignal() {
-    if (thread) thread->setInDeadlySignal(false);
-  }
-
- private:
-  AsanThread *thread;
-};
-
 // Returns a single instance of registry.
 ThreadRegistry &asanThreadRegistry();
 
diff --git a/lib/asan/asan_win.cc b/lib/asan/asan_win.cc
index 4ab535c..68eedd1 100644
--- a/lib/asan/asan_win.cc
+++ b/lib/asan/asan_win.cc
@@ -57,8 +57,8 @@
 
   // FIXME: Handle EXCEPTION_STACK_OVERFLOW here.
 
-  SignalContext sig = SignalContext::Create(exception_record, context);
-  ReportDeadlySignal(exception_record->ExceptionCode, sig);
+  SignalContext sig(exception_record, context);
+  ReportDeadlySignal(sig);
   UNREACHABLE("returned from reporting deadly signal");
 }
 
@@ -80,7 +80,7 @@
 INTERCEPTOR_WINAPI(LPTOP_LEVEL_EXCEPTION_FILTER, SetUnhandledExceptionFilter,
     LPTOP_LEVEL_EXCEPTION_FILTER ExceptionFilter) {
   CHECK(REAL(SetUnhandledExceptionFilter));
-  if (ExceptionFilter == &SEHHandler || common_flags()->allow_user_segv_handler)
+  if (ExceptionFilter == &SEHHandler)
     return REAL(SetUnhandledExceptionFilter)(ExceptionFilter);
   // We record the user provided exception handler to be called for all the
   // exceptions unhandled by asan.
@@ -217,6 +217,18 @@
   return 0;
 }
 
+uptr FindDynamicShadowStart() {
+  uptr granularity = GetMmapGranularity();
+  uptr alignment = 8 * granularity;
+  uptr left_padding = granularity;
+  uptr space_size = kHighShadowEnd + left_padding;
+  uptr shadow_start =
+      FindAvailableMemoryRange(space_size, alignment, granularity, nullptr);
+  CHECK_NE((uptr)0, shadow_start);
+  CHECK(IsAligned(shadow_start, alignment));
+  return shadow_start;
+}
+
 void AsanCheckDynamicRTPrereqs() {}
 
 void AsanCheckIncompatibleRT() {}
diff --git a/lib/asan/asan_win_dll_thunk.cc b/lib/asan/asan_win_dll_thunk.cc
index 651886c..c67116c 100644
--- a/lib/asan/asan_win_dll_thunk.cc
+++ b/lib/asan/asan_win_dll_thunk.cc
@@ -82,8 +82,10 @@
 INTERCEPT_LIBRARY_FUNCTION(strrchr);
 INTERCEPT_LIBRARY_FUNCTION(strspn);
 INTERCEPT_LIBRARY_FUNCTION(strstr);
+INTERCEPT_LIBRARY_FUNCTION(strtok);
 INTERCEPT_LIBRARY_FUNCTION(strtol);
 INTERCEPT_LIBRARY_FUNCTION(wcslen);
+INTERCEPT_LIBRARY_FUNCTION(wcsnlen);
 
 #ifdef _WIN64
 INTERCEPT_LIBRARY_FUNCTION(__C_specific_handler);
diff --git a/lib/asan/scripts/asan_device_setup b/lib/asan/scripts/asan_device_setup
index fdfc46f..ec19899 100755
--- a/lib/asan/scripts/asan_device_setup
+++ b/lib/asan/scripts/asan_device_setup
@@ -52,7 +52,7 @@
     local STORAGE=`$ADB shell mount | grep /system | cut -d ' ' -f1`
     if [ "$STORAGE" != "" ]; then
       echo Remounting $STORAGE at /system
-      $ADB shell su -c "mount -o remount,rw $STORAGE /system"
+      $ADB shell su -c "mount -o rw,remount $STORAGE /system"
     else
       echo Failed to get storage device name for "/system" mount point
     fi
@@ -95,7 +95,7 @@
     local _ARCH=
     local _ARCH64=
     if [[ $_ABI == x86* ]]; then
-        _ARCH=i686
+        _ARCH=i386
     elif [[ $_ABI == armeabi* ]]; then
         _ARCH=arm
     elif [[ $_ABI == arm64-v8a* ]]; then
@@ -181,6 +181,17 @@
   ASAN_RT64="libclang_rt.asan-$ARCH64-android.so"
 fi
 
+RELEASE=$(adb_shell getprop ro.build.version.release)
+PRE_L=0
+if echo "$RELEASE" | grep '^4\.' >&/dev/null; then
+    PRE_L=1
+fi
+ANDROID_O=0
+if echo "$RELEASE" | grep '^8\.0\.' >&/dev/null; then
+    # 8.0.x is for Android O
+    ANDROID_O=1
+fi
+
 if [[ x$revert == xyes ]]; then
     echo '>> Uninstalling ASan'
 
@@ -202,6 +213,10 @@
       adb_shell ln -s /system/bin/app_process32 /system/bin/app_process
     fi
 
+    if [[ ANDROID_O -eq 1 ]]; then
+      adb_shell mv /system/etc/ld.config.txt.saved /system/etc/ld.config.txt
+    fi
+
     echo '>> Restarting shell'
     adb_shell stop
     adb_shell start
@@ -251,12 +266,6 @@
 TMPDIR="$TMPDIRBASE/new"
 mkdir "$TMPDIROLD"
 
-RELEASE=$(adb_shell getprop ro.build.version.release)
-PRE_L=0
-if echo "$RELEASE" | grep '^4\.' >&/dev/null; then
-    PRE_L=1
-fi
-
 if ! adb_shell ls -l /system/bin/app_process | grep -o '\->.*app_process' >&/dev/null; then
 
     if adb_pull /system/bin/app_process.real /dev/null >&/dev/null; then
@@ -327,11 +336,6 @@
 EOF
 }
 
-# On Android-L not allowing user segv handler breaks some applications.
-if [[ PRE_L -eq 0 ]]; then
-    ASAN_OPTIONS="$ASAN_OPTIONS,allow_user_segv_handler=1"
-fi
-
 if [[ x$extra_options != x ]] ; then
     ASAN_OPTIONS="$ASAN_OPTIONS,$extra_options"
 fi
@@ -415,15 +419,15 @@
       install "$TMPDIR/asanwrapper" /system/bin 755
       install "$TMPDIR/asanwrapper64" /system/bin 755
 
-      adb_shell ln -s $ASAN_RT /system/lib/$ASAN_RT_SYMLINK
-      adb_shell ln -s $ASAN_RT64 /system/lib64/$ASAN_RT_SYMLINK
+      adb_shell ln -sf $ASAN_RT /system/lib/$ASAN_RT_SYMLINK
+      adb_shell ln -sf $ASAN_RT64 /system/lib64/$ASAN_RT_SYMLINK
     else
       install "$TMPDIR/$ASAN_RT" /system/lib 644
       install "$TMPDIR/app_process32" /system/bin 755 $CTX
       install "$TMPDIR/app_process.wrap" /system/bin 755 $CTX
       install "$TMPDIR/asanwrapper" /system/bin 755 $CTX
 
-      adb_shell ln -s $ASAN_RT /system/lib/$ASAN_RT_SYMLINK
+      adb_shell ln -sf $ASAN_RT /system/lib/$ASAN_RT_SYMLINK
 
       adb_shell rm /system/bin/app_process
       adb_shell ln -s /system/bin/app_process.wrap /system/bin/app_process
@@ -432,6 +436,11 @@
     adb_shell cp /system/bin/sh /system/bin/sh-from-zygote
     adb_shell chcon $CTX /system/bin/sh-from-zygote
 
+    if [[ ANDROID_O -eq 1 ]]; then
+      # For Android O, the linker namespace is temporarily disabled.
+      adb_shell mv /system/etc/ld.config.txt /system/etc/ld.config.txt.saved
+    fi
+
     if [ $ENFORCING == 1 ]; then
         adb_shell setenforce 1
     fi
diff --git a/lib/asan/scripts/asan_symbolize.py b/lib/asan/scripts/asan_symbolize.py
index 1a56e44..cd5d89b 100755
--- a/lib/asan/scripts/asan_symbolize.py
+++ b/lib/asan/scripts/asan_symbolize.py
@@ -280,7 +280,7 @@
 def SystemSymbolizerFactory(system, addr, binary, arch):
   if system == 'Darwin':
     return DarwinSymbolizer(addr, binary, arch)
-  elif system == 'Linux' or system == 'FreeBSD':
+  elif system in ['Linux', 'FreeBSD', 'NetBSD']:
     return Addr2LineSymbolizer(binary)
 
 
@@ -370,7 +370,7 @@
       self.binary_name_filter = binary_name_filter
       self.dsym_hint_producer = dsym_hint_producer
       self.system = os.uname()[0]
-      if self.system not in ['Linux', 'Darwin', 'FreeBSD']:
+      if self.system not in ['Linux', 'Darwin', 'FreeBSD', 'NetBSD']:
         raise Exception('Unknown system')
       self.llvm_symbolizers = {}
       self.last_llvm_symbolizer = None
diff --git a/lib/asan/tests/CMakeLists.txt b/lib/asan/tests/CMakeLists.txt
index 8089d51..4073208 100644
--- a/lib/asan/tests/CMakeLists.txt
+++ b/lib/asan/tests/CMakeLists.txt
@@ -125,57 +125,6 @@
 # NDK r10 requires -latomic almost always.
 append_list_if(ANDROID atomic ASAN_UNITTEST_NOINST_LIBS)
 
-# Compile source for the given architecture, using compiler
-# options in ${ARGN}, and add it to the object list.
-macro(asan_compile obj_list source arch kind)
-  get_filename_component(basename ${source} NAME)
-  if(CMAKE_CONFIGURATION_TYPES)
-    set(output_obj "${CMAKE_CFG_INTDIR}/${obj_list}.${basename}.${arch}${kind}.o")
-  else()
-    set(output_obj "${obj_list}.${basename}.${arch}${kind}.o")
-  endif()
-  get_target_flags_for_arch(${arch} TARGET_CFLAGS)
-  set(COMPILE_DEPS ${ASAN_UNITTEST_HEADERS} ${ASAN_BLACKLIST_FILE})
-  if(NOT COMPILER_RT_STANDALONE_BUILD)
-    list(APPEND COMPILE_DEPS gtest asan)
-  endif()
-  clang_compile(${output_obj} ${source}
-                CFLAGS ${ARGN} ${TARGET_CFLAGS}
-                DEPS ${COMPILE_DEPS})
-  list(APPEND ${obj_list} ${output_obj})
-endmacro()
-
-# Link ASan unit test for a given architecture from a set
-# of objects in with given linker flags.
-macro(add_asan_test test_suite test_name arch kind)
-  cmake_parse_arguments(TEST "WITH_TEST_RUNTIME" "" "OBJECTS;LINK_FLAGS;SUBDIR" ${ARGN})
-  get_target_flags_for_arch(${arch} TARGET_LINK_FLAGS)
-  set(TEST_DEPS ${TEST_OBJECTS})
-  if(NOT COMPILER_RT_STANDALONE_BUILD)
-    list(APPEND TEST_DEPS asan)
-  endif()
-  if(TEST_WITH_TEST_RUNTIME)
-    list(APPEND TEST_DEPS ${ASAN_TEST_RUNTIME})
-    if(CMAKE_CONFIGURATION_TYPES)
-     set(configuration_path "${CMAKE_CFG_INTDIR}/")
-    else()
-     set(configuration_path "")
-    endif()
-    if(NOT MSVC)
-      set(asan_test_runtime_path ${configuration_path}lib${ASAN_TEST_RUNTIME}.a)
-    else()
-      set(asan_test_runtime_path ${configuration_path}${ASAN_TEST_RUNTIME}.lib)
-    endif()
-    list(APPEND TEST_OBJECTS ${asan_test_runtime_path})
-  endif()
-  add_compiler_rt_test(${test_suite} ${test_name}
-                       SUBDIR ${TEST_SUBDIR}
-                       OBJECTS ${TEST_OBJECTS}
-                       DEPS ${TEST_DEPS}
-                       LINK_FLAGS ${TEST_LINK_FLAGS}
-                                  ${TARGET_LINK_FLAGS})
-endmacro()
-
 # Main AddressSanitizer unit tests.
 add_custom_target(AsanUnitTests)
 set_target_properties(AsanUnitTests PROPERTIES FOLDER "Compiler-RT Tests")
@@ -206,131 +155,118 @@
   asan_str_test.cc
   asan_test_main.cc)
 if(APPLE)
-  list(APPEND ASAN_INST_TEST_SOURCES asan_mac_test.cc)
+  list(APPEND ASAN_INST_TEST_SOURCES asan_mac_test.cc asan_mac_test_helpers.mm)
 endif()
 
 set(ASAN_BENCHMARKS_SOURCES
   ${COMPILER_RT_GTEST_SOURCE}
   asan_benchmarks_test.cc)
 
-# Adds ASan unit tests and benchmarks for architecture.
-macro(add_asan_tests_for_arch_and_kind arch kind)
-  # Instrumented tests.
+function(add_asan_tests arch test_runtime)
+  cmake_parse_arguments(TEST "" "KIND" "CFLAGS" ${ARGN})
+
+  # Closure to keep the values.
+  function(generate_asan_tests test_objects test_suite testname)
+    generate_compiler_rt_tests(${test_objects} ${test_suite} ${testname} ${arch}
+      COMPILE_DEPS ${ASAN_UNITTEST_HEADERS} ${ASAN_BLACKLIST_FILE}
+      DEPS gtest asan
+      KIND ${TEST_KIND}
+      ${ARGN}
+      )
+    set("${test_objects}" "${${test_objects}}" PARENT_SCOPE)
+  endfunction()
+
   set(ASAN_INST_TEST_OBJECTS)
-  foreach(src ${ASAN_INST_TEST_SOURCES})
-    asan_compile(ASAN_INST_TEST_OBJECTS ${src} ${arch} ${kind}
-      ${ASAN_UNITTEST_INSTRUMENTED_CFLAGS} ${ARGN})
-  endforeach()
-  if (APPLE)
-    # Add Mac-specific helper.
-    asan_compile(ASAN_INST_TEST_OBJECTS asan_mac_test_helpers.mm ${arch} ${kind}
-                 ${ASAN_UNITTEST_INSTRUMENTED_CFLAGS} -ObjC ${ARGN})
-  endif()
+  generate_asan_tests(ASAN_INST_TEST_OBJECTS AsanUnitTests
+    "Asan-${arch}${TEST_KIND}-Test"
+    SUBDIR "default"
+    LINK_FLAGS ${ASAN_UNITTEST_INSTRUMENTED_LINK_FLAGS}
+    SOURCES ${ASAN_INST_TEST_SOURCES}
+    CFLAGS ${ASAN_UNITTEST_INSTRUMENTED_CFLAGS} ${TEST_CFLAGS})
 
-  if (MSVC)
-    # With the MSVC CRT, the choice between static and dynamic CRT is made at
-    # compile time with a macro. Simulate the effect of passing /MD to clang-cl.
-    set(ASAN_INST_DYNAMIC_TEST_OBJECTS)
-    foreach(src ${ASAN_INST_TEST_SOURCES})
-      asan_compile(ASAN_INST_DYNAMIC_TEST_OBJECTS ${src} ${arch} ${kind}
-        ${ASAN_UNITTEST_INSTRUMENTED_CFLAGS} -D_MT -D_DLL ${ARGN})
-    endforeach()
-    # Clang links the static CRT by default. Override that to use the dynamic
-    # CRT.
-    set(ASAN_DYNAMIC_UNITTEST_INSTRUMENTED_LINK_FLAGS
-      ${ASAN_DYNAMIC_UNITTEST_INSTRUMENTED_LINK_FLAGS}
-      -Wl,-nodefaultlib:libcmt,-defaultlib:msvcrt,-defaultlib:oldnames)
-  else()
-    set(ASAN_INST_DYNAMIC_TEST_OBJECTS ${ASAN_INST_TEST_OBJECTS})
-  endif()
-
-  # Create the 'default' folder where ASAN tests are produced.
-  if(CMAKE_CONFIGURATION_TYPES)
-    foreach(build_mode ${CMAKE_CONFIGURATION_TYPES})
-      file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/default/${build_mode}")
-    endforeach()
-  else()
-    file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/default")
-  endif()
-
-  add_asan_test(AsanUnitTests "Asan-${arch}${kind}-Test"
-                ${arch} ${kind} SUBDIR "default"
-                OBJECTS ${ASAN_INST_TEST_OBJECTS}
-                LINK_FLAGS ${ASAN_UNITTEST_INSTRUMENTED_LINK_FLAGS})
   if(COMPILER_RT_ASAN_HAS_STATIC_RUNTIME)
-    # Create the 'dynamic' folder where ASAN tests are produced.
-    if(CMAKE_CONFIGURATION_TYPES)
-      foreach(build_mode ${CMAKE_CONFIGURATION_TYPES})
-        file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/dynamic/${build_mode}")
-      endforeach()
+    set(dynamic_test_name "Asan-${arch}${TEST_KIND}-Dynamic-Test")
+    if(MSVC)
+
+      # With the MSVC CRT, the choice between static and dynamic CRT is made at
+      # compile time with a macro. Simulate the effect of passing /MD to clang-cl.
+      set(ASAN_DYNAMIC_TEST_OBJECTS)
+      generate_asan_tests(ASAN_DYNAMIC_TEST_OBJECTS
+        AsanDynamicUnitTests "${dynamic_test_name}"
+        SUBDIR "dynamic"
+        CFLAGS ${ASAN_UNITTEST_INSTRUMENTED_CFLAGS} -D_MT -D_DLL
+        SOURCES ${ASAN_INST_TEST_SOURCES}
+        LINK_FLAGS ${ASAN_DYNAMIC_UNITTEST_INSTRUMENTED_LINK_FLAGS}
+          -Wl,-nodefaultlib:libcmt,-defaultlib:msvcrt,-defaultlib:oldnames
+        )
     else()
-      file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/dynamic")
+
+      # Otherwise, reuse ASAN_INST_TEST_OBJECTS.
+      add_compiler_rt_test(AsanDynamicUnitTests "${dynamic_test_name}" "${arch}"
+        SUBDIR "dynamic"
+        OBJECTS ${ASAN_INST_TEST_OBJECTS}
+        DEPS asan ${ASAN_INST_TEST_OBJECTS}
+        LINK_FLAGS ${ASAN_DYNAMIC_UNITTEST_INSTRUMENTED_LINK_FLAGS}
+        )
     endif()
-
-    add_asan_test(AsanDynamicUnitTests "Asan-${arch}${kind}-Dynamic-Test"
-                  ${arch} ${kind} SUBDIR "dynamic"
-                  OBJECTS ${ASAN_INST_DYNAMIC_TEST_OBJECTS}
-                  LINK_FLAGS ${ASAN_DYNAMIC_UNITTEST_INSTRUMENTED_LINK_FLAGS})
   endif()
 
-  # Add static ASan runtime that will be linked with uninstrumented tests.
-  set(ASAN_TEST_RUNTIME RTAsanTest.${arch}${kind})
-  if(APPLE)
-    set(ASAN_TEST_RUNTIME_OBJECTS
-      $<TARGET_OBJECTS:RTAsan_dynamic.osx>
-      $<TARGET_OBJECTS:RTInterception.osx>
-      $<TARGET_OBJECTS:RTSanitizerCommon.osx>
-      $<TARGET_OBJECTS:RTSanitizerCommonLibc.osx>
-      $<TARGET_OBJECTS:RTLSanCommon.osx>
-      $<TARGET_OBJECTS:RTUbsan.osx>)
-  else()
-    set(ASAN_TEST_RUNTIME_OBJECTS
-      $<TARGET_OBJECTS:RTAsan.${arch}>
-      $<TARGET_OBJECTS:RTAsan_cxx.${arch}>
-      $<TARGET_OBJECTS:RTInterception.${arch}>
-      $<TARGET_OBJECTS:RTSanitizerCommon.${arch}>
-      $<TARGET_OBJECTS:RTSanitizerCommonLibc.${arch}>
-      $<TARGET_OBJECTS:RTLSanCommon.${arch}>
-      $<TARGET_OBJECTS:RTUbsan.${arch}>
-      $<TARGET_OBJECTS:RTUbsan_cxx.${arch}>)
-  endif()
-  add_library(${ASAN_TEST_RUNTIME} STATIC ${ASAN_TEST_RUNTIME_OBJECTS})
-  set_target_properties(${ASAN_TEST_RUNTIME} PROPERTIES
-    ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
-    FOLDER "Compiler-RT Runtime tests")
   # Uninstrumented tests.
   set(ASAN_NOINST_TEST_OBJECTS)
-  foreach(src ${ASAN_NOINST_TEST_SOURCES})
-    asan_compile(ASAN_NOINST_TEST_OBJECTS ${src} ${arch} ${kind}
-                 ${ASAN_UNITTEST_COMMON_CFLAGS} ${ARGN})
-  endforeach()
-  add_asan_test(AsanUnitTests "Asan-${arch}${kind}-Noinst-Test"
-                ${arch} ${kind} SUBDIR "default"
-                OBJECTS ${ASAN_NOINST_TEST_OBJECTS}
-                LINK_FLAGS ${ASAN_UNITTEST_NOINST_LINK_FLAGS}
-                WITH_TEST_RUNTIME)
+  generate_asan_tests(ASAN_NOINST_TEST_OBJECTS
+    AsanUnitTests "Asan-${arch}${TEST_KIND}-Noinst-Test"
+    SUBDIR "default"
+    CFLAGS ${ASAN_UNITTEST_COMMON_CFLAGS}
+    LINK_FLAGS ${ASAN_UNITTEST_NOINST_LINK_FLAGS}
+    SOURCES ${ASAN_NOINST_TEST_SOURCES}
+    RUNTIME ${test_runtime})
 
-  # Benchmarks.
-  set(ASAN_BENCHMARKS_OBJECTS)
-  foreach(src ${ASAN_BENCHMARKS_SOURCES})
-    asan_compile(ASAN_BENCHMARKS_OBJECTS ${src} ${arch} ${kind}
-                 ${ASAN_UNITTEST_INSTRUMENTED_CFLAGS} ${ARGN})
-  endforeach()
-  add_asan_test(AsanBenchmarks "Asan-${arch}${kind}-Benchmark"
-                ${arch} ${kind} SUBDIR "default"
-                OBJECTS ${ASAN_BENCHMARKS_OBJECTS}
-                LINK_FLAGS ${ASAN_UNITTEST_INSTRUMENTED_LINK_FLAGS})
-endmacro()
+  set(ASAN_BENCHMARK_OBJECTS)
+  generate_asan_tests(ASAN_BENCHMARK_OBJECTS
+    AsanBenchmarks "Asan-${arch}${TEST_KIND}-Benchmark"
+    SUBDIR "default"
+    CFLAGS ${ASAN_UNITTEST_INSTRUMENTED_CFLAGS}
+    SOURCES ${ASAN_BENCHMARKS_SOURCES}
+    LINK_FLAGS ${ASAN_UNITTEST_INSTRUMENTED_LINK_FLAGS})
+endfunction()
 
 if(COMPILER_RT_CAN_EXECUTE_TESTS AND NOT ANDROID)
   set(ASAN_TEST_ARCH ${ASAN_SUPPORTED_ARCH})
   if(APPLE)
     darwin_filter_host_archs(ASAN_SUPPORTED_ARCH ASAN_TEST_ARCH)
   endif()
+
   foreach(arch ${ASAN_TEST_ARCH})
-    add_asan_tests_for_arch_and_kind(${arch} "-inline")
-    add_asan_tests_for_arch_and_kind(${arch} "-with-calls"
-      -mllvm -asan-instrumentation-with-call-threshold=0)
+
+    # Add static ASan runtime that will be linked with uninstrumented tests.
+    set(ASAN_TEST_RUNTIME RTAsanTest.${arch})
+    if(APPLE)
+      set(ASAN_TEST_RUNTIME_OBJECTS
+        $<TARGET_OBJECTS:RTAsan_dynamic.osx>
+        $<TARGET_OBJECTS:RTInterception.osx>
+        $<TARGET_OBJECTS:RTSanitizerCommon.osx>
+        $<TARGET_OBJECTS:RTSanitizerCommonLibc.osx>
+        $<TARGET_OBJECTS:RTLSanCommon.osx>
+        $<TARGET_OBJECTS:RTUbsan.osx>)
+    else()
+      set(ASAN_TEST_RUNTIME_OBJECTS
+        $<TARGET_OBJECTS:RTAsan.${arch}>
+        $<TARGET_OBJECTS:RTAsan_cxx.${arch}>
+        $<TARGET_OBJECTS:RTInterception.${arch}>
+        $<TARGET_OBJECTS:RTSanitizerCommon.${arch}>
+        $<TARGET_OBJECTS:RTSanitizerCommonLibc.${arch}>
+        $<TARGET_OBJECTS:RTLSanCommon.${arch}>
+        $<TARGET_OBJECTS:RTUbsan.${arch}>
+        $<TARGET_OBJECTS:RTUbsan_cxx.${arch}>)
+    endif()
+    add_library(${ASAN_TEST_RUNTIME} STATIC ${ASAN_TEST_RUNTIME_OBJECTS})
+    set_target_properties(${ASAN_TEST_RUNTIME} PROPERTIES
+      ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+      FOLDER "Compiler-RT Runtime tests")
+
+    add_asan_tests(${arch} ${ASAN_TEST_RUNTIME} KIND "-inline")
+    add_asan_tests(${arch} ${ASAN_TEST_RUNTIME} KIND "-calls"
+      CFLAGS -mllvm -asan-instrumentation-with-call-threshold=0)
   endforeach()
 endif()
 
diff --git a/lib/asan/tests/asan_interface_test.cc b/lib/asan/tests/asan_interface_test.cc
index d13962b..0540ab5 100644
--- a/lib/asan/tests/asan_interface_test.cc
+++ b/lib/asan/tests/asan_interface_test.cc
@@ -102,9 +102,6 @@
   }
 }
 
-#ifndef __powerpc64__
-// FIXME: This has not reliably worked on powerpc since r279664.  Re-enable
-// this once the problem is tracked down and fixed.
 static const size_t kManyThreadsMallocSizes[] = {5, 1UL<<10, 1UL<<14, 357};
 static const size_t kManyThreadsIterations = 250;
 static const size_t kManyThreadsNumThreads =
@@ -138,7 +135,6 @@
   // so we can't check for equality here.
   EXPECT_LT(after_test, before_test + (1UL<<20));
 }
-#endif
 
 static void DoDoubleFree() {
   int *x = Ident(new int);
@@ -390,23 +386,6 @@
   free(array);
 }
 
-#if !defined(_WIN32)  // FIXME: This should really be a lit test.
-static void ErrorReportCallbackOneToZ(const char *report) {
-  int report_len = strlen(report);
-  ASSERT_EQ(6, write(2, "ABCDEF", 6));
-  ASSERT_EQ(report_len, write(2, report, report_len));
-  ASSERT_EQ(6, write(2, "ABCDEF", 6));
-  _exit(1);
-}
-
-TEST(AddressSanitizerInterface, SetErrorReportCallbackTest) {
-  __asan_set_error_report_callback(ErrorReportCallbackOneToZ);
-  EXPECT_DEATH(__asan_report_error((void *)GET_CALLER_PC(), 0, 0, 0, true, 1),
-               ASAN_PCRE_DOTALL "ABCDEF.*AddressSanitizer.*WRITE.*ABCDEF");
-  __asan_set_error_report_callback(NULL);
-}
-#endif
-
 TEST(AddressSanitizerInterface, GetOwnershipStressTest) {
   std::vector<char *> pointers;
   std::vector<size_t> sizes;
@@ -427,3 +406,11 @@
     free(pointers[i]);
 }
 
+TEST(AddressSanitizerInterface, HandleNoReturnTest) {
+  char array[40];
+  __asan_poison_memory_region(array, sizeof(array));
+  BAD_ACCESS(array, 20);
+  __asan_handle_no_return();
+  // It unpoisons the whole thread stack.
+  GOOD_ACCESS(array, 20);
+}
diff --git a/lib/asan/tests/asan_mac_test_helpers.mm b/lib/asan/tests/asan_mac_test_helpers.mm
index a7e4b9d..3f8fa26 100644
--- a/lib/asan/tests/asan_mac_test_helpers.mm
+++ b/lib/asan/tests/asan_mac_test_helpers.mm
@@ -237,4 +237,5 @@
       [[NSURL alloc] initWithString:@"Saved Application State"
                      relativeToURL:base];
   [u release];
+  [base release];
 }
diff --git a/lib/asan/tests/asan_noinst_test.cc b/lib/asan/tests/asan_noinst_test.cc
index b3a235e..65acb28 100644
--- a/lib/asan/tests/asan_noinst_test.cc
+++ b/lib/asan/tests/asan_noinst_test.cc
@@ -97,9 +97,6 @@
   MallocStress(ASAN_LOW_MEMORY ? 300000 : 1000000);
 }
 
-#ifndef __powerpc64__
-// FIXME: This has not reliably worked on powerpc since r279664.  Re-enable
-// this once the problem is tracked down and fixed.
 TEST(AddressSanitizer, ThreadedMallocStressTest) {
   const int kNumThreads = 4;
   const int kNumIterations = (ASAN_LOW_MEMORY) ? 10000 : 100000;
@@ -112,7 +109,6 @@
     PTHREAD_JOIN(t[i], 0);
   }
 }
-#endif
 
 static void PrintShadow(const char *tag, uptr ptr, size_t size) {
   fprintf(stderr, "%s shadow: %lx size % 3ld: ", tag, (long)ptr, (long)size);
@@ -210,10 +206,6 @@
   return NULL;
 }
 
-#ifndef __powerpc64__
-// FIXME: This has not reliably worked on powerpc since r279664.  Re-enable
-// this once the problem is tracked down and fixed.
-
 TEST(AddressSanitizer, ThreadedOneSizeMallocStressTest) {
   const int kNumThreads = 4;
   pthread_t t[kNumThreads];
@@ -224,7 +216,6 @@
     PTHREAD_JOIN(t[i], 0);
   }
 }
-#endif
 
 TEST(AddressSanitizer, ShadowRegionIsPoisonedTest) {
   using __asan::kHighMemEnd;
diff --git a/lib/asan/tests/asan_str_test.cc b/lib/asan/tests/asan_str_test.cc
index c790088..5cf4e05 100644
--- a/lib/asan/tests/asan_str_test.cc
+++ b/lib/asan/tests/asan_str_test.cc
@@ -95,6 +95,9 @@
   free(heap_string);
 }
 
+// 32-bit android libc++-based NDK toolchain links wcslen statically, disabling
+// the interceptor.
+#if !defined(__ANDROID__) || defined(__LP64__)
 TEST(AddressSanitizer, WcsLenTest) {
   EXPECT_EQ(0U, wcslen(Ident(L"")));
   size_t hello_len = 13;
@@ -106,6 +109,7 @@
   EXPECT_DEATH(Ident(wcslen(heap_string + 14)), RightOOBReadMessage(0));
   free(heap_string);
 }
+#endif
 
 #if SANITIZER_TEST_HAS_STRNLEN
 TEST(AddressSanitizer, StrNLenOOBTest) {
@@ -154,6 +158,33 @@
   free(str);
 }
 
+#if SANITIZER_TEST_HAS_STRNDUP
+TEST(AddressSanitizer, MAYBE_StrNDupOOBTest) {
+  size_t size = Ident(42);
+  char *str = MallocAndMemsetString(size);
+  char *new_str;
+  // Normal strndup calls.
+  str[size - 1] = '\0';
+  new_str = strndup(str, size - 13);
+  free(new_str);
+  new_str = strndup(str + size - 1, 13);
+  free(new_str);
+  // Argument points to not allocated memory.
+  EXPECT_DEATH(Ident(strndup(str - 1, 13)), LeftOOBReadMessage(1));
+  EXPECT_DEATH(Ident(strndup(str + size, 13)), RightOOBReadMessage(0));
+  // Overwrite the terminating '\0' and hit unallocated memory.
+  str[size - 1] = 'z';
+  EXPECT_DEATH(Ident(strndup(str, size + 13)), RightOOBReadMessage(0));
+  // Check handling of non 0 terminated strings.
+  Ident(new_str = strndup(str + size - 1, 0));
+  free(new_str);
+  Ident(new_str = strndup(str + size - 1, 1));
+  free(new_str);
+  EXPECT_DEATH(Ident(strndup(str + size - 1, 2)), RightOOBReadMessage(0));
+  free(str);
+}
+#endif // SANITIZER_TEST_HAS_STRNDUP
+
 TEST(AddressSanitizer, StrCpyOOBTest) {
   size_t to_size = Ident(30);
   size_t from_size = Ident(6);  // less than to_size
@@ -602,5 +633,3 @@
   RunStrtolOOBTest(&CallStrtol);
 }
 #endif
-
-
diff --git a/lib/asan/tests/asan_test.cc b/lib/asan/tests/asan_test.cc
index 7bc230a..ed00032 100644
--- a/lib/asan/tests/asan_test.cc
+++ b/lib/asan/tests/asan_test.cc
@@ -12,6 +12,19 @@
 //===----------------------------------------------------------------------===//
 #include "asan_test_utils.h"
 
+#include <errno.h>
+#include <stdarg.h>
+
+#ifdef _LIBCPP_GET_C_LOCALE
+#define SANITIZER_GET_C_LOCALE _LIBCPP_GET_C_LOCALE
+#else
+#if defined(__FreeBSD__)
+#define SANITIZER_GET_C_LOCALE 0
+#elif defined(__NetBSD__)
+#define SANITIZER_GET_C_LOCALE LC_C_LOCALE
+#endif
+#endif
+
 NOINLINE void *malloc_fff(size_t size) {
   void *res = malloc/**/(size); break_optimization(0); return res;}
 NOINLINE void *malloc_eee(size_t size) {
@@ -74,9 +87,11 @@
   delete c;
 
 #if SANITIZER_TEST_HAS_POSIX_MEMALIGN
-  int *pm;
-  int pm_res = posix_memalign((void**)&pm, kPageSize, kPageSize);
+  void *pm = 0;
+  // Valid allocation.
+  int pm_res = posix_memalign(&pm, kPageSize, kPageSize);
   EXPECT_EQ(0, pm_res);
+  EXPECT_NE(nullptr, pm);
   free(pm);
 #endif  // SANITIZER_TEST_HAS_POSIX_MEMALIGN
 
@@ -251,7 +266,8 @@
 namespace {
 
 const char kSEGVCrash[] = "AddressSanitizer: SEGV on unknown address";
-const char kOverriddenHandler[] = "ASan signal handler has been overridden\n";
+const char kOverriddenSigactionHandler[] = "Test sigaction handler\n";
+const char kOverriddenSignalHandler[] = "Test signal handler\n";
 
 TEST(AddressSanitizer, WildAddressTest) {
   char *c = (char*)0x123;
@@ -259,12 +275,12 @@
 }
 
 void my_sigaction_sighandler(int, siginfo_t*, void*) {
-  fprintf(stderr, kOverriddenHandler);
+  fprintf(stderr, kOverriddenSigactionHandler);
   exit(1);
 }
 
 void my_signal_sighandler(int signum) {
-  fprintf(stderr, kOverriddenHandler);
+  fprintf(stderr, kOverriddenSignalHandler);
   exit(1);
 }
 
@@ -273,16 +289,20 @@
   memset(&sigact, 0, sizeof(sigact));
   sigact.sa_sigaction = my_sigaction_sighandler;
   sigact.sa_flags = SA_SIGINFO;
-  // ASan should silently ignore sigaction()...
+  char *c = (char *)0x123;
+
+  EXPECT_DEATH(*c = 0, kSEGVCrash);
+
+  // ASan should allow to set sigaction()...
   EXPECT_EQ(0, sigaction(SIGSEGV, &sigact, 0));
 #ifdef __APPLE__
   EXPECT_EQ(0, sigaction(SIGBUS, &sigact, 0));
 #endif
-  char *c = (char*)0x123;
-  EXPECT_DEATH(*c = 0, kSEGVCrash);
+  EXPECT_DEATH(*c = 0, kOverriddenSigactionHandler);
+
   // ... and signal().
-  EXPECT_EQ(0, signal(SIGSEGV, my_signal_sighandler));
-  EXPECT_DEATH(*c = 0, kSEGVCrash);
+  EXPECT_NE(SIG_ERR, signal(SIGSEGV, my_signal_sighandler));
+  EXPECT_DEATH(*c = 0, kOverriddenSignalHandler);
 }
 }  // namespace
 #endif
@@ -337,8 +357,9 @@
   return 0;
 }
 
-#if !defined(__aarch64__)
+#if !defined(__aarch64__) && !defined(__powerpc64__)
 // FIXME: Infinite loop in AArch64 (PR24389).
+// FIXME: Also occasional hang on powerpc.  Maybe same problem as on AArch64?
 TEST(AddressSanitizer, ManyThreadsTest) {
   const size_t kNumThreads =
       (SANITIZER_WORDSIZE == 32 || ASAN_AVOID_EXPENSIVE_TESTS) ? 30 : 1000;
@@ -684,6 +705,7 @@
   return 0;
 }
 
+#if !defined(__thumb__)
 TEST(AddressSanitizer, ThreadStackReuseTest) {
   pthread_t t;
   PTHREAD_CREATE(&t, 0, ThreadStackReuseFunc1, 0);
@@ -691,6 +713,7 @@
   PTHREAD_CREATE(&t, 0, ThreadStackReuseFunc2, 0);
   PTHREAD_JOIN(t, 0);
 }
+#endif
 
 #if defined(__SSE2__)
 #include <emmintrin.h>
@@ -945,7 +968,7 @@
   char *addr = (char*)0x0000100000080000;
 # endif
 #endif
-  EXPECT_DEATH(*addr = 1, "AddressSanitizer: BUS on unknown");
+  EXPECT_DEATH(*addr = 1, "AddressSanitizer: (SEGV|BUS) on unknown");
 }
 #endif  // ASAN_NEEDS_SEGV
 
@@ -1090,6 +1113,11 @@
   }
 }
 
+// pthread_exit tries to perform unwinding stuff that leads to dlopen'ing
+// libgcc_s.so. dlopen in its turn calls malloc to store "libgcc_s.so" string
+// that confuses LSan on Thumb because it fails to understand that this
+// allocation happens in dynamic linker and should be ignored.
+#if !defined(__thumb__)
 static void *PthreadExit(void *a) {
   pthread_exit(0);
   return 0;
@@ -1102,6 +1130,7 @@
     PTHREAD_JOIN(t, 0);
   }
 }
+#endif
 
 // FIXME: Why does clang-cl define __EXCEPTIONS?
 #if defined(__EXCEPTIONS) && !defined(_WIN32)
@@ -1310,19 +1339,18 @@
 TEST(AddressSanitizer, snprintf_l) {
   char buff[5];
   // Check that snprintf_l() works fine with Asan.
-  int res = snprintf_l(buff, 5,
-                       _LIBCPP_GET_C_LOCALE, "%s", "snprintf_l()");
+  int res = snprintf_l(buff, 5, SANITIZER_GET_C_LOCALE, "%s", "snprintf_l()");
   EXPECT_EQ(12, res);
   // Check that vsnprintf_l() works fine with Asan.
-  res = vsnprintf_l_wrapper(buff, 5,
-                            _LIBCPP_GET_C_LOCALE, "%s", "vsnprintf_l()");
+  res = vsnprintf_l_wrapper(buff, 5, SANITIZER_GET_C_LOCALE, "%s",
+                            "vsnprintf_l()");
   EXPECT_EQ(13, res);
 
-  EXPECT_DEATH(snprintf_l(buff, 10,
-                          _LIBCPP_GET_C_LOCALE, "%s", "snprintf_l()"),
-                "AddressSanitizer: stack-buffer-overflow");
-  EXPECT_DEATH(vsnprintf_l_wrapper(buff, 10,
-                                  _LIBCPP_GET_C_LOCALE, "%s", "vsnprintf_l()"),
-                "AddressSanitizer: stack-buffer-overflow");
+  EXPECT_DEATH(
+      snprintf_l(buff, 10, SANITIZER_GET_C_LOCALE, "%s", "snprintf_l()"),
+      "AddressSanitizer: stack-buffer-overflow");
+  EXPECT_DEATH(vsnprintf_l_wrapper(buff, 10, SANITIZER_GET_C_LOCALE, "%s",
+                                   "vsnprintf_l()"),
+               "AddressSanitizer: stack-buffer-overflow");
 }
 #endif
diff --git a/lib/asan/tests/asan_test_main.cc b/lib/asan/tests/asan_test_main.cc
index 1071d44..0c1b93c 100644
--- a/lib/asan/tests/asan_test_main.cc
+++ b/lib/asan/tests/asan_test_main.cc
@@ -13,15 +13,23 @@
 #include "asan_test_utils.h"
 #include "sanitizer_common/sanitizer_platform.h"
 
-// Default ASAN_OPTIONS for the unit tests. Let's turn symbolication off to
-// speed up testing (unit tests don't use it anyway).
+// Default ASAN_OPTIONS for the unit tests.
 extern "C" const char* __asan_default_options() {
 #if SANITIZER_MAC
   // On Darwin, we default to `abort_on_error=1`, which would make tests run
-  // much slower. Let's override this and run lit tests with 'abort_on_error=0'.
-  // Also, make sure we do not overwhelm the syslog while testing.
+  // much slower. Let's override this and run lit tests with 'abort_on_error=0'
+  // and make sure we do not overwhelm the syslog while testing. Also, let's
+  // turn symbolization off to speed up testing, especially when not running
+  // with llvm-symbolizer but with atos.
   return "symbolize=false:abort_on_error=0:log_to_syslog=0";
+#elif SANITIZER_SUPPRESS_LEAK_ON_PTHREAD_EXIT
+  // On PowerPC and ARM Thumb, a couple tests involving pthread_exit fail due to
+  // leaks detected by LSan. Symbolized leak report is required to apply a
+  // suppression for this known problem.
+  return "";
 #else
+  // Let's turn symbolization off to speed up testing (more than 3 times speedup
+  // observed).
   return "symbolize=false";
 #endif
 }
diff --git a/lib/asan/tests/asan_test_utils.h b/lib/asan/tests/asan_test_utils.h
index f16d939..d7b6f82 100644
--- a/lib/asan/tests/asan_test_utils.h
+++ b/lib/asan/tests/asan_test_utils.h
@@ -30,11 +30,11 @@
 #include <stdint.h>
 #include <assert.h>
 #include <algorithm>
+#include <setjmp.h>
 
 #if !defined(_WIN32)
 # include <strings.h>
 # include <sys/mman.h>
-# include <setjmp.h>
 #endif
 
 #ifdef __linux__
@@ -45,7 +45,7 @@
 #include <unistd.h>
 #endif
 
-#if !defined(__APPLE__) && !defined(__FreeBSD__)
+#if !defined(__APPLE__) && !defined(__FreeBSD__) && !defined(__NetBSD__)
 #include <malloc.h>
 #endif
 
diff --git a/lib/asan/weak_symbols.txt b/lib/asan/weak_symbols.txt
index ba7b027..fe680f8 100644
--- a/lib/asan/weak_symbols.txt
+++ b/lib/asan/weak_symbols.txt
@@ -1,3 +1,12 @@
 ___asan_default_options
 ___asan_default_suppressions
 ___asan_on_error
+___asan_set_shadow_00
+___asan_set_shadow_f1
+___asan_set_shadow_f2
+___asan_set_shadow_f3
+___asan_set_shadow_f4
+___asan_set_shadow_f5
+___asan_set_shadow_f6
+___asan_set_shadow_f7
+___asan_set_shadow_f8
diff --git a/lib/builtins/CMakeLists.txt b/lib/builtins/CMakeLists.txt
index ad9059c..6128abc 100644
--- a/lib/builtins/CMakeLists.txt
+++ b/lib/builtins/CMakeLists.txt
@@ -42,7 +42,8 @@
   ashlti3.c
   ashrdi3.c
   ashrti3.c
-  clear_cache.c
+  bswapdi2.c
+  bswapsi2.c
   clzdi2.c
   clzsi2.c
   clzti2.c
@@ -66,11 +67,10 @@
   divti3.c
   divtf3.c
   divxc3.c
-  enable_execute_stack.c
-  eprintf.c
   extendsfdf2.c
   extendhfsf2.c
   ffsdi2.c
+  ffssi2.c
   ffsti2.c
   fixdfdi.c
   fixdfsi.c
@@ -165,25 +165,44 @@
   umodsi3.c
   umodti3.c)
 
+set(GENERIC_TF_SOURCES
+  comparetf2.c
+  extenddftf2.c
+  extendsftf2.c
+  fixtfdi.c
+  fixtfsi.c
+  fixtfti.c
+  fixunstfdi.c
+  fixunstfsi.c
+  fixunstfti.c
+  floatditf.c
+  floatsitf.c
+  floattitf.c
+  floatunditf.c
+  floatunsitf.c
+  floatuntitf.c
+  multc3.c
+  trunctfdf2.c
+  trunctfsf2.c)
+
 option(COMPILER_RT_EXCLUDE_ATOMIC_BUILTIN
   "Skip the atomic builtin (this may be needed if system headers are unavailable)"
   Off)
 
+if(NOT FUCHSIA AND NOT COMPILER_RT_BAREMETAL_BUILD)
+  set(GENERIC_SOURCES
+    ${GENERIC_SOURCES}
+    emutls.c 
+    enable_execute_stack.c
+    eprintf.c)
+endif()
+
 if(COMPILER_RT_HAS_ATOMIC_KEYWORD AND NOT COMPILER_RT_EXCLUDE_ATOMIC_BUILTIN)
   set(GENERIC_SOURCES
     ${GENERIC_SOURCES}
     atomic.c)
 endif()
 
-set(MSVC_SOURCES
- divsc3.c
- divdc3.c
- divxc3.c
- mulsc3.c
- muldc3.c
- mulxc3.c)
-
-
 if(APPLE)
   set(GENERIC_SOURCES
     ${GENERIC_SOURCES}
@@ -195,18 +214,18 @@
     atomic_thread_fence.c)
 endif()
 
-if(NOT WIN32 OR MINGW)
-  set(GENERIC_SOURCES
-      ${GENERIC_SOURCES}
-      emutls.c)
-endif()
-
 if (HAVE_UNWIND_H)
   set(GENERIC_SOURCES
       ${GENERIC_SOURCES}
       gcc_personality_v0.c)
 endif ()
 
+if (NOT FUCHSIA)
+  set(GENERIC_SOURCES
+    ${GENERIC_SOURCES}
+    clear_cache.c)
+endif()
+
 if (NOT MSVC)
   set(x86_64_SOURCES
       x86_64/chkstk.S
@@ -216,8 +235,8 @@
       x86_64/floatdixf.c
       x86_64/floatundidf.S
       x86_64/floatundisf.S
-      x86_64/floatundixf.S
-      ${GENERIC_SOURCES})
+      x86_64/floatundixf.S)
+  filter_builtin_sources(x86_64_SOURCES EXCLUDE x86_64_SOURCES "${x86_64_SOURCES};${GENERIC_SOURCES}")
   set(x86_64h_SOURCES ${x86_64_SOURCES})
 
   if (WIN32)
@@ -243,8 +262,8 @@
       i386/moddi3.S
       i386/muldi3.S
       i386/udivdi3.S
-      i386/umoddi3.S
-      ${GENERIC_SOURCES})
+      i386/umoddi3.S)
+  filter_builtin_sources(i386_SOURCES EXCLUDE i386_SOURCES "${i386_SOURCES};${GENERIC_SOURCES}")
 
   if (WIN32)
     set(i386_SOURCES
@@ -252,9 +271,6 @@
         i386/chkstk.S
         i386/chkstk2.S)
   endif()
-
-  set(i686_SOURCES
-      ${i386_SOURCES})
 else () # MSVC
   # Use C versions of functions when building on MSVC
   # MSVC's assembler takes Intel syntax, not AT&T syntax.
@@ -263,10 +279,9 @@
       x86_64/floatdidf.c
       x86_64/floatdisf.c
       x86_64/floatdixf.c
-      ${MSVC_SOURCES})
+      ${GENERIC_SOURCES})
   set(x86_64h_SOURCES ${x86_64_SOURCES})
-  set(i386_SOURCES ${MSVC_SOURCES})
-  set(i686_SOURCES ${i386_SOURCES})
+  set(i386_SOURCES ${GENERIC_SOURCES})
 endif () # if (NOT MSVC)
 
 set(arm_SOURCES
@@ -300,8 +315,8 @@
   arm/sync_fetch_and_xor_8.S
   arm/udivmodsi4.S
   arm/udivsi3.S
-  arm/umodsi3.S
-  ${GENERIC_SOURCES})
+  arm/umodsi3.S)
+filter_builtin_sources(arm_SOURCES EXCLUDE arm_SOURCES "${arm_SOURCES};${GENERIC_SOURCES}")
 
 set(thumb1_SOURCES
   arm/divsi3.S
@@ -403,7 +418,8 @@
       udivmoddi4.c
       udivmodsi4.c
       udivsi3.c
-      umoddi3.c)
+      umoddi3.c
+      emutls.c)
 elseif(NOT WIN32)
   # TODO the EABI sources should only be added to EABI targets
   set(arm_SOURCES
@@ -417,24 +433,7 @@
 endif()
 
 set(aarch64_SOURCES
-  comparetf2.c
-  extenddftf2.c
-  extendsftf2.c
-  fixtfdi.c
-  fixtfsi.c
-  fixtfti.c
-  fixunstfdi.c
-  fixunstfsi.c
-  fixunstfti.c
-  floatditf.c
-  floatsitf.c
-  floattitf.c
-  floatunditf.c
-  floatunsitf.c
-  floatuntitf.c
-  multc3.c
-  trunctfdf2.c
-  trunctfsf2.c
+  ${GENERIC_TF_SOURCES}
   ${GENERIC_SOURCES})
 
 set(armhf_SOURCES ${arm_SOURCES})
@@ -450,8 +449,10 @@
 
 set(mips_SOURCES ${GENERIC_SOURCES})
 set(mipsel_SOURCES ${mips_SOURCES})
-set(mips64_SOURCES ${mips_SOURCES})
-set(mips64el_SOURCES ${mips_SOURCES})
+set(mips64_SOURCES ${GENERIC_TF_SOURCES}
+                   ${mips_SOURCES})
+set(mips64el_SOURCES ${GENERIC_TF_SOURCES}
+                     ${mips_SOURCES})
 
 set(wasm32_SOURCES ${GENERIC_SOURCES})
 set(wasm64_SOURCES ${GENERIC_SOURCES})
@@ -485,11 +486,20 @@
 
   foreach (arch ${BUILTIN_SUPPORTED_ARCH})
     if (CAN_TARGET_${arch})
+      # NOTE: some architectures (e.g. i386) have multiple names.  Ensure that
+      # we catch them all.
+      set(_arch ${arch})

+      if("${arch}" STREQUAL "armv6m")
+        set(_arch "arm|armv6m")
+      elseif("${arch}" MATCHES "^(armhf|armv7|armv7s|armv7k|armv7m|armv7em)$")
+        set(_arch "arm")
+      endif()
+
       # Filter out generic versions of routines that are re-implemented in
       # architecture specific manner.  This prevents multiple definitions of the
       # same symbols, making the symbol selection non-deterministic.
       foreach (_file ${${arch}_SOURCES})
-        if (${_file} MATCHES ${arch}/*)
+        if (${_file} MATCHES ${_arch}/*)
           get_filename_component(_name ${_file} NAME)
           string(REPLACE ".S" ".c" _cname "${_name}")
           list(REMOVE_ITEM ${arch}_SOURCES ${_cname})
@@ -499,7 +509,7 @@
       # Needed for clear_cache on debug mode, due to r7's usage in inline asm.
       # Release mode already sets it via -O2/3, Debug mode doesn't.
       if (${arch} STREQUAL "armhf")
-        list(APPEND BUILTIN_CFLAGS -fomit-frame-pointer)
+        list(APPEND BUILTIN_CFLAGS -fomit-frame-pointer -DCOMPILER_RT_ARMHF_TARGET)
       endif()
 
       add_compiler_rt_runtime(clang_rt.builtins
diff --git a/lib/builtins/README.txt b/lib/builtins/README.txt
index ad36e4e..e603dfa 100644
--- a/lib/builtins/README.txt
+++ b/lib/builtins/README.txt
@@ -45,6 +45,7 @@
 si_int __ctzdi2(di_int a);  // count trailing zeros
 si_int __ctzti2(ti_int a);  // count trailing zeros
 
+si_int __ffssi2(si_int a);  // find least significant 1 bit
 si_int __ffsdi2(di_int a);  // find least significant 1 bit
 si_int __ffsti2(ti_int a);  // find least significant 1 bit
 
@@ -56,8 +57,8 @@
 si_int __popcountdi2(di_int a);  // bit population
 si_int __popcountti2(ti_int a);  // bit population
 
-uint32_t __bswapsi2(uint32_t a);   // a byteswapped, arm only
-uint64_t __bswapdi2(uint64_t a);   // a byteswapped, arm only
+uint32_t __bswapsi2(uint32_t a);   // a byteswapped
+uint64_t __bswapdi2(uint64_t a);   // a byteswapped
 
 // Integral arithmetic
 
diff --git a/lib/builtins/adddf3.c b/lib/builtins/adddf3.c
index 8b7aae0..9a39013 100644
--- a/lib/builtins/adddf3.c
+++ b/lib/builtins/adddf3.c
@@ -15,8 +15,16 @@
 #define DOUBLE_PRECISION
 #include "fp_add_impl.inc"
 
-ARM_EABI_FNALIAS(dadd, adddf3)
-
 COMPILER_RT_ABI double __adddf3(double a, double b){
     return __addXf3__(a, b);
 }
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI double __aeabi_dadd(double a, double b) {
+  return __adddf3(a, b);
+}
+#else
+AEABI_RTABI double __aeabi_dadd(double a, double b) COMPILER_RT_ALIAS(__adddf3);
+#endif
+#endif
diff --git a/lib/builtins/addsf3.c b/lib/builtins/addsf3.c
index 0f5d6ea..c5c1a41 100644
--- a/lib/builtins/addsf3.c
+++ b/lib/builtins/addsf3.c
@@ -15,8 +15,16 @@
 #define SINGLE_PRECISION
 #include "fp_add_impl.inc"
 
-ARM_EABI_FNALIAS(fadd, addsf3)
-
 COMPILER_RT_ABI float __addsf3(float a, float b) {
     return __addXf3__(a, b);
 }
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI float __aeabi_fadd(float a, float b) {
+  return __addsf3(a, b);
+}
+#else
+AEABI_RTABI float __aeabi_fadd(float a, float b) COMPILER_RT_ALIAS(__addsf3);
+#endif
+#endif
diff --git a/lib/builtins/arm/aeabi_cdcmp.S b/lib/builtins/arm/aeabi_cdcmp.S
index b67814d..87dd03d 100644
--- a/lib/builtins/arm/aeabi_cdcmp.S
+++ b/lib/builtins/arm/aeabi_cdcmp.S
@@ -30,7 +30,7 @@
         push {r0-r3, lr}
         bl __aeabi_cdcmpeq_check_nan
         cmp r0, #1
-#if __ARM_ARCH_ISA_THUMB == 1
+#if defined(USE_THUMB_1)
         beq 1f
         // NaN has been ruled out, so __aeabi_cdcmple can't trap
         mov r0, sp
@@ -46,9 +46,17 @@
         pop {r0-r3, lr}
 
         // NaN has been ruled out, so __aeabi_cdcmple can't trap
+        // Use "it ne" + unconditional branch to guarantee a supported relocation if
+        // __aeabi_cdcmple is in a different section for some builds.
+        IT(ne)
         bne __aeabi_cdcmple
 
+#if defined(USE_THUMB_2)
+        mov ip, #APSR_C
+        msr APSR_nzcvq, ip
+#else
         msr CPSR_f, #APSR_C
+#endif
         JMP(lr)
 #endif
 END_COMPILERRT_FUNCTION(__aeabi_cdcmpeq)
@@ -73,7 +81,7 @@
 
         bl __aeabi_dcmplt
         cmp r0, #1
-#if __ARM_ARCH_ISA_THUMB == 1
+#if defined(USE_THUMB_1)
         bne 1f
         // Z = 0, C = 0
         movs r0, #1
@@ -95,17 +103,23 @@
         lsls r0, r0, #31
         pop {r0-r3, pc}
 #else
+        ITT(eq)
         moveq ip, #0
         beq 1f
 
         ldm sp, {r0-r3}
         bl __aeabi_dcmpeq
         cmp r0, #1
+        ITE(eq)
         moveq ip, #(APSR_C | APSR_Z)
         movne ip, #(APSR_C)
 
 1:
+#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
+        msr APSR_nzcvq, ip
+#else
         msr CPSR_f, ip
+#endif
         pop {r0-r3}
         POP_PC()
 #endif
diff --git a/lib/builtins/arm/aeabi_cdcmpeq_check_nan.c b/lib/builtins/arm/aeabi_cdcmpeq_check_nan.c
index 577f6b2..7578433 100644
--- a/lib/builtins/arm/aeabi_cdcmpeq_check_nan.c
+++ b/lib/builtins/arm/aeabi_cdcmpeq_check_nan.c
@@ -8,9 +8,9 @@
 //===----------------------------------------------------------------------===//
 
 #include <stdint.h>
+#include "../int_lib.h"
 
-__attribute__((pcs("aapcs")))
-__attribute__((visibility("hidden")))
+AEABI_RTABI __attribute__((visibility("hidden")))
 int __aeabi_cdcmpeq_check_nan(double a, double b) {
     return __builtin_isnan(a) || __builtin_isnan(b);
 }
diff --git a/lib/builtins/arm/aeabi_cfcmp.S b/lib/builtins/arm/aeabi_cfcmp.S
index e37aa3d..c5fee6b 100644
--- a/lib/builtins/arm/aeabi_cfcmp.S
+++ b/lib/builtins/arm/aeabi_cfcmp.S
@@ -30,7 +30,7 @@
         push {r0-r3, lr}
         bl __aeabi_cfcmpeq_check_nan
         cmp r0, #1
-#if __ARM_ARCH_ISA_THUMB == 1
+#if defined(USE_THUMB_1)
         beq 1f
         // NaN has been ruled out, so __aeabi_cfcmple can't trap
         mov r0, sp
@@ -46,9 +46,17 @@
         pop {r0-r3, lr}
 
         // NaN has been ruled out, so __aeabi_cfcmple can't trap
+        // Use "it ne" + unconditional branch to guarantee a supported relocation if
+        // __aeabi_cfcmple is in a different section for some builds.
+        IT(ne)
         bne __aeabi_cfcmple
 
+#if defined(USE_THUMB_2)
+        mov ip, #APSR_C
+        msr APSR_nzcvq, ip
+#else
         msr CPSR_f, #APSR_C
+#endif
         JMP(lr)
 #endif
 END_COMPILERRT_FUNCTION(__aeabi_cfcmpeq)
@@ -73,7 +81,7 @@
 
         bl __aeabi_fcmplt
         cmp r0, #1
-#if __ARM_ARCH_ISA_THUMB == 1
+#if defined(USE_THUMB_1)
         bne 1f
         // Z = 0, C = 0
         movs r0, #1
@@ -95,17 +103,23 @@
         lsls r0, r0, #31
         pop {r0-r3, pc}
 #else
+        ITT(eq)
         moveq ip, #0
         beq 1f
 
         ldm sp, {r0-r3}
         bl __aeabi_fcmpeq
         cmp r0, #1
+        ITE(eq)
         moveq ip, #(APSR_C | APSR_Z)
         movne ip, #(APSR_C)
 
 1:
+#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
+        msr APSR_nzcvq, ip
+#else
         msr CPSR_f, ip
+#endif
         pop {r0-r3}
         POP_PC()
 #endif
diff --git a/lib/builtins/arm/aeabi_cfcmpeq_check_nan.c b/lib/builtins/arm/aeabi_cfcmpeq_check_nan.c
index 992e31f..43dde9a 100644
--- a/lib/builtins/arm/aeabi_cfcmpeq_check_nan.c
+++ b/lib/builtins/arm/aeabi_cfcmpeq_check_nan.c
@@ -8,9 +8,9 @@
 //===----------------------------------------------------------------------===//
 
 #include <stdint.h>
+#include "../int_lib.h"
 
-__attribute__((pcs("aapcs")))
-__attribute__((visibility("hidden")))
+AEABI_RTABI __attribute__((visibility("hidden")))
 int __aeabi_cfcmpeq_check_nan(float a, float b) {
     return __builtin_isnan(a) || __builtin_isnan(b);
 }
diff --git a/lib/builtins/arm/aeabi_dcmp.S b/lib/builtins/arm/aeabi_dcmp.S
index 51539c0..9fa78b4 100644
--- a/lib/builtins/arm/aeabi_dcmp.S
+++ b/lib/builtins/arm/aeabi_dcmp.S
@@ -18,11 +18,20 @@
 //   }
 // }
 
+#if defined(COMPILER_RT_ARMHF_TARGET)
+#  define CONVERT_DCMP_ARGS_TO_DF2_ARGS                    \
+        vmov      d0, r0, r1                     SEPARATOR \
+        vmov      d1, r2, r3
+#else
+#  define CONVERT_DCMP_ARGS_TO_DF2_ARGS
+#endif
+
 #define DEFINE_AEABI_DCMP(cond)                            \
         .syntax unified                          SEPARATOR \
         .p2align 2                               SEPARATOR \
 DEFINE_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond)           \
         push      { r4, lr }                     SEPARATOR \
+        CONVERT_DCMP_ARGS_TO_DF2_ARGS            SEPARATOR \
         bl        SYMBOL_NAME(__ ## cond ## df2) SEPARATOR \
         cmp       r0, #0                         SEPARATOR \
         b ## cond 1f                             SEPARATOR \
diff --git a/lib/builtins/arm/aeabi_div0.c b/lib/builtins/arm/aeabi_div0.c
index ccc95fa..dc30313 100644
--- a/lib/builtins/arm/aeabi_div0.c
+++ b/lib/builtins/arm/aeabi_div0.c
@@ -26,16 +26,18 @@
  * line.
  */
 
+#include "../int_lib.h"
+
 /* provide an unused declaration to pacify pendantic compilation */
 extern unsigned char declaration;
 
 #if defined(__ARM_EABI__)
-int __attribute__((weak)) __attribute__((visibility("hidden")))
+AEABI_RTABI int __attribute__((weak)) __attribute__((visibility("hidden")))
 __aeabi_idiv0(int return_value) {
   return return_value;
 }
 
-long long __attribute__((weak)) __attribute__((visibility("hidden")))
+AEABI_RTABI long long __attribute__((weak)) __attribute__((visibility("hidden")))
 __aeabi_ldiv0(long long return_value) {
   return return_value;
 }
diff --git a/lib/builtins/arm/aeabi_drsub.c b/lib/builtins/arm/aeabi_drsub.c
index fc17d5a..1254886 100644
--- a/lib/builtins/arm/aeabi_drsub.c
+++ b/lib/builtins/arm/aeabi_drsub.c
@@ -10,10 +10,10 @@
 #define DOUBLE_PRECISION
 #include "../fp_lib.h"
 
-COMPILER_RT_ABI fp_t
+AEABI_RTABI fp_t
 __aeabi_dsub(fp_t, fp_t);
 
-COMPILER_RT_ABI fp_t
+AEABI_RTABI fp_t
 __aeabi_drsub(fp_t a, fp_t b) {
     return __aeabi_dsub(b, a);
 }
diff --git a/lib/builtins/arm/aeabi_fcmp.S b/lib/builtins/arm/aeabi_fcmp.S
index 8e7774b..ea5b96c 100644
--- a/lib/builtins/arm/aeabi_fcmp.S
+++ b/lib/builtins/arm/aeabi_fcmp.S
@@ -18,11 +18,20 @@
 //   }
 // }
 
+#if defined(COMPILER_RT_ARMHF_TARGET)
+#  define CONVERT_FCMP_ARGS_TO_SF2_ARGS                    \
+        vmov      s0, r0                         SEPARATOR \
+        vmov      s1, r1
+#else
+#  define CONVERT_FCMP_ARGS_TO_SF2_ARGS
+#endif
+
 #define DEFINE_AEABI_FCMP(cond)                            \
         .syntax unified                          SEPARATOR \
         .p2align 2                               SEPARATOR \
 DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond)           \
         push      { r4, lr }                     SEPARATOR \
+        CONVERT_FCMP_ARGS_TO_SF2_ARGS            SEPARATOR \
         bl        SYMBOL_NAME(__ ## cond ## sf2) SEPARATOR \
         cmp       r0, #0                         SEPARATOR \
         b ## cond 1f                             SEPARATOR \
diff --git a/lib/builtins/arm/aeabi_frsub.c b/lib/builtins/arm/aeabi_frsub.c
index 64258dc..34f2303 100644
--- a/lib/builtins/arm/aeabi_frsub.c
+++ b/lib/builtins/arm/aeabi_frsub.c
@@ -10,10 +10,10 @@
 #define SINGLE_PRECISION
 #include "../fp_lib.h"
 
-COMPILER_RT_ABI fp_t
+AEABI_RTABI fp_t
 __aeabi_fsub(fp_t, fp_t);
 
-COMPILER_RT_ABI fp_t
+AEABI_RTABI fp_t
 __aeabi_frsub(fp_t a, fp_t b) {
     return __aeabi_fsub(b, a);
 }
diff --git a/lib/builtins/arm/aeabi_idivmod.S b/lib/builtins/arm/aeabi_idivmod.S
index 0164b15..9c9c80a 100644
--- a/lib/builtins/arm/aeabi_idivmod.S
+++ b/lib/builtins/arm/aeabi_idivmod.S
@@ -20,16 +20,18 @@
 #endif
 
         .syntax unified
+        .text
+        DEFINE_CODE_STATE
         .p2align 2
 DEFINE_COMPILERRT_FUNCTION(__aeabi_idivmod)
-#if __ARM_ARCH_ISA_THUMB == 1
+#if defined(USE_THUMB_1)
         push    {r0, r1, lr}
         bl      SYMBOL_NAME(__divsi3)
         pop     {r1, r2, r3} // now r0 = quot, r1 = num, r2 = denom
         muls    r2, r0, r2   // r2 = quot * denom
         subs    r1, r1, r2
         JMP     (r3)
-#else
+#else  // defined(USE_THUMB_1)
         push    { lr }
         sub     sp, sp, #4
         mov     r2, sp
@@ -42,7 +44,7 @@
         ldr     r1, [sp]
         add     sp, sp, #4
         pop     { pc }
-#endif // __ARM_ARCH_ISA_THUMB == 1
+#endif //  defined(USE_THUMB_1)
 END_COMPILERRT_FUNCTION(__aeabi_idivmod)
 
 NO_EXEC_STACK_DIRECTIVE
diff --git a/lib/builtins/arm/aeabi_memset.S b/lib/builtins/arm/aeabi_memset.S
index 633f592..b8022d9 100644
--- a/lib/builtins/arm/aeabi_memset.S
+++ b/lib/builtins/arm/aeabi_memset.S
@@ -24,6 +24,7 @@
 DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset4, __aeabi_memset)
 DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset8, __aeabi_memset)
 
+        .p2align 2
 DEFINE_COMPILERRT_FUNCTION(__aeabi_memclr)
         mov     r2, r1
         movs    r1, #0
diff --git a/lib/builtins/arm/aeabi_uidivmod.S b/lib/builtins/arm/aeabi_uidivmod.S
index a627fc7..88a4a6d 100644
--- a/lib/builtins/arm/aeabi_uidivmod.S
+++ b/lib/builtins/arm/aeabi_uidivmod.S
@@ -21,9 +21,11 @@
 #endif
 
         .syntax unified
+        .text
+        DEFINE_CODE_STATE
         .p2align 2
 DEFINE_COMPILERRT_FUNCTION(__aeabi_uidivmod)
-#if __ARM_ARCH_ISA_THUMB == 1
+#if defined(USE_THUMB_1)
         cmp     r0, r1
         bcc     LOCAL_LABEL(case_denom_larger)
         push    {r0, r1, lr}
@@ -36,7 +38,7 @@
         movs    r1, r0
         movs    r0, #0
         JMP     (lr)
-#else
+#else // defined(USE_THUMB_1)
         push    { lr }
         sub     sp, sp, #4
         mov     r2, sp
diff --git a/lib/builtins/arm/bswapdi2.S b/lib/builtins/arm/bswapdi2.S
index fb226ce..e9db8ba 100644
--- a/lib/builtins/arm/bswapdi2.S
+++ b/lib/builtins/arm/bswapdi2.S
@@ -11,9 +11,7 @@
 
 	.syntax unified
 	.text
-#if __ARM_ARCH_ISA_THUMB == 2
-	.thumb
-#endif
+	DEFINE_CODE_STATE
 
 //
 // extern uint64_t __bswapdi2(uint64_t);
@@ -21,11 +19,7 @@
 // Reverse all the bytes in a 64-bit integer.
 //
 	.p2align 2
-#if __ARM_ARCH_ISA_THUMB == 2
-DEFINE_COMPILERRT_THUMB_FUNCTION(__bswapdi2)
-#else
 DEFINE_COMPILERRT_FUNCTION(__bswapdi2)
-#endif
 #if __ARM_ARCH < 6
     // before armv6 does not have "rev" instruction
     // r2 = rev(r0)
diff --git a/lib/builtins/arm/bswapsi2.S b/lib/builtins/arm/bswapsi2.S
index 553c3c2..1f6eed5 100644
--- a/lib/builtins/arm/bswapsi2.S
+++ b/lib/builtins/arm/bswapsi2.S
@@ -11,9 +11,7 @@
 
 	.syntax unified
 	.text
-#if __ARM_ARCH_ISA_THUMB == 2
-	.thumb
-#endif
+	DEFINE_CODE_STATE
 
 //
 // extern uint32_t __bswapsi2(uint32_t);
@@ -21,11 +19,7 @@
 // Reverse all the bytes in a 32-bit integer.
 //
 	.p2align 2
-#if __ARM_ARCH_ISA_THUMB == 2
-DEFINE_COMPILERRT_THUMB_FUNCTION(__bswapsi2)
-#else
 DEFINE_COMPILERRT_FUNCTION(__bswapsi2)
-#endif
 #if __ARM_ARCH < 6
     // before armv6 does not have "rev" instruction
  	eor	r1, r0, r0, ror #16
diff --git a/lib/builtins/arm/clzdi2.S b/lib/builtins/arm/clzdi2.S
index 6068c17..fc03b38 100644
--- a/lib/builtins/arm/clzdi2.S
+++ b/lib/builtins/arm/clzdi2.S
@@ -15,17 +15,10 @@
 
 	.syntax unified
 	.text
-#if __ARM_ARCH_ISA_THUMB == 2
-	.thumb
-#endif
-
+	DEFINE_CODE_STATE
 
 	.p2align	2
-#if __ARM_ARCH_ISA_THUMB == 2
-DEFINE_COMPILERRT_THUMB_FUNCTION(__clzdi2)
-#else
 DEFINE_COMPILERRT_FUNCTION(__clzdi2)
-#endif
 #ifdef __ARM_FEATURE_CLZ
 #ifdef __ARMEB__
 	cmp	r0, 0
diff --git a/lib/builtins/arm/clzsi2.S b/lib/builtins/arm/clzsi2.S
index c2ba3a8..f2ce59c 100644
--- a/lib/builtins/arm/clzsi2.S
+++ b/lib/builtins/arm/clzsi2.S
@@ -15,16 +15,10 @@
 
 	.syntax unified
 	.text
-#if __ARM_ARCH_ISA_THUMB == 2
-	.thumb
-#endif
+	DEFINE_CODE_STATE
 
 	.p2align	2
-#if __ARM_ARCH_ISA_THUMB == 2
-DEFINE_COMPILERRT_THUMB_FUNCTION(__clzsi2)
-#else
 DEFINE_COMPILERRT_FUNCTION(__clzsi2)
-#endif
 #ifdef __ARM_FEATURE_CLZ
 	clz	r0, r0
 	JMP(lr)
diff --git a/lib/builtins/arm/comparesf2.S b/lib/builtins/arm/comparesf2.S
index ef7091b..c6c4cc0 100644
--- a/lib/builtins/arm/comparesf2.S
+++ b/lib/builtins/arm/comparesf2.S
@@ -38,10 +38,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "../assembly.h"
-.syntax unified
-#if __ARM_ARCH_ISA_THUMB == 2
-.thumb
-#endif
+    .syntax unified
+    .text
+    DEFINE_CODE_STATE
 
 @ int __eqsf2(float a, float b)
 
@@ -53,7 +52,7 @@
 #endif
     // Make copies of a and b with the sign bit shifted off the top.  These will
     // be used to detect zeros and NaNs.
-#if __ARM_ARCH_ISA_THUMB == 1
+#if defined(USE_THUMB_1)
     push    {r6, lr}
     lsls    r2,         r0, #1
     lsls    r3,         r1, #1
@@ -67,7 +66,7 @@
     // flag if both a and b are zero (of either sign).  The shift of r3 doesn't
     // effect this at all, but it *does* make sure that the C flag is clear for
     // the subsequent operations.
-#if __ARM_ARCH_ISA_THUMB == 1
+#if defined(USE_THUMB_1)
     lsrs    r6,     r3, #1
     orrs    r6,     r2
 #else
@@ -75,7 +74,7 @@
 #endif
     // Next, we check if a and b have the same or different signs.  If they have
     // opposite signs, this eor will set the N flag.
-#if __ARM_ARCH_ISA_THUMB == 1
+#if defined(USE_THUMB_1)
     beq     1f
     movs    r6,     r0
     eors    r6,     r1
@@ -89,7 +88,7 @@
     // ignoring NaNs for now), this subtract will zero out r0.  If they have the
     // same sign, the flags are updated as they would be for a comparison of the
     // absolute values of a and b.
-#if __ARM_ARCH_ISA_THUMB == 1
+#if defined(USE_THUMB_1)
     bmi     1f
     subs    r0,     r2, r3
 1:
@@ -108,7 +107,7 @@
     // still clear from the shift argument in orrs; if a is positive and b
     // negative, this places 0 in r0; if a is negative and b positive, -1 is
     // placed in r0.
-#if __ARM_ARCH_ISA_THUMB == 1
+#if defined(USE_THUMB_1)
     bhs     1f
     // Here if a and b have the same sign and absA < absB, the result is thus
     // b < 0 ? 1 : -1. Same if a and b have the opposite sign (ignoring Nan).
@@ -127,7 +126,7 @@
     // the sign of b in r0.  Thus, if both are negative and a < b, -1 is placed
     // in r0, which is the desired result.  Conversely, if both are positive
     // and a > b, zero is placed in r0.
-#if __ARM_ARCH_ISA_THUMB == 1
+#if defined(USE_THUMB_1)
     bls     1f
     // Here both have the same sign and absA > absB.
     movs    r0,         #1
@@ -145,14 +144,14 @@
     // If a == b, then the Z flag is set, so we can get the correct final value
     // into r0 by simply or'ing with 1 if Z is clear.
     // For Thumb-1, r0 contains -1 if a < b, 0 if a > b and 0 if a == b.
-#if __ARM_ARCH_ISA_THUMB != 1
+#if !defined(USE_THUMB_1)
     it ne
     orrne   r0,     r0, #1
 #endif
 
     // Finally, we need to deal with NaNs.  If either argument is NaN, replace
     // the value in r0 with 1.
-#if __ARM_ARCH_ISA_THUMB == 1
+#if defined(USE_THUMB_1)
 LOCAL_LABEL(CHECK_NAN):
     movs    r6,         #0xff
     lsls    r6,         #24
@@ -189,7 +188,7 @@
     vmov r0, s0
     vmov r1, s1
 #endif
-#if __ARM_ARCH_ISA_THUMB == 1
+#if defined(USE_THUMB_1)
     push    {r6, lr}
     lsls    r2,        r0, #1
     lsls    r3,        r1, #1
@@ -255,6 +254,7 @@
 
     .p2align 2
 DEFINE_COMPILERRT_FUNCTION(__unordsf2)
+
 #if defined(COMPILER_RT_ARMHF_TARGET)
     vmov    r0,         s0
     vmov    r1,         s1
@@ -263,7 +263,7 @@
     lsls    r2,         r0, #1
     lsls    r3,         r1, #1
     movs    r0,         #0
-#if __ARM_ARCH_ISA_THUMB == 1
+#if defined(USE_THUMB_1)
     movs    r1,         #0xff
     lsls    r1,         #24
     cmp     r2,         r1
diff --git a/lib/builtins/arm/divmodsi4.S b/lib/builtins/arm/divmodsi4.S
index 999c310..8a027b7 100644
--- a/lib/builtins/arm/divmodsi4.S
+++ b/lib/builtins/arm/divmodsi4.S
@@ -23,20 +23,14 @@
 
 	.syntax unified
 	.text
-#if __ARM_ARCH_ISA_THUMB == 2
-	.thumb
-#endif
+  DEFINE_CODE_STATE
 
 @ int __divmodsi4(int divident, int divisor, int *remainder)
 @   Calculate the quotient and remainder of the (signed) division.  The return
 @   value is the quotient, the remainder is placed in the variable.
 
 	.p2align 3
-#if __ARM_ARCH_ISA_THUMB == 2
-DEFINE_COMPILERRT_THUMB_FUNCTION(__divmodsi4)
-#else
 DEFINE_COMPILERRT_FUNCTION(__divmodsi4)
-#endif
 #if __ARM_ARCH_EXT_IDIV__
 	tst     r1, r1
 	beq     LOCAL_LABEL(divzero)
diff --git a/lib/builtins/arm/divsi3.S b/lib/builtins/arm/divsi3.S
index f066f60..19757af 100644
--- a/lib/builtins/arm/divsi3.S
+++ b/lib/builtins/arm/divsi3.S
@@ -20,11 +20,9 @@
 #define CLEAR_FRAME_AND_RETURN \
     pop    {r4, r7, pc}
 
-	.syntax unified
-	.text
-#if __ARM_ARCH_ISA_THUMB == 2
-	.thumb
-#endif
+   .syntax unified
+   .text
+   DEFINE_CODE_STATE
 
 	.p2align 3
 // Ok, APCS and AAPCS agree on 32 bit args, so it's safe to use the same routine.
@@ -33,11 +31,7 @@
 @ int __divsi3(int divident, int divisor)
 @   Calculate and return the quotient of the (signed) division.
 
-#if __ARM_ARCH_ISA_THUMB == 2
-DEFINE_COMPILERRT_THUMB_FUNCTION(__divsi3)
-#else
 DEFINE_COMPILERRT_FUNCTION(__divsi3)
-#endif
 #if __ARM_ARCH_EXT_IDIV__
    tst     r1,r1
    beq     LOCAL_LABEL(divzero)
@@ -49,14 +43,14 @@
 #else
 ESTABLISH_FRAME
 //  Set aside the sign of the quotient.
-#  if __ARM_ARCH_ISA_THUMB == 1
+#  if defined(USE_THUMB_1)
     movs    r4,     r0
     eors    r4,     r1
 #  else
     eor     r4,     r0, r1
 #  endif
 //  Take absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31).
-#  if   __ARM_ARCH_ISA_THUMB == 1
+#  if defined(USE_THUMB_1)
     asrs    r2,     r0, #31
     asrs    r3,     r1, #31
     eors    r0,     r2
@@ -72,7 +66,7 @@
 //  abs(a) / abs(b)
     bl      SYMBOL_NAME(__udivsi3)
 //  Apply sign of quotient to result and return.
-#  if __ARM_ARCH_ISA_THUMB == 1
+#  if defined(USE_THUMB_1)
     asrs    r4,     #31
     eors    r0,     r4
     subs    r0,     r0, r4
diff --git a/lib/builtins/arm/eqdf2vfp.S b/lib/builtins/arm/eqdf2vfp.S
index 8fa0b2d..d507065 100644
--- a/lib/builtins/arm/eqdf2vfp.S
+++ b/lib/builtins/arm/eqdf2vfp.S
@@ -27,6 +27,7 @@
 	vcmp.f64 d6, d7		
 #endif
 	vmrs	apsr_nzcv, fpscr
+	ITE(eq)
 	moveq	r0, #1		// set result register to 1 if equal
 	movne	r0, #0
 	bx	lr
diff --git a/lib/builtins/arm/eqsf2vfp.S b/lib/builtins/arm/eqsf2vfp.S
index 3776bf4..fd72b2f 100644
--- a/lib/builtins/arm/eqsf2vfp.S
+++ b/lib/builtins/arm/eqsf2vfp.S
@@ -27,6 +27,7 @@
 	vcmp.f32 s14, s15
 #endif
 	vmrs	apsr_nzcv, fpscr
+	ITE(eq)
 	moveq	r0, #1      // set result register to 1 if equal
 	movne	r0, #0
 	bx	lr
diff --git a/lib/builtins/arm/gedf2vfp.S b/lib/builtins/arm/gedf2vfp.S
index 14899f0..364fc5b 100644
--- a/lib/builtins/arm/gedf2vfp.S
+++ b/lib/builtins/arm/gedf2vfp.S
@@ -27,6 +27,7 @@
 	vcmp.f64 d6, d7
 #endif
 	vmrs	apsr_nzcv, fpscr
+	ITE(ge)
 	movge	r0, #1      // set result register to 1 if greater than or equal
 	movlt	r0, #0
 	bx	lr
diff --git a/lib/builtins/arm/gesf2vfp.S b/lib/builtins/arm/gesf2vfp.S
index b49d04d..346c347 100644
--- a/lib/builtins/arm/gesf2vfp.S
+++ b/lib/builtins/arm/gesf2vfp.S
@@ -27,6 +27,7 @@
 	vcmp.f32 s14, s15
 #endif
 	vmrs	apsr_nzcv, fpscr
+	ITE(ge)
 	movge	r0, #1      // set result register to 1 if greater than or equal
 	movlt	r0, #0
 	bx	lr
diff --git a/lib/builtins/arm/gtdf2vfp.S b/lib/builtins/arm/gtdf2vfp.S
index 8166305..3389c3a 100644
--- a/lib/builtins/arm/gtdf2vfp.S
+++ b/lib/builtins/arm/gtdf2vfp.S
@@ -27,6 +27,7 @@
 	vcmp.f64 d6, d7
 #endif
 	vmrs	apsr_nzcv, fpscr
+	ITE(gt)
 	movgt	r0, #1		// set result register to 1 if equal
 	movle	r0, #0
 	bx	lr
diff --git a/lib/builtins/arm/gtsf2vfp.S b/lib/builtins/arm/gtsf2vfp.S
index d2d8a23..afdba8b 100644
--- a/lib/builtins/arm/gtsf2vfp.S
+++ b/lib/builtins/arm/gtsf2vfp.S
@@ -27,6 +27,7 @@
 	vcmp.f32 s14, s15
 #endif
 	vmrs	apsr_nzcv, fpscr
+	ITE(gt)
 	movgt	r0, #1		// set result register to 1 if equal
 	movle	r0, #0
 	bx	lr
diff --git a/lib/builtins/arm/ledf2vfp.S b/lib/builtins/arm/ledf2vfp.S
index a9dab77..4bbe4c8 100644
--- a/lib/builtins/arm/ledf2vfp.S
+++ b/lib/builtins/arm/ledf2vfp.S
@@ -27,6 +27,7 @@
 	vcmp.f64 d6, d7
 #endif
 	vmrs	apsr_nzcv, fpscr
+	ITE(ls)
 	movls	r0, #1		// set result register to 1 if equal
 	movhi	r0, #0
 	bx	lr
diff --git a/lib/builtins/arm/lesf2vfp.S b/lib/builtins/arm/lesf2vfp.S
index 7e127f4..51232bd 100644
--- a/lib/builtins/arm/lesf2vfp.S
+++ b/lib/builtins/arm/lesf2vfp.S
@@ -27,6 +27,7 @@
 	vcmp.f32 s14, s15
 #endif
 	vmrs	apsr_nzcv, fpscr
+	ITE(ls)
 	movls	r0, #1      // set result register to 1 if equal
 	movhi	r0, #0
 	bx	lr
diff --git a/lib/builtins/arm/ltdf2vfp.S b/lib/builtins/arm/ltdf2vfp.S
index 8b6f8e4..8e2928c 100644
--- a/lib/builtins/arm/ltdf2vfp.S
+++ b/lib/builtins/arm/ltdf2vfp.S
@@ -27,6 +27,7 @@
 	vcmp.f64 d6, d7
 #endif
 	vmrs	apsr_nzcv, fpscr
+	ITE(mi)
 	movmi	r0, #1		// set result register to 1 if equal
 	movpl	r0, #0
 	bx	lr
diff --git a/lib/builtins/arm/ltsf2vfp.S b/lib/builtins/arm/ltsf2vfp.S
index c4ff812..59c00c6 100644
--- a/lib/builtins/arm/ltsf2vfp.S
+++ b/lib/builtins/arm/ltsf2vfp.S
@@ -27,6 +27,7 @@
 	vcmp.f32 s14, s15
 #endif
 	vmrs	apsr_nzcv, fpscr
+	ITE(mi)
 	movmi	r0, #1      // set result register to 1 if equal
 	movpl	r0, #0
 	bx	lr
diff --git a/lib/builtins/arm/modsi3.S b/lib/builtins/arm/modsi3.S
index 1d302ed..be26383 100644
--- a/lib/builtins/arm/modsi3.S
+++ b/lib/builtins/arm/modsi3.S
@@ -22,19 +22,13 @@
 
 	.syntax unified
 	.text
-#if __ARM_ARCH_ISA_THUMB == 2
-	.thumb
-#endif
+	DEFINE_CODE_STATE
 
 @ int __modsi3(int divident, int divisor)
 @   Calculate and return the remainder of the (signed) division.
 
 	.p2align 3
-#if __ARM_ARCH_ISA_THUMB == 2
-DEFINE_COMPILERRT_THUMB_FUNCTION(__modsi3)
-#else
 DEFINE_COMPILERRT_FUNCTION(__modsi3)
-#endif
 #if __ARM_ARCH_EXT_IDIV__
 	tst     r1, r1
 	beq     LOCAL_LABEL(divzero)
diff --git a/lib/builtins/arm/nedf2vfp.S b/lib/builtins/arm/nedf2vfp.S
index 7d884e0..aef72eb 100644
--- a/lib/builtins/arm/nedf2vfp.S
+++ b/lib/builtins/arm/nedf2vfp.S
@@ -27,6 +27,7 @@
 	vcmp.f64 d6, d7		
 #endif
 	vmrs	apsr_nzcv, fpscr
+	ITE(ne)
 	movne	r0, #1		// set result register to 0 if unequal
 	moveq	r0, #0
 	bx	lr
diff --git a/lib/builtins/arm/nesf2vfp.S b/lib/builtins/arm/nesf2vfp.S
index 97c764f..50d60f4 100644
--- a/lib/builtins/arm/nesf2vfp.S
+++ b/lib/builtins/arm/nesf2vfp.S
@@ -27,6 +27,7 @@
 	vcmp.f32 s14, s15
 #endif
 	vmrs	apsr_nzcv, fpscr
+	ITE(ne)
 	movne	r0, #1      // set result register to 1 if unequal
 	moveq	r0, #0
 	bx	lr
diff --git a/lib/builtins/arm/udivmodsi4.S b/lib/builtins/arm/udivmodsi4.S
index 1ad8ee3..ee3950c 100644
--- a/lib/builtins/arm/udivmodsi4.S
+++ b/lib/builtins/arm/udivmodsi4.S
@@ -16,10 +16,7 @@
 
 	.syntax unified
 	.text
-
-#if __ARM_ARCH_ISA_THUMB == 2
-	.thumb
-#endif
+	DEFINE_CODE_STATE
 
 @ unsigned int __udivmodsi4(unsigned int divident, unsigned int divisor,
 @                           unsigned int *remainder)
@@ -27,11 +24,7 @@
 @   value is the quotient, the remainder is placed in the variable.
 
 	.p2align 2
-#if __ARM_ARCH_ISA_THUMB == 2
-DEFINE_COMPILERRT_THUMB_FUNCTION(__udivmodsi4)
-#else
 DEFINE_COMPILERRT_FUNCTION(__udivmodsi4)
-#endif
 #if __ARM_ARCH_EXT_IDIV__
 	tst     r1, r1
 	beq     LOCAL_LABEL(divby0)
@@ -67,7 +60,7 @@
 	clz	r3, r1
 	/* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */
 	sub	r3, r3, ip
-#    if __ARM_ARCH_ISA_THUMB == 2
+#    if defined(USE_THUMB_2)
 	adr	ip, LOCAL_LABEL(div0block) + 1
 	sub	ip, ip, r3, lsl #1
 #    else
@@ -78,7 +71,7 @@
 	mov	r3, #0
 	bx	ip
 #  else
-#    if __ARM_ARCH_ISA_THUMB == 2
+#    if defined(USE_THUMB_2)
 #    error THUMB mode requires CLZ or UDIV
 #    endif
 	str	r4, [sp, #-8]!
diff --git a/lib/builtins/arm/udivsi3.S b/lib/builtins/arm/udivsi3.S
index fcc472b..6dea27d 100644
--- a/lib/builtins/arm/udivsi3.S
+++ b/lib/builtins/arm/udivsi3.S
@@ -17,9 +17,7 @@
 	.syntax unified
 	.text
 
-#if __ARM_ARCH_ISA_THUMB == 2
-	.thumb
-#endif
+DEFINE_CODE_STATE
 
 	.p2align 2
 DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3)
@@ -27,20 +25,25 @@
 @ unsigned int __udivsi3(unsigned int divident, unsigned int divisor)
 @   Calculate and return the quotient of the (unsigned) division.
 
-#if __ARM_ARCH_ISA_THUMB == 2
-DEFINE_COMPILERRT_THUMB_FUNCTION(__udivsi3)
-#else
 DEFINE_COMPILERRT_FUNCTION(__udivsi3)
-#endif
 #if __ARM_ARCH_EXT_IDIV__
 	tst     r1, r1
 	beq     LOCAL_LABEL(divby0)
 	udiv	r0, r0, r1
 	bx  	lr
-#else
+
+LOCAL_LABEL(divby0):
+	mov     r0, #0
+#  ifdef __ARM_EABI__
+	b       __aeabi_idiv0
+#  else
+	JMP(lr)
+#  endif
+
+#else /* ! __ARM_ARCH_EXT_IDIV__ */
 	cmp	r1, #1
 	bcc	LOCAL_LABEL(divby0)
-#if __ARM_ARCH_ISA_THUMB == 1
+#if defined(USE_THUMB_1)
 	bne LOCAL_LABEL(num_neq_denom)
 	JMP(lr)
 LOCAL_LABEL(num_neq_denom):
@@ -49,7 +52,7 @@
 	JMPc(lr, eq)
 #endif
 	cmp	r0, r1
-#if __ARM_ARCH_ISA_THUMB == 1
+#if defined(USE_THUMB_1)
 	bhs LOCAL_LABEL(num_ge_denom)
 	movs r0, #0
 	JMP(lr)
@@ -81,7 +84,7 @@
 	clz	r3, r1
 	/* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */
 	sub	r3, r3, ip
-#    if __ARM_ARCH_ISA_THUMB == 2
+#    if defined(USE_THUMB_2)
 	adr	ip, LOCAL_LABEL(div0block) + 1
 	sub	ip, ip, r3, lsl #1
 #    else
@@ -92,17 +95,17 @@
 	mov	r3, #0
 	bx	ip
 #  else /* No CLZ Feature */
-#    if __ARM_ARCH_ISA_THUMB == 2
+#    if defined(USE_THUMB_2)
 #    error THUMB mode requires CLZ or UDIV
 #    endif
-#    if __ARM_ARCH_ISA_THUMB == 1
+#    if defined(USE_THUMB_1)
 #      define BLOCK_SIZE 10
 #    else
 #      define BLOCK_SIZE 12
 #    endif
 
 	mov	r2, r0
-#    if __ARM_ARCH_ISA_THUMB == 1
+#    if defined(USE_THUMB_1)
 	mov ip, r0
 	adr r0, LOCAL_LABEL(div0block)
 	adds r0, #1
@@ -111,7 +114,7 @@
 #    endif
 	lsrs	r3, r2, #16
 	cmp	r3, r1
-#    if __ARM_ARCH_ISA_THUMB == 1
+#    if defined(USE_THUMB_1)
 	blo LOCAL_LABEL(skip_16)
 	movs r2, r3
 	subs r0, r0, #(16 * BLOCK_SIZE)
@@ -123,7 +126,7 @@
 
 	lsrs	r3, r2, #8
 	cmp	r3, r1
-#    if __ARM_ARCH_ISA_THUMB == 1
+#    if defined(USE_THUMB_1)
 	blo LOCAL_LABEL(skip_8)
 	movs r2, r3
 	subs r0, r0, #(8 * BLOCK_SIZE)
@@ -135,7 +138,7 @@
 
 	lsrs	r3, r2, #4
 	cmp	r3, r1
-#    if __ARM_ARCH_ISA_THUMB == 1
+#    if defined(USE_THUMB_1)
 	blo LOCAL_LABEL(skip_4)
 	movs r2, r3
 	subs r0, r0, #(4 * BLOCK_SIZE)
@@ -147,7 +150,7 @@
 
 	lsrs	r3, r2, #2
 	cmp	r3, r1
-#    if __ARM_ARCH_ISA_THUMB == 1
+#    if defined(USE_THUMB_1)
 	blo LOCAL_LABEL(skip_2)
 	movs r2, r3
 	subs r0, r0, #(2 * BLOCK_SIZE)
@@ -158,7 +161,7 @@
 #    endif
 
 	/* Last block, no need to update r2 or r3. */
-#    if __ARM_ARCH_ISA_THUMB == 1
+#    if defined(USE_THUMB_1)
 	lsrs r3, r2, #1
 	cmp r3, r1
 	blo LOCAL_LABEL(skip_1)
@@ -186,12 +189,15 @@
 LOCAL_LABEL(divby0):
 	movs	r0, #0
 #      if defined(__ARM_EABI__)
+	push {r7, lr}
 	bl	__aeabi_idiv0 // due to relocation limit, can't use b.
-#      endif
+	pop  {r7, pc}
+#      else
 	JMP(lr)
+#      endif
 
 
-#if __ARM_ARCH_ISA_THUMB == 1
+#if defined(USE_THUMB_1)
 #define block(shift)                                                           \
 	lsls r2, r1, IMM shift;                                                      \
 	cmp r0, r2;                                                                  \
@@ -252,16 +258,6 @@
 	JMP(lr)
 #endif /* __ARM_ARCH_EXT_IDIV__ */
 
-#if __ARM_ARCH_EXT_IDIV__
-LOCAL_LABEL(divby0):
-        mov     r0, #0
-#  ifdef __ARM_EABI__
-        b       __aeabi_idiv0
-#  else
-        JMP(lr)
-#  endif
-#endif
-
 END_COMPILERRT_FUNCTION(__udivsi3)
 
 NO_EXEC_STACK_DIRECTIVE
diff --git a/lib/builtins/arm/umodsi3.S b/lib/builtins/arm/umodsi3.S
index 672487e..069fad3 100644
--- a/lib/builtins/arm/umodsi3.S
+++ b/lib/builtins/arm/umodsi3.S
@@ -16,19 +16,13 @@
 
 	.syntax unified
 	.text
-#if __ARM_ARCH_ISA_THUMB == 2
-	.thumb
-#endif
+	DEFINE_CODE_STATE
 
 @ unsigned int __umodsi3(unsigned int divident, unsigned int divisor)
 @   Calculate and return the remainder of the (unsigned) division.
 
 	.p2align 2
-#if __ARM_ARCH_ISA_THUMB == 2
-DEFINE_COMPILERRT_THUMB_FUNCTION(__umodsi3)
-#else
 DEFINE_COMPILERRT_FUNCTION(__umodsi3)
-#endif
 #if __ARM_ARCH_EXT_IDIV__
 	tst     r1, r1
 	beq     LOCAL_LABEL(divby0)
@@ -65,7 +59,7 @@
 	clz	r3, r1
 	/* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */
 	sub	r3, r3, ip
-#    if __ARM_ARCH_ISA_THUMB == 2
+#    if defined(USE_THUMB_2)
 	adr	ip, LOCAL_LABEL(div0block) + 1
 	sub	ip, ip, r3, lsl #1
 #    else
@@ -74,7 +68,7 @@
 	sub	ip, ip, r3, lsl #3
 	bx	ip
 #  else
-#    if __ARM_ARCH_ISA_THUMB == 2
+#    if defined(USE_THUMB_2)
 #    error THUMB mode requires CLZ or UDIV
 #    endif
 	mov	r2, r0
diff --git a/lib/builtins/arm/unorddf2vfp.S b/lib/builtins/arm/unorddf2vfp.S
index 8556375..6625fa8 100644
--- a/lib/builtins/arm/unorddf2vfp.S
+++ b/lib/builtins/arm/unorddf2vfp.S
@@ -27,6 +27,7 @@
 	vcmp.f64 d6, d7
 #endif
 	vmrs	apsr_nzcv, fpscr
+	ITE(vs)
 	movvs	r0, #1      // set result register to 1 if "overflow" (any NaNs)
 	movvc	r0, #0
 	bx	lr
diff --git a/lib/builtins/arm/unordsf2vfp.S b/lib/builtins/arm/unordsf2vfp.S
index 2b16b49..0b5da2b 100644
--- a/lib/builtins/arm/unordsf2vfp.S
+++ b/lib/builtins/arm/unordsf2vfp.S
@@ -27,6 +27,7 @@
 	vcmp.f32 s14, s15
 #endif
 	vmrs	apsr_nzcv, fpscr
+	ITE(vs)
 	movvs	r0, #1      // set result register to 1 if "overflow" (any NaNs)
 	movvc	r0, #0
 	bx	lr
diff --git a/lib/builtins/ashldi3.c b/lib/builtins/ashldi3.c
index eb4698a..a5c1836 100644
--- a/lib/builtins/ashldi3.c
+++ b/lib/builtins/ashldi3.c
@@ -18,8 +18,6 @@
 
 /* Precondition:  0 <= b < bits_in_dword */
 
-ARM_EABI_FNALIAS(llsl, ashldi3)
-
 COMPILER_RT_ABI di_int
 __ashldi3(di_int a, si_int b)
 {
@@ -41,3 +39,7 @@
     }
     return result.all;
 }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI di_int __aeabi_llsl(di_int a, si_int b) COMPILER_RT_ALIAS(__ashldi3);
+#endif
diff --git a/lib/builtins/ashrdi3.c b/lib/builtins/ashrdi3.c
index 14c878b..8461996 100644
--- a/lib/builtins/ashrdi3.c
+++ b/lib/builtins/ashrdi3.c
@@ -18,8 +18,6 @@
 
 /* Precondition:  0 <= b < bits_in_dword */
 
-ARM_EABI_FNALIAS(lasr, ashrdi3)
-
 COMPILER_RT_ABI di_int
 __ashrdi3(di_int a, si_int b)
 {
@@ -42,3 +40,7 @@
     }
     return result.all;
 }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI di_int __aeabi_lasr(di_int a, si_int b) COMPILER_RT_ALIAS(__ashrdi3);
+#endif
diff --git a/lib/builtins/assembly.h b/lib/builtins/assembly.h
index 29d9f88..3f5e59b 100644
--- a/lib/builtins/assembly.h
+++ b/lib/builtins/assembly.h
@@ -44,7 +44,8 @@
 #endif
 #define CONST_SECTION .section .rodata
 
-#if defined(__GNU__) || defined(__ANDROID__) || defined(__FreeBSD__)
+#if defined(__GNU__) || defined(__FreeBSD__) || defined(__Fuchsia__) || \
+    defined(__linux__)
 #define NO_EXEC_STACK_DIRECTIVE .section .note.GNU-stack,"",%progbits
 #else
 #define NO_EXEC_STACK_DIRECTIVE
@@ -67,10 +68,42 @@
 #endif
 
 #if defined(__arm__)
+
+/*
+ * Determine actual [ARM][THUMB[1][2]] ISA using compiler predefined macros:
+ * - for '-mthumb -march=armv6' compiler defines '__thumb__'
+ * - for '-mthumb -march=armv7' compiler defines '__thumb__' and '__thumb2__'
+ */
+#if defined(__thumb2__) || defined(__thumb__)
+#define DEFINE_CODE_STATE .thumb SEPARATOR
+#define DECLARE_FUNC_ENCODING    .thumb_func SEPARATOR
+#if defined(__thumb2__)
+#define USE_THUMB_2
+#define IT(cond)  it cond
+#define ITT(cond) itt cond
+#define ITE(cond) ite cond
+#else
+#define USE_THUMB_1
+#define IT(cond)
+#define ITT(cond)
+#define ITE(cond)
+#endif // defined(__thumb__2)
+#else // !defined(__thumb2__) && !defined(__thumb__)
+#define DEFINE_CODE_STATE .arm SEPARATOR
+#define DECLARE_FUNC_ENCODING
+#define IT(cond)
+#define ITT(cond)
+#define ITE(cond)
+#endif
+
+#if defined(USE_THUMB_1) && defined(USE_THUMB_2)
+#error "USE_THUMB_1 and USE_THUMB_2 can't be defined together."
+#endif
+
 #if defined(__ARM_ARCH_4T__) || __ARM_ARCH >= 5
 #define ARM_HAS_BX
 #endif
-#if !defined(__ARM_FEATURE_CLZ) && __ARM_ARCH_ISA_THUMB != 1 &&                \
+#if !defined(__ARM_FEATURE_CLZ) && !defined(USE_THUMB_1) &&  \
     (__ARM_ARCH >= 6 || (__ARM_ARCH == 5 && !defined(__ARM_ARCH_5__)))
 #define __ARM_FEATURE_CLZ
 #endif
@@ -92,19 +125,14 @@
   JMP(ip)
 #endif
 
-#if __ARM_ARCH_ISA_THUMB == 2
-#define IT(cond)  it cond
-#define ITT(cond) itt cond
-#else
-#define IT(cond)
-#define ITT(cond)
-#endif
-
-#if __ARM_ARCH_ISA_THUMB == 2
+#if defined(USE_THUMB_2)
 #define WIDE(op) op.w
 #else
 #define WIDE(op) op
 #endif
+#else // !defined(__arm)
+#define DECLARE_FUNC_ENCODING
+#define DEFINE_CODE_STATE
 #endif
 
 #define GLUE2(a, b) a##b
@@ -119,13 +147,16 @@
 #endif
 
 #define DEFINE_COMPILERRT_FUNCTION(name)                                       \
+  DEFINE_CODE_STATE                                                            \
   FILE_LEVEL_DIRECTIVE SEPARATOR                                               \
   .globl SYMBOL_NAME(name) SEPARATOR                                           \
   SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR                                  \
   DECLARE_SYMBOL_VISIBILITY(name)                                              \
+  DECLARE_FUNC_ENCODING                                                        \
   SYMBOL_NAME(name):
 
 #define DEFINE_COMPILERRT_THUMB_FUNCTION(name)                                 \
+  DEFINE_CODE_STATE                                                            \
   FILE_LEVEL_DIRECTIVE SEPARATOR                                               \
   .globl SYMBOL_NAME(name) SEPARATOR                                           \
   SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR                                  \
@@ -134,16 +165,20 @@
   SYMBOL_NAME(name):
 
 #define DEFINE_COMPILERRT_PRIVATE_FUNCTION(name)                               \
+  DEFINE_CODE_STATE                                                            \
   FILE_LEVEL_DIRECTIVE SEPARATOR                                               \
   .globl SYMBOL_NAME(name) SEPARATOR                                           \
   SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR                                  \
   HIDDEN(SYMBOL_NAME(name)) SEPARATOR                                          \
+  DECLARE_FUNC_ENCODING                                                        \
   SYMBOL_NAME(name):
 
 #define DEFINE_COMPILERRT_PRIVATE_FUNCTION_UNMANGLED(name)                     \
+  DEFINE_CODE_STATE                                                            \
   .globl name SEPARATOR                                                        \
   SYMBOL_IS_FUNC(name) SEPARATOR                                               \
   HIDDEN(name) SEPARATOR                                                       \
+  DECLARE_FUNC_ENCODING                                                        \
   name:
 
 #define DEFINE_COMPILERRT_FUNCTION_ALIAS(name, target)                         \
diff --git a/lib/builtins/bswapdi2.c b/lib/builtins/bswapdi2.c
new file mode 100644
index 0000000..eb22000
--- /dev/null
+++ b/lib/builtins/bswapdi2.c
@@ -0,0 +1,27 @@
+/* ===-- bswapdi2.c - Implement __bswapdi2 ---------------------------------===
+ *
+ *               The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __bswapdi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+COMPILER_RT_ABI uint64_t __bswapdi2(uint64_t u) {
+  return (
+      (((u)&0xff00000000000000ULL) >> 56) |
+      (((u)&0x00ff000000000000ULL) >> 40) |
+      (((u)&0x0000ff0000000000ULL) >> 24) |
+      (((u)&0x000000ff00000000ULL) >> 8)  |
+      (((u)&0x00000000ff000000ULL) << 8)  |
+      (((u)&0x0000000000ff0000ULL) << 24) |
+      (((u)&0x000000000000ff00ULL) << 40) |
+      (((u)&0x00000000000000ffULL) << 56));
+}
diff --git a/lib/builtins/bswapsi2.c b/lib/builtins/bswapsi2.c
new file mode 100644
index 0000000..5d941e6
--- /dev/null
+++ b/lib/builtins/bswapsi2.c
@@ -0,0 +1,23 @@
+/* ===-- bswapsi2.c - Implement __bswapsi2 ---------------------------------===
+ *
+ *               The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __bswapsi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+COMPILER_RT_ABI uint32_t __bswapsi2(uint32_t u) {
+  return (
+      (((u)&0xff000000) >> 24) |
+      (((u)&0x00ff0000) >> 8)  |
+      (((u)&0x0000ff00) << 8)  |
+      (((u)&0x000000ff) << 24));
+}
diff --git a/lib/builtins/clear_cache.c b/lib/builtins/clear_cache.c
index a68f9fc..640cb75 100644
--- a/lib/builtins/clear_cache.c
+++ b/lib/builtins/clear_cache.c
@@ -9,6 +9,7 @@
  */
 
 #include "int_lib.h"
+#include <assert.h>
 #include <stddef.h>
 
 #if __APPLE__
@@ -23,7 +24,7 @@
 uintptr_t GetCurrentProcess(void);
 #endif
 
-#if (defined(__FreeBSD__) || defined(__Bitrig__)) && defined(__arm__)
+#if defined(__FreeBSD__) && defined(__arm__)
   #include <sys/types.h>
   #include <machine/sysarch.h>
 #endif
@@ -41,7 +42,7 @@
      * clear_mips_cache - Invalidates instruction cache for Mips.
      */
     static void clear_mips_cache(const void* Addr, size_t Size) {
-      asm volatile (
+      __asm__ volatile (
         ".set push\n"
         ".set noreorder\n"
         ".set noat\n"
@@ -90,13 +91,13 @@
  */
 
 void __clear_cache(void *start, void *end) {
-#if __i386__ || __x86_64__
+#if __i386__ || __x86_64__ || defined(_M_IX86) || defined(_M_X64)
 /*
  * Intel processors have a unified instruction and data cache
  * so there is nothing to do
  */
 #elif defined(__arm__) && !defined(__APPLE__)
-    #if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__Bitrig__)
+    #if defined(__FreeBSD__) || defined(__NetBSD__)
         struct arm_sync_icache_args arg;
 
         arg.addr = (uintptr_t)start;
@@ -121,9 +122,7 @@
                           : "=r"(start_reg)
                           : "r"(syscall_nr), "r"(start_reg), "r"(end_reg),
                             "r"(flags));
-         if (start_reg != 0) {
-             compilerrt_abort();
-         }
+         assert(start_reg == 0 && "Cache flush syscall failed.");
     #elif defined(_WIN32)
         FlushInstructionCache(GetCurrentProcess(), start, end - start);
     #else
@@ -165,6 +164,21 @@
   for (addr = xstart; addr < xend; addr += icache_line_size)
     __asm __volatile("ic ivau, %0" :: "r"(addr));
   __asm __volatile("isb sy");
+#elif defined (__powerpc64__)
+  const size_t line_size = 32;
+  const size_t len = (uintptr_t)end - (uintptr_t)start;
+
+  const uintptr_t mask = ~(line_size - 1);
+  const uintptr_t start_line = ((uintptr_t)start) & mask;
+  const uintptr_t end_line = ((uintptr_t)start + len + line_size - 1) & mask;
+
+  for (uintptr_t line = start_line; line < end_line; line += line_size)
+    __asm__ volatile("dcbf 0, %0" : : "r"(line));
+  __asm__ volatile("sync");
+
+  for (uintptr_t line = start_line; line < end_line; line += line_size)
+    __asm__ volatile("icbi 0, %0" : : "r"(line));
+  __asm__ volatile("isync");
 #else
     #if __APPLE__
         /* On Darwin, sys_icache_invalidate() provides this functionality */
@@ -174,4 +188,3 @@
     #endif
 #endif
 }
-
diff --git a/lib/builtins/comparedf2.c b/lib/builtins/comparedf2.c
index 9e29752..44e5d2b 100644
--- a/lib/builtins/comparedf2.c
+++ b/lib/builtins/comparedf2.c
@@ -113,8 +113,6 @@
     }
 }
 
-ARM_EABI_FNALIAS(dcmpun, unorddf2)
-
 COMPILER_RT_ABI int
 __unorddf2(fp_t a, fp_t b) {
     const rep_t aAbs = toRep(a) & absMask;
@@ -144,3 +142,12 @@
     return __gedf2(a, b);
 }
 
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI int __aeabi_dcmpun(fp_t a, fp_t b) {
+  return __unorddf2(a, b);
+}
+#else
+AEABI_RTABI int __aeabi_dcmpun(fp_t a, fp_t b) COMPILER_RT_ALIAS(__unorddf2);
+#endif
+#endif
diff --git a/lib/builtins/comparesf2.c b/lib/builtins/comparesf2.c
index 1fd5063..43cd6a6 100644
--- a/lib/builtins/comparesf2.c
+++ b/lib/builtins/comparesf2.c
@@ -113,8 +113,6 @@
     }
 }
 
-ARM_EABI_FNALIAS(fcmpun, unordsf2)
-
 COMPILER_RT_ABI int
 __unordsf2(fp_t a, fp_t b) {
     const rep_t aAbs = toRep(a) & absMask;
@@ -143,3 +141,13 @@
 __gtsf2(fp_t a, fp_t b) {
     return __gesf2(a, b);
 }
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI int __aeabi_fcmpun(fp_t a, fp_t b) {
+  return __unordsf2(a, b);
+}
+#else
+AEABI_RTABI int __aeabi_fcmpun(fp_t a, fp_t b) COMPILER_RT_ALIAS(__unordsf2);
+#endif
+#endif
diff --git a/lib/builtins/cpu_model.c b/lib/builtins/cpu_model.c
index 9a37370..2fd39e1 100644
--- a/lib/builtins/cpu_model.c
+++ b/lib/builtins/cpu_model.c
@@ -27,6 +27,10 @@
 #include <intrin.h>
 #endif
 
+#ifndef __has_attribute
+#define __has_attribute(attr) 0
+#endif
+
 enum VendorSignatures {
   SIG_INTEL = 0x756e6547 /* Genu */,
   SIG_AMD = 0x68747541 /* Auth */
@@ -40,29 +44,17 @@
 };
 
 enum ProcessorTypes {
-  INTEL_ATOM = 1,
+  INTEL_BONNELL = 1,
   INTEL_CORE2,
   INTEL_COREI7,
   AMDFAM10H,
   AMDFAM15H,
-  INTEL_i386,
-  INTEL_i486,
-  INTEL_PENTIUM,
-  INTEL_PENTIUM_PRO,
-  INTEL_PENTIUM_II,
-  INTEL_PENTIUM_III,
-  INTEL_PENTIUM_IV,
-  INTEL_PENTIUM_M,
-  INTEL_CORE_DUO,
-  INTEL_XEONPHI,
-  INTEL_X86_64,
-  INTEL_NOCONA,
-  INTEL_PRESCOTT,
-  AMD_i486,
-  AMDPENTIUM,
-  AMDATHLON,
-  AMDFAM14H,
-  AMDFAM16H,
+  INTEL_SILVERMONT,
+  INTEL_KNL,
+  AMD_BTVER1,
+  AMD_BTVER2,
+  AMDFAM17H,
+  INTEL_KNM,
   CPU_TYPE_MAX
 };
 
@@ -75,32 +67,14 @@
   AMDFAM10H_ISTANBUL,
   AMDFAM15H_BDVER1,
   AMDFAM15H_BDVER2,
-  INTEL_PENTIUM_MMX,
-  INTEL_CORE2_65,
-  INTEL_CORE2_45,
+  AMDFAM15H_BDVER3,
+  AMDFAM15H_BDVER4,
+  AMDFAM17H_ZNVER1,
   INTEL_COREI7_IVYBRIDGE,
   INTEL_COREI7_HASWELL,
   INTEL_COREI7_BROADWELL,
   INTEL_COREI7_SKYLAKE,
   INTEL_COREI7_SKYLAKE_AVX512,
-  INTEL_ATOM_BONNELL,
-  INTEL_ATOM_SILVERMONT,
-  INTEL_KNIGHTS_LANDING,
-  AMDPENTIUM_K6,
-  AMDPENTIUM_K62,
-  AMDPENTIUM_K63,
-  AMDPENTIUM_GEODE,
-  AMDATHLON_TBIRD,
-  AMDATHLON_MP,
-  AMDATHLON_XP,
-  AMDATHLON_K8SSE3,
-  AMDATHLON_OPTERON,
-  AMDATHLON_FX,
-  AMDATHLON_64,
-  AMD_BTVER1,
-  AMD_BTVER2,
-  AMDFAM15H_BDVER3,
-  AMDFAM15H_BDVER4,
   CPU_SUBTYPE_MAX
 };
 
@@ -116,11 +90,26 @@
   FEATURE_SSE4_2,
   FEATURE_AVX,
   FEATURE_AVX2,
-  FEATURE_AVX512,
-  FEATURE_AVX512SAVE,
-  FEATURE_MOVBE,
-  FEATURE_ADX,
-  FEATURE_EM64T
+  FEATURE_SSE4_A,
+  FEATURE_FMA4,
+  FEATURE_XOP,
+  FEATURE_FMA,
+  FEATURE_AVX512F,
+  FEATURE_BMI,
+  FEATURE_BMI2,
+  FEATURE_AES,
+  FEATURE_PCLMUL,
+  FEATURE_AVX512VL,
+  FEATURE_AVX512BW,
+  FEATURE_AVX512DQ,
+  FEATURE_AVX512CD,
+  FEATURE_AVX512ER,
+  FEATURE_AVX512PF,
+  FEATURE_AVX512VBMI,
+  FEATURE_AVX512IFMA,
+  FEATURE_AVX5124VNNIW,
+  FEATURE_AVX5124FMAPS,
+  FEATURE_AVX512VPOPCNTDQ
 };
 
 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
@@ -160,26 +149,27 @@
 
 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
 /// the specified arguments.  If we can't run cpuid on the host, return true.
-static void getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
+static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
                                unsigned *rECX, unsigned *rEDX) {
 #if defined(__GNUC__) || defined(__clang__)
 #if defined(__x86_64__)
-  // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
+  // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
+  // FIXME: should we save this for Clang?
   __asm__("movq\t%%rbx, %%rsi\n\t"
           "cpuid\n\t"
           "xchgq\t%%rbx, %%rsi\n\t"
           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
           : "a"(value));
+  return false;
 #elif defined(__i386__)
   __asm__("movl\t%%ebx, %%esi\n\t"
           "cpuid\n\t"
           "xchgl\t%%ebx, %%esi\n\t"
           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
           : "a"(value));
-// pedantic #else returns to appease -Wunreachable-code (so we don't generate
-// postprocessed code that looks like "return true; return false;")
+  return false;
 #else
-  assert(0 && "This method is defined only for x86.");
+  return true;
 #endif
 #elif defined(_MSC_VER)
   // The MSVC intrinsic is portable across x86 and x64.
@@ -189,19 +179,20 @@
   *rEBX = registers[1];
   *rECX = registers[2];
   *rEDX = registers[3];
+  return false;
 #else
-  assert(0 && "This method is defined only for GNUC, Clang or MSVC.");
+  return true;
 #endif
 }
 
 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
 /// the 4 values in the specified arguments.  If we can't run cpuid on the host,
 /// return true.
-static void getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
+static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
                                  unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
                                  unsigned *rEDX) {
-#if defined(__x86_64__) || defined(_M_X64)
 #if defined(__GNUC__) || defined(__clang__)
+#if defined(__x86_64__)
   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
   // FIXME: should we save this for Clang?
   __asm__("movq\t%%rbx, %%rsi\n\t"
@@ -209,6 +200,17 @@
           "xchgq\t%%rbx, %%rsi\n\t"
           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
           : "a"(value), "c"(subleaf));
+  return false;
+#elif defined(__i386__)
+  __asm__("movl\t%%ebx, %%esi\n\t"
+          "cpuid\n\t"
+          "xchgl\t%%ebx, %%esi\n\t"
+          : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
+          : "a"(value), "c"(subleaf));
+  return false;
+#else
+  return true;
+#endif
 #elif defined(_MSC_VER)
   int registers[4];
   __cpuidex(registers, value, subleaf);
@@ -216,35 +218,9 @@
   *rEBX = registers[1];
   *rECX = registers[2];
   *rEDX = registers[3];
+  return false;
 #else
-  assert(0 && "This method is defined only for GNUC, Clang or MSVC.");
-#endif
-#elif defined(__i386__) || defined(_M_IX86)
-#if defined(__GNUC__) || defined(__clang__)
-  __asm__("movl\t%%ebx, %%esi\n\t"
-          "cpuid\n\t"
-          "xchgl\t%%ebx, %%esi\n\t"
-          : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
-          : "a"(value), "c"(subleaf));
-#elif defined(_MSC_VER)
-  __asm {
-      mov   eax,value
-      mov   ecx,subleaf
-      cpuid
-      mov   esi,rEAX
-      mov   dword ptr [esi],eax
-      mov   esi,rEBX
-      mov   dword ptr [esi],ebx
-      mov   esi,rECX
-      mov   dword ptr [esi],ecx
-      mov   esi,rEDX
-      mov   dword ptr [esi],edx
-  }
-#else
-  assert(0 && "This method is defined only for GNUC, Clang or MSVC.");
-#endif
-#else
-  assert(0 && "This method is defined only for x86.");
+  return true;
 #endif
 }
 
@@ -279,84 +255,15 @@
   }
 }
 
-static void getIntelProcessorTypeAndSubtype(unsigned int Family,
-                                            unsigned int Model,
-                                            unsigned int Brand_id,
-                                            unsigned int Features,
-                                            unsigned *Type, unsigned *Subtype) {
+static void
+getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
+                                unsigned Brand_id, unsigned Features,
+                                unsigned *Type, unsigned *Subtype) {
   if (Brand_id != 0)
     return;
   switch (Family) {
-  case 3:
-    *Type = INTEL_i386;
-    break;
-  case 4:
-    switch (Model) {
-    case 0: // Intel486 DX processors
-    case 1: // Intel486 DX processors
-    case 2: // Intel486 SX processors
-    case 3: // Intel487 processors, IntelDX2 OverDrive processors,
-            // IntelDX2 processors
-    case 4: // Intel486 SL processor
-    case 5: // IntelSX2 processors
-    case 7: // Write-Back Enhanced IntelDX2 processors
-    case 8: // IntelDX4 OverDrive processors, IntelDX4 processors
-    default:
-      *Type = INTEL_i486;
-      break;
-    }
-  case 5:
-    switch (Model) {
-    case 1: // Pentium OverDrive processor for Pentium processor (60, 66),
-            // Pentium processors (60, 66)
-    case 2: // Pentium OverDrive processor for Pentium processor (75, 90,
-            // 100, 120, 133), Pentium processors (75, 90, 100, 120, 133,
-            // 150, 166, 200)
-    case 3: // Pentium OverDrive processors for Intel486 processor-based
-            // systems
-      *Type = INTEL_PENTIUM;
-      break;
-    case 4: // Pentium OverDrive processor with MMX technology for Pentium
-            // processor (75, 90, 100, 120, 133), Pentium processor with
-            // MMX technology (166, 200)
-      *Type = INTEL_PENTIUM;
-      *Subtype = INTEL_PENTIUM_MMX;
-      break;
-    default:
-      *Type = INTEL_PENTIUM;
-      break;
-    }
   case 6:
     switch (Model) {
-    case 0x01: // Pentium Pro processor
-      *Type = INTEL_PENTIUM_PRO;
-      break;
-    case 0x03: // Intel Pentium II OverDrive processor, Pentium II processor,
-               // model 03
-    case 0x05: // Pentium II processor, model 05, Pentium II Xeon processor,
-               // model 05, and Intel Celeron processor, model 05
-    case 0x06: // Celeron processor, model 06
-      *Type = INTEL_PENTIUM_II;
-      break;
-    case 0x07: // Pentium III processor, model 07, and Pentium III Xeon
-               // processor, model 07
-    case 0x08: // Pentium III processor, model 08, Pentium III Xeon processor,
-               // model 08, and Celeron processor, model 08
-    case 0x0a: // Pentium III Xeon processor, model 0Ah
-    case 0x0b: // Pentium III processor, model 0Bh
-      *Type = INTEL_PENTIUM_III;
-      break;
-    case 0x09: // Intel Pentium M processor, Intel Celeron M processor model 09.
-    case 0x0d: // Intel Pentium M processor, Intel Celeron M processor, model
-               // 0Dh. All processors are manufactured using the 90 nm process.
-    case 0x15: // Intel EP80579 Integrated Processor and Intel EP80579
-               // Integrated Processor with Intel QuickAssist Technology
-      *Type = INTEL_PENTIUM_M;
-      break;
-    case 0x0e: // Intel Core Duo processor, Intel Core Solo processor, model
-               // 0Eh. All processors are manufactured using the 65 nm process.
-      *Type = INTEL_CORE_DUO;
-      break;   // yonah
     case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
                // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
                // mobile processor, Intel Core 2 Extreme processor, Intel
@@ -364,9 +271,6 @@
                // 0Fh. All processors are manufactured using the 65 nm process.
     case 0x16: // Intel Celeron processor model 16h. All processors are
                // manufactured using the 65 nm process
-      *Type = INTEL_CORE2; // "core2"
-      *Subtype = INTEL_CORE2_65;
-      break;
     case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
                // 17h. All processors are manufactured using the 45 nm process.
                //
@@ -374,14 +278,13 @@
     case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
                // the 45 nm process.
       *Type = INTEL_CORE2; // "penryn"
-      *Subtype = INTEL_CORE2_45;
       break;
     case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
                // processors are manufactured using the 45 nm process.
     case 0x1e: // Intel(R) Core(TM) i7 CPU         870  @ 2.93GHz.
                // As found in a Summer 2010 model iMac.
     case 0x1f:
-    case 0x2e:              // Nehalem EX
+    case 0x2e:             // Nehalem EX
       *Type = INTEL_COREI7; // "nehalem"
       *Subtype = INTEL_COREI7_NEHALEM;
       break;
@@ -399,7 +302,7 @@
       *Subtype = INTEL_COREI7_SANDYBRIDGE;
       break;
     case 0x3a:
-    case 0x3e:              // Ivy Bridge EP
+    case 0x3e:             // Ivy Bridge EP
       *Type = INTEL_COREI7; // "ivybridge"
       *Subtype = INTEL_COREI7_IVYBRIDGE;
       break;
@@ -423,22 +326,26 @@
       break;
 
     // Skylake:
-    case 0x4e:
-      *Type = INTEL_COREI7; // "skylake-avx512"
-      *Subtype = INTEL_COREI7_SKYLAKE_AVX512;
-      break;
-    case 0x5e:
+    case 0x4e: // Skylake mobile
+    case 0x5e: // Skylake desktop
+    case 0x8e: // Kaby Lake mobile
+    case 0x9e: // Kaby Lake desktop
       *Type = INTEL_COREI7; // "skylake"
       *Subtype = INTEL_COREI7_SKYLAKE;
       break;
 
+    // Skylake Xeon:
+    case 0x55:
+      *Type = INTEL_COREI7;
+      *Subtype = INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512"
+      break;
+
     case 0x1c: // Most 45 nm Intel Atom processors
     case 0x26: // 45 nm Atom Lincroft
     case 0x27: // 32 nm Atom Medfield
     case 0x35: // 32 nm Atom Midview
     case 0x36: // 32 nm Atom Midview
-      *Type = INTEL_ATOM;
-      *Subtype = INTEL_ATOM_BONNELL;
+      *Type = INTEL_BONNELL;
       break; // "bonnell"
 
     // Atom Silvermont codes from the Intel software optimization guide.
@@ -448,185 +355,33 @@
     case 0x5a:
     case 0x5d:
     case 0x4c: // really airmont
-      *Type = INTEL_ATOM;
-      *Subtype = INTEL_ATOM_SILVERMONT;
+      *Type = INTEL_SILVERMONT;
       break; // "silvermont"
 
     case 0x57:
-      *Type = INTEL_XEONPHI; // knl
-      *Subtype = INTEL_KNIGHTS_LANDING;
+      *Type = INTEL_KNL; // knl
       break;
 
-    default: // Unknown family 6 CPU, try to guess.
-      if (Features & (1 << FEATURE_AVX512)) {
-        *Type = INTEL_XEONPHI; // knl
-        *Subtype = INTEL_KNIGHTS_LANDING;
-        break;
-      }
-      if (Features & (1 << FEATURE_ADX)) {
-        *Type = INTEL_COREI7;
-        *Subtype = INTEL_COREI7_BROADWELL;
-        break;
-      }
-      if (Features & (1 << FEATURE_AVX2)) {
-        *Type = INTEL_COREI7;
-        *Subtype = INTEL_COREI7_HASWELL;
-        break;
-      }
-      if (Features & (1 << FEATURE_AVX)) {
-        *Type = INTEL_COREI7;
-        *Subtype = INTEL_COREI7_SANDYBRIDGE;
-        break;
-      }
-      if (Features & (1 << FEATURE_SSE4_2)) {
-        if (Features & (1 << FEATURE_MOVBE)) {
-          *Type = INTEL_ATOM;
-          *Subtype = INTEL_ATOM_SILVERMONT;
-        } else {
-          *Type = INTEL_COREI7;
-          *Subtype = INTEL_COREI7_NEHALEM;
-        }
-        break;
-      }
-      if (Features & (1 << FEATURE_SSE4_1)) {
-        *Type = INTEL_CORE2; // "penryn"
-        *Subtype = INTEL_CORE2_45;
-        break;
-      }
-      if (Features & (1 << FEATURE_SSSE3)) {
-        if (Features & (1 << FEATURE_MOVBE)) {
-          *Type = INTEL_ATOM;
-          *Subtype = INTEL_ATOM_BONNELL; // "bonnell"
-        } else {
-          *Type = INTEL_CORE2; // "core2"
-          *Subtype = INTEL_CORE2_65;
-        }
-        break;
-      }
-      if (Features & (1 << FEATURE_EM64T)) {
-        *Type = INTEL_X86_64;
-        break; // x86-64
-      }
-      if (Features & (1 << FEATURE_SSE2)) {
-        *Type = INTEL_PENTIUM_M;
-        break;
-      }
-      if (Features & (1 << FEATURE_SSE)) {
-        *Type = INTEL_PENTIUM_III;
-        break;
-      }
-      if (Features & (1 << FEATURE_MMX)) {
-        *Type = INTEL_PENTIUM_II;
-        break;
-      }
-      *Type = INTEL_PENTIUM_PRO;
+    case 0x85:
+      *Type = INTEL_KNM; // knm
       break;
+
+    default: // Unknown family 6 CPU.
+      break;
+    break;
     }
-  case 15: {
-    switch (Model) {
-    case 0: // Pentium 4 processor, Intel Xeon processor. All processors are
-            // model 00h and manufactured using the 0.18 micron process.
-    case 1: // Pentium 4 processor, Intel Xeon processor, Intel Xeon
-            // processor MP, and Intel Celeron processor. All processors are
-            // model 01h and manufactured using the 0.18 micron process.
-    case 2: // Pentium 4 processor, Mobile Intel Pentium 4 processor - M,
-            // Intel Xeon processor, Intel Xeon processor MP, Intel Celeron
-            // processor, and Mobile Intel Celeron processor. All processors
-            // are model 02h and manufactured using the 0.13 micron process.
-      *Type =
-          ((Features & (1 << FEATURE_EM64T)) ? INTEL_X86_64 : INTEL_PENTIUM_IV);
-      break;
-
-    case 3: // Pentium 4 processor, Intel Xeon processor, Intel Celeron D
-            // processor. All processors are model 03h and manufactured using
-            // the 90 nm process.
-    case 4: // Pentium 4 processor, Pentium 4 processor Extreme Edition,
-            // Pentium D processor, Intel Xeon processor, Intel Xeon
-            // processor MP, Intel Celeron D processor. All processors are
-            // model 04h and manufactured using the 90 nm process.
-    case 6: // Pentium 4 processor, Pentium D processor, Pentium processor
-            // Extreme Edition, Intel Xeon processor, Intel Xeon processor
-            // MP, Intel Celeron D processor. All processors are model 06h
-            // and manufactured using the 65 nm process.
-      *Type =
-          ((Features & (1 << FEATURE_EM64T)) ? INTEL_NOCONA : INTEL_PRESCOTT);
-      break;
-
-    default:
-      *Type =
-          ((Features & (1 << FEATURE_EM64T)) ? INTEL_X86_64 : INTEL_PENTIUM_IV);
-      break;
-    }
-  }
   default:
-    break; /*"generic"*/
+    break; // Unknown.
   }
 }
 
-static void getAMDProcessorTypeAndSubtype(unsigned int Family,
-                                          unsigned int Model,
-                                          unsigned int Features, unsigned *Type,
+static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
+                                          unsigned Features, unsigned *Type,
                                           unsigned *Subtype) {
   // FIXME: this poorly matches the generated SubtargetFeatureKV table.  There
   // appears to be no way to generate the wide variety of AMD-specific targets
   // from the information returned from CPUID.
   switch (Family) {
-  case 4:
-    *Type = AMD_i486;
-  case 5:
-    *Type = AMDPENTIUM;
-    switch (Model) {
-    case 6:
-    case 7:
-      *Subtype = AMDPENTIUM_K6;
-      break; // "k6"
-    case 8:
-      *Subtype = AMDPENTIUM_K62;
-      break; // "k6-2"
-    case 9:
-    case 13:
-      *Subtype = AMDPENTIUM_K63;
-      break; // "k6-3"
-    case 10:
-      *Subtype = AMDPENTIUM_GEODE;
-      break; // "geode"
-    default:
-      break;
-    }
-  case 6:
-    *Type = AMDATHLON;
-    switch (Model) {
-    case 4:
-      *Subtype = AMDATHLON_TBIRD;
-      break; // "athlon-tbird"
-    case 6:
-    case 7:
-    case 8:
-      *Subtype = AMDATHLON_MP;
-      break; // "athlon-mp"
-    case 10:
-      *Subtype = AMDATHLON_XP;
-      break; // "athlon-xp"
-    default:
-      break;
-    }
-  case 15:
-    *Type = AMDATHLON;
-    if (Features & (1 << FEATURE_SSE3)) {
-      *Subtype = AMDATHLON_K8SSE3;
-      break; // "k8-sse3"
-    }
-    switch (Model) {
-    case 1:
-      *Subtype = AMDATHLON_OPTERON;
-      break; // "opteron"
-    case 5:
-      *Subtype = AMDATHLON_FX;
-      break; // "athlon-fx"; also opteron
-    default:
-      *Subtype = AMDATHLON_64;
-      break; // "athlon64"
-    }
   case 16:
     *Type = AMDFAM10H; // "amdfam10"
     switch (Model) {
@@ -639,23 +394,16 @@
     case 8:
       *Subtype = AMDFAM10H_ISTANBUL;
       break;
-    default:
-      break;
     }
+    break;
   case 20:
-    *Type = AMDFAM14H;
-    *Subtype = AMD_BTVER1;
+    *Type = AMD_BTVER1;
     break; // "btver1";
   case 21:
     *Type = AMDFAM15H;
-    if (!(Features &
-          (1 << FEATURE_AVX))) { // If no AVX support, provide a sane fallback.
-      *Subtype = AMD_BTVER1;
-      break; // "btver1"
-    }
-    if (Model >= 0x50 && Model <= 0x6f) {
+    if (Model >= 0x60 && Model <= 0x7f) {
       *Subtype = AMDFAM15H_BDVER4;
-      break; // "bdver4"; 50h-6Fh: Excavator
+      break; // "bdver4"; 60h-7Fh: Excavator
     }
     if (Model >= 0x30 && Model <= 0x3f) {
       *Subtype = AMDFAM15H_BDVER3;
@@ -671,31 +419,47 @@
     }
     break;
   case 22:
-    *Type = AMDFAM16H;
-    if (!(Features &
-          (1 << FEATURE_AVX))) { // If no AVX support provide a sane fallback.
-      *Subtype = AMD_BTVER1;
-      break; // "btver1";
-    }
-    *Subtype = AMD_BTVER2;
+    *Type = AMD_BTVER2;
     break; // "btver2"
+  case 23:
+    *Type = AMDFAM17H;
+    *Subtype = AMDFAM17H_ZNVER1;
+    break;
   default:
     break; // "generic"
   }
 }
 
-static unsigned getAvailableFeatures(unsigned int ECX, unsigned int EDX,
-                                     unsigned MaxLeaf) {
+static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
+                                 unsigned *FeaturesOut) {
   unsigned Features = 0;
-  unsigned int EAX, EBX;
-  Features |= (((EDX >> 23) & 1) << FEATURE_MMX);
-  Features |= (((EDX >> 25) & 1) << FEATURE_SSE);
-  Features |= (((EDX >> 26) & 1) << FEATURE_SSE2);
-  Features |= (((ECX >> 0) & 1) << FEATURE_SSE3);
-  Features |= (((ECX >> 9) & 1) << FEATURE_SSSE3);
-  Features |= (((ECX >> 19) & 1) << FEATURE_SSE4_1);
-  Features |= (((ECX >> 20) & 1) << FEATURE_SSE4_2);
-  Features |= (((ECX >> 22) & 1) << FEATURE_MOVBE);
+  unsigned EAX, EBX;
+
+  if ((EDX >> 15) & 1)
+    Features |= 1 << FEATURE_CMOV;
+  if ((EDX >> 23) & 1)
+    Features |= 1 << FEATURE_MMX;
+  if ((EDX >> 25) & 1)
+    Features |= 1 << FEATURE_SSE;
+  if ((EDX >> 26) & 1)
+    Features |= 1 << FEATURE_SSE2;
+
+  if ((ECX >> 0) & 1)
+    Features |= 1 << FEATURE_SSE3;
+  if ((ECX >> 1) & 1)
+    Features |= 1 << FEATURE_PCLMUL;
+  if ((ECX >> 9) & 1)
+    Features |= 1 << FEATURE_SSSE3;
+  if ((ECX >> 12) & 1)
+    Features |= 1 << FEATURE_FMA;
+  if ((ECX >> 19) & 1)
+    Features |= 1 << FEATURE_SSE4_1;
+  if ((ECX >> 20) & 1)
+    Features |= 1 << FEATURE_SSE4_2;
+  if ((ECX >> 23) & 1)
+    Features |= 1 << FEATURE_POPCNT;
+  if ((ECX >> 25) & 1)
+    Features |= 1 << FEATURE_AES;
 
   // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
   // indicates that the AVX registers will be saved and restored on context
@@ -704,30 +468,72 @@
   bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
                 ((EAX & 0x6) == 0x6);
   bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
-  bool HasLeaf7 = MaxLeaf >= 0x7;
-  getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
-  bool HasADX = HasLeaf7 && ((EBX >> 19) & 1);
-  bool HasAVX2 = HasAVX && HasLeaf7 && (EBX & 0x20);
-  bool HasAVX512 = HasLeaf7 && HasAVX512Save && ((EBX >> 16) & 1);
-  Features |= (HasAVX << FEATURE_AVX);
-  Features |= (HasAVX2 << FEATURE_AVX2);
-  Features |= (HasAVX512 << FEATURE_AVX512);
-  Features |= (HasAVX512Save << FEATURE_AVX512SAVE);
-  Features |= (HasADX << FEATURE_ADX);
 
-  getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
-  Features |= (((EDX >> 29) & 0x1) << FEATURE_EM64T);
-  return Features;
+  if (HasAVX)
+    Features |= 1 << FEATURE_AVX;
+
+  bool HasLeaf7 =
+      MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
+
+  if (HasLeaf7 && ((EBX >> 3) & 1))
+    Features |= 1 << FEATURE_BMI;
+  if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
+    Features |= 1 << FEATURE_AVX2;
+  if (HasLeaf7 && ((EBX >> 9) & 1))
+    Features |= 1 << FEATURE_BMI2;
+  if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
+    Features |= 1 << FEATURE_AVX512F;
+  if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
+    Features |= 1 << FEATURE_AVX512DQ;
+  if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
+    Features |= 1 << FEATURE_AVX512IFMA;
+  if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
+    Features |= 1 << FEATURE_AVX512PF;
+  if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
+    Features |= 1 << FEATURE_AVX512ER;
+  if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
+    Features |= 1 << FEATURE_AVX512CD;
+  if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
+    Features |= 1 << FEATURE_AVX512BW;
+  if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
+    Features |= 1 << FEATURE_AVX512VL;
+
+  if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
+    Features |= 1 << FEATURE_AVX512VBMI;
+  if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
+    Features |= 1 << FEATURE_AVX512VPOPCNTDQ;
+
+  if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
+    Features |= 1 << FEATURE_AVX5124VNNIW;
+  if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
+    Features |= 1 << FEATURE_AVX5124FMAPS;
+
+  unsigned MaxExtLevel;
+  getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
+
+  bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
+                     !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
+  if (HasExtLeaf1 && ((ECX >> 6) & 1))
+    Features |= 1 << FEATURE_SSE4_A;
+  if (HasExtLeaf1 && ((ECX >> 11) & 1))
+    Features |= 1 << FEATURE_XOP;
+  if (HasExtLeaf1 && ((ECX >> 16) & 1))
+    Features |= 1 << FEATURE_FMA4;
+
+  *FeaturesOut = Features;
 }
 
-#ifdef HAVE_INIT_PRIORITY
-#define CONSTRUCTOR_PRIORITY (101)
+#if defined(HAVE_INIT_PRIORITY)
+#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__ 101))
+#elif __has_attribute(__constructor__)
+#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__))
 #else
-#define CONSTRUCTOR_PRIORITY
+// FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that
+// this runs during initialization.
+#define CONSTRUCTOR_ATTRIBUTE
 #endif
 
-int __cpu_indicator_init(void)
-    __attribute__((constructor CONSTRUCTOR_PRIORITY));
+int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE;
 
 struct __processor_model {
   unsigned int __cpu_vendor;
@@ -742,13 +548,13 @@
    the priority set.  However, it still runs after ifunc initializers and
    needs to be called explicitly there.  */
 
-int __attribute__((constructor CONSTRUCTOR_PRIORITY))
+int CONSTRUCTOR_ATTRIBUTE
 __cpu_indicator_init(void) {
-  unsigned int EAX, EBX, ECX, EDX;
-  unsigned int MaxLeaf = 5;
-  unsigned int Vendor;
-  unsigned int Model, Family, Brand_id;
-  unsigned int Features = 0;
+  unsigned EAX, EBX, ECX, EDX;
+  unsigned MaxLeaf = 5;
+  unsigned Vendor;
+  unsigned Model, Family, Brand_id;
+  unsigned Features = 0;
 
   /* This function needs to run just once.  */
   if (__cpu_model.__cpu_vendor)
@@ -758,9 +564,7 @@
     return -1;
 
   /* Assume cpuid insn present. Run in level 0 to get vendor id. */
-  getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX);
-
-  if (MaxLeaf < 1) {
+  if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) {
     __cpu_model.__cpu_vendor = VENDOR_OTHER;
     return -1;
   }
@@ -769,7 +573,7 @@
   Brand_id = EBX & 0xff;
 
   /* Find available features. */
-  Features = getAvailableFeatures(ECX, EDX, MaxLeaf);
+  getAvailableFeatures(ECX, EDX, MaxLeaf, &Features);
   __cpu_model.__cpu_features[0] = Features;
 
   if (Vendor == SIG_INTEL) {
diff --git a/lib/builtins/divdf3.c b/lib/builtins/divdf3.c
index ab44c2b..04a4dc5 100644
--- a/lib/builtins/divdf3.c
+++ b/lib/builtins/divdf3.c
@@ -19,8 +19,6 @@
 #define DOUBLE_PRECISION
 #include "fp_lib.h"
 
-ARM_EABI_FNALIAS(ddiv, divdf3)
-
 COMPILER_RT_ABI fp_t
 __divdf3(fp_t a, fp_t b) {
     
@@ -183,3 +181,13 @@
         return result;
     }
 }
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI fp_t __aeabi_ddiv(fp_t a, fp_t b) {
+  return __divdf3(a, b);
+}
+#else
+AEABI_RTABI fp_t __aeabi_ddiv(fp_t a, fp_t b) COMPILER_RT_ALIAS(__divdf3);
+#endif
+#endif
diff --git a/lib/builtins/divsf3.c b/lib/builtins/divsf3.c
index de2e376..65294d7 100644
--- a/lib/builtins/divsf3.c
+++ b/lib/builtins/divsf3.c
@@ -19,8 +19,6 @@
 #define SINGLE_PRECISION
 #include "fp_lib.h"
 
-ARM_EABI_FNALIAS(fdiv, divsf3)
-
 COMPILER_RT_ABI fp_t
 __divsf3(fp_t a, fp_t b) {
     
@@ -167,3 +165,13 @@
         return fromRep(absResult | quotientSign);
     }
 }
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI fp_t __aeabi_fdiv(fp_t a, fp_t b) {
+  return __divsf3(a, b);
+}
+#else
+AEABI_RTABI fp_t __aeabi_fdiv(fp_t a, fp_t b) COMPILER_RT_ALIAS(__divsf3);
+#endif
+#endif
diff --git a/lib/builtins/divsi3.c b/lib/builtins/divsi3.c
index bab4aef..75aea00 100644
--- a/lib/builtins/divsi3.c
+++ b/lib/builtins/divsi3.c
@@ -16,8 +16,6 @@
 
 /* Returns: a / b */
 
-ARM_EABI_FNALIAS(idiv, divsi3)
-
 COMPILER_RT_ABI si_int
 __divsi3(si_int a, si_int b)
 {
@@ -35,3 +33,7 @@
      */
     return ((su_int)a/(su_int)b ^ s_a) - s_a;    /* negate if s_a == -1 */
 }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI si_int __aeabi_idiv(si_int a, si_int b) COMPILER_RT_ALIAS(__divsi3);
+#endif
diff --git a/lib/builtins/divtc3.c b/lib/builtins/divtc3.c
index 04693df..16e538b 100644
--- a/lib/builtins/divtc3.c
+++ b/lib/builtins/divtc3.c
@@ -17,7 +17,7 @@
 
 /* Returns: the quotient of (a + ib) / (c + id) */
 
-COMPILER_RT_ABI long double _Complex
+COMPILER_RT_ABI Lcomplex
 __divtc3(long double __a, long double __b, long double __c, long double __d)
 {
     int __ilogbw = 0;
@@ -29,31 +29,31 @@
         __d = crt_scalbnl(__d, -__ilogbw);
     }
     long double __denom = __c * __c + __d * __d;
-    long double _Complex z;
-    __real__ z = crt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw);
-    __imag__ z = crt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw);
-    if (crt_isnan(__real__ z) && crt_isnan(__imag__ z))
+    Lcomplex z;
+    COMPLEX_REAL(z) = crt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw);
+    COMPLEX_IMAGINARY(z) = crt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw);
+    if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z)))
     {
         if ((__denom == 0.0) && (!crt_isnan(__a) || !crt_isnan(__b)))
         {
-            __real__ z = crt_copysignl(CRT_INFINITY, __c) * __a;
-            __imag__ z = crt_copysignl(CRT_INFINITY, __c) * __b;
+            COMPLEX_REAL(z) = crt_copysignl(CRT_INFINITY, __c) * __a;
+            COMPLEX_IMAGINARY(z) = crt_copysignl(CRT_INFINITY, __c) * __b;
         }
         else if ((crt_isinf(__a) || crt_isinf(__b)) &&
                  crt_isfinite(__c) && crt_isfinite(__d))
         {
             __a = crt_copysignl(crt_isinf(__a) ? 1.0 : 0.0, __a);
             __b = crt_copysignl(crt_isinf(__b) ? 1.0 : 0.0, __b);
-            __real__ z = CRT_INFINITY * (__a * __c + __b * __d);
-            __imag__ z = CRT_INFINITY * (__b * __c - __a * __d);
+            COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d);
+            COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d);
         }
         else if (crt_isinf(__logbw) && __logbw > 0.0 &&
                  crt_isfinite(__a) && crt_isfinite(__b))
         {
             __c = crt_copysignl(crt_isinf(__c) ? 1.0 : 0.0, __c);
             __d = crt_copysignl(crt_isinf(__d) ? 1.0 : 0.0, __d);
-            __real__ z = 0.0 * (__a * __c + __b * __d);
-            __imag__ z = 0.0 * (__b * __c - __a * __d);
+            COMPLEX_REAL(z) = 0.0 * (__a * __c + __b * __d);
+            COMPLEX_IMAGINARY(z) = 0.0 * (__b * __c - __a * __d);
         }
     }
     return z;
diff --git a/lib/builtins/emutls.c b/lib/builtins/emutls.c
index eccbf53..5dd8dd1 100644
--- a/lib/builtins/emutls.c
+++ b/lib/builtins/emutls.c
@@ -7,7 +7,6 @@
  *
  * ===----------------------------------------------------------------------===
  */
-#include <pthread.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
@@ -15,6 +14,23 @@
 #include "int_lib.h"
 #include "int_util.h"
 
+typedef struct emutls_address_array {
+    uintptr_t size;  /* number of elements in the 'data' array */
+    void* data[];
+} emutls_address_array;
+
+static void emutls_shutdown(emutls_address_array *array);
+
+#ifndef _WIN32
+
+#include <pthread.h>
+
+static pthread_mutex_t emutls_mutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_key_t emutls_pthread_key;
+
+typedef unsigned int gcc_word __attribute__((mode(word)));
+typedef unsigned int gcc_pointer __attribute__((mode(pointer)));
+
 /* Default is not to use posix_memalign, so systems like Android
  * can use thread local data without heavier POSIX memory allocators.
  */
@@ -22,26 +38,6 @@
 #define EMUTLS_USE_POSIX_MEMALIGN 0
 #endif
 
-/* For every TLS variable xyz,
- * there is one __emutls_control variable named __emutls_v.xyz.
- * If xyz has non-zero initial value, __emutls_v.xyz's "value"
- * will point to __emutls_t.xyz, which has the initial value.
- */
-typedef unsigned int gcc_word __attribute__((mode(word)));
-typedef struct __emutls_control {
-    /* Must use gcc_word here, instead of size_t, to match GCC.  When
-       gcc_word is larger than size_t, the upper extra bits are all
-       zeros.  We can use variables of size_t to operate on size and
-       align.  */
-    gcc_word size;  /* size of the object in bytes */
-    gcc_word align;  /* alignment of the object in bytes */
-    union {
-        uintptr_t index;  /* data[index-1] is the object address */
-        void* address;  /* object address, when in single thread env */
-    } object;
-    void* value;  /* null or non-zero initial value for the object */
-} __emutls_control;
-
 static __inline void *emutls_memalign_alloc(size_t align, size_t size) {
     void *base;
 #if EMUTLS_USE_POSIX_MEMALIGN
@@ -50,7 +46,7 @@
 #else
     #define EXTRA_ALIGN_PTR_BYTES (align - 1 + sizeof(void*))
     char* object;
-    if ((object = malloc(EXTRA_ALIGN_PTR_BYTES + size)) == NULL)
+    if ((object = (char*)malloc(EXTRA_ALIGN_PTR_BYTES + size)) == NULL)
         abort();
     base = (void*)(((uintptr_t)(object + EXTRA_ALIGN_PTR_BYTES))
                     & ~(uintptr_t)(align - 1));
@@ -69,10 +65,205 @@
 #endif
 }
 
+static void emutls_key_destructor(void* ptr) {
+    emutls_shutdown((emutls_address_array*)ptr);
+    free(ptr);
+}
+
+static __inline void emutls_init(void) {
+    if (pthread_key_create(&emutls_pthread_key, emutls_key_destructor) != 0)
+        abort();
+}
+
+static __inline void emutls_init_once(void) {
+    static pthread_once_t once = PTHREAD_ONCE_INIT;
+    pthread_once(&once, emutls_init);
+}
+
+static __inline void emutls_lock() {
+    pthread_mutex_lock(&emutls_mutex);
+}
+
+static __inline void emutls_unlock() {
+    pthread_mutex_unlock(&emutls_mutex);
+}
+
+static __inline void emutls_setspecific(emutls_address_array *value) {
+    pthread_setspecific(emutls_pthread_key, (void*) value);
+}
+
+static __inline emutls_address_array* emutls_getspecific() {
+    return (emutls_address_array*) pthread_getspecific(emutls_pthread_key);
+}
+
+#else
+
+#include <windows.h>
+#include <malloc.h>
+#include <stdio.h>
+#include <assert.h>
+
+static LPCRITICAL_SECTION emutls_mutex;
+static DWORD emutls_tls_index = TLS_OUT_OF_INDEXES;
+
+typedef uintptr_t gcc_word;
+typedef void * gcc_pointer;
+
+static void win_error(DWORD last_err, const char *hint) {
+    char *buffer = NULL;
+    if (FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER |
+                       FORMAT_MESSAGE_FROM_SYSTEM |
+                       FORMAT_MESSAGE_MAX_WIDTH_MASK,
+                       NULL, last_err, 0, (LPSTR)&buffer, 1, NULL)) {
+        fprintf(stderr, "Windows error: %s\n", buffer);
+    } else {
+        fprintf(stderr, "Unkown Windows error: %s\n", hint);
+    }
+    LocalFree(buffer);
+}
+
+static __inline void win_abort(DWORD last_err, const char *hint) {
+    win_error(last_err, hint);
+    abort();
+}
+
+static __inline void *emutls_memalign_alloc(size_t align, size_t size) {
+    void *base = _aligned_malloc(size, align);
+    if (!base)
+        win_abort(GetLastError(), "_aligned_malloc");
+    return base;
+}
+
+static __inline void emutls_memalign_free(void *base) {
+    _aligned_free(base);
+}
+
+static void emutls_exit(void) {
+    if (emutls_mutex) {
+        DeleteCriticalSection(emutls_mutex);
+        _aligned_free(emutls_mutex);
+        emutls_mutex = NULL;
+    }
+    if (emutls_tls_index != TLS_OUT_OF_INDEXES) {
+        emutls_shutdown((emutls_address_array*)TlsGetValue(emutls_tls_index));
+        TlsFree(emutls_tls_index);
+        emutls_tls_index = TLS_OUT_OF_INDEXES;
+    }
+}
+
+#pragma warning (push)
+#pragma warning (disable : 4100)
+static BOOL CALLBACK emutls_init(PINIT_ONCE p0, PVOID p1, PVOID *p2) {
+    emutls_mutex = (LPCRITICAL_SECTION)_aligned_malloc(sizeof(CRITICAL_SECTION), 16);
+    if (!emutls_mutex) {
+        win_error(GetLastError(), "_aligned_malloc");
+        return FALSE;
+    }
+    InitializeCriticalSection(emutls_mutex);
+
+    emutls_tls_index = TlsAlloc();
+    if (emutls_tls_index == TLS_OUT_OF_INDEXES) {
+        emutls_exit();
+        win_error(GetLastError(), "TlsAlloc");
+        return FALSE;
+    }
+    atexit(&emutls_exit);
+    return TRUE;
+}
+
+static __inline void emutls_init_once(void) {
+    static INIT_ONCE once;
+    InitOnceExecuteOnce(&once, emutls_init, NULL, NULL);
+}
+
+static __inline void emutls_lock() {
+    EnterCriticalSection(emutls_mutex);
+}
+
+static __inline void emutls_unlock() {
+    LeaveCriticalSection(emutls_mutex);
+}
+
+static __inline void emutls_setspecific(emutls_address_array *value) {
+    if (TlsSetValue(emutls_tls_index, (LPVOID) value) == 0)
+        win_abort(GetLastError(), "TlsSetValue");
+}
+
+static __inline emutls_address_array* emutls_getspecific() {
+    LPVOID value = TlsGetValue(emutls_tls_index);
+    if (value == NULL) {
+        const DWORD err = GetLastError();
+        if (err != ERROR_SUCCESS)
+            win_abort(err, "TlsGetValue");
+    }
+    return (emutls_address_array*) value;
+}
+
+/* Provide atomic load/store functions for emutls_get_index if built with MSVC.
+ */
+#if !defined(__ATOMIC_RELEASE)
+#include <intrin.h>
+
+enum { __ATOMIC_ACQUIRE = 2, __ATOMIC_RELEASE = 3 };
+
+static __inline uintptr_t __atomic_load_n(void *ptr, unsigned type) {
+    assert(type == __ATOMIC_ACQUIRE);
+    // These return the previous value - but since we do an OR with 0,
+    // it's equivalent to a plain load.
+#ifdef _WIN64
+    return InterlockedOr64(ptr, 0);
+#else
+    return InterlockedOr(ptr, 0);
+#endif
+}
+
+static __inline void __atomic_store_n(void *ptr, uintptr_t val, unsigned type) {
+    assert(type == __ATOMIC_RELEASE);
+    InterlockedExchangePointer((void *volatile *)ptr, (void *)val);
+}
+
+#endif
+
+#pragma warning (pop)
+
+#endif
+
+static size_t emutls_num_object = 0;  /* number of allocated TLS objects */
+
+/* Free the allocated TLS data
+ */
+static void emutls_shutdown(emutls_address_array *array) {
+    if (array) {
+        uintptr_t i;
+        for (i = 0; i < array->size; ++i) {
+            if (array->data[i])
+                emutls_memalign_free(array->data[i]);
+        }
+    }
+}
+
+/* For every TLS variable xyz,
+ * there is one __emutls_control variable named __emutls_v.xyz.
+ * If xyz has non-zero initial value, __emutls_v.xyz's "value"
+ * will point to __emutls_t.xyz, which has the initial value.
+ */
+typedef struct __emutls_control {
+    /* Must use gcc_word here, instead of size_t, to match GCC.  When
+       gcc_word is larger than size_t, the upper extra bits are all
+       zeros.  We can use variables of size_t to operate on size and
+       align.  */
+    gcc_word size;  /* size of the object in bytes */
+    gcc_word align;  /* alignment of the object in bytes */
+    union {
+        uintptr_t index;  /* data[index-1] is the object address */
+        void* address;  /* object address, when in single thread env */
+    } object;
+    void* value;  /* null or non-zero initial value for the object */
+} __emutls_control;
+
 /* Emulated TLS objects are always allocated at run-time. */
 static __inline void *emutls_allocate_object(__emutls_control *control) {
     /* Use standard C types, check with gcc's emutls.o. */
-    typedef unsigned int gcc_pointer __attribute__((mode(pointer)));
     COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(gcc_pointer));
     COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(void*));
 
@@ -93,45 +284,19 @@
     return base;
 }
 
-static pthread_mutex_t emutls_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-static size_t emutls_num_object = 0;  /* number of allocated TLS objects */
-
-typedef struct emutls_address_array {
-    uintptr_t size;  /* number of elements in the 'data' array */
-    void* data[];
-} emutls_address_array;
-
-static pthread_key_t emutls_pthread_key;
-
-static void emutls_key_destructor(void* ptr) {
-    emutls_address_array* array = (emutls_address_array*)ptr;
-    uintptr_t i;
-    for (i = 0; i < array->size; ++i) {
-        if (array->data[i])
-            emutls_memalign_free(array->data[i]);
-    }
-    free(ptr);
-}
-
-static void emutls_init(void) {
-    if (pthread_key_create(&emutls_pthread_key, emutls_key_destructor) != 0)
-        abort();
-}
 
 /* Returns control->object.index; set index if not allocated yet. */
 static __inline uintptr_t emutls_get_index(__emutls_control *control) {
     uintptr_t index = __atomic_load_n(&control->object.index, __ATOMIC_ACQUIRE);
     if (!index) {
-        static pthread_once_t once = PTHREAD_ONCE_INIT;
-        pthread_once(&once, emutls_init);
-        pthread_mutex_lock(&emutls_mutex);
+        emutls_init_once();
+        emutls_lock();
         index = control->object.index;
         if (!index) {
             index = ++emutls_num_object;
             __atomic_store_n(&control->object.index, index, __ATOMIC_RELEASE);
         }
-        pthread_mutex_unlock(&emutls_mutex);
+        emutls_unlock();
     }
     return index;
 }
@@ -142,7 +307,7 @@
     if (array == NULL)
         abort();
     array->size = size;
-    pthread_setspecific(emutls_pthread_key, (void*)array);
+    emutls_setspecific(array);
 }
 
 /* Returns the new 'data' array size, number of elements,
@@ -156,22 +321,29 @@
     return ((index + 1 + 15) & ~((uintptr_t)15)) - 1;
 }
 
+/* Returns the size in bytes required for an emutls_address_array with
+ * N number of elements for data field.
+ */
+static __inline uintptr_t emutls_asize(uintptr_t N) {
+    return N * sizeof(void *) + sizeof(emutls_address_array);
+}
+
 /* Returns the thread local emutls_address_array.
  * Extends its size if necessary to hold address at index.
  */
 static __inline emutls_address_array *
 emutls_get_address_array(uintptr_t index) {
-    emutls_address_array* array = pthread_getspecific(emutls_pthread_key);
+    emutls_address_array* array = emutls_getspecific();
     if (array == NULL) {
         uintptr_t new_size = emutls_new_data_array_size(index);
-        array = malloc(new_size * sizeof(void *) + sizeof(emutls_address_array));
+        array = (emutls_address_array*) malloc(emutls_asize(new_size));
         if (array)
             memset(array->data, 0, new_size * sizeof(void*));
         emutls_check_array_set_size(array, new_size);
     } else if (index > array->size) {
         uintptr_t orig_size = array->size;
         uintptr_t new_size = emutls_new_data_array_size(index);
-        array = realloc(array, new_size * sizeof(void *) + sizeof(emutls_address_array));
+        array = (emutls_address_array*) realloc(array, emutls_asize(new_size));
         if (array)
             memset(array->data + orig_size, 0,
                    (new_size - orig_size) * sizeof(void*));
@@ -182,8 +354,8 @@
 
 void* __emutls_get_address(__emutls_control* control) {
     uintptr_t index = emutls_get_index(control);
-    emutls_address_array* array = emutls_get_address_array(index);
-    if (array->data[index - 1] == NULL)
-        array->data[index - 1] = emutls_allocate_object(control);
-    return array->data[index - 1];
+    emutls_address_array* array = emutls_get_address_array(index--);
+    if (array->data[index] == NULL)
+        array->data[index] = emutls_allocate_object(control);
+    return array->data[index];
 }
diff --git a/lib/builtins/enable_execute_stack.c b/lib/builtins/enable_execute_stack.c
index 0dc3482..327d460 100644
--- a/lib/builtins/enable_execute_stack.c
+++ b/lib/builtins/enable_execute_stack.c
@@ -22,7 +22,7 @@
 
 #ifdef _WIN32
 #define WIN32_LEAN_AND_MEAN
-#include <Windows.h>
+#include <windows.h>
 #else
 #ifndef __APPLE__
 #include <unistd.h>
diff --git a/lib/builtins/extendhfsf2.c b/lib/builtins/extendhfsf2.c
index 27115a4..d9c0db8 100644
--- a/lib/builtins/extendhfsf2.c
+++ b/lib/builtins/extendhfsf2.c
@@ -12,8 +12,6 @@
 #define DST_SINGLE
 #include "fp_extend_impl.inc"
 
-ARM_EABI_FNALIAS(h2f, extendhfsf2)
-
 // Use a forwarding definition and noinline to implement a poor man's alias,
 // as there isn't a good cross-platform way of defining one.
 COMPILER_RT_ABI NOINLINE float __extendhfsf2(uint16_t a) {
@@ -23,3 +21,13 @@
 COMPILER_RT_ABI float __gnu_h2f_ieee(uint16_t a) {
     return __extendhfsf2(a);
 }
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI float __aeabi_h2f(uint16_t a) {
+  return __extendhfsf2(a);
+}
+#else
+AEABI_RTABI float __aeabi_h2f(uint16_t a) COMPILER_RT_ALIAS(__extendhfsf2);
+#endif
+#endif
diff --git a/lib/builtins/extendsfdf2.c b/lib/builtins/extendsfdf2.c
index 7a267c2..3d84529 100644
--- a/lib/builtins/extendsfdf2.c
+++ b/lib/builtins/extendsfdf2.c
@@ -12,8 +12,16 @@
 #define DST_DOUBLE
 #include "fp_extend_impl.inc"
 
-ARM_EABI_FNALIAS(f2d, extendsfdf2)
-
 COMPILER_RT_ABI double __extendsfdf2(float a) {
     return __extendXfYf2__(a);
 }
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI double __aeabi_f2d(float a) {
+  return __extendsfdf2(a);
+}
+#else
+AEABI_RTABI double __aeabi_f2d(float a) COMPILER_RT_ALIAS(__extendsfdf2);
+#endif
+#endif
diff --git a/lib/builtins/ffssi2.c b/lib/builtins/ffssi2.c
new file mode 100644
index 0000000..e5180ef
--- /dev/null
+++ b/lib/builtins/ffssi2.c
@@ -0,0 +1,29 @@
+/* ===-- ffssi2.c - Implement __ffssi2 -------------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __ffssi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: the index of the least significant 1-bit in a, or
+ * the value zero if a is zero. The least significant bit is index one.
+ */
+
+COMPILER_RT_ABI si_int
+__ffssi2(si_int a)
+{
+    if (a == 0)
+    {
+        return 0;
+    }
+    return __builtin_ctz(a) + 1;
+}
diff --git a/lib/builtins/fixdfdi.c b/lib/builtins/fixdfdi.c
index 14283ef..54e312d 100644
--- a/lib/builtins/fixdfdi.c
+++ b/lib/builtins/fixdfdi.c
@@ -10,7 +10,6 @@
 
 #define DOUBLE_PRECISION
 #include "fp_lib.h"
-ARM_EABI_FNALIAS(d2lz, fixdfdi)
 
 #ifndef __SOFT_FP__
 /* Support for systems that have hardware floating-point; can set the invalid
@@ -44,3 +43,13 @@
 }
 
 #endif
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI di_int __aeabi_d2lz(fp_t a) {
+  return __fixdfdi(a);
+}
+#else
+AEABI_RTABI di_int __aeabi_d2lz(fp_t a) COMPILER_RT_ALIAS(__fixdfdi);
+#endif
+#endif
diff --git a/lib/builtins/fixdfsi.c b/lib/builtins/fixdfsi.c
index 704e65b..5b95881 100644
--- a/lib/builtins/fixdfsi.c
+++ b/lib/builtins/fixdfsi.c
@@ -14,9 +14,17 @@
 typedef su_int fixuint_t;
 #include "fp_fixint_impl.inc"
 
-ARM_EABI_FNALIAS(d2iz, fixdfsi)
-
 COMPILER_RT_ABI si_int
 __fixdfsi(fp_t a) {
     return __fixint(a);
 }
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI si_int __aeabi_d2iz(fp_t a) {
+  return __fixdfsi(a);
+}
+#else
+AEABI_RTABI si_int __aeabi_d2iz(fp_t a) COMPILER_RT_ALIAS(__fixdfsi);
+#endif
+#endif
diff --git a/lib/builtins/fixsfdi.c b/lib/builtins/fixsfdi.c
index fab47e2..32e87c6 100644
--- a/lib/builtins/fixsfdi.c
+++ b/lib/builtins/fixsfdi.c
@@ -11,8 +11,6 @@
 #define SINGLE_PRECISION
 #include "fp_lib.h"
 
-ARM_EABI_FNALIAS(f2lz, fixsfdi)
-
 #ifndef __SOFT_FP__
 /* Support for systems that have hardware floating-point; can set the invalid
  * flag as a side-effect of computation.
@@ -45,3 +43,13 @@
 }
 
 #endif
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI di_int __aeabi_f2lz(fp_t a) {
+  return __fixsfdi(a);
+}
+#else
+AEABI_RTABI di_int __aeabi_f2lz(fp_t a) COMPILER_RT_ALIAS(__fixsfdi);
+#endif
+#endif
diff --git a/lib/builtins/fixsfsi.c b/lib/builtins/fixsfsi.c
index f045536..e94e5f3 100644
--- a/lib/builtins/fixsfsi.c
+++ b/lib/builtins/fixsfsi.c
@@ -14,9 +14,17 @@
 typedef su_int fixuint_t;
 #include "fp_fixint_impl.inc"
 
-ARM_EABI_FNALIAS(f2iz, fixsfsi)
-
 COMPILER_RT_ABI si_int
 __fixsfsi(fp_t a) {
     return __fixint(a);
 }
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI si_int __aeabi_f2iz(fp_t a) {
+  return __fixsfsi(a);
+}
+#else
+AEABI_RTABI si_int __aeabi_f2iz(fp_t a) COMPILER_RT_ALIAS(__fixsfsi);
+#endif
+#endif
diff --git a/lib/builtins/fixunsdfdi.c b/lib/builtins/fixunsdfdi.c
index 4b0bc9e..bfe4dbb 100644
--- a/lib/builtins/fixunsdfdi.c
+++ b/lib/builtins/fixunsdfdi.c
@@ -11,8 +11,6 @@
 #define DOUBLE_PRECISION
 #include "fp_lib.h"
 
-ARM_EABI_FNALIAS(d2ulz, fixunsdfdi)
-
 #ifndef __SOFT_FP__
 /* Support for systems that have hardware floating-point; can set the invalid
  * flag as a side-effect of computation.
@@ -42,3 +40,13 @@
 }
 
 #endif
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI du_int __aeabi_d2ulz(fp_t a) {
+  return __fixunsdfdi(a);
+}
+#else
+AEABI_RTABI du_int __aeabi_d2ulz(fp_t a) COMPILER_RT_ALIAS(__fixunsdfdi);
+#endif
+#endif
diff --git a/lib/builtins/fixunsdfsi.c b/lib/builtins/fixunsdfsi.c
index 232d342..3c5355b 100644
--- a/lib/builtins/fixunsdfsi.c
+++ b/lib/builtins/fixunsdfsi.c
@@ -13,9 +13,17 @@
 typedef su_int fixuint_t;
 #include "fp_fixuint_impl.inc"
 
-ARM_EABI_FNALIAS(d2uiz, fixunsdfsi)
-
 COMPILER_RT_ABI su_int
 __fixunsdfsi(fp_t a) {
     return __fixuint(a);
 }
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI su_int __aeabi_d2uiz(fp_t a) {
+  return __fixunsdfsi(a);
+}
+#else
+AEABI_RTABI su_int __aeabi_d2uiz(fp_t a) COMPILER_RT_ALIAS(__fixunsdfsi);
+#endif
+#endif
diff --git a/lib/builtins/fixunssfdi.c b/lib/builtins/fixunssfdi.c
index f8ebab8..080a25b 100644
--- a/lib/builtins/fixunssfdi.c
+++ b/lib/builtins/fixunssfdi.c
@@ -11,8 +11,6 @@
 #define SINGLE_PRECISION
 #include "fp_lib.h"
 
-ARM_EABI_FNALIAS(f2ulz, fixunssfdi)
-
 #ifndef __SOFT_FP__
 /* Support for systems that have hardware floating-point; can set the invalid
  * flag as a side-effect of computation.
@@ -43,3 +41,13 @@
 }
 
 #endif
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI du_int __aeabi_f2ulz(fp_t a) {
+  return __fixunssfdi(a);
+}
+#else
+AEABI_RTABI du_int __aeabi_f2ulz(fp_t a) COMPILER_RT_ALIAS(__fixunssfdi);
+#endif
+#endif
diff --git a/lib/builtins/fixunssfsi.c b/lib/builtins/fixunssfsi.c
index cc2b05b..eca2916 100644
--- a/lib/builtins/fixunssfsi.c
+++ b/lib/builtins/fixunssfsi.c
@@ -17,9 +17,17 @@
 typedef su_int fixuint_t;
 #include "fp_fixuint_impl.inc"
 
-ARM_EABI_FNALIAS(f2uiz, fixunssfsi)
-
 COMPILER_RT_ABI su_int
 __fixunssfsi(fp_t a) {
     return __fixuint(a);
 }
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI su_int __aeabi_f2uiz(fp_t a) {
+  return __fixunssfsi(a);
+}
+#else
+AEABI_RTABI su_int __aeabi_f2uiz(fp_t a) COMPILER_RT_ALIAS(__fixunssfsi);
+#endif
+#endif
diff --git a/lib/builtins/floatdidf.c b/lib/builtins/floatdidf.c
index 2b023ad..36b856e 100644
--- a/lib/builtins/floatdidf.c
+++ b/lib/builtins/floatdidf.c
@@ -22,8 +22,6 @@
 
 /* seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm */
 
-ARM_EABI_FNALIAS(l2d, floatdidf)
-
 #ifndef __SOFT_FP__
 /* Support for systems that have hardware floating-point; we'll set the inexact flag
  * as a side-effect of this computation.
@@ -105,3 +103,13 @@
     return fb.f;
 }
 #endif
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI double __aeabi_l2d(di_int a) {
+  return __floatdidf(a);
+}
+#else
+AEABI_RTABI double __aeabi_l2d(di_int a) COMPILER_RT_ALIAS(__floatdidf);
+#endif
+#endif
diff --git a/lib/builtins/floatdisf.c b/lib/builtins/floatdisf.c
index 3e47580..a2f09eb 100644
--- a/lib/builtins/floatdisf.c
+++ b/lib/builtins/floatdisf.c
@@ -22,8 +22,6 @@
 
 #include "int_lib.h"
 
-ARM_EABI_FNALIAS(l2f, floatdisf)
-
 COMPILER_RT_ABI float
 __floatdisf(di_int a)
 {
@@ -78,3 +76,13 @@
            ((su_int)a & 0x007FFFFF);   /* mantissa */
     return fb.f;
 }
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI float __aeabi_l2f(di_int a) {
+  return __floatdisf(a);
+}
+#else
+AEABI_RTABI float __aeabi_l2f(di_int a) COMPILER_RT_ALIAS(__floatdisf);
+#endif
+#endif
diff --git a/lib/builtins/floatsidf.c b/lib/builtins/floatsidf.c
index 1cf99b7..fe05112 100644
--- a/lib/builtins/floatsidf.c
+++ b/lib/builtins/floatsidf.c
@@ -18,8 +18,6 @@
 
 #include "int_lib.h"
 
-ARM_EABI_FNALIAS(i2d, floatsidf)
-
 COMPILER_RT_ABI fp_t
 __floatsidf(int a) {
     
@@ -51,3 +49,13 @@
     // Insert the sign bit and return
     return fromRep(result | sign);
 }
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI fp_t __aeabi_i2d(int a) {
+  return __floatsidf(a);
+}
+#else
+AEABI_RTABI fp_t __aeabi_i2d(int a) COMPILER_RT_ALIAS(__floatsidf);
+#endif
+#endif
diff --git a/lib/builtins/floatsisf.c b/lib/builtins/floatsisf.c
index 467dd1d..bf087ee 100644
--- a/lib/builtins/floatsisf.c
+++ b/lib/builtins/floatsisf.c
@@ -18,8 +18,6 @@
 
 #include "int_lib.h"
 
-ARM_EABI_FNALIAS(i2f, floatsisf)
-
 COMPILER_RT_ABI fp_t
 __floatsisf(int a) {
     
@@ -57,3 +55,13 @@
     // Insert the sign bit and return
     return fromRep(result | sign);
 }
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI fp_t __aeabi_i2f(int a) {
+  return __floatsisf(a);
+}
+#else
+AEABI_RTABI fp_t __aeabi_i2f(int a) COMPILER_RT_ALIAS(__floatsisf);
+#endif
+#endif
diff --git a/lib/builtins/floatundidf.c b/lib/builtins/floatundidf.c
index cfd3a7a..8bc2a09 100644
--- a/lib/builtins/floatundidf.c
+++ b/lib/builtins/floatundidf.c
@@ -22,8 +22,6 @@
 
 #include "int_lib.h"
 
-ARM_EABI_FNALIAS(ul2d, floatundidf)
-
 #ifndef __SOFT_FP__
 /* Support for systems that have hardware floating-point; we'll set the inexact flag
  * as a side-effect of this computation.
@@ -104,3 +102,13 @@
     return fb.f;
 }
 #endif
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI double __aeabi_ul2d(du_int a) {
+  return __floatundidf(a);
+}
+#else
+AEABI_RTABI double __aeabi_ul2d(du_int a) COMPILER_RT_ALIAS(__floatundidf);
+#endif
+#endif
diff --git a/lib/builtins/floatundisf.c b/lib/builtins/floatundisf.c
index 713a44a..844786e 100644
--- a/lib/builtins/floatundisf.c
+++ b/lib/builtins/floatundisf.c
@@ -22,8 +22,6 @@
 
 #include "int_lib.h"
 
-ARM_EABI_FNALIAS(ul2f, floatundisf)
-
 COMPILER_RT_ABI float
 __floatundisf(du_int a)
 {
@@ -75,3 +73,13 @@
            ((su_int)a & 0x007FFFFF);  /* mantissa */
     return fb.f;
 }
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI float __aeabi_ul2f(du_int a) {
+  return __floatundisf(a);
+}
+#else
+AEABI_RTABI float __aeabi_ul2f(du_int a) COMPILER_RT_ALIAS(__floatundisf);
+#endif
+#endif
diff --git a/lib/builtins/floatunsidf.c b/lib/builtins/floatunsidf.c
index 445e180..75cf6b9 100644
--- a/lib/builtins/floatunsidf.c
+++ b/lib/builtins/floatunsidf.c
@@ -18,8 +18,6 @@
 
 #include "int_lib.h"
 
-ARM_EABI_FNALIAS(ui2d, floatunsidf)
-
 COMPILER_RT_ABI fp_t
 __floatunsidf(unsigned int a) {
     
@@ -40,3 +38,13 @@
     result += (rep_t)(exponent + exponentBias) << significandBits;
     return fromRep(result);
 }
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI fp_t __aeabi_ui2d(unsigned int a) {
+  return __floatunsidf(a);
+}
+#else
+AEABI_RTABI fp_t __aeabi_ui2d(unsigned int a) COMPILER_RT_ALIAS(__floatunsidf);
+#endif
+#endif
diff --git a/lib/builtins/floatunsisf.c b/lib/builtins/floatunsisf.c
index ea6f161..29525cc 100644
--- a/lib/builtins/floatunsisf.c
+++ b/lib/builtins/floatunsisf.c
@@ -18,8 +18,6 @@
 
 #include "int_lib.h"
 
-ARM_EABI_FNALIAS(ui2f, floatunsisf)
-
 COMPILER_RT_ABI fp_t
 __floatunsisf(unsigned int a) {
     
@@ -48,3 +46,13 @@
     result += (rep_t)(exponent + exponentBias) << significandBits;
     return fromRep(result);
 }
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI fp_t __aeabi_ui2f(unsigned int a) {
+  return __floatunsisf(a);
+}
+#else
+AEABI_RTABI fp_t __aeabi_ui2f(unsigned int a) COMPILER_RT_ALIAS(__floatunsisf);
+#endif
+#endif
diff --git a/lib/builtins/int_endianness.h b/lib/builtins/int_endianness.h
index 7995ddb..e2586c5 100644
--- a/lib/builtins/int_endianness.h
+++ b/lib/builtins/int_endianness.h
@@ -61,7 +61,7 @@
 
 #endif /* *BSD */
 
-#if defined(__OpenBSD__) || defined(__Bitrig__)
+#if defined(__OpenBSD__)
 #include <machine/endian.h>
 
 #if _BYTE_ORDER == _BIG_ENDIAN
@@ -72,7 +72,7 @@
 #define _YUGA_BIG_ENDIAN    0
 #endif /* _BYTE_ORDER */
 
-#endif /* OpenBSD and Bitrig. */
+#endif /* OpenBSD */
 
 /* .. */
 
diff --git a/lib/builtins/int_lib.h b/lib/builtins/int_lib.h
index 39eee18..9d09e2d 100644
--- a/lib/builtins/int_lib.h
+++ b/lib/builtins/int_lib.h
@@ -22,22 +22,27 @@
 
 #if defined(__ELF__)
 #define FNALIAS(alias_name, original_name) \
-  void alias_name() __attribute__((alias(#original_name)))
+  void alias_name() __attribute__((__alias__(#original_name)))
+#define COMPILER_RT_ALIAS(aliasee) __attribute__((__alias__(#aliasee)))
 #else
 #define FNALIAS(alias, name) _Pragma("GCC error(\"alias unsupported on this file format\")")
+#define COMPILER_RT_ALIAS(aliasee) _Pragma("GCC error(\"alias unsupported on this file format\")")
 #endif
 
 /* ABI macro definitions */
 
 #if __ARM_EABI__
-# define ARM_EABI_FNALIAS(aeabi_name, name)         \
-  void __aeabi_##aeabi_name() __attribute__((alias("__" #name)));
-# define COMPILER_RT_ABI __attribute__((pcs("aapcs")))
+# ifdef COMPILER_RT_ARMHF_TARGET
+#   define COMPILER_RT_ABI
+# else
+#   define COMPILER_RT_ABI __attribute__((__pcs__("aapcs")))
+# endif
 #else
-# define ARM_EABI_FNALIAS(aeabi_name, name)
 # define COMPILER_RT_ABI
 #endif
 
+#define AEABI_RTABI __attribute__((__pcs__("aapcs")))
+
 #ifdef _MSC_VER
 #define ALWAYS_INLINE __forceinline
 #define NOINLINE __declspec(noinline)
diff --git a/lib/builtins/int_types.h b/lib/builtins/int_types.h
index 660385e..a92238c 100644
--- a/lib/builtins/int_types.h
+++ b/lib/builtins/int_types.h
@@ -60,9 +60,7 @@
     }s;
 } udwords;
 
-/* MIPS64 issue: PR 20098 */
-#if (defined(__LP64__) || defined(__wasm__)) && \
-    !(defined(__mips__) && defined(__clang__))
+#if (defined(__LP64__) || defined(__wasm__) || defined(__mips64))
 #define CRT_HAS_128BIT
 #endif
 
diff --git a/lib/builtins/int_util.c b/lib/builtins/int_util.c
index 420d1e2..de87410 100644
--- a/lib/builtins/int_util.c
+++ b/lib/builtins/int_util.c
@@ -45,6 +45,16 @@
   __assert_rtn(function, file, line, "libcompiler_rt abort");
 }
 
+#elif __Fuchsia__
+
+#ifndef _WIN32
+__attribute__((weak))
+__attribute__((visibility("hidden")))
+#endif
+void compilerrt_abort_impl(const char *file, int line, const char *function) {
+  __builtin_trap();
+}
+
 #else
 
 /* Get the system definition of abort() */
diff --git a/lib/builtins/lshrdi3.c b/lib/builtins/lshrdi3.c
index 6b1ea92..67b2a76 100644
--- a/lib/builtins/lshrdi3.c
+++ b/lib/builtins/lshrdi3.c
@@ -18,8 +18,6 @@
 
 /* Precondition:  0 <= b < bits_in_dword */
 
-ARM_EABI_FNALIAS(llsr, lshrdi3)
-
 COMPILER_RT_ABI di_int
 __lshrdi3(di_int a, si_int b)
 {
@@ -41,3 +39,7 @@
     }
     return result.all;
 }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI di_int __aeabi_llsr(di_int a, si_int b) COMPILER_RT_ALIAS(__lshrdi3);
+#endif
diff --git a/lib/builtins/muldf3.c b/lib/builtins/muldf3.c
index 1eb7338..1bb103e 100644
--- a/lib/builtins/muldf3.c
+++ b/lib/builtins/muldf3.c
@@ -15,8 +15,16 @@
 #define DOUBLE_PRECISION
 #include "fp_mul_impl.inc"
 
-ARM_EABI_FNALIAS(dmul, muldf3)
-
 COMPILER_RT_ABI fp_t __muldf3(fp_t a, fp_t b) {
     return __mulXf3__(a, b);
 }
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI fp_t __aeabi_dmul(fp_t a, fp_t b) {
+  return __muldf3(a, b);
+}
+#else
+AEABI_RTABI fp_t __aeabi_dmul(fp_t a, fp_t b) COMPILER_RT_ALIAS(__muldf3);
+#endif
+#endif
diff --git a/lib/builtins/muldi3.c b/lib/builtins/muldi3.c
index 2dae44c..a187315 100644
--- a/lib/builtins/muldi3.c
+++ b/lib/builtins/muldi3.c
@@ -40,8 +40,6 @@
 
 /* Returns: a * b */
 
-ARM_EABI_FNALIAS(lmul, muldi3)
-
 COMPILER_RT_ABI di_int
 __muldi3(di_int a, di_int b)
 {
@@ -54,3 +52,7 @@
     r.s.high += x.s.high * y.s.low + x.s.low * y.s.high;
     return r.all;
 }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI di_int __aeabi_lmul(di_int a, di_int b) COMPILER_RT_ALIAS(__muldi3);
+#endif
diff --git a/lib/builtins/mulsf3.c b/lib/builtins/mulsf3.c
index 478b3bc..1e2cf3e 100644
--- a/lib/builtins/mulsf3.c
+++ b/lib/builtins/mulsf3.c
@@ -15,8 +15,16 @@
 #define SINGLE_PRECISION
 #include "fp_mul_impl.inc"
 
-ARM_EABI_FNALIAS(fmul, mulsf3)
-
 COMPILER_RT_ABI fp_t __mulsf3(fp_t a, fp_t b) {
     return __mulXf3__(a, b);
 }
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI fp_t __aeabi_fmul(fp_t a, fp_t b) {
+  return __mulsf3(a, b);
+}
+#else
+AEABI_RTABI fp_t __aeabi_fmul(fp_t a, fp_t b) COMPILER_RT_ALIAS(__mulsf3);
+#endif
+#endif
diff --git a/lib/builtins/negdf2.c b/lib/builtins/negdf2.c
index d634b42..f0bfaad 100644
--- a/lib/builtins/negdf2.c
+++ b/lib/builtins/negdf2.c
@@ -14,9 +14,17 @@
 #define DOUBLE_PRECISION
 #include "fp_lib.h"
 
-ARM_EABI_FNALIAS(dneg, negdf2)
-
 COMPILER_RT_ABI fp_t
 __negdf2(fp_t a) {
     return fromRep(toRep(a) ^ signBit);
 }
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI fp_t __aeabi_dneg(fp_t a) {
+  return __negdf2(a);
+}
+#else
+AEABI_RTABI fp_t __aeabi_dneg(fp_t a) COMPILER_RT_ALIAS(__negdf2);
+#endif
+#endif
diff --git a/lib/builtins/negsf2.c b/lib/builtins/negsf2.c
index 29c17be..05c97d4 100644
--- a/lib/builtins/negsf2.c
+++ b/lib/builtins/negsf2.c
@@ -14,9 +14,17 @@
 #define SINGLE_PRECISION
 #include "fp_lib.h"
 
-ARM_EABI_FNALIAS(fneg, negsf2)
-
 COMPILER_RT_ABI fp_t
 __negsf2(fp_t a) {
     return fromRep(toRep(a) ^ signBit);
 }
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI fp_t __aeabi_fneg(fp_t a) {
+  return __negsf2(a);
+}
+#else
+AEABI_RTABI fp_t __aeabi_fneg(fp_t a) COMPILER_RT_ALIAS(__negsf2);
+#endif
+#endif
diff --git a/lib/builtins/os_version_check.c b/lib/builtins/os_version_check.c
index b36ae54..74ade2f 100644
--- a/lib/builtins/os_version_check.c
+++ b/lib/builtins/os_version_check.c
@@ -18,6 +18,7 @@
 #include <CoreFoundation/CoreFoundation.h>
 #include <dispatch/dispatch.h>
 #include <TargetConditionals.h>
+#include <dlfcn.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -30,6 +31,58 @@
 /* Find and parse the SystemVersion.plist file. */
 static void parseSystemVersionPList(void *Unused) {
   (void)Unused;
+  /* Load CoreFoundation dynamically */
+  const void *NullAllocator = dlsym(RTLD_DEFAULT, "kCFAllocatorNull");
+  if (!NullAllocator)
+    return;
+  const CFAllocatorRef kCFAllocatorNull =
+      *(const CFAllocatorRef *)NullAllocator;
+  typeof(CFDataCreateWithBytesNoCopy) *CFDataCreateWithBytesNoCopyFunc =
+      (typeof(CFDataCreateWithBytesNoCopy) *)dlsym(
+          RTLD_DEFAULT, "CFDataCreateWithBytesNoCopy");
+  if (!CFDataCreateWithBytesNoCopyFunc)
+    return;
+  typeof(CFPropertyListCreateWithData) *CFPropertyListCreateWithDataFunc =
+      (typeof(CFPropertyListCreateWithData) *)dlsym(
+          RTLD_DEFAULT, "CFPropertyListCreateWithData");
+  /* CFPropertyListCreateWithData was introduced only in macOS 10.6+, so it
+   * will be NULL on earlier OS versions. */
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wdeprecated-declarations"
+  typeof(CFPropertyListCreateFromXMLData) *CFPropertyListCreateFromXMLDataFunc =
+      (typeof(CFPropertyListCreateFromXMLData) *)dlsym(
+          RTLD_DEFAULT, "CFPropertyListCreateFromXMLData");
+#pragma clang diagnostic pop
+  /* CFPropertyListCreateFromXMLDataFunc is deprecated in macOS 10.10, so it
+   * might be NULL in future OS versions. */
+  if (!CFPropertyListCreateWithDataFunc && !CFPropertyListCreateFromXMLDataFunc)
+    return;
+  typeof(CFStringCreateWithCStringNoCopy) *CFStringCreateWithCStringNoCopyFunc =
+      (typeof(CFStringCreateWithCStringNoCopy) *)dlsym(
+          RTLD_DEFAULT, "CFStringCreateWithCStringNoCopy");
+  if (!CFStringCreateWithCStringNoCopyFunc)
+    return;
+  typeof(CFDictionaryGetValue) *CFDictionaryGetValueFunc =
+      (typeof(CFDictionaryGetValue) *)dlsym(RTLD_DEFAULT,
+                                            "CFDictionaryGetValue");
+  if (!CFDictionaryGetValueFunc)
+    return;
+  typeof(CFGetTypeID) *CFGetTypeIDFunc =
+      (typeof(CFGetTypeID) *)dlsym(RTLD_DEFAULT, "CFGetTypeID");
+  if (!CFGetTypeIDFunc)
+    return;
+  typeof(CFStringGetTypeID) *CFStringGetTypeIDFunc =
+      (typeof(CFStringGetTypeID) *)dlsym(RTLD_DEFAULT, "CFStringGetTypeID");
+  if (!CFStringGetTypeIDFunc)
+    return;
+  typeof(CFStringGetCString) *CFStringGetCStringFunc =
+      (typeof(CFStringGetCString) *)dlsym(RTLD_DEFAULT, "CFStringGetCString");
+  if (!CFStringGetCStringFunc)
+    return;
+  typeof(CFRelease) *CFReleaseFunc =
+      (typeof(CFRelease) *)dlsym(RTLD_DEFAULT, "CFRelease");
+  if (!CFReleaseFunc)
+    return;
 
   char *PListPath = "/System/Library/CoreServices/SystemVersion.plist";
 
@@ -67,40 +120,41 @@
 
   /* Get the file buffer into CF's format. We pass in a null allocator here *
    * because we free PListBuf ourselves */
-  FileContentsRef = CFDataCreateWithBytesNoCopy(
+  FileContentsRef = (*CFDataCreateWithBytesNoCopyFunc)(
       NULL, PListBuf, (CFIndex)NumRead, kCFAllocatorNull);
   if (!FileContentsRef)
     goto Fail;
 
-  if (&CFPropertyListCreateWithData)
-    PListRef = CFPropertyListCreateWithData(
+  if (CFPropertyListCreateWithDataFunc)
+    PListRef = (*CFPropertyListCreateWithDataFunc)(
         NULL, FileContentsRef, kCFPropertyListImmutable, NULL, NULL);
-  else {
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wdeprecated-declarations"
-    PListRef = CFPropertyListCreateFromXMLData(NULL, FileContentsRef,
-                                               kCFPropertyListImmutable, NULL);
-#pragma clang diagnostic pop
-  }
+  else
+    PListRef = (*CFPropertyListCreateFromXMLDataFunc)(
+        NULL, FileContentsRef, kCFPropertyListImmutable, NULL);
   if (!PListRef)
     goto Fail;
 
-  CFTypeRef OpaqueValue =
-      CFDictionaryGetValue(PListRef, CFSTR("ProductVersion"));
-  if (!OpaqueValue || CFGetTypeID(OpaqueValue) != CFStringGetTypeID())
+  CFStringRef ProductVersion = (*CFStringCreateWithCStringNoCopyFunc)(
+      NULL, "ProductVersion", kCFStringEncodingASCII, kCFAllocatorNull);
+  if (!ProductVersion)
+    goto Fail;
+  CFTypeRef OpaqueValue = (*CFDictionaryGetValueFunc)(PListRef, ProductVersion);
+  (*CFReleaseFunc)(ProductVersion);
+  if (!OpaqueValue ||
+      (*CFGetTypeIDFunc)(OpaqueValue) != (*CFStringGetTypeIDFunc)())
     goto Fail;
 
   char VersionStr[32];
-  if (!CFStringGetCString((CFStringRef)OpaqueValue, VersionStr,
-                          sizeof(VersionStr), kCFStringEncodingUTF8))
+  if (!(*CFStringGetCStringFunc)((CFStringRef)OpaqueValue, VersionStr,
+                                 sizeof(VersionStr), kCFStringEncodingUTF8))
     goto Fail;
   sscanf(VersionStr, "%d.%d.%d", &GlobalMajor, &GlobalMinor, &GlobalSubminor);
 
 Fail:
   if (PListRef)
-    CFRelease(PListRef);
+    (*CFReleaseFunc)(PListRef);
   if (FileContentsRef)
-    CFRelease(FileContentsRef);
+    (*CFReleaseFunc)(FileContentsRef);
   free(PListBuf);
   fclose(PropertyList);
 }
diff --git a/lib/builtins/subdf3.c b/lib/builtins/subdf3.c
index 7a79e5e..a892fa6 100644
--- a/lib/builtins/subdf3.c
+++ b/lib/builtins/subdf3.c
@@ -15,11 +15,18 @@
 #define DOUBLE_PRECISION
 #include "fp_lib.h"
 
-ARM_EABI_FNALIAS(dsub, subdf3)
-
 // Subtraction; flip the sign bit of b and add.
 COMPILER_RT_ABI fp_t
 __subdf3(fp_t a, fp_t b) {
     return __adddf3(a, fromRep(toRep(b) ^ signBit));
 }
 
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI fp_t __aeabi_dsub(fp_t a, fp_t b) {
+  return __subdf3(a, b);
+}
+#else
+AEABI_RTABI fp_t __aeabi_dsub(fp_t a, fp_t b) COMPILER_RT_ALIAS(__subdf3);
+#endif
+#endif
diff --git a/lib/builtins/subsf3.c b/lib/builtins/subsf3.c
index c3b8514..4b27861 100644
--- a/lib/builtins/subsf3.c
+++ b/lib/builtins/subsf3.c
@@ -15,11 +15,18 @@
 #define SINGLE_PRECISION
 #include "fp_lib.h"
 
-ARM_EABI_FNALIAS(fsub, subsf3)
-
 // Subtraction; flip the sign bit of b and add.
 COMPILER_RT_ABI fp_t
 __subsf3(fp_t a, fp_t b) {
     return __addsf3(a, fromRep(toRep(b) ^ signBit));
 }
 
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI fp_t __aeabi_fsub(fp_t a, fp_t b) {
+  return __subsf3(a, b);
+}
+#else
+AEABI_RTABI fp_t __aeabi_fsub(fp_t a, fp_t b) COMPILER_RT_ALIAS(__subsf3);
+#endif
+#endif
diff --git a/lib/builtins/truncdfhf2.c b/lib/builtins/truncdfhf2.c
index 17195cd..8354a41 100644
--- a/lib/builtins/truncdfhf2.c
+++ b/lib/builtins/truncdfhf2.c
@@ -11,8 +11,16 @@
 #define DST_HALF
 #include "fp_trunc_impl.inc"
 
-ARM_EABI_FNALIAS(d2h, truncdfhf2)
-
 COMPILER_RT_ABI uint16_t __truncdfhf2(double a) {
     return __truncXfYf2__(a);
 }
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI uint16_t __aeabi_d2h(double a) {
+  return __truncdfhf2(a);
+}
+#else
+AEABI_RTABI uint16_t __aeabi_d2h(double a) COMPILER_RT_ALIAS(__truncdfhf2);
+#endif
+#endif
diff --git a/lib/builtins/truncdfsf2.c b/lib/builtins/truncdfsf2.c
index 46ec11d..195d3e0 100644
--- a/lib/builtins/truncdfsf2.c
+++ b/lib/builtins/truncdfsf2.c
@@ -11,8 +11,16 @@
 #define DST_SINGLE
 #include "fp_trunc_impl.inc"
 
-ARM_EABI_FNALIAS(d2f, truncdfsf2)
-
 COMPILER_RT_ABI float __truncdfsf2(double a) {
     return __truncXfYf2__(a);
 }
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI float __aeabi_d2f(double a) {
+  return __truncdfsf2(a);
+}
+#else
+AEABI_RTABI float __aeabi_d2f(double a) COMPILER_RT_ALIAS(__truncdfsf2);
+#endif
+#endif
diff --git a/lib/builtins/truncsfhf2.c b/lib/builtins/truncsfhf2.c
index 9d61895..9c84ab4 100644
--- a/lib/builtins/truncsfhf2.c
+++ b/lib/builtins/truncsfhf2.c
@@ -11,8 +11,6 @@
 #define DST_HALF
 #include "fp_trunc_impl.inc"
 
-ARM_EABI_FNALIAS(f2h, truncsfhf2)
-
 // Use a forwarding definition and noinline to implement a poor man's alias,
 // as there isn't a good cross-platform way of defining one.
 COMPILER_RT_ABI NOINLINE uint16_t __truncsfhf2(float a) {
@@ -22,3 +20,13 @@
 COMPILER_RT_ABI uint16_t __gnu_f2h_ieee(float a) {
     return __truncsfhf2(a);
 }
+
+#if defined(__ARM_EABI__)
+#if defined(COMPILER_RT_ARMHF_TARGET)
+AEABI_RTABI uint16_t __aeabi_f2h(float a) {
+  return __truncsfhf2(a);
+}
+#else
+AEABI_RTABI uint16_t __aeabi_f2h(float a) COMPILER_RT_ALIAS(__truncsfhf2);
+#endif
+#endif
diff --git a/lib/builtins/udivsi3.c b/lib/builtins/udivsi3.c
index 5d0140c..bb720f8 100644
--- a/lib/builtins/udivsi3.c
+++ b/lib/builtins/udivsi3.c
@@ -18,8 +18,6 @@
 
 /* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */
 
-ARM_EABI_FNALIAS(uidiv, udivsi3)
-
 /* This function should not call __divsi3! */
 COMPILER_RT_ABI su_int
 __udivsi3(su_int n, su_int d)
@@ -64,3 +62,7 @@
     q = (q << 1) | carry;
     return q;
 }
+
+#if defined(__ARM_EABI__)
+AEABI_RTABI su_int __aeabi_uidiv(su_int n, su_int d) COMPILER_RT_ALIAS(__udivsi3);
+#endif
diff --git a/lib/builtins/x86_64/floatdidf.c b/lib/builtins/x86_64/floatdidf.c
index 388404e..dead0ed 100644
--- a/lib/builtins/x86_64/floatdidf.c
+++ b/lib/builtins/x86_64/floatdidf.c
@@ -4,7 +4,7 @@
 
 /* double __floatdidf(di_int a); */
 
-#ifdef __x86_64__
+#if defined(__x86_64__) || defined(_M_X64)
 
 #include "../int_lib.h"
 
diff --git a/lib/builtins/x86_64/floatdisf.c b/lib/builtins/x86_64/floatdisf.c
index 96c3728..99d5621 100644
--- a/lib/builtins/x86_64/floatdisf.c
+++ b/lib/builtins/x86_64/floatdisf.c
@@ -2,7 +2,7 @@
  * License. See LICENSE.TXT for details.
  */
 
-#ifdef __x86_64__
+#if defined(__x86_64__) || defined(_M_X64)
 
 #include "../int_lib.h"
 
diff --git a/lib/cfi/CMakeLists.txt b/lib/cfi/CMakeLists.txt
index 2064002..6c53144 100644
--- a/lib/cfi/CMakeLists.txt
+++ b/lib/cfi/CMakeLists.txt
@@ -1,37 +1,39 @@
 add_compiler_rt_component(cfi)
 
-set(CFI_SOURCES cfi.cc)
+if(OS_NAME MATCHES "Linux")
+  set(CFI_SOURCES cfi.cc)
 
-include_directories(..)
+  include_directories(..)
 
-set(CFI_CFLAGS
-  ${SANITIZER_COMMON_CFLAGS}
-)
+  set(CFI_CFLAGS
+    ${SANITIZER_COMMON_CFLAGS}
+  )
 
-set(CFI_DIAG_CFLAGS
-  -DCFI_ENABLE_DIAG=1
-)
+  set(CFI_DIAG_CFLAGS
+    -DCFI_ENABLE_DIAG=1
+  )
 
-foreach(arch ${CFI_SUPPORTED_ARCH})
-  add_compiler_rt_runtime(clang_rt.cfi
-    STATIC
-    ARCHS ${arch}
-    SOURCES ${CFI_SOURCES}
-    OBJECT_LIBS RTInterception
-                RTSanitizerCommon
-                RTSanitizerCommonLibc
-    CFLAGS ${CFI_CFLAGS}
-    PARENT_TARGET cfi)
-  add_compiler_rt_runtime(clang_rt.cfi_diag
-    STATIC
-    ARCHS ${arch}
-    SOURCES ${CFI_SOURCES}
-    OBJECT_LIBS RTInterception
-                RTSanitizerCommon
-                RTSanitizerCommonLibc
-		RTUbsan
-    CFLAGS ${CFI_CFLAGS} ${CFI_DIAG_CFLAGS}
-    PARENT_TARGET cfi)
-endforeach()
+  foreach(arch ${CFI_SUPPORTED_ARCH})
+    add_compiler_rt_runtime(clang_rt.cfi
+      STATIC
+      ARCHS ${arch}
+      SOURCES ${CFI_SOURCES}
+      OBJECT_LIBS RTInterception
+                  RTSanitizerCommon
+                  RTSanitizerCommonLibc
+      CFLAGS ${CFI_CFLAGS}
+      PARENT_TARGET cfi)
+    add_compiler_rt_runtime(clang_rt.cfi_diag
+      STATIC
+      ARCHS ${arch}
+      SOURCES ${CFI_SOURCES}
+      OBJECT_LIBS RTInterception
+                  RTSanitizerCommon
+                  RTSanitizerCommonLibc
+                  RTUbsan
+      CFLAGS ${CFI_CFLAGS} ${CFI_DIAG_CFLAGS}
+      PARENT_TARGET cfi)
+  endforeach()
+endif()
 
 add_compiler_rt_resource_file(cfi_blacklist cfi_blacklist.txt cfi)
diff --git a/lib/cfi/cfi.cc b/lib/cfi/cfi.cc
index d463ca8..f720230 100644
--- a/lib/cfi/cfi.cc
+++ b/lib/cfi/cfi.cc
@@ -188,12 +188,14 @@
     }
   }
   if (!dynamic) return 0;
-  uptr strtab = 0, symtab = 0;
+  uptr strtab = 0, symtab = 0, strsz = 0;
   for (const ElfW(Dyn) *p = dynamic; p->d_tag != PT_NULL; ++p) {
     if (p->d_tag == DT_SYMTAB)
       symtab = p->d_un.d_ptr;
     else if (p->d_tag == DT_STRTAB)
       strtab = p->d_un.d_ptr;
+    else if (p->d_tag == DT_STRSZ)
+      strsz = p->d_un.d_ptr;
   }
 
   if (symtab > strtab) {
@@ -209,7 +211,8 @@
     if (phdr->p_type == PT_LOAD) {
       uptr beg = info->dlpi_addr + phdr->p_vaddr;
       uptr end = beg + phdr->p_memsz;
-      if (strtab >= beg && strtab < end && symtab >= beg && symtab < end)
+      if (strtab >= beg && strtab + strsz < end && symtab >= beg &&
+          symtab < end)
         break;
     }
   }
@@ -222,9 +225,14 @@
 
   for (const ElfW(Sym) *p = (const ElfW(Sym) *)symtab; (ElfW(Addr))p < strtab;
        ++p) {
+    // There is no reliable way to find the end of the symbol table. In
+    // lld-produces files, there are other sections between symtab and strtab.
+    // Stop looking when the symbol name is not inside strtab.
+    if (p->st_name >= strsz) break;
     char *name = (char*)(strtab + p->st_name);
     if (strcmp(name, "__cfi_check") == 0) {
-      assert(p->st_info == ELF32_ST_INFO(STB_GLOBAL, STT_FUNC));
+      assert(p->st_info == ELF32_ST_INFO(STB_GLOBAL, STT_FUNC) ||
+             p->st_info == ELF32_ST_INFO(STB_WEAK, STT_FUNC));
       uptr addr = info->dlpi_addr + p->st_value;
       return addr;
     }
diff --git a/lib/cfi/cfi_blacklist.txt b/lib/cfi/cfi_blacklist.txt
index 1f0eeb3..d8c9d49 100644
--- a/lib/cfi/cfi_blacklist.txt
+++ b/lib/cfi/cfi_blacklist.txt
@@ -1,18 +1,4 @@
-# Standard library types.
-type:std::*
-
-# The stdext namespace contains Microsoft standard library extensions.
-type:stdext::*
-
-# Types with a uuid attribute, i.e. COM types.
-type:attr:uuid
-
-# STL allocators (T *allocator<T *>::allocate(size_type, const void*)).
-# The type signature mandates a cast from uninitialized void* to T*.
-# size_type can either be unsigned int (j) or unsigned long (m).
-fun:*8allocateEjPKv
-fun:*8allocateEmPKv
-
+[cfi-unrelated-cast]
 # std::get_temporary_buffer, likewise (libstdc++, libc++).
 fun:_ZSt20get_temporary_buffer*
 fun:_ZNSt3__120get_temporary_buffer*
@@ -24,3 +10,8 @@
 # Windows C++ stdlib headers that contain bad unrelated casts.
 src:*xmemory0
 src:*xstddef
+
+# std::_Sp_counted_ptr_inplace::_Sp_counted_ptr_inplace() (libstdc++).
+# This ctor is used by std::make_shared and needs to cast to uninitialized T*
+# in order to call std::allocator_traits<T>::construct.
+fun:_ZNSt23_Sp_counted_ptr_inplace*
diff --git a/lib/dfsan/dfsan.cc b/lib/dfsan/dfsan.cc
index 3aa99b7..9e360c9 100644
--- a/lib/dfsan/dfsan.cc
+++ b/lib/dfsan/dfsan.cc
@@ -21,6 +21,7 @@
 
 #include "sanitizer_common/sanitizer_atomic.h"
 #include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_file.h"
 #include "sanitizer_common/sanitizer_flags.h"
 #include "sanitizer_common/sanitizer_flag_parser.h"
 #include "sanitizer_common/sanitizer_libc.h"
diff --git a/lib/dfsan/done_abilist.txt b/lib/dfsan/done_abilist.txt
index a00dc54..a560cd7 100644
--- a/lib/dfsan/done_abilist.txt
+++ b/lib/dfsan/done_abilist.txt
@@ -285,24 +285,8 @@
 fun:__sanitizer_cov_module_init=discard
 fun:__sanitizer_cov_with_check=uninstrumented
 fun:__sanitizer_cov_with_check=discard
-fun:__sanitizer_cov_indir_call16=uninstrumented
-fun:__sanitizer_cov_indir_call16=discard
-fun:__sanitizer_cov_indir_call16=uninstrumented
-fun:__sanitizer_cov_indir_call16=discard
-fun:__sanitizer_reset_coverage=uninstrumented
-fun:__sanitizer_reset_coverage=discard
 fun:__sanitizer_set_death_callback=uninstrumented
 fun:__sanitizer_set_death_callback=discard
-fun:__sanitizer_get_coverage_guards=uninstrumented
-fun:__sanitizer_get_coverage_guards=discard
-fun:__sanitizer_get_number_of_counters=uninstrumented
-fun:__sanitizer_get_number_of_counters=discard
-fun:__sanitizer_update_counter_bitset_and_clear_counters=uninstrumented
-fun:__sanitizer_update_counter_bitset_and_clear_counters=discard
-fun:__sanitizer_get_total_unique_coverage=uninstrumented
-fun:__sanitizer_get_total_unique_coverage=discard
-fun:__sanitizer_get_total_unique_coverage=uninstrumented
-fun:__sanitizer_get_total_unique_coverage=discard
 fun:__sanitizer_update_counter_bitset_and_clear_counters=uninstrumented
 fun:__sanitizer_update_counter_bitset_and_clear_counters=discard
 
diff --git a/lib/esan/esan_interceptors.cpp b/lib/esan/esan_interceptors.cpp
index 9ae5482..62fa13c 100644
--- a/lib/esan/esan_interceptors.cpp
+++ b/lib/esan/esan_interceptors.cpp
@@ -31,6 +31,8 @@
 // Get the per-platform defines for what is possible to intercept
 #include "sanitizer_common/sanitizer_platform_interceptors.h"
 
+DECLARE_REAL_AND_INTERCEPTOR(void *, malloc, uptr)
+
 // TODO(bruening): tsan disables several interceptors (getpwent, etc.) claiming
 // that interception is a perf hit: should we do the same?
 
@@ -304,20 +306,6 @@
   return REAL(unlink)(path);
 }
 
-INTERCEPTOR(uptr, fread, void *ptr, uptr size, uptr nmemb, void *f) {
-  void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, fread, ptr, size, nmemb, f);
-  COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, size * nmemb);
-  return REAL(fread)(ptr, size, nmemb, f);
-}
-
-INTERCEPTOR(uptr, fwrite, const void *p, uptr size, uptr nmemb, void *f) {
-  void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, fwrite, p, size, nmemb, f);
-  COMMON_INTERCEPTOR_READ_RANGE(ctx, p, size * nmemb);
-  return REAL(fwrite)(p, size, nmemb, f);
-}
-
 INTERCEPTOR(int, puts, const char *s) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, puts, s);
diff --git a/lib/esan/esan_sideline_linux.cpp b/lib/esan/esan_sideline_linux.cpp
index d04f590..bc272df 100644
--- a/lib/esan/esan_sideline_linux.cpp
+++ b/lib/esan/esan_sideline_linux.cpp
@@ -70,7 +70,7 @@
 
   // Set up a signal handler on an alternate stack for safety.
   InternalScopedBuffer<char> StackMap(SigAltStackSize);
-  struct sigaltstack SigAltStack;
+  stack_t SigAltStack;
   SigAltStack.ss_sp = StackMap.data();
   SigAltStack.ss_size = SigAltStackSize;
   SigAltStack.ss_flags = 0;
diff --git a/lib/esan/working_set.cpp b/lib/esan/working_set.cpp
index f391119..e56902c 100644
--- a/lib/esan/working_set.cpp
+++ b/lib/esan/working_set.cpp
@@ -160,15 +160,16 @@
 static u32 computeWorkingSizeAndReset(u32 BitIdx) {
   u32 WorkingSetSize = 0;
   MemoryMappingLayout MemIter(true/*cache*/);
-  uptr Start, End, Prot;
-  while (MemIter.Next(&Start, &End, nullptr/*offs*/, nullptr/*file*/,
-                      0/*file size*/, &Prot)) {
-    VPrintf(4, "%s: considering %p-%p app=%d shadow=%d prot=%u\n",
-            __FUNCTION__, Start, End, Prot, isAppMem(Start),
-            isShadowMem(Start));
-    if (isShadowMem(Start) && (Prot & MemoryMappingLayout::kProtectionWrite)) {
-      VPrintf(3, "%s: walking %p-%p\n", __FUNCTION__, Start, End);
-      WorkingSetSize += countAndClearShadowValues(BitIdx, Start, End);
+  MemoryMappedSegment Segment;
+  while (MemIter.Next(&Segment)) {
+    VPrintf(4, "%s: considering %p-%p app=%d shadow=%d prot=%u\n", __FUNCTION__,
+            Segment.start, Segment.end, Segment.protection,
+            isAppMem(Segment.start), isShadowMem(Segment.start));
+    if (isShadowMem(Segment.start) && Segment.IsWritable()) {
+      VPrintf(3, "%s: walking %p-%p\n", __FUNCTION__, Segment.start,
+              Segment.end);
+      WorkingSetSize +=
+          countAndClearShadowValues(BitIdx, Segment.start, Segment.end);
     }
   }
   return WorkingSetSize;
diff --git a/lib/fuzzer/CMakeLists.txt b/lib/fuzzer/CMakeLists.txt
new file mode 100644
index 0000000..e442b64
--- /dev/null
+++ b/lib/fuzzer/CMakeLists.txt
@@ -0,0 +1,79 @@
+set(LIBFUZZER_SOURCES
+  FuzzerClangCounters.cpp
+  FuzzerCrossOver.cpp
+  FuzzerDriver.cpp
+  FuzzerExtFunctionsDlsym.cpp
+  FuzzerExtFunctionsDlsymWin.cpp
+  FuzzerExtFunctionsWeak.cpp
+  FuzzerExtraCounters.cpp
+  FuzzerIO.cpp
+  FuzzerIOPosix.cpp
+  FuzzerIOWindows.cpp
+  FuzzerLoop.cpp
+  FuzzerMerge.cpp
+  FuzzerMutate.cpp
+  FuzzerSHA1.cpp
+  FuzzerShmemPosix.cpp
+  FuzzerShmemWindows.cpp
+  FuzzerTracePC.cpp
+  FuzzerUtil.cpp
+  FuzzerUtilDarwin.cpp
+  FuzzerUtilLinux.cpp
+  FuzzerUtilPosix.cpp
+  FuzzerUtilWindows.cpp
+  )
+
+CHECK_CXX_SOURCE_COMPILES("
+  static thread_local int blah;
+  int main() {
+  return 0;
+  }
+  " HAS_THREAD_LOCAL)
+
+set(LIBFUZZER_CFLAGS ${SANITIZER_COMMON_CFLAGS})
+
+append_list_if(COMPILER_RT_HAS_OMIT_FRAME_POINTER_FLAG -fno-omit-frame-pointer LIBFUZZER_CFLAGS)
+
+if (CMAKE_CXX_FLAGS MATCHES "fsanitize-coverage")
+  list(APPEND LIBFUZZER_CFLAGS -fno-sanitize-coverage=trace-pc-guard,edge,trace-cmp,indirect-calls,8bit-counters)
+endif()
+
+if(NOT HAS_THREAD_LOCAL)
+  list(APPEND LIBFUZZER_CFLAGS -Dthread_local=__thread)
+endif()
+
+if(APPLE)
+  set(FUZZER_SUPPORTED_OS osx)
+endif()
+
+add_compiler_rt_object_libraries(RTfuzzer
+  OS ${FUZZER_SUPPORTED_OS}
+  ARCHS ${FUZZER_SUPPORTED_ARCH}
+  SOURCES ${LIBFUZZER_SOURCES}
+  CFLAGS ${LIBFUZZER_CFLAGS})
+
+add_compiler_rt_object_libraries(RTfuzzer_main
+  OS ${FUZZER_SUPPORTED_OS}
+  ARCHS ${FUZZER_SUPPORTED_ARCH}
+  SOURCES FuzzerMain.cpp
+  CFLAGS ${LIBFUZZER_CFLAGS})
+
+add_compiler_rt_runtime(clang_rt.fuzzer
+  STATIC
+  OS ${FUZZER_SUPPORTED_OS}
+  ARCHS ${FUZZER_SUPPORTED_ARCH}
+  OBJECT_LIBS RTfuzzer RTfuzzer_main
+  CFLAGS ${LIBFUZZER_CFLAGS}
+  PARENT_TARGET fuzzer)
+
+add_compiler_rt_runtime(clang_rt.fuzzer_no_main
+  STATIC
+  OS ${FUZZER_SUPPORTED_OS}
+  ARCHS ${FUZZER_SUPPORTED_ARCH}
+  OBJECT_LIBS RTfuzzer
+  CFLAGS ${LIBFUZZER_CFLAGS}
+  PARENT_TARGET fuzzer)
+
+if(COMPILER_RT_INCLUDE_TESTS)
+  add_subdirectory(tests)
+endif()
diff --git a/lib/fuzzer/FuzzerClangCounters.cpp b/lib/fuzzer/FuzzerClangCounters.cpp
new file mode 100644
index 0000000..f69e922
--- /dev/null
+++ b/lib/fuzzer/FuzzerClangCounters.cpp
@@ -0,0 +1,49 @@
+//===- FuzzerExtraCounters.cpp - Extra coverage counters ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Coverage counters from Clang's SourceBasedCodeCoverage.
+//===----------------------------------------------------------------------===//
+
+// Support for SourceBasedCodeCoverage is experimental:
+// * Works only for the main binary, not DSOs yet.
+// * Works only on Linux.
+// * Does not implement print_pcs/print_coverage yet.
+// * Is not fully evaluated for performance and sensitivity.
+//   We expect large performance drop due to 64-bit counters,
+//   and *maybe* better sensitivity due to more fine-grained counters.
+//   Preliminary comparison on a single benchmark (RE2) shows
+//   a bit worse sensitivity though.
+
+#include "FuzzerDefs.h"
+
+#if LIBFUZZER_LINUX
+__attribute__((weak)) extern uint64_t __start___llvm_prf_cnts;
+__attribute__((weak)) extern uint64_t __stop___llvm_prf_cnts;
+namespace fuzzer {
+uint64_t *ClangCountersBegin() { return &__start___llvm_prf_cnts; }
+uint64_t *ClangCountersEnd() { return &__stop___llvm_prf_cnts; }
+}  // namespace fuzzer
+#else
+// TODO: Implement on Mac (if the data shows it's worth it).
+//__attribute__((visibility("hidden")))
+//extern uint64_t CountersStart __asm("section$start$__DATA$__llvm_prf_cnts");
+//__attribute__((visibility("hidden")))
+//extern uint64_t CountersEnd __asm("section$end$__DATA$__llvm_prf_cnts");
+namespace fuzzer {
+uint64_t *ClangCountersBegin() { return nullptr; }
+uint64_t *ClangCountersEnd() { return  nullptr; }
+}  // namespace fuzzer
+#endif
+
+namespace fuzzer {
+ATTRIBUTE_NO_SANITIZE_ALL
+void ClearClangCounters() {  // hand-written memset, don't asan-ify.
+  for (auto P = ClangCountersBegin(); P < ClangCountersEnd(); P++)
+    *P = 0;
+}
+}
diff --git a/lib/fuzzer/FuzzerCorpus.h b/lib/fuzzer/FuzzerCorpus.h
new file mode 100644
index 0000000..2da9298
--- /dev/null
+++ b/lib/fuzzer/FuzzerCorpus.h
@@ -0,0 +1,302 @@
+//===- FuzzerCorpus.h - Internal header for the Fuzzer ----------*- C++ -* ===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// fuzzer::InputCorpus
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_FUZZER_CORPUS
+#define LLVM_FUZZER_CORPUS
+
+#include "FuzzerDefs.h"
+#include "FuzzerIO.h"
+#include "FuzzerRandom.h"
+#include "FuzzerSHA1.h"
+#include "FuzzerTracePC.h"
+#include <algorithm>
+#include <numeric>
+#include <random>
+#include <unordered_set>
+
+namespace fuzzer {
+
+struct InputInfo {
+  Unit U;  // The actual input data.
+  uint8_t Sha1[kSHA1NumBytes];  // Checksum.
+  // Number of features that this input has and no smaller input has.
+  size_t NumFeatures = 0;
+  size_t Tmp = 0; // Used by ValidateFeatureSet.
+  // Stats.
+  size_t NumExecutedMutations = 0;
+  size_t NumSuccessfullMutations = 0;
+  bool MayDeleteFile = false;
+  bool Reduced = false;
+  Vector<uint32_t> UniqFeatureSet;
+  float FeatureFrequencyScore = 1.0;
+};
+
+class InputCorpus {
+  static const size_t kFeatureSetSize = 1 << 21;
+ public:
+  InputCorpus(const std::string &OutputCorpus) : OutputCorpus(OutputCorpus) {
+    memset(InputSizesPerFeature, 0, sizeof(InputSizesPerFeature));
+    memset(SmallestElementPerFeature, 0, sizeof(SmallestElementPerFeature));
+    memset(FeatureFrequency, 0, sizeof(FeatureFrequency));
+  }
+  ~InputCorpus() {
+    for (auto II : Inputs)
+      delete II;
+  }
+  size_t size() const { return Inputs.size(); }
+  size_t SizeInBytes() const {
+    size_t Res = 0;
+    for (auto II : Inputs)
+      Res += II->U.size();
+    return Res;
+  }
+  size_t NumActiveUnits() const {
+    size_t Res = 0;
+    for (auto II : Inputs)
+      Res += !II->U.empty();
+    return Res;
+  }
+  size_t MaxInputSize() const {
+    size_t Res = 0;
+    for (auto II : Inputs)
+        Res = std::max(Res, II->U.size());
+    return Res;
+  }
+  bool empty() const { return Inputs.empty(); }
+  const Unit &operator[] (size_t Idx) const { return Inputs[Idx]->U; }
+  void AddToCorpus(const Unit &U, size_t NumFeatures, bool MayDeleteFile,
+                   const Vector<uint32_t> &FeatureSet) {
+    assert(!U.empty());
+    if (FeatureDebug)
+      Printf("ADD_TO_CORPUS %zd NF %zd\n", Inputs.size(), NumFeatures);
+    Inputs.push_back(new InputInfo());
+    InputInfo &II = *Inputs.back();
+    II.U = U;
+    II.NumFeatures = NumFeatures;
+    II.MayDeleteFile = MayDeleteFile;
+    II.UniqFeatureSet = FeatureSet;
+    std::sort(II.UniqFeatureSet.begin(), II.UniqFeatureSet.end());
+    ComputeSHA1(U.data(), U.size(), II.Sha1);
+    Hashes.insert(Sha1ToString(II.Sha1));
+    UpdateCorpusDistribution();
+    PrintCorpus();
+    // ValidateFeatureSet();
+  }
+
+  // Debug-only
+  void PrintUnit(const Unit &U) {
+    if (!FeatureDebug) return;
+    for (uint8_t C : U) {
+      if (C != 'F' && C != 'U' && C != 'Z')
+        C = '.';
+      Printf("%c", C);
+    }
+  }
+
+  // Debug-only
+  void PrintFeatureSet(const Vector<uint32_t> &FeatureSet) {
+    if (!FeatureDebug) return;
+    Printf("{");
+    for (uint32_t Feature: FeatureSet)
+      Printf("%u,", Feature);
+    Printf("}");
+  }
+
+  // Debug-only
+  void PrintCorpus() {
+    if (!FeatureDebug) return;
+    Printf("======= CORPUS:\n");
+    int i = 0;
+    for (auto II : Inputs) {
+      if (std::find(II->U.begin(), II->U.end(), 'F') != II->U.end()) {
+        Printf("[%2d] ", i);
+        Printf("%s sz=%zd ", Sha1ToString(II->Sha1).c_str(), II->U.size());
+        PrintUnit(II->U);
+        Printf(" ");
+        PrintFeatureSet(II->UniqFeatureSet);
+        Printf("\n");
+      }
+      i++;
+    }
+  }
+
+  void Replace(InputInfo *II, const Unit &U) {
+    assert(II->U.size() > U.size());
+    Hashes.erase(Sha1ToString(II->Sha1));
+    DeleteFile(*II);
+    ComputeSHA1(U.data(), U.size(), II->Sha1);
+    Hashes.insert(Sha1ToString(II->Sha1));
+    II->U = U;
+    II->Reduced = true;
+    UpdateCorpusDistribution();
+  }
+
+  bool HasUnit(const Unit &U) { return Hashes.count(Hash(U)); }
+  bool HasUnit(const std::string &H) { return Hashes.count(H); }
+  InputInfo &ChooseUnitToMutate(Random &Rand) {
+    InputInfo &II = *Inputs[ChooseUnitIdxToMutate(Rand)];
+    assert(!II.U.empty());
+    return II;
+  };
+
+  // Returns an index of random unit from the corpus to mutate.
+  size_t ChooseUnitIdxToMutate(Random &Rand) {
+    size_t Idx = static_cast<size_t>(CorpusDistribution(Rand));
+    assert(Idx < Inputs.size());
+    return Idx;
+  }
+
+  void PrintStats() {
+    for (size_t i = 0; i < Inputs.size(); i++) {
+      const auto &II = *Inputs[i];
+      Printf("  [%zd %s]\tsz: %zd\truns: %zd\tsucc: %zd\n", i,
+             Sha1ToString(II.Sha1).c_str(), II.U.size(),
+             II.NumExecutedMutations, II.NumSuccessfullMutations);
+    }
+  }
+
+  void PrintFeatureSet() {
+    for (size_t i = 0; i < kFeatureSetSize; i++) {
+      if(size_t Sz = GetFeature(i))
+        Printf("[%zd: id %zd sz%zd] ", i, SmallestElementPerFeature[i], Sz);
+    }
+    Printf("\n\t");
+    for (size_t i = 0; i < Inputs.size(); i++)
+      if (size_t N = Inputs[i]->NumFeatures)
+        Printf(" %zd=>%zd ", i, N);
+    Printf("\n");
+  }
+
+  void DeleteFile(const InputInfo &II) {
+    if (!OutputCorpus.empty() && II.MayDeleteFile)
+      RemoveFile(DirPlusFile(OutputCorpus, Sha1ToString(II.Sha1)));
+  }
+
+  void DeleteInput(size_t Idx) {
+    InputInfo &II = *Inputs[Idx];
+    DeleteFile(II);
+    Unit().swap(II.U);
+    if (FeatureDebug)
+      Printf("EVICTED %zd\n", Idx);
+  }
+
+  bool AddFeature(size_t Idx, uint32_t NewSize, bool Shrink) {
+    assert(NewSize);
+    Idx = Idx % kFeatureSetSize;
+    uint32_t OldSize = GetFeature(Idx);
+    if (OldSize == 0 || (Shrink && OldSize > NewSize)) {
+      if (OldSize > 0) {
+        size_t OldIdx = SmallestElementPerFeature[Idx];
+        InputInfo &II = *Inputs[OldIdx];
+        assert(II.NumFeatures > 0);
+        II.NumFeatures--;
+        if (II.NumFeatures == 0)
+          DeleteInput(OldIdx);
+      } else {
+        NumAddedFeatures++;
+      }
+      NumUpdatedFeatures++;
+      if (FeatureDebug)
+        Printf("ADD FEATURE %zd sz %d\n", Idx, NewSize);
+      SmallestElementPerFeature[Idx] = Inputs.size();
+      InputSizesPerFeature[Idx] = NewSize;
+      return true;
+    }
+    return false;
+  }
+
+  void UpdateFeatureFrequency(size_t Idx) {
+    FeatureFrequency[Idx % kFeatureSetSize]++;
+  }
+  float GetFeatureFrequency(size_t Idx) const {
+    return FeatureFrequency[Idx % kFeatureSetSize];
+  }
+  void UpdateFeatureFrequencyScore(InputInfo *II) {
+    const float kMin = 0.01, kMax = 100.;
+    II->FeatureFrequencyScore = kMin;
+    for (auto Idx : II->UniqFeatureSet)
+      II->FeatureFrequencyScore += 1. / (GetFeatureFrequency(Idx) + 1.);
+    II->FeatureFrequencyScore = Min(II->FeatureFrequencyScore, kMax);
+  }
+
+  size_t NumFeatures() const { return NumAddedFeatures; }
+  size_t NumFeatureUpdates() const { return NumUpdatedFeatures; }
+
+private:
+
+  static const bool FeatureDebug = false;
+
+  size_t GetFeature(size_t Idx) const { return InputSizesPerFeature[Idx]; }
+
+  void ValidateFeatureSet() {
+    if (FeatureDebug)
+      PrintFeatureSet();
+    for (size_t Idx = 0; Idx < kFeatureSetSize; Idx++)
+      if (GetFeature(Idx))
+        Inputs[SmallestElementPerFeature[Idx]]->Tmp++;
+    for (auto II: Inputs) {
+      if (II->Tmp != II->NumFeatures)
+        Printf("ZZZ %zd %zd\n", II->Tmp, II->NumFeatures);
+      assert(II->Tmp == II->NumFeatures);
+      II->Tmp = 0;
+    }
+  }
+
+  // Updates the probability distribution for the units in the corpus.
+  // Must be called whenever the corpus or unit weights are changed.
+  //
+  // Hypothesis: units added to the corpus last are more interesting.
+  //
+  // Hypothesis: inputs with infrequent features are more interesting.
+  void UpdateCorpusDistribution() {
+    size_t N = Inputs.size();
+    assert(N);
+    Intervals.resize(N + 1);
+    Weights.resize(N);
+    std::iota(Intervals.begin(), Intervals.end(), 0);
+    for (size_t i = 0; i < N; i++)
+      Weights[i] = Inputs[i]->NumFeatures
+                       ? (i + 1) * Inputs[i]->FeatureFrequencyScore
+                       : 0.;
+    if (FeatureDebug) {
+      for (size_t i = 0; i < N; i++)
+        Printf("%zd ", Inputs[i]->NumFeatures);
+      Printf("NUM\n");
+      for (size_t i = 0; i < N; i++)
+        Printf("%f ", Inputs[i]->FeatureFrequencyScore);
+      Printf("SCORE\n");
+      for (size_t i = 0; i < N; i++)
+        Printf("%f ", Weights[i]);
+      Printf("Weights\n");
+    }
+    CorpusDistribution = std::piecewise_constant_distribution<double>(
+        Intervals.begin(), Intervals.end(), Weights.begin());
+  }
+  std::piecewise_constant_distribution<double> CorpusDistribution;
+
+  Vector<double> Intervals;
+  Vector<double> Weights;
+
+  std::unordered_set<std::string> Hashes;
+  Vector<InputInfo*> Inputs;
+
+  size_t NumAddedFeatures = 0;
+  size_t NumUpdatedFeatures = 0;
+  uint32_t InputSizesPerFeature[kFeatureSetSize];
+  uint32_t SmallestElementPerFeature[kFeatureSetSize];
+  float FeatureFrequency[kFeatureSetSize];
+
+  std::string OutputCorpus;
+};
+
+}  // namespace fuzzer
+
+#endif  // LLVM_FUZZER_CORPUS
diff --git a/lib/fuzzer/FuzzerCrossOver.cpp b/lib/fuzzer/FuzzerCrossOver.cpp
new file mode 100644
index 0000000..8b0fd7d
--- /dev/null
+++ b/lib/fuzzer/FuzzerCrossOver.cpp
@@ -0,0 +1,52 @@
+//===- FuzzerCrossOver.cpp - Cross over two test inputs -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Cross over test inputs.
+//===----------------------------------------------------------------------===//
+
+#include "FuzzerDefs.h"
+#include "FuzzerMutate.h"
+#include "FuzzerRandom.h"
+#include <cstring>
+
+namespace fuzzer {
+
+// Cross Data1 and Data2, store the result (up to MaxOutSize bytes) in Out.
+size_t MutationDispatcher::CrossOver(const uint8_t *Data1, size_t Size1,
+                                     const uint8_t *Data2, size_t Size2,
+                                     uint8_t *Out, size_t MaxOutSize) {
+  assert(Size1 || Size2);
+  MaxOutSize = Rand(MaxOutSize) + 1;
+  size_t OutPos = 0;
+  size_t Pos1 = 0;
+  size_t Pos2 = 0;
+  size_t *InPos = &Pos1;
+  size_t InSize = Size1;
+  const uint8_t *Data = Data1;
+  bool CurrentlyUsingFirstData = true;
+  while (OutPos < MaxOutSize && (Pos1 < Size1 || Pos2 < Size2)) {
+    // Merge a part of Data into Out.
+    size_t OutSizeLeft = MaxOutSize - OutPos;
+    if (*InPos < InSize) {
+      size_t InSizeLeft = InSize - *InPos;
+      size_t MaxExtraSize = std::min(OutSizeLeft, InSizeLeft);
+      size_t ExtraSize = Rand(MaxExtraSize) + 1;
+      memcpy(Out + OutPos, Data + *InPos, ExtraSize);
+      OutPos += ExtraSize;
+      (*InPos) += ExtraSize;
+    }
+    // Use the other input data on the next iteration.
+    InPos  = CurrentlyUsingFirstData ? &Pos2 : &Pos1;
+    InSize = CurrentlyUsingFirstData ? Size2 : Size1;
+    Data   = CurrentlyUsingFirstData ? Data2 : Data1;
+    CurrentlyUsingFirstData = !CurrentlyUsingFirstData;
+  }
+  return OutPos;
+}
+
+}  // namespace fuzzer
diff --git a/lib/fuzzer/FuzzerDefs.h b/lib/fuzzer/FuzzerDefs.h
new file mode 100644
index 0000000..e8c92ae
--- /dev/null
+++ b/lib/fuzzer/FuzzerDefs.h
@@ -0,0 +1,157 @@
+//===- FuzzerDefs.h - Internal header for the Fuzzer ------------*- C++ -* ===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Basic definitions.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_FUZZER_DEFS_H
+#define LLVM_FUZZER_DEFS_H
+
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <string>
+#include <vector>
+#include <set>
+#include <memory>
+
+// Platform detection.
+#ifdef __linux__
+#define LIBFUZZER_APPLE 0
+#define LIBFUZZER_LINUX 1
+#define LIBFUZZER_NETBSD 0
+#define LIBFUZZER_WINDOWS 0
+#elif __APPLE__
+#define LIBFUZZER_APPLE 1
+#define LIBFUZZER_LINUX 0
+#define LIBFUZZER_NETBSD 0
+#define LIBFUZZER_WINDOWS 0
+#elif __NetBSD__
+#define LIBFUZZER_APPLE 0
+#define LIBFUZZER_LINUX 0
+#define LIBFUZZER_NETBSD 1
+#define LIBFUZZER_WINDOWS 0
+#elif _WIN32
+#define LIBFUZZER_APPLE 0
+#define LIBFUZZER_LINUX 0
+#define LIBFUZZER_NETBSD 0
+#define LIBFUZZER_WINDOWS 1
+#else
+#error "Support for your platform has not been implemented"
+#endif
+
+#ifndef __has_attribute
+#  define __has_attribute(x) 0
+#endif
+
+#define LIBFUZZER_POSIX (LIBFUZZER_APPLE || LIBFUZZER_LINUX || LIBFUZZER_NETBSD)
+
+#ifdef __x86_64
+#  if __has_attribute(target)
+#    define ATTRIBUTE_TARGET_POPCNT __attribute__((target("popcnt")))
+#  else
+#    define ATTRIBUTE_TARGET_POPCNT
+#  endif
+#else
+#  define ATTRIBUTE_TARGET_POPCNT
+#endif
+
+
+#ifdef __clang__  // avoid gcc warning.
+#  if __has_attribute(no_sanitize)
+#    define ATTRIBUTE_NO_SANITIZE_MEMORY __attribute__((no_sanitize("memory")))
+#  else
+#    define ATTRIBUTE_NO_SANITIZE_MEMORY
+#  endif
+#  define ALWAYS_INLINE __attribute__((always_inline))
+#else
+#  define ATTRIBUTE_NO_SANITIZE_MEMORY
+#  define ALWAYS_INLINE
+#endif // __clang__
+
+#define ATTRIBUTE_NO_SANITIZE_ADDRESS __attribute__((no_sanitize_address))
+
+#if defined(__has_feature)
+#  if __has_feature(address_sanitizer)
+#    define ATTRIBUTE_NO_SANITIZE_ALL ATTRIBUTE_NO_SANITIZE_ADDRESS
+#  elif __has_feature(memory_sanitizer)
+#    define ATTRIBUTE_NO_SANITIZE_ALL ATTRIBUTE_NO_SANITIZE_MEMORY
+#  else
+#    define ATTRIBUTE_NO_SANITIZE_ALL
+#  endif
+#else
+#  define ATTRIBUTE_NO_SANITIZE_ALL
+#endif
+
+#if LIBFUZZER_WINDOWS
+#define ATTRIBUTE_INTERFACE __declspec(dllexport)
+#else
+#define ATTRIBUTE_INTERFACE __attribute__((visibility("default")))
+#endif
+
+namespace fuzzer {
+
+template <class T> T Min(T a, T b) { return a < b ? a : b; }
+template <class T> T Max(T a, T b) { return a > b ? a : b; }
+
+class Random;
+class Dictionary;
+class DictionaryEntry;
+class MutationDispatcher;
+struct FuzzingOptions;
+class InputCorpus;
+struct InputInfo;
+struct ExternalFunctions;
+
+// Global interface to functions that may or may not be available.
+extern ExternalFunctions *EF;
+
+// We are using a custom allocator to give a different symbol name to STL
+// containers in order to avoid ODR violations.
+template<typename T>
+  class fuzzer_allocator: public std::allocator<T> {
+    public:
+      template<class Other>
+      struct rebind { typedef fuzzer_allocator<Other> other;  };
+  };
+
+template<typename T>
+using Vector = std::vector<T, fuzzer_allocator<T>>;
+
+template<typename T>
+using Set = std::set<T, std::less<T>, fuzzer_allocator<T>>;
+
+typedef Vector<uint8_t> Unit;
+typedef Vector<Unit> UnitVector;
+typedef int (*UserCallback)(const uint8_t *Data, size_t Size);
+
+int FuzzerDriver(int *argc, char ***argv, UserCallback Callback);
+
+struct ScopedDoingMyOwnMemOrStr {
+  ScopedDoingMyOwnMemOrStr() { DoingMyOwnMemOrStr++; }
+  ~ScopedDoingMyOwnMemOrStr() { DoingMyOwnMemOrStr--; }
+  static int DoingMyOwnMemOrStr;
+};
+
+inline uint8_t  Bswap(uint8_t x)  { return x; }
+inline uint16_t Bswap(uint16_t x) { return __builtin_bswap16(x); }
+inline uint32_t Bswap(uint32_t x) { return __builtin_bswap32(x); }
+inline uint64_t Bswap(uint64_t x) { return __builtin_bswap64(x); }
+
+uint8_t *ExtraCountersBegin();
+uint8_t *ExtraCountersEnd();
+void ClearExtraCounters();
+
+uint64_t *ClangCountersBegin();
+uint64_t *ClangCountersEnd();
+void ClearClangCounters();
+
+}  // namespace fuzzer
+
+#endif  // LLVM_FUZZER_DEFS_H
diff --git a/lib/fuzzer/FuzzerDictionary.h b/lib/fuzzer/FuzzerDictionary.h
new file mode 100644
index 0000000..daf7d00
--- /dev/null
+++ b/lib/fuzzer/FuzzerDictionary.h
@@ -0,0 +1,127 @@
+//===- FuzzerDictionary.h - Internal header for the Fuzzer ------*- C++ -* ===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// fuzzer::Dictionary
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_FUZZER_DICTIONARY_H
+#define LLVM_FUZZER_DICTIONARY_H
+
+#include "FuzzerDefs.h"
+#include "FuzzerIO.h"
+#include "FuzzerUtil.h"
+#include <algorithm>
+#include <limits>
+
+namespace fuzzer {
+// A simple POD sized array of bytes.
+template <size_t kMaxSizeT> class FixedWord {
+public:
+  static const size_t kMaxSize = kMaxSizeT;
+  FixedWord() {}
+  FixedWord(const uint8_t *B, uint8_t S) { Set(B, S); }
+
+  void Set(const uint8_t *B, uint8_t S) {
+    assert(S <= kMaxSize);
+    memcpy(Data, B, S);
+    Size = S;
+  }
+
+  bool operator==(const FixedWord<kMaxSize> &w) const {
+    ScopedDoingMyOwnMemOrStr scoped_doing_my_own_mem_os_str;
+    return Size == w.Size && 0 == memcmp(Data, w.Data, Size);
+  }
+
+  bool operator<(const FixedWord<kMaxSize> &w) const {
+    ScopedDoingMyOwnMemOrStr scoped_doing_my_own_mem_os_str;
+    if (Size != w.Size)
+      return Size < w.Size;
+    return memcmp(Data, w.Data, Size) < 0;
+  }
+
+  static size_t GetMaxSize() { return kMaxSize; }
+  const uint8_t *data() const { return Data; }
+  uint8_t size() const { return Size; }
+
+private:
+  uint8_t Size = 0;
+  uint8_t Data[kMaxSize];
+};
+
+typedef FixedWord<64> Word;
+
+class DictionaryEntry {
+ public:
+  DictionaryEntry() {}
+  DictionaryEntry(Word W) : W(W) {}
+  DictionaryEntry(Word W, size_t PositionHint) : W(W), PositionHint(PositionHint) {}
+  const Word &GetW() const { return W; }
+
+  bool HasPositionHint() const { return PositionHint != std::numeric_limits<size_t>::max(); }
+  size_t GetPositionHint() const {
+    assert(HasPositionHint());
+    return PositionHint;
+  }
+  void IncUseCount() { UseCount++; }
+  void IncSuccessCount() { SuccessCount++; }
+  size_t GetUseCount() const { return UseCount; }
+  size_t GetSuccessCount() const {return SuccessCount; }
+
+  void Print(const char *PrintAfter = "\n") {
+    PrintASCII(W.data(), W.size());
+    if (HasPositionHint())
+      Printf("@%zd", GetPositionHint());
+    Printf("%s", PrintAfter);
+  }
+
+private:
+  Word W;
+  size_t PositionHint = std::numeric_limits<size_t>::max();
+  size_t UseCount = 0;
+  size_t SuccessCount = 0;
+};
+
+class Dictionary {
+ public:
+  static const size_t kMaxDictSize = 1 << 14;
+
+  bool ContainsWord(const Word &W) const {
+    return std::any_of(begin(), end(), [&](const DictionaryEntry &DE) {
+      return DE.GetW() == W;
+    });
+  }
+  const DictionaryEntry *begin() const { return &DE[0]; }
+  const DictionaryEntry *end() const { return begin() + Size; }
+  DictionaryEntry & operator[] (size_t Idx) {
+    assert(Idx < Size);
+    return DE[Idx];
+  }
+  void push_back(DictionaryEntry DE) {
+    if (Size < kMaxDictSize)
+      this->DE[Size++] = DE;
+  }
+  void clear() { Size = 0; }
+  bool empty() const { return Size == 0; }
+  size_t size() const { return Size; }
+
+private:
+  DictionaryEntry DE[kMaxDictSize];
+  size_t Size = 0;
+};
+
+// Parses one dictionary entry.
+// If successfull, write the enty to Unit and returns true,
+// otherwise returns false.
+bool ParseOneDictionaryEntry(const std::string &Str, Unit *U);
+// Parses the dictionary file, fills Units, returns true iff all lines
+// were parsed succesfully.
+bool ParseDictionaryFile(const std::string &Text, Vector<Unit> *Units);
+
+}  // namespace fuzzer
+
+#endif  // LLVM_FUZZER_DICTIONARY_H
diff --git a/lib/fuzzer/FuzzerDriver.cpp b/lib/fuzzer/FuzzerDriver.cpp
new file mode 100644
index 0000000..18c73ca
--- /dev/null
+++ b/lib/fuzzer/FuzzerDriver.cpp
@@ -0,0 +1,752 @@
+//===- FuzzerDriver.cpp - FuzzerDriver function and flags -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// FuzzerDriver and flag parsing.
+//===----------------------------------------------------------------------===//
+
+#include "FuzzerCorpus.h"
+#include "FuzzerIO.h"
+#include "FuzzerInterface.h"
+#include "FuzzerInternal.h"
+#include "FuzzerMutate.h"
+#include "FuzzerRandom.h"
+#include "FuzzerShmem.h"
+#include "FuzzerTracePC.h"
+#include <algorithm>
+#include <atomic>
+#include <chrono>
+#include <cstdlib>
+#include <cstring>
+#include <mutex>
+#include <string>
+#include <thread>
+
+// This function should be present in the libFuzzer so that the client
+// binary can test for its existence.
+extern "C" __attribute__((used)) void __libfuzzer_is_present() {}
+
+namespace fuzzer {
+
+// Program arguments.
+struct FlagDescription {
+  const char *Name;
+  const char *Description;
+  int   Default;
+  int   *IntFlag;
+  const char **StrFlag;
+  unsigned int *UIntFlag;
+};
+
+struct {
+#define FUZZER_DEPRECATED_FLAG(Name)
+#define FUZZER_FLAG_INT(Name, Default, Description) int Name;
+#define FUZZER_FLAG_UNSIGNED(Name, Default, Description) unsigned int Name;
+#define FUZZER_FLAG_STRING(Name, Description) const char *Name;
+#include "FuzzerFlags.def"
+#undef FUZZER_DEPRECATED_FLAG
+#undef FUZZER_FLAG_INT
+#undef FUZZER_FLAG_UNSIGNED
+#undef FUZZER_FLAG_STRING
+} Flags;
+
+static const FlagDescription FlagDescriptions [] {
+#define FUZZER_DEPRECATED_FLAG(Name)                                           \
+  {#Name, "Deprecated; don't use", 0, nullptr, nullptr, nullptr},
+#define FUZZER_FLAG_INT(Name, Default, Description)                            \
+  {#Name, Description, Default, &Flags.Name, nullptr, nullptr},
+#define FUZZER_FLAG_UNSIGNED(Name, Default, Description)                       \
+  {#Name,   Description, static_cast<int>(Default),                            \
+   nullptr, nullptr, &Flags.Name},
+#define FUZZER_FLAG_STRING(Name, Description)                                  \
+  {#Name, Description, 0, nullptr, &Flags.Name, nullptr},
+#include "FuzzerFlags.def"
+#undef FUZZER_DEPRECATED_FLAG
+#undef FUZZER_FLAG_INT
+#undef FUZZER_FLAG_UNSIGNED
+#undef FUZZER_FLAG_STRING
+};
+
+static const size_t kNumFlags =
+    sizeof(FlagDescriptions) / sizeof(FlagDescriptions[0]);
+
+static Vector<std::string> *Inputs;
+static std::string *ProgName;
+
+static void PrintHelp() {
+  Printf("Usage:\n");
+  auto Prog = ProgName->c_str();
+  Printf("\nTo run fuzzing pass 0 or more directories.\n");
+  Printf("%s [-flag1=val1 [-flag2=val2 ...] ] [dir1 [dir2 ...] ]\n", Prog);
+
+  Printf("\nTo run individual tests without fuzzing pass 1 or more files:\n");
+  Printf("%s [-flag1=val1 [-flag2=val2 ...] ] file1 [file2 ...]\n", Prog);
+
+  Printf("\nFlags: (strictly in form -flag=value)\n");
+  size_t MaxFlagLen = 0;
+  for (size_t F = 0; F < kNumFlags; F++)
+    MaxFlagLen = std::max(strlen(FlagDescriptions[F].Name), MaxFlagLen);
+
+  for (size_t F = 0; F < kNumFlags; F++) {
+    const auto &D = FlagDescriptions[F];
+    if (strstr(D.Description, "internal flag") == D.Description) continue;
+    Printf(" %s", D.Name);
+    for (size_t i = 0, n = MaxFlagLen - strlen(D.Name); i < n; i++)
+      Printf(" ");
+    Printf("\t");
+    Printf("%d\t%s\n", D.Default, D.Description);
+  }
+  Printf("\nFlags starting with '--' will be ignored and "
+            "will be passed verbatim to subprocesses.\n");
+}
+
+static const char *FlagValue(const char *Param, const char *Name) {
+  size_t Len = strlen(Name);
+  if (Param[0] == '-' && strstr(Param + 1, Name) == Param + 1 &&
+      Param[Len + 1] == '=')
+      return &Param[Len + 2];
+  return nullptr;
+}
+
+// Avoid calling stol as it triggers a bug in clang/glibc build.
+static long MyStol(const char *Str) {
+  long Res = 0;
+  long Sign = 1;
+  if (*Str == '-') {
+    Str++;
+    Sign = -1;
+  }
+  for (size_t i = 0; Str[i]; i++) {
+    char Ch = Str[i];
+    if (Ch < '0' || Ch > '9')
+      return Res;
+    Res = Res * 10 + (Ch - '0');
+  }
+  return Res * Sign;
+}
+
+static bool ParseOneFlag(const char *Param) {
+  if (Param[0] != '-') return false;
+  if (Param[1] == '-') {
+    static bool PrintedWarning = false;
+    if (!PrintedWarning) {
+      PrintedWarning = true;
+      Printf("INFO: libFuzzer ignores flags that start with '--'\n");
+    }
+    for (size_t F = 0; F < kNumFlags; F++)
+      if (FlagValue(Param + 1, FlagDescriptions[F].Name))
+        Printf("WARNING: did you mean '%s' (single dash)?\n", Param + 1);
+    return true;
+  }
+  for (size_t F = 0; F < kNumFlags; F++) {
+    const char *Name = FlagDescriptions[F].Name;
+    const char *Str = FlagValue(Param, Name);
+    if (Str)  {
+      if (FlagDescriptions[F].IntFlag) {
+        int Val = MyStol(Str);
+        *FlagDescriptions[F].IntFlag = Val;
+        if (Flags.verbosity >= 2)
+          Printf("Flag: %s %d\n", Name, Val);
+        return true;
+      } else if (FlagDescriptions[F].UIntFlag) {
+        unsigned int Val = std::stoul(Str);
+        *FlagDescriptions[F].UIntFlag = Val;
+        if (Flags.verbosity >= 2)
+          Printf("Flag: %s %u\n", Name, Val);
+        return true;
+      } else if (FlagDescriptions[F].StrFlag) {
+        *FlagDescriptions[F].StrFlag = Str;
+        if (Flags.verbosity >= 2)
+          Printf("Flag: %s %s\n", Name, Str);
+        return true;
+      } else {  // Deprecated flag.
+        Printf("Flag: %s: deprecated, don't use\n", Name);
+        return true;
+      }
+    }
+  }
+  Printf("\n\nWARNING: unrecognized flag '%s'; "
+         "use -help=1 to list all flags\n\n", Param);
+  return true;
+}
+
+// We don't use any library to minimize dependencies.
+static void ParseFlags(const Vector<std::string> &Args) {
+  for (size_t F = 0; F < kNumFlags; F++) {
+    if (FlagDescriptions[F].IntFlag)
+      *FlagDescriptions[F].IntFlag = FlagDescriptions[F].Default;
+    if (FlagDescriptions[F].UIntFlag)
+      *FlagDescriptions[F].UIntFlag =
+          static_cast<unsigned int>(FlagDescriptions[F].Default);
+    if (FlagDescriptions[F].StrFlag)
+      *FlagDescriptions[F].StrFlag = nullptr;
+  }
+  Inputs = new Vector<std::string>;
+  for (size_t A = 1; A < Args.size(); A++) {
+    if (ParseOneFlag(Args[A].c_str())) {
+      if (Flags.ignore_remaining_args)
+        break;
+      continue;
+    }
+    Inputs->push_back(Args[A]);
+  }
+}
+
+static std::mutex Mu;
+
+static void PulseThread() {
+  while (true) {
+    SleepSeconds(600);
+    std::lock_guard<std::mutex> Lock(Mu);
+    Printf("pulse...\n");
+  }
+}
+
+static void WorkerThread(const std::string &Cmd, std::atomic<unsigned> *Counter,
+                         unsigned NumJobs, std::atomic<bool> *HasErrors) {
+  while (true) {
+    unsigned C = (*Counter)++;
+    if (C >= NumJobs) break;
+    std::string Log = "fuzz-" + std::to_string(C) + ".log";
+    std::string ToRun = Cmd + " > " + Log + " 2>&1\n";
+    if (Flags.verbosity)
+      Printf("%s", ToRun.c_str());
+    int ExitCode = ExecuteCommand(ToRun);
+    if (ExitCode != 0)
+      *HasErrors = true;
+    std::lock_guard<std::mutex> Lock(Mu);
+    Printf("================== Job %u exited with exit code %d ============\n",
+           C, ExitCode);
+    fuzzer::CopyFileToErr(Log);
+  }
+}
+
+std::string CloneArgsWithoutX(const Vector<std::string> &Args,
+                              const char *X1, const char *X2) {
+  std::string Cmd;
+  for (auto &S : Args) {
+    if (FlagValue(S.c_str(), X1) || FlagValue(S.c_str(), X2))
+      continue;
+    Cmd += S + " ";
+  }
+  return Cmd;
+}
+
+static int RunInMultipleProcesses(const Vector<std::string> &Args,
+                                  unsigned NumWorkers, unsigned NumJobs) {
+  std::atomic<unsigned> Counter(0);
+  std::atomic<bool> HasErrors(false);
+  std::string Cmd = CloneArgsWithoutX(Args, "jobs", "workers");
+  Vector<std::thread> V;
+  std::thread Pulse(PulseThread);
+  Pulse.detach();
+  for (unsigned i = 0; i < NumWorkers; i++)
+    V.push_back(std::thread(WorkerThread, Cmd, &Counter, NumJobs, &HasErrors));
+  for (auto &T : V)
+    T.join();
+  return HasErrors ? 1 : 0;
+}
+
+static void RssThread(Fuzzer *F, size_t RssLimitMb) {
+  while (true) {
+    SleepSeconds(1);
+    size_t Peak = GetPeakRSSMb();
+    if (Peak > RssLimitMb)
+      F->RssLimitCallback();
+  }
+}
+
+static void StartRssThread(Fuzzer *F, size_t RssLimitMb) {
+  if (!RssLimitMb) return;
+  std::thread T(RssThread, F, RssLimitMb);
+  T.detach();
+}
+
+int RunOneTest(Fuzzer *F, const char *InputFilePath, size_t MaxLen) {
+  Unit U = FileToVector(InputFilePath);
+  if (MaxLen && MaxLen < U.size())
+    U.resize(MaxLen);
+  F->ExecuteCallback(U.data(), U.size());
+  F->TryDetectingAMemoryLeak(U.data(), U.size(), true);
+  return 0;
+}
+
+static bool AllInputsAreFiles() {
+  if (Inputs->empty()) return false;
+  for (auto &Path : *Inputs)
+    if (!IsFile(Path))
+      return false;
+  return true;
+}
+
+static std::string GetDedupTokenFromFile(const std::string &Path) {
+  auto S = FileToString(Path);
+  auto Beg = S.find("DEDUP_TOKEN:");
+  if (Beg == std::string::npos)
+    return "";
+  auto End = S.find('\n', Beg);
+  if (End == std::string::npos)
+    return "";
+  return S.substr(Beg, End - Beg);
+}
+
+int CleanseCrashInput(const Vector<std::string> &Args,
+                       const FuzzingOptions &Options) {
+  if (Inputs->size() != 1 || !Flags.exact_artifact_path) {
+    Printf("ERROR: -cleanse_crash should be given one input file and"
+          " -exact_artifact_path\n");
+    exit(1);
+  }
+  std::string InputFilePath = Inputs->at(0);
+  std::string OutputFilePath = Flags.exact_artifact_path;
+  std::string BaseCmd =
+      CloneArgsWithoutX(Args, "cleanse_crash", "cleanse_crash");
+
+  auto InputPos = BaseCmd.find(" " + InputFilePath + " ");
+  assert(InputPos != std::string::npos);
+  BaseCmd.erase(InputPos, InputFilePath.size() + 1);
+
+  auto LogFilePath = DirPlusFile(
+      TmpDir(), "libFuzzerTemp." + std::to_string(GetPid()) + ".txt");
+  auto TmpFilePath = DirPlusFile(
+      TmpDir(), "libFuzzerTemp." + std::to_string(GetPid()) + ".repro");
+  auto LogFileRedirect = " > " + LogFilePath + " 2>&1 ";
+
+  auto Cmd = BaseCmd + " " + TmpFilePath + LogFileRedirect;
+
+  std::string CurrentFilePath = InputFilePath;
+  auto U = FileToVector(CurrentFilePath);
+  size_t Size = U.size();
+
+  const Vector<uint8_t> ReplacementBytes = {' ', 0xff};
+  for (int NumAttempts = 0; NumAttempts < 5; NumAttempts++) {
+    bool Changed = false;
+    for (size_t Idx = 0; Idx < Size; Idx++) {
+      Printf("CLEANSE[%d]: Trying to replace byte %zd of %zd\n", NumAttempts,
+             Idx, Size);
+      uint8_t OriginalByte = U[Idx];
+      if (ReplacementBytes.end() != std::find(ReplacementBytes.begin(),
+                                              ReplacementBytes.end(),
+                                              OriginalByte))
+        continue;
+      for (auto NewByte : ReplacementBytes) {
+        U[Idx] = NewByte;
+        WriteToFile(U, TmpFilePath);
+        auto ExitCode = ExecuteCommand(Cmd);
+        RemoveFile(TmpFilePath);
+        if (!ExitCode) {
+          U[Idx] = OriginalByte;
+        } else {
+          Changed = true;
+          Printf("CLEANSE: Replaced byte %zd with 0x%x\n", Idx, NewByte);
+          WriteToFile(U, OutputFilePath);
+          break;
+        }
+      }
+    }
+    if (!Changed) break;
+  }
+  RemoveFile(LogFilePath);
+  return 0;
+}
+
+int MinimizeCrashInput(const Vector<std::string> &Args,
+                       const FuzzingOptions &Options) {
+  if (Inputs->size() != 1) {
+    Printf("ERROR: -minimize_crash should be given one input file\n");
+    exit(1);
+  }
+  std::string InputFilePath = Inputs->at(0);
+  auto BaseCmd = SplitBefore(
+      "-ignore_remaining_args=1",
+      CloneArgsWithoutX(Args, "minimize_crash", "exact_artifact_path"));
+  auto InputPos = BaseCmd.first.find(" " + InputFilePath + " ");
+  assert(InputPos != std::string::npos);
+  BaseCmd.first.erase(InputPos, InputFilePath.size() + 1);
+  if (Flags.runs <= 0 && Flags.max_total_time == 0) {
+    Printf("INFO: you need to specify -runs=N or "
+           "-max_total_time=N with -minimize_crash=1\n"
+           "INFO: defaulting to -max_total_time=600\n");
+    BaseCmd.first += " -max_total_time=600";
+  }
+
+  auto LogFilePath = DirPlusFile(
+      TmpDir(), "libFuzzerTemp." + std::to_string(GetPid()) + ".txt");
+  auto LogFileRedirect = " > " + LogFilePath + " 2>&1 ";
+
+  std::string CurrentFilePath = InputFilePath;
+  while (true) {
+    Unit U = FileToVector(CurrentFilePath);
+    Printf("CRASH_MIN: minimizing crash input: '%s' (%zd bytes)\n",
+           CurrentFilePath.c_str(), U.size());
+
+    auto Cmd = BaseCmd.first + " " + CurrentFilePath + LogFileRedirect + " " +
+               BaseCmd.second;
+
+    Printf("CRASH_MIN: executing: %s\n", Cmd.c_str());
+    int ExitCode = ExecuteCommand(Cmd);
+    if (ExitCode == 0) {
+      Printf("ERROR: the input %s did not crash\n", CurrentFilePath.c_str());
+      exit(1);
+    }
+    Printf("CRASH_MIN: '%s' (%zd bytes) caused a crash. Will try to minimize "
+           "it further\n",
+           CurrentFilePath.c_str(), U.size());
+    auto DedupToken1 = GetDedupTokenFromFile(LogFilePath);
+    if (!DedupToken1.empty())
+      Printf("CRASH_MIN: DedupToken1: %s\n", DedupToken1.c_str());
+
+    std::string ArtifactPath =
+        Flags.exact_artifact_path
+            ? Flags.exact_artifact_path
+            : Options.ArtifactPrefix + "minimized-from-" + Hash(U);
+    Cmd += " -minimize_crash_internal_step=1 -exact_artifact_path=" +
+        ArtifactPath;
+    Printf("CRASH_MIN: executing: %s\n", Cmd.c_str());
+    ExitCode = ExecuteCommand(Cmd);
+    CopyFileToErr(LogFilePath);
+    if (ExitCode == 0) {
+      if (Flags.exact_artifact_path) {
+        CurrentFilePath = Flags.exact_artifact_path;
+        WriteToFile(U, CurrentFilePath);
+      }
+      Printf("CRASH_MIN: failed to minimize beyond %s (%d bytes), exiting\n",
+             CurrentFilePath.c_str(), U.size());
+      break;
+    }
+    auto DedupToken2 = GetDedupTokenFromFile(LogFilePath);
+    if (!DedupToken2.empty())
+      Printf("CRASH_MIN: DedupToken2: %s\n", DedupToken2.c_str());
+
+    if (DedupToken1 != DedupToken2) {
+      if (Flags.exact_artifact_path) {
+        CurrentFilePath = Flags.exact_artifact_path;
+        WriteToFile(U, CurrentFilePath);
+      }
+      Printf("CRASH_MIN: mismatch in dedup tokens"
+             " (looks like a different bug). Won't minimize further\n");
+      break;
+    }
+
+    CurrentFilePath = ArtifactPath;
+    Printf("*********************************\n");
+  }
+  RemoveFile(LogFilePath);
+  return 0;
+}
+
+int MinimizeCrashInputInternalStep(Fuzzer *F, InputCorpus *Corpus) {
+  assert(Inputs->size() == 1);
+  std::string InputFilePath = Inputs->at(0);
+  Unit U = FileToVector(InputFilePath);
+  Printf("INFO: Starting MinimizeCrashInputInternalStep: %zd\n", U.size());
+  if (U.size() < 2) {
+    Printf("INFO: The input is small enough, exiting\n");
+    exit(0);
+  }
+  F->SetMaxInputLen(U.size());
+  F->SetMaxMutationLen(U.size() - 1);
+  F->MinimizeCrashLoop(U);
+  Printf("INFO: Done MinimizeCrashInputInternalStep, no crashes found\n");
+  exit(0);
+  return 0;
+}
+
+int AnalyzeDictionary(Fuzzer *F, const Vector<Unit>& Dict,
+                      UnitVector& Corpus) {
+  Printf("Started dictionary minimization (up to %d tests)\n",
+         Dict.size() * Corpus.size() * 2);
+
+  // Scores and usage count for each dictionary unit.
+  Vector<int> Scores(Dict.size());
+  Vector<int> Usages(Dict.size());
+
+  Vector<size_t> InitialFeatures;
+  Vector<size_t> ModifiedFeatures;
+  for (auto &C : Corpus) {
+    // Get coverage for the testcase without modifications.
+    F->ExecuteCallback(C.data(), C.size());
+    InitialFeatures.clear();
+    TPC.CollectFeatures([&](size_t Feature) {
+      InitialFeatures.push_back(Feature);
+    });
+
+    for (size_t i = 0; i < Dict.size(); ++i) {
+      Vector<uint8_t> Data = C;
+      auto StartPos = std::search(Data.begin(), Data.end(),
+                                  Dict[i].begin(), Dict[i].end());
+      // Skip dictionary unit, if the testcase does not contain it.
+      if (StartPos == Data.end())
+        continue;
+
+      ++Usages[i];
+      while (StartPos != Data.end()) {
+        // Replace all occurrences of dictionary unit in the testcase.
+        auto EndPos = StartPos + Dict[i].size();
+        for (auto It = StartPos; It != EndPos; ++It)
+          *It ^= 0xFF;
+
+        StartPos = std::search(EndPos, Data.end(),
+                               Dict[i].begin(), Dict[i].end());
+      }
+
+      // Get coverage for testcase with masked occurrences of dictionary unit.
+      F->ExecuteCallback(Data.data(), Data.size());
+      ModifiedFeatures.clear();
+      TPC.CollectFeatures([&](size_t Feature) {
+        ModifiedFeatures.push_back(Feature);
+      });
+
+      if (InitialFeatures == ModifiedFeatures)
+        --Scores[i];
+      else
+        Scores[i] += 2;
+    }
+  }
+
+  Printf("###### Useless dictionary elements. ######\n");
+  for (size_t i = 0; i < Dict.size(); ++i) {
+    // Dictionary units with positive score are treated as useful ones.
+    if (Scores[i] > 0)
+       continue;
+
+    Printf("\"");
+    PrintASCII(Dict[i].data(), Dict[i].size(), "\"");
+    Printf(" # Score: %d, Used: %d\n", Scores[i], Usages[i]);
+  }
+  Printf("###### End of useless dictionary elements. ######\n");
+  return 0;
+}
+
+int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
+  using namespace fuzzer;
+  assert(argc && argv && "Argument pointers cannot be nullptr");
+  std::string Argv0((*argv)[0]);
+  EF = new ExternalFunctions();
+  if (EF->LLVMFuzzerInitialize)
+    EF->LLVMFuzzerInitialize(argc, argv);
+  const Vector<std::string> Args(*argv, *argv + *argc);
+  assert(!Args.empty());
+  ProgName = new std::string(Args[0]);
+  if (Argv0 != *ProgName) {
+    Printf("ERROR: argv[0] has been modified in LLVMFuzzerInitialize\n");
+    exit(1);
+  }
+  ParseFlags(Args);
+  if (Flags.help) {
+    PrintHelp();
+    return 0;
+  }
+
+  if (Flags.close_fd_mask & 2)
+    DupAndCloseStderr();
+  if (Flags.close_fd_mask & 1)
+    CloseStdout();
+
+  if (Flags.jobs > 0 && Flags.workers == 0) {
+    Flags.workers = std::min(NumberOfCpuCores() / 2, Flags.jobs);
+    if (Flags.workers > 1)
+      Printf("Running %u workers\n", Flags.workers);
+  }
+
+  if (Flags.workers > 0 && Flags.jobs > 0)
+    return RunInMultipleProcesses(Args, Flags.workers, Flags.jobs);
+
+  FuzzingOptions Options;
+  Options.Verbosity = Flags.verbosity;
+  Options.MaxLen = Flags.max_len;
+  Options.ExperimentalLenControl = Flags.experimental_len_control;
+  Options.UnitTimeoutSec = Flags.timeout;
+  Options.ErrorExitCode = Flags.error_exitcode;
+  Options.TimeoutExitCode = Flags.timeout_exitcode;
+  Options.MaxTotalTimeSec = Flags.max_total_time;
+  Options.DoCrossOver = Flags.cross_over;
+  Options.MutateDepth = Flags.mutate_depth;
+  Options.UseCounters = Flags.use_counters;
+  Options.UseIndirCalls = Flags.use_indir_calls;
+  Options.UseMemmem = Flags.use_memmem;
+  Options.UseCmp = Flags.use_cmp;
+  Options.UseValueProfile = Flags.use_value_profile;
+  Options.Shrink = Flags.shrink;
+  Options.ReduceInputs = Flags.reduce_inputs;
+  Options.ShuffleAtStartUp = Flags.shuffle;
+  Options.PreferSmall = Flags.prefer_small;
+  Options.ReloadIntervalSec = Flags.reload;
+  Options.OnlyASCII = Flags.only_ascii;
+  Options.DetectLeaks = Flags.detect_leaks;
+  Options.PurgeAllocatorIntervalSec = Flags.purge_allocator_interval;
+  Options.TraceMalloc = Flags.trace_malloc;
+  Options.RssLimitMb = Flags.rss_limit_mb;
+  if (Flags.runs >= 0)
+    Options.MaxNumberOfRuns = Flags.runs;
+  if (!Inputs->empty() && !Flags.minimize_crash_internal_step)
+    Options.OutputCorpus = (*Inputs)[0];
+  Options.ReportSlowUnits = Flags.report_slow_units;
+  if (Flags.artifact_prefix)
+    Options.ArtifactPrefix = Flags.artifact_prefix;
+  if (Flags.exact_artifact_path)
+    Options.ExactArtifactPath = Flags.exact_artifact_path;
+  Vector<Unit> Dictionary;
+  if (Flags.dict)
+    if (!ParseDictionaryFile(FileToString(Flags.dict), &Dictionary))
+      return 1;
+  if (Flags.verbosity > 0 && !Dictionary.empty())
+    Printf("Dictionary: %zd entries\n", Dictionary.size());
+  bool DoPlainRun = AllInputsAreFiles();
+  Options.SaveArtifacts =
+      !DoPlainRun || Flags.minimize_crash_internal_step;
+  Options.PrintNewCovPcs = Flags.print_pcs;
+  Options.PrintNewCovFuncs = Flags.print_funcs;
+  Options.PrintFinalStats = Flags.print_final_stats;
+  Options.PrintCorpusStats = Flags.print_corpus_stats;
+  Options.PrintCoverage = Flags.print_coverage;
+  Options.DumpCoverage = Flags.dump_coverage;
+  Options.UseClangCoverage = Flags.use_clang_coverage;
+  Options.UseFeatureFrequency = Flags.use_feature_frequency;
+  if (Flags.exit_on_src_pos)
+    Options.ExitOnSrcPos = Flags.exit_on_src_pos;
+  if (Flags.exit_on_item)
+    Options.ExitOnItem = Flags.exit_on_item;
+
+  unsigned Seed = Flags.seed;
+  // Initialize Seed.
+  if (Seed == 0)
+    Seed =
+        std::chrono::system_clock::now().time_since_epoch().count() + GetPid();
+  if (Flags.verbosity)
+    Printf("INFO: Seed: %u\n", Seed);
+
+  Random Rand(Seed);
+  auto *MD = new MutationDispatcher(Rand, Options);
+  auto *Corpus = new InputCorpus(Options.OutputCorpus);
+  auto *F = new Fuzzer(Callback, *Corpus, *MD, Options);
+
+  for (auto &U: Dictionary)
+    if (U.size() <= Word::GetMaxSize())
+      MD->AddWordToManualDictionary(Word(U.data(), U.size()));
+
+  StartRssThread(F, Flags.rss_limit_mb);
+
+  Options.HandleAbrt = Flags.handle_abrt;
+  Options.HandleBus = Flags.handle_bus;
+  Options.HandleFpe = Flags.handle_fpe;
+  Options.HandleIll = Flags.handle_ill;
+  Options.HandleInt = Flags.handle_int;
+  Options.HandleSegv = Flags.handle_segv;
+  Options.HandleTerm = Flags.handle_term;
+  Options.HandleXfsz = Flags.handle_xfsz;
+  SetSignalHandler(Options);
+
+  std::atexit(Fuzzer::StaticExitCallback);
+
+  if (Flags.minimize_crash)
+    return MinimizeCrashInput(Args, Options);
+
+  if (Flags.minimize_crash_internal_step)
+    return MinimizeCrashInputInternalStep(F, Corpus);
+
+  if (Flags.cleanse_crash)
+    return CleanseCrashInput(Args, Options);
+
+  if (auto Name = Flags.run_equivalence_server) {
+    SMR.Destroy(Name);
+    if (!SMR.Create(Name)) {
+       Printf("ERROR: can't create shared memory region\n");
+      return 1;
+    }
+    Printf("INFO: EQUIVALENCE SERVER UP\n");
+    while (true) {
+      SMR.WaitClient();
+      size_t Size = SMR.ReadByteArraySize();
+      SMR.WriteByteArray(nullptr, 0);
+      const Unit tmp(SMR.GetByteArray(), SMR.GetByteArray() + Size);
+      F->ExecuteCallback(tmp.data(), tmp.size());
+      SMR.PostServer();
+    }
+    return 0;
+  }
+
+  if (auto Name = Flags.use_equivalence_server) {
+    if (!SMR.Open(Name)) {
+      Printf("ERROR: can't open shared memory region\n");
+      return 1;
+    }
+    Printf("INFO: EQUIVALENCE CLIENT UP\n");
+  }
+
+  if (DoPlainRun) {
+    Options.SaveArtifacts = false;
+    int Runs = std::max(1, Flags.runs);
+    Printf("%s: Running %zd inputs %d time(s) each.\n", ProgName->c_str(),
+           Inputs->size(), Runs);
+    for (auto &Path : *Inputs) {
+      auto StartTime = system_clock::now();
+      Printf("Running: %s\n", Path.c_str());
+      for (int Iter = 0; Iter < Runs; Iter++)
+        RunOneTest(F, Path.c_str(), Options.MaxLen);
+      auto StopTime = system_clock::now();
+      auto MS = duration_cast<milliseconds>(StopTime - StartTime).count();
+      Printf("Executed %s in %zd ms\n", Path.c_str(), (long)MS);
+    }
+    Printf("***\n"
+           "*** NOTE: fuzzing was not performed, you have only\n"
+           "***       executed the target code on a fixed set of inputs.\n"
+           "***\n");
+    F->PrintFinalStats();
+    exit(0);
+  }
+
+  if (Flags.merge) {
+    const size_t kDefaultMaxMergeLen = 1 << 20;
+    if (Options.MaxLen == 0)
+      F->SetMaxInputLen(kDefaultMaxMergeLen);
+
+    if (Flags.merge_control_file)
+      F->CrashResistantMergeInternalStep(Flags.merge_control_file);
+    else
+      F->CrashResistantMerge(Args, *Inputs,
+                             Flags.load_coverage_summary,
+                             Flags.save_coverage_summary);
+    exit(0);
+  }
+
+
+  if (Flags.analyze_dict) {
+    size_t MaxLen = INT_MAX;  // Large max length.
+    UnitVector InitialCorpus;
+    for (auto &Inp : *Inputs) {
+      Printf("Loading corpus dir: %s\n", Inp.c_str());
+      ReadDirToVectorOfUnits(Inp.c_str(), &InitialCorpus, nullptr,
+                             MaxLen, /*ExitOnError=*/false);
+    }
+
+    if (Dictionary.empty() || Inputs->empty()) {
+      Printf("ERROR: can't analyze dict without dict and corpus provided\n");
+      return 1;
+    }
+    if (AnalyzeDictionary(F, Dictionary, InitialCorpus)) {
+      Printf("Dictionary analysis failed\n");
+      exit(1);
+    }
+    Printf("Dictionary analysis suceeded\n");
+    exit(0);
+  }
+
+  F->Loop(*Inputs);
+
+  if (Flags.verbosity)
+    Printf("Done %zd runs in %zd second(s)\n", F->getTotalNumberOfRuns(),
+           F->secondsSinceProcessStartUp());
+  F->PrintFinalStats();
+
+  exit(0);  // Don't let F destroy itself.
+}
+
+// Storage for global ExternalFunctions object.
+ExternalFunctions *EF = nullptr;
+
+}  // namespace fuzzer
diff --git a/lib/fuzzer/FuzzerExtFunctions.def b/lib/fuzzer/FuzzerExtFunctions.def
new file mode 100644
index 0000000..25a655b
--- /dev/null
+++ b/lib/fuzzer/FuzzerExtFunctions.def
@@ -0,0 +1,47 @@
+//===- FuzzerExtFunctions.def - External functions --------------*- C++ -* ===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This defines the external function pointers that
+// ``fuzzer::ExternalFunctions`` should contain and try to initialize.  The
+// EXT_FUNC macro must be defined at the point of inclusion. The signature of
+// the macro is:
+//
+// EXT_FUNC(<name>, <return_type>, <function_signature>, <warn_if_missing>)
+//===----------------------------------------------------------------------===//
+
+// Optional user functions
+EXT_FUNC(LLVMFuzzerInitialize, int, (int *argc, char ***argv), false);
+EXT_FUNC(LLVMFuzzerCustomMutator, size_t,
+         (uint8_t * Data, size_t Size, size_t MaxSize, unsigned int Seed),
+         false);
+EXT_FUNC(LLVMFuzzerCustomCrossOver, size_t,
+         (const uint8_t * Data1, size_t Size1,
+          const uint8_t * Data2, size_t Size2,
+          uint8_t * Out, size_t MaxOutSize, unsigned int Seed),
+         false);
+
+// Sanitizer functions
+EXT_FUNC(__lsan_enable, void, (), false);
+EXT_FUNC(__lsan_disable, void, (), false);
+EXT_FUNC(__lsan_do_recoverable_leak_check, int, (), false);
+EXT_FUNC(__sanitizer_install_malloc_and_free_hooks, int,
+         (void (*malloc_hook)(const volatile void *, size_t),
+          void (*free_hook)(const volatile void *)),
+         false);
+EXT_FUNC(__sanitizer_purge_allocator, void, (), false);
+EXT_FUNC(__sanitizer_print_memory_profile, int, (size_t, size_t), false);
+EXT_FUNC(__sanitizer_print_stack_trace, void, (), true);
+EXT_FUNC(__sanitizer_symbolize_pc, void,
+         (void *, const char *fmt, char *out_buf, size_t out_buf_size), false);
+EXT_FUNC(__sanitizer_get_module_and_offset_for_pc, int,
+         (void *pc, char *module_path,
+         size_t module_path_len,void **pc_offset), false);
+EXT_FUNC(__sanitizer_set_death_callback, void, (void (*)(void)), true);
+EXT_FUNC(__sanitizer_set_report_fd, void, (void*), false);
+EXT_FUNC(__sanitizer_dump_coverage, void, (const uintptr_t *, uintptr_t),
+         false);
diff --git a/lib/fuzzer/FuzzerExtFunctions.h b/lib/fuzzer/FuzzerExtFunctions.h
new file mode 100644
index 0000000..2672a38
--- /dev/null
+++ b/lib/fuzzer/FuzzerExtFunctions.h
@@ -0,0 +1,35 @@
+//===- FuzzerExtFunctions.h - Interface to external functions ---*- C++ -* ===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Defines an interface to (possibly optional) functions.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_FUZZER_EXT_FUNCTIONS_H
+#define LLVM_FUZZER_EXT_FUNCTIONS_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+namespace fuzzer {
+
+struct ExternalFunctions {
+  // Initialize function pointers. Functions that are not available will be set
+  // to nullptr.  Do not call this constructor  before ``main()`` has been
+  // entered.
+  ExternalFunctions();
+
+#define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN)                            \
+  RETURN_TYPE(*NAME) FUNC_SIG = nullptr
+
+#include "FuzzerExtFunctions.def"
+
+#undef EXT_FUNC
+};
+} // namespace fuzzer
+
+#endif
diff --git a/lib/fuzzer/FuzzerExtFunctionsDlsym.cpp b/lib/fuzzer/FuzzerExtFunctionsDlsym.cpp
new file mode 100644
index 0000000..06bddd5
--- /dev/null
+++ b/lib/fuzzer/FuzzerExtFunctionsDlsym.cpp
@@ -0,0 +1,52 @@
+//===- FuzzerExtFunctionsDlsym.cpp - Interface to external functions ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Implementation for operating systems that support dlsym(). We only use it on
+// Apple platforms for now. We don't use this approach on Linux because it
+// requires that clients of LibFuzzer pass ``--export-dynamic`` to the linker.
+// That is a complication we don't wish to expose to clients right now.
+//===----------------------------------------------------------------------===//
+#include "FuzzerDefs.h"
+#if LIBFUZZER_APPLE
+
+#include "FuzzerExtFunctions.h"
+#include "FuzzerIO.h"
+#include <dlfcn.h>
+
+using namespace fuzzer;
+
+template <typename T>
+static T GetFnPtr(const char *FnName, bool WarnIfMissing) {
+  dlerror(); // Clear any previous errors.
+  void *Fn = dlsym(RTLD_DEFAULT, FnName);
+  if (Fn == nullptr) {
+    if (WarnIfMissing) {
+      const char *ErrorMsg = dlerror();
+      Printf("WARNING: Failed to find function \"%s\".", FnName);
+      if (ErrorMsg)
+        Printf(" Reason %s.", ErrorMsg);
+      Printf("\n");
+    }
+  }
+  return reinterpret_cast<T>(Fn);
+}
+
+namespace fuzzer {
+
+ExternalFunctions::ExternalFunctions() {
+#define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN)                            \
+  this->NAME = GetFnPtr<decltype(ExternalFunctions::NAME)>(#NAME, WARN)
+
+#include "FuzzerExtFunctions.def"
+
+#undef EXT_FUNC
+}
+
+} // namespace fuzzer
+
+#endif // LIBFUZZER_APPLE
diff --git a/lib/fuzzer/FuzzerExtFunctionsDlsymWin.cpp b/lib/fuzzer/FuzzerExtFunctionsDlsymWin.cpp
new file mode 100644
index 0000000..321b3ec
--- /dev/null
+++ b/lib/fuzzer/FuzzerExtFunctionsDlsymWin.cpp
@@ -0,0 +1,62 @@
+//===- FuzzerExtFunctionsDlsymWin.cpp - Interface to external functions ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Implementation using dynamic loading for Windows.
+//===----------------------------------------------------------------------===//
+#include "FuzzerDefs.h"
+#if LIBFUZZER_WINDOWS
+
+#include "FuzzerExtFunctions.h"
+#include "FuzzerIO.h"
+#include "Windows.h"
+
+// This must be included after Windows.h.
+#include "Psapi.h"
+
+namespace fuzzer {
+
+ExternalFunctions::ExternalFunctions() {
+  HMODULE Modules[1024];
+  DWORD BytesNeeded;
+  HANDLE CurrentProcess = GetCurrentProcess();
+
+  if (!EnumProcessModules(CurrentProcess, Modules, sizeof(Modules),
+                          &BytesNeeded)) {
+    Printf("EnumProcessModules failed (error: %d).\n", GetLastError());
+    exit(1);
+  }
+
+  if (sizeof(Modules) < BytesNeeded) {
+    Printf("Error: the array is not big enough to hold all loaded modules.\n");
+    exit(1);
+  }
+
+  for (size_t i = 0; i < (BytesNeeded / sizeof(HMODULE)); i++)
+  {
+    FARPROC Fn;
+#define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN)                            \
+    if (this->NAME == nullptr) {                                               \
+      Fn = GetProcAddress(Modules[i], #NAME);                                  \
+      if (Fn == nullptr)                                                       \
+         Fn = GetProcAddress(Modules[i], #NAME "__dll");                       \
+      this->NAME = (decltype(ExternalFunctions::NAME)) Fn;                     \
+    }
+#include "FuzzerExtFunctions.def"
+#undef EXT_FUNC
+  }
+
+#define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN)                            \
+  if (this->NAME == nullptr && WARN)                                           \
+    Printf("WARNING: Failed to find function \"%s\".\n", #NAME);
+#include "FuzzerExtFunctions.def"
+#undef EXT_FUNC
+}
+
+} // namespace fuzzer
+
+#endif // LIBFUZZER_WINDOWS
diff --git a/lib/fuzzer/FuzzerExtFunctionsWeak.cpp b/lib/fuzzer/FuzzerExtFunctionsWeak.cpp
new file mode 100644
index 0000000..5056eb8
--- /dev/null
+++ b/lib/fuzzer/FuzzerExtFunctionsWeak.cpp
@@ -0,0 +1,54 @@
+//===- FuzzerExtFunctionsWeak.cpp - Interface to external functions -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Implementation for Linux. This relies on the linker's support for weak
+// symbols. We don't use this approach on Apple platforms because it requires
+// clients of LibFuzzer to pass ``-U _<symbol_name>`` to the linker to allow
+// weak symbols to be undefined. That is a complication we don't want to expose
+// to clients right now.
+//===----------------------------------------------------------------------===//
+#include "FuzzerDefs.h"
+#if LIBFUZZER_LINUX || LIBFUZZER_NETBSD
+
+#include "FuzzerExtFunctions.h"
+#include "FuzzerIO.h"
+
+extern "C" {
+// Declare these symbols as weak to allow them to be optionally defined.
+#define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN)                            \
+  __attribute__((weak)) RETURN_TYPE NAME FUNC_SIG
+
+#include "FuzzerExtFunctions.def"
+
+#undef EXT_FUNC
+}
+
+using namespace fuzzer;
+
+static void CheckFnPtr(void *FnPtr, const char *FnName, bool WarnIfMissing) {
+  if (FnPtr == nullptr && WarnIfMissing) {
+    Printf("WARNING: Failed to find function \"%s\".\n", FnName);
+  }
+}
+
+namespace fuzzer {
+
+ExternalFunctions::ExternalFunctions() {
+#define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN)                            \
+  this->NAME = ::NAME;                                                         \
+  CheckFnPtr(reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(::NAME)),    \
+             #NAME, WARN);
+
+#include "FuzzerExtFunctions.def"
+
+#undef EXT_FUNC
+}
+
+} // namespace fuzzer
+
+#endif // LIBFUZZER_LINUX || LIBFUZZER_NETBSD
diff --git a/lib/fuzzer/FuzzerExtFunctionsWeakAlias.cpp b/lib/fuzzer/FuzzerExtFunctionsWeakAlias.cpp
new file mode 100644
index 0000000..e10f7b4
--- /dev/null
+++ b/lib/fuzzer/FuzzerExtFunctionsWeakAlias.cpp
@@ -0,0 +1,56 @@
+//===- FuzzerExtFunctionsWeakAlias.cpp - Interface to external functions --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Implementation using weak aliases. Works for Windows.
+//===----------------------------------------------------------------------===//
+#include "FuzzerDefs.h"
+#if LIBFUZZER_WINDOWS
+
+#include "FuzzerExtFunctions.h"
+#include "FuzzerIO.h"
+
+using namespace fuzzer;
+
+extern "C" {
+// Declare these symbols as weak to allow them to be optionally defined.
+#define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN)                            \
+  RETURN_TYPE NAME##Def FUNC_SIG {                                             \
+    Printf("ERROR: Function \"%s\" not defined.\n", #NAME);                    \
+    exit(1);                                                                   \
+  }                                                                            \
+  RETURN_TYPE NAME FUNC_SIG __attribute__((weak, alias(#NAME "Def")));
+
+#include "FuzzerExtFunctions.def"
+
+#undef EXT_FUNC
+}
+
+template <typename T>
+static T *GetFnPtr(T *Fun, T *FunDef, const char *FnName, bool WarnIfMissing) {
+  if (Fun == FunDef) {
+    if (WarnIfMissing)
+      Printf("WARNING: Failed to find function \"%s\".\n", FnName);
+    return nullptr;
+  }
+  return Fun;
+}
+
+namespace fuzzer {
+
+ExternalFunctions::ExternalFunctions() {
+#define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN)                            \
+  this->NAME = GetFnPtr<decltype(::NAME)>(::NAME, ::NAME##Def, #NAME, WARN);
+
+#include "FuzzerExtFunctions.def"
+
+#undef EXT_FUNC
+}
+
+} // namespace fuzzer
+
+#endif // LIBFUZZER_WINDOWS
diff --git a/lib/fuzzer/FuzzerExtraCounters.cpp b/lib/fuzzer/FuzzerExtraCounters.cpp
new file mode 100644
index 0000000..0e7a776
--- /dev/null
+++ b/lib/fuzzer/FuzzerExtraCounters.cpp
@@ -0,0 +1,41 @@
+//===- FuzzerExtraCounters.cpp - Extra coverage counters ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Extra coverage counters defined by user code.
+//===----------------------------------------------------------------------===//
+
+#include "FuzzerDefs.h"
+
+#if LIBFUZZER_LINUX || LIBFUZZER_NETBSD
+__attribute__((weak)) extern uint8_t __start___libfuzzer_extra_counters;
+__attribute__((weak)) extern uint8_t __stop___libfuzzer_extra_counters;
+
+namespace fuzzer {
+uint8_t *ExtraCountersBegin() { return &__start___libfuzzer_extra_counters; }
+uint8_t *ExtraCountersEnd() { return &__stop___libfuzzer_extra_counters; }
+ATTRIBUTE_NO_SANITIZE_ALL
+void ClearExtraCounters() {  // hand-written memset, don't asan-ify.
+  uintptr_t *Beg = reinterpret_cast<uintptr_t*>(ExtraCountersBegin());
+  uintptr_t *End = reinterpret_cast<uintptr_t*>(ExtraCountersEnd());
+  for (; Beg < End; Beg++) {
+    *Beg = 0;
+    __asm__ __volatile__("" : : : "memory");
+  }
+}
+
+}  // namespace fuzzer
+
+#else
+// TODO: implement for other platforms.
+namespace fuzzer {
+uint8_t *ExtraCountersBegin() { return nullptr; }
+uint8_t *ExtraCountersEnd() { return nullptr; }
+void ClearExtraCounters() {}
+}  // namespace fuzzer
+
+#endif
diff --git a/lib/fuzzer/FuzzerFlags.def b/lib/fuzzer/FuzzerFlags.def
new file mode 100644
index 0000000..e4bca46
--- /dev/null
+++ b/lib/fuzzer/FuzzerFlags.def
@@ -0,0 +1,148 @@
+//===- FuzzerFlags.def - Run-time flags -------------------------*- C++ -* ===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Flags. FUZZER_FLAG_INT/FUZZER_FLAG_STRING macros should be defined at the
+// point of inclusion. We are not using any flag parsing library for better
+// portability and independence.
+//===----------------------------------------------------------------------===//
+FUZZER_FLAG_INT(verbosity, 1, "Verbosity level.")
+FUZZER_FLAG_UNSIGNED(seed, 0, "Random seed. If 0, seed is generated.")
+FUZZER_FLAG_INT(runs, -1,
+            "Number of individual test runs (-1 for infinite runs).")
+FUZZER_FLAG_INT(max_len, 0, "Maximum length of the test input. "
+    "If 0, libFuzzer tries to guess a good value based on the corpus "
+    "and reports it. ")
+FUZZER_FLAG_INT(experimental_len_control, 0, "experimental flag")
+FUZZER_FLAG_INT(cross_over, 1, "If 1, cross over inputs.")
+FUZZER_FLAG_INT(mutate_depth, 5,
+            "Apply this number of consecutive mutations to each input.")
+FUZZER_FLAG_INT(shuffle, 1, "Shuffle inputs at startup")
+FUZZER_FLAG_INT(prefer_small, 1,
+    "If 1, always prefer smaller inputs during the corpus shuffle.")
+FUZZER_FLAG_INT(
+    timeout, 1200,
+    "Timeout in seconds (if positive). "
+    "If one unit runs more than this number of seconds the process will abort.")
+FUZZER_FLAG_INT(error_exitcode, 77, "When libFuzzer itself reports a bug "
+  "this exit code will be used.")
+FUZZER_FLAG_INT(timeout_exitcode, 77, "When libFuzzer reports a timeout "
+  "this exit code will be used.")
+FUZZER_FLAG_INT(max_total_time, 0, "If positive, indicates the maximal total "
+                                   "time in seconds to run the fuzzer.")
+FUZZER_FLAG_INT(help, 0, "Print help.")
+FUZZER_FLAG_INT(merge, 0, "If 1, the 2-nd, 3-rd, etc corpora will be "
+  "merged into the 1-st corpus. Only interesting units will be taken. "
+  "This flag can be used to minimize a corpus.")
+FUZZER_FLAG_STRING(merge_control_file, "internal flag")
+FUZZER_FLAG_STRING(save_coverage_summary, "Experimental:"
+                   " save coverage summary to a given file."
+                   " Used with -merge=1")
+FUZZER_FLAG_STRING(load_coverage_summary, "Experimental:"
+                   " load coverage summary from a given file."
+                   " Treat this coverage as belonging to the first corpus. "
+                   " Used with -merge=1")
+FUZZER_FLAG_INT(minimize_crash, 0, "If 1, minimizes the provided"
+  " crash input. Use with -runs=N or -max_total_time=N to limit "
+  "the number attempts."
+  " Use with -exact_artifact_path to specify the output."
+  " Combine with ASAN_OPTIONS=dedup_token_length=3 (or similar) to ensure that"
+  " the minimized input triggers the same crash."
+  )
+FUZZER_FLAG_INT(cleanse_crash, 0, "If 1, tries to cleanse the provided"
+  " crash input to make it contain fewer original bytes."
+  " Use with -exact_artifact_path to specify the output."
+  )
+FUZZER_FLAG_INT(minimize_crash_internal_step, 0, "internal flag")
+FUZZER_FLAG_INT(use_counters, 1, "Use coverage counters")
+FUZZER_FLAG_INT(use_indir_calls, 1, "Use indirect caller-callee counters")
+FUZZER_FLAG_INT(use_memmem, 1,
+                "Use hints from intercepting memmem, strstr, etc")
+FUZZER_FLAG_INT(use_value_profile, 0,
+                "Experimental. Use value profile to guide fuzzing.")
+FUZZER_FLAG_INT(use_cmp, 1, "Use CMP traces to guide mutations")
+FUZZER_FLAG_INT(shrink, 0, "Experimental. Try to shrink corpus inputs.")
+FUZZER_FLAG_INT(reduce_inputs, 1,
+  "Try to reduce the size of inputs while preserving their full feature sets")
+FUZZER_FLAG_UNSIGNED(jobs, 0, "Number of jobs to run. If jobs >= 1 we spawn"
+                          " this number of jobs in separate worker processes"
+                          " with stdout/stderr redirected to fuzz-JOB.log.")
+FUZZER_FLAG_UNSIGNED(workers, 0,
+            "Number of simultaneous worker processes to run the jobs."
+            " If zero, \"min(jobs,NumberOfCpuCores()/2)\" is used.")
+FUZZER_FLAG_INT(reload, 1,
+                "Reload the main corpus every <N> seconds to get new units"
+                " discovered by other processes. If 0, disabled")
+FUZZER_FLAG_INT(report_slow_units, 10,
+    "Report slowest units if they run for more than this number of seconds.")
+FUZZER_FLAG_INT(only_ascii, 0,
+                "If 1, generate only ASCII (isprint+isspace) inputs.")
+FUZZER_FLAG_STRING(dict, "Experimental. Use the dictionary file.")
+FUZZER_FLAG_STRING(artifact_prefix, "Write fuzzing artifacts (crash, "
+                                    "timeout, or slow inputs) as "
+                                    "$(artifact_prefix)file")
+FUZZER_FLAG_STRING(exact_artifact_path,
+                   "Write the single artifact on failure (crash, timeout) "
+                   "as $(exact_artifact_path). This overrides -artifact_prefix "
+                   "and will not use checksum in the file name. Do not "
+                   "use the same path for several parallel processes.")
+FUZZER_FLAG_INT(print_pcs, 0, "If 1, print out newly covered PCs.")
+FUZZER_FLAG_INT(print_funcs, 2, "If >=1, print out at most this number of "
+                                "newly covered functions.")
+FUZZER_FLAG_INT(print_final_stats, 0, "If 1, print statistics at exit.")
+FUZZER_FLAG_INT(print_corpus_stats, 0,
+  "If 1, print statistics on corpus elements at exit.")
+FUZZER_FLAG_INT(print_coverage, 0, "If 1, print coverage information as text"
+                                   " at exit.")
+FUZZER_FLAG_INT(dump_coverage, 0, "Deprecated."
+                                  " If 1, dump coverage information as a"
+                                  " .sancov file at exit.")
+FUZZER_FLAG_INT(handle_segv, 1, "If 1, try to intercept SIGSEGV.")
+FUZZER_FLAG_INT(handle_bus, 1, "If 1, try to intercept SIGBUS.")
+FUZZER_FLAG_INT(handle_abrt, 1, "If 1, try to intercept SIGABRT.")
+FUZZER_FLAG_INT(handle_ill, 1, "If 1, try to intercept SIGILL.")
+FUZZER_FLAG_INT(handle_fpe, 1, "If 1, try to intercept SIGFPE.")
+FUZZER_FLAG_INT(handle_int, 1, "If 1, try to intercept SIGINT.")
+FUZZER_FLAG_INT(handle_term, 1, "If 1, try to intercept SIGTERM.")
+FUZZER_FLAG_INT(handle_xfsz, 1, "If 1, try to intercept SIGXFSZ.")
+FUZZER_FLAG_INT(close_fd_mask, 0, "If 1, close stdout at startup; "
+    "if 2, close stderr; if 3, close both. "
+    "Be careful, this will also close e.g. asan's stderr/stdout.")
+FUZZER_FLAG_INT(detect_leaks, 1, "If 1, and if LeakSanitizer is enabled "
+    "try to detect memory leaks during fuzzing (i.e. not only at shut down).")
+FUZZER_FLAG_INT(purge_allocator_interval, 1, "Purge allocator caches and "
+    "quarantines every <N> seconds. When rss_limit_mb is specified (>0), "
+    "purging starts when RSS exceeds 50% of rss_limit_mb. Pass "
+    "purge_allocator_interval=-1 to disable this functionality.")
+FUZZER_FLAG_INT(trace_malloc, 0, "If >= 1 will print all mallocs/frees. "
+    "If >= 2 will also print stack traces.")
+FUZZER_FLAG_INT(rss_limit_mb, 2048, "If non-zero, the fuzzer will exit upon"
+    "reaching this limit of RSS memory usage.")
+FUZZER_FLAG_STRING(exit_on_src_pos, "Exit if a newly found PC originates"
+    " from the given source location. Example: -exit_on_src_pos=foo.cc:123. "
+    "Used primarily for testing libFuzzer itself.")
+FUZZER_FLAG_STRING(exit_on_item, "Exit if an item with a given sha1 sum"
+    " was added to the corpus. "
+    "Used primarily for testing libFuzzer itself.")
+FUZZER_FLAG_INT(ignore_remaining_args, 0, "If 1, ignore all arguments passed "
+                "after this one. Useful for fuzzers that need to do their own "
+                "argument parsing.")
+
+FUZZER_FLAG_STRING(run_equivalence_server, "Experimental")
+FUZZER_FLAG_STRING(use_equivalence_server, "Experimental")
+FUZZER_FLAG_INT(analyze_dict, 0, "Experimental")
+FUZZER_FLAG_INT(use_clang_coverage, 0, "Experimental")
+FUZZER_FLAG_INT(use_feature_frequency, 0, "Experimental/internal")
+
+FUZZER_DEPRECATED_FLAG(exit_on_first)
+FUZZER_DEPRECATED_FLAG(save_minimized_corpus)
+FUZZER_DEPRECATED_FLAG(sync_command)
+FUZZER_DEPRECATED_FLAG(sync_timeout)
+FUZZER_DEPRECATED_FLAG(test_single_input)
+FUZZER_DEPRECATED_FLAG(drill)
+FUZZER_DEPRECATED_FLAG(truncate_units)
+FUZZER_DEPRECATED_FLAG(output_csv)
diff --git a/lib/fuzzer/FuzzerIO.cpp b/lib/fuzzer/FuzzerIO.cpp
new file mode 100644
index 0000000..dac5ec6
--- /dev/null
+++ b/lib/fuzzer/FuzzerIO.cpp
@@ -0,0 +1,129 @@
+//===- FuzzerIO.cpp - IO utils. -------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// IO functions.
+//===----------------------------------------------------------------------===//
+
+#include "FuzzerIO.h"
+#include "FuzzerDefs.h"
+#include "FuzzerExtFunctions.h"
+#include <algorithm>
+#include <cstdarg>
+#include <fstream>
+#include <iterator>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+namespace fuzzer {
+
+static FILE *OutputFile = stderr;
+
+long GetEpoch(const std::string &Path) {
+  struct stat St;
+  if (stat(Path.c_str(), &St))
+    return 0;  // Can't stat, be conservative.
+  return St.st_mtime;
+}
+
+Unit FileToVector(const std::string &Path, size_t MaxSize, bool ExitOnError) {
+  std::ifstream T(Path);
+  if (ExitOnError && !T) {
+    Printf("No such directory: %s; exiting\n", Path.c_str());
+    exit(1);
+  }
+
+  T.seekg(0, T.end);
+  auto EndPos = T.tellg();
+  if (EndPos < 0) return {};
+  size_t FileLen = EndPos;
+  if (MaxSize)
+    FileLen = std::min(FileLen, MaxSize);
+
+  T.seekg(0, T.beg);
+  Unit Res(FileLen);
+  T.read(reinterpret_cast<char *>(Res.data()), FileLen);
+  return Res;
+}
+
+std::string FileToString(const std::string &Path) {
+  std::ifstream T(Path);
+  return std::string((std::istreambuf_iterator<char>(T)),
+                     std::istreambuf_iterator<char>());
+}
+
+void CopyFileToErr(const std::string &Path) {
+  Printf("%s", FileToString(Path).c_str());
+}
+
+void WriteToFile(const Unit &U, const std::string &Path) {
+  // Use raw C interface because this function may be called from a sig handler.
+  FILE *Out = fopen(Path.c_str(), "w");
+  if (!Out) return;
+  fwrite(U.data(), sizeof(U[0]), U.size(), Out);
+  fclose(Out);
+}
+
+void ReadDirToVectorOfUnits(const char *Path, Vector<Unit> *V,
+                            long *Epoch, size_t MaxSize, bool ExitOnError) {
+  long E = Epoch ? *Epoch : 0;
+  Vector<std::string> Files;
+  ListFilesInDirRecursive(Path, Epoch, &Files, /*TopDir*/true);
+  size_t NumLoaded = 0;
+  for (size_t i = 0; i < Files.size(); i++) {
+    auto &X = Files[i];
+    if (Epoch && GetEpoch(X) < E) continue;
+    NumLoaded++;
+    if ((NumLoaded & (NumLoaded - 1)) == 0 && NumLoaded >= 1024)
+      Printf("Loaded %zd/%zd files from %s\n", NumLoaded, Files.size(), Path);
+    auto S = FileToVector(X, MaxSize, ExitOnError);
+    if (!S.empty())
+      V->push_back(S);
+  }
+}
+
+
+void GetSizedFilesFromDir(const std::string &Dir, Vector<SizedFile> *V) {
+  Vector<std::string> Files;
+  ListFilesInDirRecursive(Dir, 0, &Files, /*TopDir*/true);
+  for (auto &File : Files)
+    if (size_t Size = FileSize(File))
+      V->push_back({File, Size});
+}
+
+std::string DirPlusFile(const std::string &DirPath,
+                        const std::string &FileName) {
+  return DirPath + GetSeparator() + FileName;
+}
+
+void DupAndCloseStderr() {
+  int OutputFd = DuplicateFile(2);
+  if (OutputFd > 0) {
+    FILE *NewOutputFile = OpenFile(OutputFd, "w");
+    if (NewOutputFile) {
+      OutputFile = NewOutputFile;
+      if (EF->__sanitizer_set_report_fd)
+        EF->__sanitizer_set_report_fd(
+            reinterpret_cast<void *>(GetHandleFromFd(OutputFd)));
+      DiscardOutput(2);
+    }
+  }
+}
+
+void CloseStdout() {
+  DiscardOutput(1);
+}
+
+void Printf(const char *Fmt, ...) {
+  va_list ap;
+  va_start(ap, Fmt);
+  vfprintf(OutputFile, Fmt, ap);
+  va_end(ap);
+  fflush(OutputFile);
+}
+
+}  // namespace fuzzer
diff --git a/lib/fuzzer/FuzzerIO.h b/lib/fuzzer/FuzzerIO.h
new file mode 100644
index 0000000..ea9f0d5
--- /dev/null
+++ b/lib/fuzzer/FuzzerIO.h
@@ -0,0 +1,85 @@
+//===- FuzzerIO.h - Internal header for IO utils ----------------*- C++ -* ===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// IO interface.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_FUZZER_IO_H
+#define LLVM_FUZZER_IO_H
+
+#include "FuzzerDefs.h"
+
+namespace fuzzer {
+
+long GetEpoch(const std::string &Path);
+
+Unit FileToVector(const std::string &Path, size_t MaxSize = 0,
+                  bool ExitOnError = true);
+
+std::string FileToString(const std::string &Path);
+
+void CopyFileToErr(const std::string &Path);
+
+void WriteToFile(const Unit &U, const std::string &Path);
+
+void ReadDirToVectorOfUnits(const char *Path, Vector<Unit> *V,
+                            long *Epoch, size_t MaxSize, bool ExitOnError);
+
+// Returns "Dir/FileName" or equivalent for the current OS.
+std::string DirPlusFile(const std::string &DirPath,
+                        const std::string &FileName);
+
+// Returns the name of the dir, similar to the 'dirname' utility.
+std::string DirName(const std::string &FileName);
+
+// Returns path to a TmpDir.
+std::string TmpDir();
+
+bool IsInterestingCoverageFile(const std::string &FileName);
+
+void DupAndCloseStderr();
+
+void CloseStdout();
+
+void Printf(const char *Fmt, ...);
+
+// Print using raw syscalls, useful when printing at early init stages.
+void RawPrint(const char *Str);
+
+// Platform specific functions:
+bool IsFile(const std::string &Path);
+size_t FileSize(const std::string &Path);
+
+void ListFilesInDirRecursive(const std::string &Dir, long *Epoch,
+                             Vector<std::string> *V, bool TopDir);
+
+struct SizedFile {
+  std::string File;
+  size_t Size;
+  bool operator<(const SizedFile &B) const { return Size < B.Size; }
+};
+
+void GetSizedFilesFromDir(const std::string &Dir, Vector<SizedFile> *V);
+
+char GetSeparator();
+
+FILE* OpenFile(int Fd, const char *Mode);
+
+int CloseFile(int Fd);
+
+int DuplicateFile(int Fd);
+
+void RemoveFile(const std::string &Path);
+
+void DiscardOutput(int Fd);
+
+intptr_t GetHandleFromFd(int fd);
+
+}  // namespace fuzzer
+
+#endif  // LLVM_FUZZER_IO_H
diff --git a/lib/fuzzer/FuzzerIOPosix.cpp b/lib/fuzzer/FuzzerIOPosix.cpp
new file mode 100644
index 0000000..2c452a7
--- /dev/null
+++ b/lib/fuzzer/FuzzerIOPosix.cpp
@@ -0,0 +1,130 @@
+//===- FuzzerIOPosix.cpp - IO utils for Posix. ----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// IO functions implementation using Posix API.
+//===----------------------------------------------------------------------===//
+#include "FuzzerDefs.h"
+#if LIBFUZZER_POSIX
+
+#include "FuzzerExtFunctions.h"
+#include "FuzzerIO.h"
+#include <cstdarg>
+#include <cstdio>
+#include <dirent.h>
+#include <fstream>
+#include <iterator>
+#include <libgen.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+namespace fuzzer {
+
+bool IsFile(const std::string &Path) {
+  struct stat St;
+  if (stat(Path.c_str(), &St))
+    return false;
+  return S_ISREG(St.st_mode);
+}
+
+size_t FileSize(const std::string &Path) {
+  struct stat St;
+  if (stat(Path.c_str(), &St))
+    return 0;
+  return St.st_size;
+}
+
+void ListFilesInDirRecursive(const std::string &Dir, long *Epoch,
+                             Vector<std::string> *V, bool TopDir) {
+  auto E = GetEpoch(Dir);
+  if (Epoch)
+    if (E && *Epoch >= E) return;
+
+  DIR *D = opendir(Dir.c_str());
+  if (!D) {
+    Printf("No such directory: %s; exiting\n", Dir.c_str());
+    exit(1);
+  }
+  while (auto E = readdir(D)) {
+    std::string Path = DirPlusFile(Dir, E->d_name);
+    if (E->d_type == DT_REG || E->d_type == DT_LNK)
+      V->push_back(Path);
+    else if (E->d_type == DT_DIR && *E->d_name != '.')
+      ListFilesInDirRecursive(Path, Epoch, V, false);
+  }
+  closedir(D);
+  if (Epoch && TopDir)
+    *Epoch = E;
+}
+
+char GetSeparator() {
+  return '/';
+}
+
+FILE* OpenFile(int Fd, const char* Mode) {
+  return fdopen(Fd, Mode);
+}
+
+int CloseFile(int fd) {
+  return close(fd);
+}
+
+int DuplicateFile(int Fd) {
+  return dup(Fd);
+}
+
+void RemoveFile(const std::string &Path) {
+  unlink(Path.c_str());
+}
+
+void DiscardOutput(int Fd) {
+  FILE* Temp = fopen("/dev/null", "w");
+  if (!Temp)
+    return;
+  dup2(fileno(Temp), Fd);
+  fclose(Temp);
+}
+
+intptr_t GetHandleFromFd(int fd) {
+  return static_cast<intptr_t>(fd);
+}
+
+std::string DirName(const std::string &FileName) {
+  char *Tmp = new char[FileName.size() + 1];
+  memcpy(Tmp, FileName.c_str(), FileName.size() + 1);
+  std::string Res = dirname(Tmp);
+  delete [] Tmp;
+  return Res;
+}
+
+std::string TmpDir() {
+  if (auto Env = getenv("TMPDIR"))
+    return Env;
+  return "/tmp";
+}
+
+bool IsInterestingCoverageFile(const std::string &FileName) {
+  if (FileName.find("compiler-rt/lib/") != std::string::npos)
+    return false; // sanitizer internal.
+  if (FileName.find("/usr/lib/") != std::string::npos)
+    return false;
+  if (FileName.find("/usr/include/") != std::string::npos)
+    return false;
+  if (FileName == "<null>")
+    return false;
+  return true;
+}
+
+
+void RawPrint(const char *Str) {
+  write(2, Str, strlen(Str));
+}
+
+}  // namespace fuzzer
+
+#endif // LIBFUZZER_POSIX
diff --git a/lib/fuzzer/FuzzerIOWindows.cpp b/lib/fuzzer/FuzzerIOWindows.cpp
new file mode 100644
index 0000000..7485364
--- /dev/null
+++ b/lib/fuzzer/FuzzerIOWindows.cpp
@@ -0,0 +1,323 @@
+//===- FuzzerIOWindows.cpp - IO utils for Windows. ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// IO functions implementation for Windows.
+//===----------------------------------------------------------------------===//
+#include "FuzzerDefs.h"
+#if LIBFUZZER_WINDOWS
+
+#include "FuzzerExtFunctions.h"
+#include "FuzzerIO.h"
+#include <cstdarg>
+#include <cstdio>
+#include <fstream>
+#include <io.h>
+#include <iterator>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <windows.h>
+
+namespace fuzzer {
+
+static bool IsFile(const std::string &Path, const DWORD &FileAttributes) {
+
+  if (FileAttributes & FILE_ATTRIBUTE_NORMAL)
+    return true;
+
+  if (FileAttributes & FILE_ATTRIBUTE_DIRECTORY)
+    return false;
+
+  HANDLE FileHandle(
+      CreateFileA(Path.c_str(), 0, FILE_SHARE_READ, NULL, OPEN_EXISTING,
+                  FILE_FLAG_BACKUP_SEMANTICS, 0));
+
+  if (FileHandle == INVALID_HANDLE_VALUE) {
+    Printf("CreateFileA() failed for \"%s\" (Error code: %lu).\n", Path.c_str(),
+        GetLastError());
+    return false;
+  }
+
+  DWORD FileType = GetFileType(FileHandle);
+
+  if (FileType == FILE_TYPE_UNKNOWN) {
+    Printf("GetFileType() failed for \"%s\" (Error code: %lu).\n", Path.c_str(),
+        GetLastError());
+    CloseHandle(FileHandle);
+    return false;
+  }
+
+  if (FileType != FILE_TYPE_DISK) {
+    CloseHandle(FileHandle);
+    return false;
+  }
+
+  CloseHandle(FileHandle);
+  return true;
+}
+
+bool IsFile(const std::string &Path) {
+  DWORD Att = GetFileAttributesA(Path.c_str());
+
+  if (Att == INVALID_FILE_ATTRIBUTES) {
+    Printf("GetFileAttributesA() failed for \"%s\" (Error code: %lu).\n",
+        Path.c_str(), GetLastError());
+    return false;
+  }
+
+  return IsFile(Path, Att);
+}
+
+void ListFilesInDirRecursive(const std::string &Dir, long *Epoch,
+                             Vector<std::string> *V, bool TopDir) {
+  auto E = GetEpoch(Dir);
+  if (Epoch)
+    if (E && *Epoch >= E) return;
+
+  std::string Path(Dir);
+  assert(!Path.empty());
+  if (Path.back() != '\\')
+      Path.push_back('\\');
+  Path.push_back('*');
+
+  // Get the first directory entry.
+  WIN32_FIND_DATAA FindInfo;
+  HANDLE FindHandle(FindFirstFileA(Path.c_str(), &FindInfo));
+  if (FindHandle == INVALID_HANDLE_VALUE)
+  {
+    if (GetLastError() == ERROR_FILE_NOT_FOUND)
+      return;
+    Printf("No such directory: %s; exiting\n", Dir.c_str());
+    exit(1);
+  }
+
+  do {
+    std::string FileName = DirPlusFile(Dir, FindInfo.cFileName);
+
+    if (FindInfo.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
+      size_t FilenameLen = strlen(FindInfo.cFileName);
+      if ((FilenameLen == 1 && FindInfo.cFileName[0] == '.') ||
+          (FilenameLen == 2 && FindInfo.cFileName[0] == '.' &&
+                               FindInfo.cFileName[1] == '.'))
+        continue;
+
+      ListFilesInDirRecursive(FileName, Epoch, V, false);
+    }
+    else if (IsFile(FileName, FindInfo.dwFileAttributes))
+      V->push_back(FileName);
+  } while (FindNextFileA(FindHandle, &FindInfo));
+
+  DWORD LastError = GetLastError();
+  if (LastError != ERROR_NO_MORE_FILES)
+    Printf("FindNextFileA failed (Error code: %lu).\n", LastError);
+
+  FindClose(FindHandle);
+
+  if (Epoch && TopDir)
+    *Epoch = E;
+}
+
+char GetSeparator() {
+  return '\\';
+}
+
+FILE* OpenFile(int Fd, const char* Mode) {
+  return _fdopen(Fd, Mode);
+}
+
+int CloseFile(int Fd) {
+  return _close(Fd);
+}
+
+int DuplicateFile(int Fd) {
+  return _dup(Fd);
+}
+
+void RemoveFile(const std::string &Path) {
+  _unlink(Path.c_str());
+}
+
+void DiscardOutput(int Fd) {
+  FILE* Temp = fopen("nul", "w");
+  if (!Temp)
+    return;
+  _dup2(_fileno(Temp), Fd);
+  fclose(Temp);
+}
+
+intptr_t GetHandleFromFd(int fd) {
+  return _get_osfhandle(fd);
+}
+
+static bool IsSeparator(char C) {
+  return C == '\\' || C == '/';
+}
+
+// Parse disk designators, like "C:\". If Relative == true, also accepts: "C:".
+// Returns number of characters considered if successful.
+static size_t ParseDrive(const std::string &FileName, const size_t Offset,
+                         bool Relative = true) {
+  if (Offset + 1 >= FileName.size() || FileName[Offset + 1] != ':')
+    return 0;
+  if (Offset + 2 >= FileName.size() || !IsSeparator(FileName[Offset + 2])) {
+    if (!Relative) // Accept relative path?
+      return 0;
+    else
+      return 2;
+  }
+  return 3;
+}
+
+// Parse a file name, like: SomeFile.txt
+// Returns number of characters considered if successful.
+static size_t ParseFileName(const std::string &FileName, const size_t Offset) {
+  size_t Pos = Offset;
+  const size_t End = FileName.size();
+  for(; Pos < End && !IsSeparator(FileName[Pos]); ++Pos)
+    ;
+  return Pos - Offset;
+}
+
+// Parse a directory ending in separator, like: `SomeDir\`
+// Returns number of characters considered if successful.
+static size_t ParseDir(const std::string &FileName, const size_t Offset) {
+  size_t Pos = Offset;
+  const size_t End = FileName.size();
+  if (Pos >= End || IsSeparator(FileName[Pos]))
+    return 0;
+  for(; Pos < End && !IsSeparator(FileName[Pos]); ++Pos)
+    ;
+  if (Pos >= End)
+    return 0;
+  ++Pos; // Include separator.
+  return Pos - Offset;
+}
+
+// Parse a servername and share, like: `SomeServer\SomeShare\`
+// Returns number of characters considered if successful.
+static size_t ParseServerAndShare(const std::string &FileName,
+                                  const size_t Offset) {
+  size_t Pos = Offset, Res;
+  if (!(Res = ParseDir(FileName, Pos)))
+    return 0;
+  Pos += Res;
+  if (!(Res = ParseDir(FileName, Pos)))
+    return 0;
+  Pos += Res;
+  return Pos - Offset;
+}
+
+// Parse the given Ref string from the position Offset, to exactly match the given
+// string Patt.
+// Returns number of characters considered if successful.
+static size_t ParseCustomString(const std::string &Ref, size_t Offset,
+                                const char *Patt) {
+  size_t Len = strlen(Patt);
+  if (Offset + Len > Ref.size())
+    return 0;
+  return Ref.compare(Offset, Len, Patt) == 0 ? Len : 0;
+}
+
+// Parse a location, like:
+// \\?\UNC\Server\Share\  \\?\C:\  \\Server\Share\  \  C:\  C:
+// Returns number of characters considered if successful.
+static size_t ParseLocation(const std::string &FileName) {
+  size_t Pos = 0, Res;
+
+  if ((Res = ParseCustomString(FileName, Pos, R"(\\?\)"))) {
+    Pos += Res;
+    if ((Res = ParseCustomString(FileName, Pos, R"(UNC\)"))) {
+      Pos += Res;
+      if ((Res = ParseServerAndShare(FileName, Pos)))
+        return Pos + Res;
+      return 0;
+    }
+    if ((Res = ParseDrive(FileName, Pos, false)))
+      return Pos + Res;
+    return 0;
+  }
+
+  if (Pos < FileName.size() && IsSeparator(FileName[Pos])) {
+    ++Pos;
+    if (Pos < FileName.size() && IsSeparator(FileName[Pos])) {
+      ++Pos;
+      if ((Res = ParseServerAndShare(FileName, Pos)))
+        return Pos + Res;
+      return 0;
+    }
+    return Pos;
+  }
+
+  if ((Res = ParseDrive(FileName, Pos)))
+    return Pos + Res;
+
+  return Pos;
+}
+
+std::string DirName(const std::string &FileName) {
+  size_t LocationLen = ParseLocation(FileName);
+  size_t DirLen = 0, Res;
+  while ((Res = ParseDir(FileName, LocationLen + DirLen)))
+    DirLen += Res;
+  size_t FileLen = ParseFileName(FileName, LocationLen + DirLen);
+
+  if (LocationLen + DirLen + FileLen != FileName.size()) {
+    Printf("DirName() failed for \"%s\", invalid path.\n", FileName.c_str());
+    exit(1);
+  }
+
+  if (DirLen) {
+    --DirLen; // Remove trailing separator.
+    if (!FileLen) { // Path ended in separator.
+      assert(DirLen);
+      // Remove file name from Dir.
+      while (DirLen && !IsSeparator(FileName[LocationLen + DirLen - 1]))
+        --DirLen;
+      if (DirLen) // Remove trailing separator.
+        --DirLen;
+    }
+  }
+
+  if (!LocationLen) { // Relative path.
+    if (!DirLen)
+      return ".";
+    return std::string(".\\").append(FileName, 0, DirLen);
+  }
+
+  return FileName.substr(0, LocationLen + DirLen);
+}
+
+std::string TmpDir() {
+  std::string Tmp;
+  Tmp.resize(MAX_PATH + 1);
+  DWORD Size = GetTempPathA(Tmp.size(), &Tmp[0]);
+  if (Size == 0) {
+    Printf("Couldn't get Tmp path.\n");
+    exit(1);
+  }
+  Tmp.resize(Size);
+  return Tmp;
+}
+
+bool IsInterestingCoverageFile(const std::string &FileName) {
+  if (FileName.find("Program Files") != std::string::npos)
+    return false;
+  if (FileName.find("compiler-rt\\lib\\") != std::string::npos)
+    return false; // sanitizer internal.
+  if (FileName == "<null>")
+    return false;
+  return true;
+}
+
+void RawPrint(const char *Str) {
+  // Not tested, may or may not work. Fix if needed.
+  Printf("%s", Str);
+}
+
+}  // namespace fuzzer
+
+#endif // LIBFUZZER_WINDOWS
diff --git a/lib/fuzzer/FuzzerInterface.h b/lib/fuzzer/FuzzerInterface.h
new file mode 100644
index 0000000..c2c0a39
--- /dev/null
+++ b/lib/fuzzer/FuzzerInterface.h
@@ -0,0 +1,67 @@
+//===- FuzzerInterface.h - Interface header for the Fuzzer ------*- C++ -* ===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Define the interface between libFuzzer and the library being tested.
+//===----------------------------------------------------------------------===//
+
+// NOTE: the libFuzzer interface is thin and in the majority of cases
+// you should not include this file into your target. In 95% of cases
+// all you need is to define the following function in your file:
+// extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size);
+
+// WARNING: keep the interface in C.
+
+#ifndef LLVM_FUZZER_INTERFACE_H
+#define LLVM_FUZZER_INTERFACE_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif  // __cplusplus
+
+// Mandatory user-provided target function.
+// Executes the code under test with [Data, Data+Size) as the input.
+// libFuzzer will invoke this function *many* times with different inputs.
+// Must return 0.
+int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size);
+
+// Optional user-provided initialization function.
+// If provided, this function will be called by libFuzzer once at startup.
+// It may read and modify argc/argv.
+// Must return 0.
+int LLVMFuzzerInitialize(int *argc, char ***argv);
+
+// Optional user-provided custom mutator.
+// Mutates raw data in [Data, Data+Size) inplace.
+// Returns the new size, which is not greater than MaxSize.
+// Given the same Seed produces the same mutation.
+size_t LLVMFuzzerCustomMutator(uint8_t *Data, size_t Size, size_t MaxSize,
+                               unsigned int Seed);
+
+// Optional user-provided custom cross-over function.
+// Combines pieces of Data1 & Data2 together into Out.
+// Returns the new size, which is not greater than MaxOutSize.
+// Should produce the same mutation given the same Seed.
+size_t LLVMFuzzerCustomCrossOver(const uint8_t *Data1, size_t Size1,
+                                 const uint8_t *Data2, size_t Size2,
+                                 uint8_t *Out, size_t MaxOutSize,
+                                 unsigned int Seed);
+
+// Experimental, may go away in future.
+// libFuzzer-provided function to be used inside LLVMFuzzerCustomMutator.
+// Mutates raw data in [Data, Data+Size) inplace.
+// Returns the new size, which is not greater than MaxSize.
+size_t LLVMFuzzerMutate(uint8_t *Data, size_t Size, size_t MaxSize);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif  // __cplusplus
+
+#endif  // LLVM_FUZZER_INTERFACE_H
diff --git a/lib/fuzzer/FuzzerInternal.h b/lib/fuzzer/FuzzerInternal.h
new file mode 100644
index 0000000..97c1408
--- /dev/null
+++ b/lib/fuzzer/FuzzerInternal.h
@@ -0,0 +1,152 @@
+//===- FuzzerInternal.h - Internal header for the Fuzzer --------*- C++ -* ===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Define the main class fuzzer::Fuzzer and most functions.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_FUZZER_INTERNAL_H
+#define LLVM_FUZZER_INTERNAL_H
+
+#include "FuzzerDefs.h"
+#include "FuzzerExtFunctions.h"
+#include "FuzzerInterface.h"
+#include "FuzzerOptions.h"
+#include "FuzzerSHA1.h"
+#include "FuzzerValueBitMap.h"
+#include <algorithm>
+#include <atomic>
+#include <chrono>
+#include <climits>
+#include <cstdlib>
+#include <string.h>
+
+namespace fuzzer {
+
+using namespace std::chrono;
+
+class Fuzzer {
+public:
+
+  Fuzzer(UserCallback CB, InputCorpus &Corpus, MutationDispatcher &MD,
+         FuzzingOptions Options);
+  ~Fuzzer();
+  void Loop(const Vector<std::string> &CorpusDirs);
+  void ReadAndExecuteSeedCorpora(const Vector<std::string> &CorpusDirs);
+  void MinimizeCrashLoop(const Unit &U);
+  void RereadOutputCorpus(size_t MaxSize);
+
+  size_t secondsSinceProcessStartUp() {
+    return duration_cast<seconds>(system_clock::now() - ProcessStartTime)
+        .count();
+  }
+
+  bool TimedOut() {
+    return Options.MaxTotalTimeSec > 0 &&
+           secondsSinceProcessStartUp() >
+               static_cast<size_t>(Options.MaxTotalTimeSec);
+  }
+
+  size_t execPerSec() {
+    size_t Seconds = secondsSinceProcessStartUp();
+    return Seconds ? TotalNumberOfRuns / Seconds : 0;
+  }
+
+  size_t getTotalNumberOfRuns() { return TotalNumberOfRuns; }
+
+  static void StaticAlarmCallback();
+  static void StaticCrashSignalCallback();
+  static void StaticExitCallback();
+  static void StaticInterruptCallback();
+  static void StaticFileSizeExceedCallback();
+
+  void ExecuteCallback(const uint8_t *Data, size_t Size);
+  bool RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile = false,
+              InputInfo *II = nullptr);
+
+  // Merge Corpora[1:] into Corpora[0].
+  void Merge(const Vector<std::string> &Corpora);
+  void CrashResistantMerge(const Vector<std::string> &Args,
+                           const Vector<std::string> &Corpora,
+                           const char *CoverageSummaryInputPathOrNull,
+                           const char *CoverageSummaryOutputPathOrNull);
+  void CrashResistantMergeInternalStep(const std::string &ControlFilePath);
+  MutationDispatcher &GetMD() { return MD; }
+  void PrintFinalStats();
+  void SetMaxInputLen(size_t MaxInputLen);
+  void SetMaxMutationLen(size_t MaxMutationLen);
+  void RssLimitCallback();
+
+  bool InFuzzingThread() const { return IsMyThread; }
+  size_t GetCurrentUnitInFuzzingThead(const uint8_t **Data) const;
+  void TryDetectingAMemoryLeak(const uint8_t *Data, size_t Size,
+                               bool DuringInitialCorpusExecution);
+
+  void HandleMalloc(size_t Size);
+  void AnnounceOutput(const uint8_t *Data, size_t Size);
+
+private:
+  void AlarmCallback();
+  void CrashCallback();
+  void ExitCallback();
+  void CrashOnOverwrittenData();
+  void InterruptCallback();
+  void MutateAndTestOne();
+  void PurgeAllocator();
+  void ReportNewCoverage(InputInfo *II, const Unit &U);
+  void PrintPulseAndReportSlowInput(const uint8_t *Data, size_t Size);
+  void WriteToOutputCorpus(const Unit &U);
+  void WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix);
+  void PrintStats(const char *Where, const char *End = "\n", size_t Units = 0);
+  void PrintStatusForNewUnit(const Unit &U, const char *Text);
+  void CheckExitOnSrcPosOrItem();
+
+  static void StaticDeathCallback();
+  void DumpCurrentUnit(const char *Prefix);
+  void DeathCallback();
+
+  void AllocateCurrentUnitData();
+  uint8_t *CurrentUnitData = nullptr;
+  std::atomic<size_t> CurrentUnitSize;
+  uint8_t BaseSha1[kSHA1NumBytes];  // Checksum of the base unit.
+  bool RunningCB = false;
+
+  size_t TotalNumberOfRuns = 0;
+  size_t NumberOfNewUnitsAdded = 0;
+
+  size_t LastCorpusUpdateRun = 0;
+  system_clock::time_point LastCorpusUpdateTime = system_clock::now();
+
+
+  bool HasMoreMallocsThanFrees = false;
+  size_t NumberOfLeakDetectionAttempts = 0;
+
+  system_clock::time_point LastAllocatorPurgeAttemptTime = system_clock::now();
+
+  UserCallback CB;
+  InputCorpus &Corpus;
+  MutationDispatcher &MD;
+  FuzzingOptions Options;
+
+  system_clock::time_point ProcessStartTime = system_clock::now();
+  system_clock::time_point UnitStartTime, UnitStopTime;
+  long TimeOfLongestUnitInSeconds = 0;
+  long EpochOfLastReadOfOutputCorpus = 0;
+
+  size_t MaxInputLen = 0;
+  size_t MaxMutationLen = 0;
+  size_t TmpMaxMutationLen = 0;
+
+  Vector<uint32_t> UniqFeatureSetTmp;
+
+  // Need to know our own thread.
+  static thread_local bool IsMyThread;
+};
+
+} // namespace fuzzer
+
+#endif // LLVM_FUZZER_INTERNAL_H
diff --git a/lib/fuzzer/FuzzerLoop.cpp b/lib/fuzzer/FuzzerLoop.cpp
new file mode 100644
index 0000000..f4771e1
--- /dev/null
+++ b/lib/fuzzer/FuzzerLoop.cpp
@@ -0,0 +1,817 @@
+//===- FuzzerLoop.cpp - Fuzzer's main loop --------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Fuzzer's main loop.
+//===----------------------------------------------------------------------===//
+
+#include "FuzzerCorpus.h"
+#include "FuzzerIO.h"
+#include "FuzzerInternal.h"
+#include "FuzzerMutate.h"
+#include "FuzzerRandom.h"
+#include "FuzzerShmem.h"
+#include "FuzzerTracePC.h"
+#include <algorithm>
+#include <cstring>
+#include <memory>
+#include <mutex>
+#include <set>
+
+#if defined(__has_include)
+#if __has_include(<sanitizer / lsan_interface.h>)
+#include <sanitizer/lsan_interface.h>
+#endif
+#endif
+
+#define NO_SANITIZE_MEMORY
+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer)
+#undef NO_SANITIZE_MEMORY
+#define NO_SANITIZE_MEMORY __attribute__((no_sanitize_memory))
+#endif
+#endif
+
+namespace fuzzer {
+static const size_t kMaxUnitSizeToPrint = 256;
+
+thread_local bool Fuzzer::IsMyThread;
+
+SharedMemoryRegion SMR;
+
+// Only one Fuzzer per process.
+static Fuzzer *F;
+
+// Leak detection is expensive, so we first check if there were more mallocs
+// than frees (using the sanitizer malloc hooks) and only then try to call lsan.
+struct MallocFreeTracer {
+  void Start(int TraceLevel) {
+    this->TraceLevel = TraceLevel;
+    if (TraceLevel)
+      Printf("MallocFreeTracer: START\n");
+    Mallocs = 0;
+    Frees = 0;
+  }
+  // Returns true if there were more mallocs than frees.
+  bool Stop() {
+    if (TraceLevel)
+      Printf("MallocFreeTracer: STOP %zd %zd (%s)\n", Mallocs.load(),
+             Frees.load(), Mallocs == Frees ? "same" : "DIFFERENT");
+    bool Result = Mallocs > Frees;
+    Mallocs = 0;
+    Frees = 0;
+    TraceLevel = 0;
+    return Result;
+  }
+  std::atomic<size_t> Mallocs;
+  std::atomic<size_t> Frees;
+  int TraceLevel = 0;
+
+  std::recursive_mutex TraceMutex;
+  bool TraceDisabled = false;
+};
+
+static MallocFreeTracer AllocTracer;
+
+// Locks printing and avoids nested hooks triggered from mallocs/frees in
+// sanitizer.
+class TraceLock {
+public:
+  TraceLock() : Lock(AllocTracer.TraceMutex) {
+    AllocTracer.TraceDisabled = !AllocTracer.TraceDisabled;
+  }
+  ~TraceLock() { AllocTracer.TraceDisabled = !AllocTracer.TraceDisabled; }
+
+  bool IsDisabled() const {
+    // This is already inverted value.
+    return !AllocTracer.TraceDisabled;
+  }
+
+private:
+  std::lock_guard<std::recursive_mutex> Lock;
+};
+
+ATTRIBUTE_NO_SANITIZE_MEMORY
+void MallocHook(const volatile void *ptr, size_t size) {
+  size_t N = AllocTracer.Mallocs++;
+  F->HandleMalloc(size);
+  if (int TraceLevel = AllocTracer.TraceLevel) {
+    TraceLock Lock;
+    if (Lock.IsDisabled())
+      return;
+    Printf("MALLOC[%zd] %p %zd\n", N, ptr, size);
+    if (TraceLevel >= 2 && EF)
+      EF->__sanitizer_print_stack_trace();
+  }
+}
+
+ATTRIBUTE_NO_SANITIZE_MEMORY
+void FreeHook(const volatile void *ptr) {
+  size_t N = AllocTracer.Frees++;
+  if (int TraceLevel = AllocTracer.TraceLevel) {
+    TraceLock Lock;
+    if (Lock.IsDisabled())
+      return;
+    Printf("FREE[%zd]   %p\n", N, ptr);
+    if (TraceLevel >= 2 && EF)
+      EF->__sanitizer_print_stack_trace();
+  }
+}
+
+// Crash on a single malloc that exceeds the rss limit.
+void Fuzzer::HandleMalloc(size_t Size) {
+  if (!Options.RssLimitMb || (Size >> 20) < (size_t)Options.RssLimitMb)
+    return;
+  Printf("==%d== ERROR: libFuzzer: out-of-memory (malloc(%zd))\n", GetPid(),
+         Size);
+  Printf("   To change the out-of-memory limit use -rss_limit_mb=<N>\n\n");
+  if (EF->__sanitizer_print_stack_trace)
+    EF->__sanitizer_print_stack_trace();
+  DumpCurrentUnit("oom-");
+  Printf("SUMMARY: libFuzzer: out-of-memory\n");
+  PrintFinalStats();
+  _Exit(Options.ErrorExitCode); // Stop right now.
+}
+
+Fuzzer::Fuzzer(UserCallback CB, InputCorpus &Corpus, MutationDispatcher &MD,
+               FuzzingOptions Options)
+    : CB(CB), Corpus(Corpus), MD(MD), Options(Options) {
+  if (EF->__sanitizer_set_death_callback)
+    EF->__sanitizer_set_death_callback(StaticDeathCallback);
+  assert(!F);
+  F = this;
+  TPC.ResetMaps();
+  IsMyThread = true;
+  if (Options.DetectLeaks && EF->__sanitizer_install_malloc_and_free_hooks)
+    EF->__sanitizer_install_malloc_and_free_hooks(MallocHook, FreeHook);
+  TPC.SetUseCounters(Options.UseCounters);
+  TPC.SetUseValueProfile(Options.UseValueProfile);
+  TPC.SetUseClangCoverage(Options.UseClangCoverage);
+
+  if (Options.Verbosity)
+    TPC.PrintModuleInfo();
+  if (!Options.OutputCorpus.empty() && Options.ReloadIntervalSec)
+    EpochOfLastReadOfOutputCorpus = GetEpoch(Options.OutputCorpus);
+  MaxInputLen = MaxMutationLen = Options.MaxLen;
+  TmpMaxMutationLen = Max(size_t(4), Corpus.MaxInputSize());
+  AllocateCurrentUnitData();
+  CurrentUnitSize = 0;
+  memset(BaseSha1, 0, sizeof(BaseSha1));
+}
+
+Fuzzer::~Fuzzer() {}
+
+void Fuzzer::AllocateCurrentUnitData() {
+  if (CurrentUnitData || MaxInputLen == 0)
+    return;
+  CurrentUnitData = new uint8_t[MaxInputLen];
+}
+
+void Fuzzer::StaticDeathCallback() {
+  assert(F);
+  F->DeathCallback();
+}
+
+void Fuzzer::DumpCurrentUnit(const char *Prefix) {
+  if (!CurrentUnitData)
+    return; // Happens when running individual inputs.
+  MD.PrintMutationSequence();
+  Printf("; base unit: %s\n", Sha1ToString(BaseSha1).c_str());
+  size_t UnitSize = CurrentUnitSize;
+  if (UnitSize <= kMaxUnitSizeToPrint) {
+    PrintHexArray(CurrentUnitData, UnitSize, "\n");
+    PrintASCII(CurrentUnitData, UnitSize, "\n");
+  }
+  WriteUnitToFileWithPrefix({CurrentUnitData, CurrentUnitData + UnitSize},
+                            Prefix);
+}
+
+NO_SANITIZE_MEMORY
+void Fuzzer::DeathCallback() {
+  DumpCurrentUnit("crash-");
+  PrintFinalStats();
+}
+
+void Fuzzer::StaticAlarmCallback() {
+  assert(F);
+  F->AlarmCallback();
+}
+
+void Fuzzer::StaticCrashSignalCallback() {
+  assert(F);
+  F->CrashCallback();
+}
+
+void Fuzzer::StaticExitCallback() {
+  assert(F);
+  F->ExitCallback();
+}
+
+void Fuzzer::StaticInterruptCallback() {
+  assert(F);
+  F->InterruptCallback();
+}
+
+void Fuzzer::StaticFileSizeExceedCallback() {
+  Printf("==%lu== ERROR: libFuzzer: file size exceeded\n", GetPid());
+  exit(1);
+}
+
+void Fuzzer::CrashCallback() {
+  Printf("==%lu== ERROR: libFuzzer: deadly signal\n", GetPid());
+  if (EF->__sanitizer_print_stack_trace)
+    EF->__sanitizer_print_stack_trace();
+  Printf("NOTE: libFuzzer has rudimentary signal handlers.\n"
+         "      Combine libFuzzer with AddressSanitizer or similar for better "
+         "crash reports.\n");
+  Printf("SUMMARY: libFuzzer: deadly signal\n");
+  DumpCurrentUnit("crash-");
+  PrintFinalStats();
+  _Exit(Options.ErrorExitCode); // Stop right now.
+}
+
+void Fuzzer::ExitCallback() {
+  if (!RunningCB)
+    return; // This exit did not come from the user callback
+  Printf("==%lu== ERROR: libFuzzer: fuzz target exited\n", GetPid());
+  if (EF->__sanitizer_print_stack_trace)
+    EF->__sanitizer_print_stack_trace();
+  Printf("SUMMARY: libFuzzer: fuzz target exited\n");
+  DumpCurrentUnit("crash-");
+  PrintFinalStats();
+  _Exit(Options.ErrorExitCode);
+}
+
+void Fuzzer::InterruptCallback() {
+  Printf("==%lu== libFuzzer: run interrupted; exiting\n", GetPid());
+  PrintFinalStats();
+  _Exit(0); // Stop right now, don't perform any at-exit actions.
+}
+
+NO_SANITIZE_MEMORY
+void Fuzzer::AlarmCallback() {
+  assert(Options.UnitTimeoutSec > 0);
+  // In Windows Alarm callback is executed by a different thread.
+#if !LIBFUZZER_WINDOWS
+  if (!InFuzzingThread())
+    return;
+#endif
+  if (!RunningCB)
+    return; // We have not started running units yet.
+  size_t Seconds =
+      duration_cast<seconds>(system_clock::now() - UnitStartTime).count();
+  if (Seconds == 0)
+    return;
+  if (Options.Verbosity >= 2)
+    Printf("AlarmCallback %zd\n", Seconds);
+  if (Seconds >= (size_t)Options.UnitTimeoutSec) {
+    Printf("ALARM: working on the last Unit for %zd seconds\n", Seconds);
+    Printf("       and the timeout value is %d (use -timeout=N to change)\n",
+           Options.UnitTimeoutSec);
+    DumpCurrentUnit("timeout-");
+    Printf("==%lu== ERROR: libFuzzer: timeout after %d seconds\n", GetPid(),
+           Seconds);
+    if (EF->__sanitizer_print_stack_trace)
+      EF->__sanitizer_print_stack_trace();
+    Printf("SUMMARY: libFuzzer: timeout\n");
+    PrintFinalStats();
+    _Exit(Options.TimeoutExitCode); // Stop right now.
+  }
+}
+
+void Fuzzer::RssLimitCallback() {
+  Printf(
+      "==%lu== ERROR: libFuzzer: out-of-memory (used: %zdMb; limit: %zdMb)\n",
+      GetPid(), GetPeakRSSMb(), Options.RssLimitMb);
+  Printf("   To change the out-of-memory limit use -rss_limit_mb=<N>\n\n");
+  if (EF->__sanitizer_print_memory_profile)
+    EF->__sanitizer_print_memory_profile(95, 8);
+  DumpCurrentUnit("oom-");
+  Printf("SUMMARY: libFuzzer: out-of-memory\n");
+  PrintFinalStats();
+  _Exit(Options.ErrorExitCode); // Stop right now.
+}
+
+void Fuzzer::PrintStats(const char *Where, const char *End, size_t Units) {
+  size_t ExecPerSec = execPerSec();
+  if (!Options.Verbosity)
+    return;
+  Printf("#%zd\t%s", TotalNumberOfRuns, Where);
+  if (size_t N = TPC.GetTotalPCCoverage())
+    Printf(" cov: %zd", N);
+  if (size_t N = Corpus.NumFeatures())
+    Printf(" ft: %zd", N);
+  if (!Corpus.empty()) {
+    Printf(" corp: %zd", Corpus.NumActiveUnits());
+    if (size_t N = Corpus.SizeInBytes()) {
+      if (N < (1 << 14))
+        Printf("/%zdb", N);
+      else if (N < (1 << 24))
+        Printf("/%zdKb", N >> 10);
+      else
+        Printf("/%zdMb", N >> 20);
+    }
+  }
+  if (Units)
+    Printf(" units: %zd", Units);
+
+  Printf(" exec/s: %zd", ExecPerSec);
+  Printf(" rss: %zdMb", GetPeakRSSMb());
+  Printf("%s", End);
+}
+
+void Fuzzer::PrintFinalStats() {
+  if (Options.PrintCoverage)
+    TPC.PrintCoverage();
+  if (Options.DumpCoverage)
+    TPC.DumpCoverage();
+  if (Options.PrintCorpusStats)
+    Corpus.PrintStats();
+  if (!Options.PrintFinalStats)
+    return;
+  size_t ExecPerSec = execPerSec();
+  Printf("stat::number_of_executed_units: %zd\n", TotalNumberOfRuns);
+  Printf("stat::average_exec_per_sec:     %zd\n", ExecPerSec);
+  Printf("stat::new_units_added:          %zd\n", NumberOfNewUnitsAdded);
+  Printf("stat::slowest_unit_time_sec:    %zd\n", TimeOfLongestUnitInSeconds);
+  Printf("stat::peak_rss_mb:              %zd\n", GetPeakRSSMb());
+}
+
+void Fuzzer::SetMaxInputLen(size_t MaxInputLen) {
+  assert(this->MaxInputLen == 0); // Can only reset MaxInputLen from 0 to non-0.
+  assert(MaxInputLen);
+  this->MaxInputLen = MaxInputLen;
+  this->MaxMutationLen = MaxInputLen;
+  AllocateCurrentUnitData();
+  Printf("INFO: -max_len is not provided; "
+         "libFuzzer will not generate inputs larger than %zd bytes\n",
+         MaxInputLen);
+}
+
+void Fuzzer::SetMaxMutationLen(size_t MaxMutationLen) {
+  assert(MaxMutationLen && MaxMutationLen <= MaxInputLen);
+  this->MaxMutationLen = MaxMutationLen;
+}
+
+void Fuzzer::CheckExitOnSrcPosOrItem() {
+  if (!Options.ExitOnSrcPos.empty()) {
+    static auto *PCsSet = new Set<uintptr_t>;
+    auto HandlePC = [&](uintptr_t PC) {
+      if (!PCsSet->insert(PC).second)
+        return;
+      std::string Descr = DescribePC("%F %L", PC + 1);
+      if (Descr.find(Options.ExitOnSrcPos) != std::string::npos) {
+        Printf("INFO: found line matching '%s', exiting.\n",
+               Options.ExitOnSrcPos.c_str());
+        _Exit(0);
+      }
+    };
+    TPC.ForEachObservedPC(HandlePC);
+  }
+  if (!Options.ExitOnItem.empty()) {
+    if (Corpus.HasUnit(Options.ExitOnItem)) {
+      Printf("INFO: found item with checksum '%s', exiting.\n",
+             Options.ExitOnItem.c_str());
+      _Exit(0);
+    }
+  }
+}
+
+void Fuzzer::RereadOutputCorpus(size_t MaxSize) {
+  if (Options.OutputCorpus.empty() || !Options.ReloadIntervalSec)
+    return;
+  Vector<Unit> AdditionalCorpus;
+  ReadDirToVectorOfUnits(Options.OutputCorpus.c_str(), &AdditionalCorpus,
+                         &EpochOfLastReadOfOutputCorpus, MaxSize,
+                         /*ExitOnError*/ false);
+  if (Options.Verbosity >= 2)
+    Printf("Reload: read %zd new units.\n", AdditionalCorpus.size());
+  bool Reloaded = false;
+  for (auto &U : AdditionalCorpus) {
+    if (U.size() > MaxSize)
+      U.resize(MaxSize);
+    if (!Corpus.HasUnit(U)) {
+      if (RunOne(U.data(), U.size())) {
+        CheckExitOnSrcPosOrItem();
+        Reloaded = true;
+      }
+    }
+  }
+  if (Reloaded)
+    PrintStats("RELOAD");
+}
+
+void Fuzzer::PrintPulseAndReportSlowInput(const uint8_t *Data, size_t Size) {
+  auto TimeOfUnit =
+      duration_cast<seconds>(UnitStopTime - UnitStartTime).count();
+  if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1)) &&
+      secondsSinceProcessStartUp() >= 2)
+    PrintStats("pulse ");
+  if (TimeOfUnit > TimeOfLongestUnitInSeconds * 1.1 &&
+      TimeOfUnit >= Options.ReportSlowUnits) {
+    TimeOfLongestUnitInSeconds = TimeOfUnit;
+    Printf("Slowest unit: %zd s:\n", TimeOfLongestUnitInSeconds);
+    WriteUnitToFileWithPrefix({Data, Data + Size}, "slow-unit-");
+  }
+}
+
+bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile,
+                    InputInfo *II) {
+  if (!Size)
+    return false;
+
+  ExecuteCallback(Data, Size);
+
+  UniqFeatureSetTmp.clear();
+  size_t FoundUniqFeaturesOfII = 0;
+  size_t NumUpdatesBefore = Corpus.NumFeatureUpdates();
+  TPC.CollectFeatures([&](size_t Feature) {
+    Corpus.UpdateFeatureFrequency(Feature);
+    if (Corpus.AddFeature(Feature, Size, Options.Shrink))
+      UniqFeatureSetTmp.push_back(Feature);
+    if (Options.ReduceInputs && II)
+      if (std::binary_search(II->UniqFeatureSet.begin(),
+                             II->UniqFeatureSet.end(), Feature))
+        FoundUniqFeaturesOfII++;
+  });
+  PrintPulseAndReportSlowInput(Data, Size);
+  size_t NumNewFeatures = Corpus.NumFeatureUpdates() - NumUpdatesBefore;
+  if (NumNewFeatures) {
+    TPC.UpdateObservedPCs();
+    Corpus.AddToCorpus({Data, Data + Size}, NumNewFeatures, MayDeleteFile,
+                       UniqFeatureSetTmp);
+    return true;
+  }
+  if (II && FoundUniqFeaturesOfII &&
+      FoundUniqFeaturesOfII == II->UniqFeatureSet.size() &&
+      II->U.size() > Size) {
+    Corpus.Replace(II, {Data, Data + Size});
+    return true;
+  }
+  return false;
+}
+
+size_t Fuzzer::GetCurrentUnitInFuzzingThead(const uint8_t **Data) const {
+  assert(InFuzzingThread());
+  *Data = CurrentUnitData;
+  return CurrentUnitSize;
+}
+
+void Fuzzer::CrashOnOverwrittenData() {
+  Printf("==%d== ERROR: libFuzzer: fuzz target overwrites it's const input\n",
+         GetPid());
+  DumpCurrentUnit("crash-");
+  Printf("SUMMARY: libFuzzer: out-of-memory\n");
+  _Exit(Options.ErrorExitCode); // Stop right now.
+}
+
+// Compare two arrays, but not all bytes if the arrays are large.
+static bool LooseMemeq(const uint8_t *A, const uint8_t *B, size_t Size) {
+  const size_t Limit = 64;
+  if (Size <= 64)
+    return !memcmp(A, B, Size);
+  // Compare first and last Limit/2 bytes.
+  return !memcmp(A, B, Limit / 2) &&
+         !memcmp(A + Size - Limit / 2, B + Size - Limit / 2, Limit / 2);
+}
+
+void Fuzzer::ExecuteCallback(const uint8_t *Data, size_t Size) {
+  TPC.RecordInitialStack();
+  TotalNumberOfRuns++;
+  assert(InFuzzingThread());
+  if (SMR.IsClient())
+    SMR.WriteByteArray(Data, Size);
+  // We copy the contents of Unit into a separate heap buffer
+  // so that we reliably find buffer overflows in it.
+  uint8_t *DataCopy = new uint8_t[Size];
+  memcpy(DataCopy, Data, Size);
+  if (CurrentUnitData && CurrentUnitData != Data)
+    memcpy(CurrentUnitData, Data, Size);
+  CurrentUnitSize = Size;
+  AllocTracer.Start(Options.TraceMalloc);
+  UnitStartTime = system_clock::now();
+  TPC.ResetMaps();
+  RunningCB = true;
+  int Res = CB(DataCopy, Size);
+  RunningCB = false;
+  UnitStopTime = system_clock::now();
+  (void)Res;
+  assert(Res == 0);
+  HasMoreMallocsThanFrees = AllocTracer.Stop();
+  if (!LooseMemeq(DataCopy, Data, Size))
+    CrashOnOverwrittenData();
+  CurrentUnitSize = 0;
+  delete[] DataCopy;
+}
+
+void Fuzzer::WriteToOutputCorpus(const Unit &U) {
+  if (Options.OnlyASCII)
+    assert(IsASCII(U));
+  if (Options.OutputCorpus.empty())
+    return;
+  std::string Path = DirPlusFile(Options.OutputCorpus, Hash(U));
+  WriteToFile(U, Path);
+  if (Options.Verbosity >= 2)
+    Printf("Written %zd bytes to %s\n", U.size(), Path.c_str());
+}
+
+void Fuzzer::WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix) {
+  if (!Options.SaveArtifacts)
+    return;
+  std::string Path = Options.ArtifactPrefix + Prefix + Hash(U);
+  if (!Options.ExactArtifactPath.empty())
+    Path = Options.ExactArtifactPath; // Overrides ArtifactPrefix.
+  WriteToFile(U, Path);
+  Printf("artifact_prefix='%s'; Test unit written to %s\n",
+         Options.ArtifactPrefix.c_str(), Path.c_str());
+  if (U.size() <= kMaxUnitSizeToPrint)
+    Printf("Base64: %s\n", Base64(U).c_str());
+}
+
+void Fuzzer::PrintStatusForNewUnit(const Unit &U, const char *Text) {
+  if (!Options.PrintNEW)
+    return;
+  PrintStats(Text, "");
+  if (Options.Verbosity) {
+    Printf(" L: %zd/%zd ", U.size(), Corpus.MaxInputSize());
+    MD.PrintMutationSequence();
+    Printf("\n");
+  }
+}
+
+void Fuzzer::ReportNewCoverage(InputInfo *II, const Unit &U) {
+  II->NumSuccessfullMutations++;
+  MD.RecordSuccessfulMutationSequence();
+  PrintStatusForNewUnit(U, II->Reduced ? "REDUCE" : "NEW   ");
+  WriteToOutputCorpus(U);
+  NumberOfNewUnitsAdded++;
+  CheckExitOnSrcPosOrItem(); // Check only after the unit is saved to corpus.
+  LastCorpusUpdateRun = TotalNumberOfRuns;
+  LastCorpusUpdateTime = system_clock::now();
+}
+
+// Tries detecting a memory leak on the particular input that we have just
+// executed before calling this function.
+void Fuzzer::TryDetectingAMemoryLeak(const uint8_t *Data, size_t Size,
+                                     bool DuringInitialCorpusExecution) {
+  if (!HasMoreMallocsThanFrees)
+    return; // mallocs==frees, a leak is unlikely.
+  if (!Options.DetectLeaks)
+    return;
+  if (!DuringInitialCorpusExecution &&
+      TotalNumberOfRuns >= Options.MaxNumberOfRuns)
+    return;
+  if (!&(EF->__lsan_enable) || !&(EF->__lsan_disable) ||
+      !(EF->__lsan_do_recoverable_leak_check))
+    return; // No lsan.
+  // Run the target once again, but with lsan disabled so that if there is
+  // a real leak we do not report it twice.
+  EF->__lsan_disable();
+  ExecuteCallback(Data, Size);
+  EF->__lsan_enable();
+  if (!HasMoreMallocsThanFrees)
+    return; // a leak is unlikely.
+  if (NumberOfLeakDetectionAttempts++ > 1000) {
+    Options.DetectLeaks = false;
+    Printf("INFO: libFuzzer disabled leak detection after every mutation.\n"
+           "      Most likely the target function accumulates allocated\n"
+           "      memory in a global state w/o actually leaking it.\n"
+           "      You may try running this binary with -trace_malloc=[12]"
+           "      to get a trace of mallocs and frees.\n"
+           "      If LeakSanitizer is enabled in this process it will still\n"
+           "      run on the process shutdown.\n");
+    return;
+  }
+  // Now perform the actual lsan pass. This is expensive and we must ensure
+  // we don't call it too often.
+  if (EF->__lsan_do_recoverable_leak_check()) { // Leak is found, report it.
+    if (DuringInitialCorpusExecution)
+      Printf("\nINFO: a leak has been found in the initial corpus.\n\n");
+    Printf("INFO: to ignore leaks on libFuzzer side use -detect_leaks=0.\n\n");
+    CurrentUnitSize = Size;
+    DumpCurrentUnit("leak-");
+    PrintFinalStats();
+    _Exit(Options.ErrorExitCode); // not exit() to disable lsan further on.
+  }
+}
+
+void Fuzzer::MutateAndTestOne() {
+  MD.StartMutationSequence();
+
+  auto &II = Corpus.ChooseUnitToMutate(MD.GetRand());
+  if (Options.UseFeatureFrequency)
+    Corpus.UpdateFeatureFrequencyScore(&II);
+  const auto &U = II.U;
+  memcpy(BaseSha1, II.Sha1, sizeof(BaseSha1));
+  assert(CurrentUnitData);
+  size_t Size = U.size();
+  assert(Size <= MaxInputLen && "Oversized Unit");
+  memcpy(CurrentUnitData, U.data(), Size);
+
+  assert(MaxMutationLen > 0);
+
+  size_t CurrentMaxMutationLen =
+      Min(MaxMutationLen, Max(U.size(), TmpMaxMutationLen));
+  assert(CurrentMaxMutationLen > 0);
+
+  for (int i = 0; i < Options.MutateDepth; i++) {
+    if (TotalNumberOfRuns >= Options.MaxNumberOfRuns)
+      break;
+    size_t NewSize = 0;
+    NewSize = MD.Mutate(CurrentUnitData, Size, CurrentMaxMutationLen);
+    assert(NewSize > 0 && "Mutator returned empty unit");
+    assert(NewSize <= CurrentMaxMutationLen && "Mutator return oversized unit");
+    Size = NewSize;
+    II.NumExecutedMutations++;
+    if (RunOne(CurrentUnitData, Size, /*MayDeleteFile=*/true, &II))
+      ReportNewCoverage(&II, {CurrentUnitData, CurrentUnitData + Size});
+
+    TryDetectingAMemoryLeak(CurrentUnitData, Size,
+                            /*DuringInitialCorpusExecution*/ false);
+  }
+}
+
+void Fuzzer::PurgeAllocator() {
+  if (Options.PurgeAllocatorIntervalSec < 0 || !EF->__sanitizer_purge_allocator)
+    return;
+  if (duration_cast<seconds>(system_clock::now() -
+                             LastAllocatorPurgeAttemptTime)
+          .count() < Options.PurgeAllocatorIntervalSec)
+    return;
+
+  if (Options.RssLimitMb <= 0 ||
+      GetPeakRSSMb() > static_cast<size_t>(Options.RssLimitMb) / 2)
+    EF->__sanitizer_purge_allocator();
+
+  LastAllocatorPurgeAttemptTime = system_clock::now();
+}
+
+void Fuzzer::ReadAndExecuteSeedCorpora(const Vector<std::string> &CorpusDirs) {
+  const size_t kMaxSaneLen = 1 << 20;
+  const size_t kMinDefaultLen = 4096;
+  Vector<SizedFile> SizedFiles;
+  size_t MaxSize = 0;
+  size_t MinSize = -1;
+  size_t TotalSize = 0;
+  size_t LastNumFiles = 0;
+  for (auto &Dir : CorpusDirs) {
+    GetSizedFilesFromDir(Dir, &SizedFiles);
+    Printf("INFO: % 8zd files found in %s\n", SizedFiles.size() - LastNumFiles,
+           Dir.c_str());
+    LastNumFiles = SizedFiles.size();
+  }
+  for (auto &File : SizedFiles) {
+    MaxSize = Max(File.Size, MaxSize);
+    MinSize = Min(File.Size, MinSize);
+    TotalSize += File.Size;
+  }
+  if (Options.MaxLen == 0)
+    SetMaxInputLen(std::min(std::max(kMinDefaultLen, MaxSize), kMaxSaneLen));
+  assert(MaxInputLen > 0);
+
+  // Test the callback with empty input and never try it again.
+  uint8_t dummy = 0;
+  ExecuteCallback(&dummy, 0);
+
+  if (SizedFiles.empty()) {
+    Printf("INFO: A corpus is not provided, starting from an empty corpus\n");
+    Unit U({'\n'}); // Valid ASCII input.
+    RunOne(U.data(), U.size());
+  } else {
+    Printf("INFO: seed corpus: files: %zd min: %zdb max: %zdb total: %zdb"
+           " rss: %zdMb\n",
+           SizedFiles.size(), MinSize, MaxSize, TotalSize, GetPeakRSSMb());
+    if (Options.ShuffleAtStartUp)
+      std::shuffle(SizedFiles.begin(), SizedFiles.end(), MD.GetRand());
+
+    if (Options.PreferSmall) {
+      std::stable_sort(SizedFiles.begin(), SizedFiles.end());
+      assert(SizedFiles.front().Size <= SizedFiles.back().Size);
+    }
+
+    // Load and execute inputs one by one.
+    for (auto &SF : SizedFiles) {
+      auto U = FileToVector(SF.File, MaxInputLen, /*ExitOnError=*/false);
+      assert(U.size() <= MaxInputLen);
+      RunOne(U.data(), U.size());
+      CheckExitOnSrcPosOrItem();
+      TryDetectingAMemoryLeak(U.data(), U.size(),
+                              /*DuringInitialCorpusExecution*/ true);
+    }
+  }
+
+  PrintStats("INITED");
+  if (Corpus.empty()) {
+    Printf("ERROR: no interesting inputs were found. "
+           "Is the code instrumented for coverage? Exiting.\n");
+    exit(1);
+  }
+}
+
+void Fuzzer::Loop(const Vector<std::string> &CorpusDirs) {
+  ReadAndExecuteSeedCorpora(CorpusDirs);
+  TPC.SetPrintNewPCs(Options.PrintNewCovPcs);
+  TPC.SetPrintNewFuncs(Options.PrintNewCovFuncs);
+  system_clock::time_point LastCorpusReload = system_clock::now();
+  if (Options.DoCrossOver)
+    MD.SetCorpus(&Corpus);
+  while (true) {
+    auto Now = system_clock::now();
+    if (duration_cast<seconds>(Now - LastCorpusReload).count() >=
+        Options.ReloadIntervalSec) {
+      RereadOutputCorpus(MaxInputLen);
+      LastCorpusReload = system_clock::now();
+    }
+    if (TotalNumberOfRuns >= Options.MaxNumberOfRuns)
+      break;
+    if (TimedOut())
+      break;
+
+    // Update TmpMaxMutationLen
+    if (Options.ExperimentalLenControl) {
+      if (TmpMaxMutationLen < MaxMutationLen &&
+          (TotalNumberOfRuns - LastCorpusUpdateRun > 1000 &&
+           duration_cast<seconds>(Now - LastCorpusUpdateTime).count() >= 1)) {
+        LastCorpusUpdateRun = TotalNumberOfRuns;
+        LastCorpusUpdateTime = Now;
+        TmpMaxMutationLen =
+            Min(MaxMutationLen,
+                TmpMaxMutationLen + Max(size_t(4), TmpMaxMutationLen / 8));
+        if (TmpMaxMutationLen <= MaxMutationLen)
+          Printf("#%zd\tTEMP_MAX_LEN: %zd\n", TotalNumberOfRuns,
+                 TmpMaxMutationLen);
+      }
+    } else {
+      TmpMaxMutationLen = MaxMutationLen;
+    }
+
+    // Perform several mutations and runs.
+    MutateAndTestOne();
+
+    PurgeAllocator();
+  }
+
+  PrintStats("DONE  ", "\n");
+  MD.PrintRecommendedDictionary();
+}
+
+void Fuzzer::MinimizeCrashLoop(const Unit &U) {
+  if (U.size() <= 1)
+    return;
+  while (!TimedOut() && TotalNumberOfRuns < Options.MaxNumberOfRuns) {
+    MD.StartMutationSequence();
+    memcpy(CurrentUnitData, U.data(), U.size());
+    for (int i = 0; i < Options.MutateDepth; i++) {
+      size_t NewSize = MD.Mutate(CurrentUnitData, U.size(), MaxMutationLen);
+      assert(NewSize > 0 && NewSize <= MaxMutationLen);
+      ExecuteCallback(CurrentUnitData, NewSize);
+      PrintPulseAndReportSlowInput(CurrentUnitData, NewSize);
+      TryDetectingAMemoryLeak(CurrentUnitData, NewSize,
+                              /*DuringInitialCorpusExecution*/ false);
+    }
+  }
+}
+
+void Fuzzer::AnnounceOutput(const uint8_t *Data, size_t Size) {
+  if (SMR.IsServer()) {
+    SMR.WriteByteArray(Data, Size);
+  } else if (SMR.IsClient()) {
+    SMR.PostClient();
+    SMR.WaitServer();
+    size_t OtherSize = SMR.ReadByteArraySize();
+    uint8_t *OtherData = SMR.GetByteArray();
+    if (Size != OtherSize || memcmp(Data, OtherData, Size) != 0) {
+      size_t i = 0;
+      for (i = 0; i < Min(Size, OtherSize); i++)
+        if (Data[i] != OtherData[i])
+          break;
+      Printf("==%lu== ERROR: libFuzzer: equivalence-mismatch. Sizes: %zd %zd; "
+             "offset %zd\n",
+             GetPid(), Size, OtherSize, i);
+      DumpCurrentUnit("mismatch-");
+      Printf("SUMMARY: libFuzzer: equivalence-mismatch\n");
+      PrintFinalStats();
+      _Exit(Options.ErrorExitCode);
+    }
+  }
+}
+
+} // namespace fuzzer
+
+extern "C" {
+
+size_t LLVMFuzzerMutate(uint8_t *Data, size_t Size, size_t MaxSize) {
+  assert(fuzzer::F);
+  return fuzzer::F->GetMD().DefaultMutate(Data, Size, MaxSize);
+}
+
+// Experimental
+void LLVMFuzzerAnnounceOutput(const uint8_t *Data, size_t Size) {
+  assert(fuzzer::F);
+  fuzzer::F->AnnounceOutput(Data, Size);
+}
+} // extern "C"
diff --git a/lib/fuzzer/FuzzerMain.cpp b/lib/fuzzer/FuzzerMain.cpp
new file mode 100644
index 0000000..af86572
--- /dev/null
+++ b/lib/fuzzer/FuzzerMain.cpp
@@ -0,0 +1,21 @@
+//===- FuzzerMain.cpp - main() function and flags -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// main() and flags.
+//===----------------------------------------------------------------------===//
+
+#include "FuzzerDefs.h"
+
+extern "C" {
+// This function should be defined by the user.
+int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size);
+}  // extern "C"
+
+int main(int argc, char **argv) {
+  return fuzzer::FuzzerDriver(&argc, &argv, LLVMFuzzerTestOneInput);
+}
diff --git a/lib/fuzzer/FuzzerMerge.cpp b/lib/fuzzer/FuzzerMerge.cpp
new file mode 100644
index 0000000..03cf00a
--- /dev/null
+++ b/lib/fuzzer/FuzzerMerge.cpp
@@ -0,0 +1,345 @@
+//===- FuzzerMerge.cpp - merging corpora ----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Merging corpora.
+//===----------------------------------------------------------------------===//
+
+#include "FuzzerMerge.h"
+#include "FuzzerIO.h"
+#include "FuzzerInternal.h"
+#include "FuzzerTracePC.h"
+#include "FuzzerUtil.h"
+
+#include <fstream>
+#include <iterator>
+#include <set>
+#include <sstream>
+
+namespace fuzzer {
+
+bool Merger::Parse(const std::string &Str, bool ParseCoverage) {
+  std::istringstream SS(Str);
+  return Parse(SS, ParseCoverage);
+}
+
+void Merger::ParseOrExit(std::istream &IS, bool ParseCoverage) {
+  if (!Parse(IS, ParseCoverage)) {
+    Printf("MERGE: failed to parse the control file (unexpected error)\n");
+    exit(1);
+  }
+}
+
+// The control file example:
+//
+// 3 # The number of inputs
+// 1 # The number of inputs in the first corpus, <= the previous number
+// file0
+// file1
+// file2  # One file name per line.
+// STARTED 0 123  # FileID, file size
+// DONE 0 1 4 6 8  # FileID COV1 COV2 ...
+// STARTED 1 456  # If DONE is missing, the input crashed while processing.
+// STARTED 2 567
+// DONE 2 8 9
+bool Merger::Parse(std::istream &IS, bool ParseCoverage) {
+  LastFailure.clear();
+  std::string Line;
+
+  // Parse NumFiles.
+  if (!std::getline(IS, Line, '\n')) return false;
+  std::istringstream L1(Line);
+  size_t NumFiles = 0;
+  L1 >> NumFiles;
+  if (NumFiles == 0 || NumFiles > 10000000) return false;
+
+  // Parse NumFilesInFirstCorpus.
+  if (!std::getline(IS, Line, '\n')) return false;
+  std::istringstream L2(Line);
+  NumFilesInFirstCorpus = NumFiles + 1;
+  L2 >> NumFilesInFirstCorpus;
+  if (NumFilesInFirstCorpus > NumFiles) return false;
+
+  // Parse file names.
+  Files.resize(NumFiles);
+  for (size_t i = 0; i < NumFiles; i++)
+    if (!std::getline(IS, Files[i].Name, '\n'))
+      return false;
+
+  // Parse STARTED and DONE lines.
+  size_t ExpectedStartMarker = 0;
+  const size_t kInvalidStartMarker = -1;
+  size_t LastSeenStartMarker = kInvalidStartMarker;
+  Vector<uint32_t> TmpFeatures;
+  while (std::getline(IS, Line, '\n')) {
+    std::istringstream ISS1(Line);
+    std::string Marker;
+    size_t N;
+    ISS1 >> Marker;
+    ISS1 >> N;
+    if (Marker == "STARTED") {
+      // STARTED FILE_ID FILE_SIZE
+      if (ExpectedStartMarker != N)
+        return false;
+      ISS1 >> Files[ExpectedStartMarker].Size;
+      LastSeenStartMarker = ExpectedStartMarker;
+      assert(ExpectedStartMarker < Files.size());
+      ExpectedStartMarker++;
+    } else if (Marker == "DONE") {
+      // DONE FILE_ID COV1 COV2 COV3 ...
+      size_t CurrentFileIdx = N;
+      if (CurrentFileIdx != LastSeenStartMarker)
+        return false;
+      LastSeenStartMarker = kInvalidStartMarker;
+      if (ParseCoverage) {
+        TmpFeatures.clear();  // use a vector from outer scope to avoid resizes.
+        while (ISS1 >> std::hex >> N)
+          TmpFeatures.push_back(N);
+        std::sort(TmpFeatures.begin(), TmpFeatures.end());
+        Files[CurrentFileIdx].Features = TmpFeatures;
+      }
+    } else {
+      return false;
+    }
+  }
+  if (LastSeenStartMarker != kInvalidStartMarker)
+    LastFailure = Files[LastSeenStartMarker].Name;
+
+  FirstNotProcessedFile = ExpectedStartMarker;
+  return true;
+}
+
+size_t Merger::ApproximateMemoryConsumption() const  {
+  size_t Res = 0;
+  for (const auto &F: Files)
+    Res += sizeof(F) + F.Features.size() * sizeof(F.Features[0]);
+  return Res;
+}
+
+// Decides which files need to be merged (add thost to NewFiles).
+// Returns the number of new features added.
+size_t Merger::Merge(const Set<uint32_t> &InitialFeatures,
+                     Vector<std::string> *NewFiles) {
+  NewFiles->clear();
+  assert(NumFilesInFirstCorpus <= Files.size());
+  Set<uint32_t> AllFeatures(InitialFeatures);
+
+  // What features are in the initial corpus?
+  for (size_t i = 0; i < NumFilesInFirstCorpus; i++) {
+    auto &Cur = Files[i].Features;
+    AllFeatures.insert(Cur.begin(), Cur.end());
+  }
+  size_t InitialNumFeatures = AllFeatures.size();
+
+  // Remove all features that we already know from all other inputs.
+  for (size_t i = NumFilesInFirstCorpus; i < Files.size(); i++) {
+    auto &Cur = Files[i].Features;
+    Vector<uint32_t> Tmp;
+    std::set_difference(Cur.begin(), Cur.end(), AllFeatures.begin(),
+                        AllFeatures.end(), std::inserter(Tmp, Tmp.begin()));
+    Cur.swap(Tmp);
+  }
+
+  // Sort. Give preference to
+  //   * smaller files
+  //   * files with more features.
+  std::sort(Files.begin() + NumFilesInFirstCorpus, Files.end(),
+            [&](const MergeFileInfo &a, const MergeFileInfo &b) -> bool {
+              if (a.Size != b.Size)
+                return a.Size < b.Size;
+              return a.Features.size() > b.Features.size();
+            });
+
+  // One greedy pass: add the file's features to AllFeatures.
+  // If new features were added, add this file to NewFiles.
+  for (size_t i = NumFilesInFirstCorpus; i < Files.size(); i++) {
+    auto &Cur = Files[i].Features;
+    // Printf("%s -> sz %zd ft %zd\n", Files[i].Name.c_str(),
+    //       Files[i].Size, Cur.size());
+    size_t OldSize = AllFeatures.size();
+    AllFeatures.insert(Cur.begin(), Cur.end());
+    if (AllFeatures.size() > OldSize)
+      NewFiles->push_back(Files[i].Name);
+  }
+  return AllFeatures.size() - InitialNumFeatures;
+}
+
+void Merger::PrintSummary(std::ostream &OS) {
+  for (auto &File : Files) {
+    OS << std::hex;
+    OS << File.Name << " size: " << File.Size << " features: ";
+    for (auto Feature : File.Features)
+      OS << " " << Feature;
+    OS << "\n";
+  }
+}
+
+Set<uint32_t> Merger::AllFeatures() const {
+  Set<uint32_t> S;
+  for (auto &File : Files)
+    S.insert(File.Features.begin(), File.Features.end());
+  return S;
+}
+
+Set<uint32_t> Merger::ParseSummary(std::istream &IS) {
+  std::string Line, Tmp;
+  Set<uint32_t> Res;
+  while (std::getline(IS, Line, '\n')) {
+    size_t N;
+    std::istringstream ISS1(Line);
+    ISS1 >> Tmp;  // Name
+    ISS1 >> Tmp;  // size:
+    assert(Tmp == "size:" && "Corrupt summary file");
+    ISS1 >> std::hex;
+    ISS1 >> N;    // File Size
+    ISS1 >> Tmp;  // features:
+    assert(Tmp == "features:" && "Corrupt summary file");
+    while (ISS1 >> std::hex >> N)
+      Res.insert(N);
+  }
+  return Res;
+}
+
+// Inner process. May crash if the target crashes.
+void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath) {
+  Printf("MERGE-INNER: using the control file '%s'\n", CFPath.c_str());
+  Merger M;
+  std::ifstream IF(CFPath);
+  M.ParseOrExit(IF, false);
+  IF.close();
+  if (!M.LastFailure.empty())
+    Printf("MERGE-INNER: '%s' caused a failure at the previous merge step\n",
+           M.LastFailure.c_str());
+
+  Printf("MERGE-INNER: %zd total files;"
+         " %zd processed earlier; will process %zd files now\n",
+         M.Files.size(), M.FirstNotProcessedFile,
+         M.Files.size() - M.FirstNotProcessedFile);
+
+  std::ofstream OF(CFPath, std::ofstream::out | std::ofstream::app);
+  Set<size_t> AllFeatures;
+  for (size_t i = M.FirstNotProcessedFile; i < M.Files.size(); i++) {
+    auto U = FileToVector(M.Files[i].Name);
+    if (U.size() > MaxInputLen) {
+      U.resize(MaxInputLen);
+      U.shrink_to_fit();
+    }
+    std::ostringstream StartedLine;
+    // Write the pre-run marker.
+    OF << "STARTED " << std::dec << i << " " << U.size() << "\n";
+    OF.flush();  // Flush is important since ExecuteCommand may crash.
+    // Run.
+    TPC.ResetMaps();
+    ExecuteCallback(U.data(), U.size());
+    // Collect coverage. We are iterating over the files in this order:
+    // * First, files in the initial corpus ordered by size, smallest first.
+    // * Then, all other files, smallest first.
+    // So it makes no sense to record all features for all files, instead we
+    // only record features that were not seen before.
+    Set<size_t> UniqFeatures;
+    TPC.CollectFeatures([&](size_t Feature) {
+      if (AllFeatures.insert(Feature).second)
+        UniqFeatures.insert(Feature);
+    });
+    // Show stats.
+    if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1)))
+      PrintStats("pulse ");
+    // Write the post-run marker and the coverage.
+    OF << "DONE " << i;
+    for (size_t F : UniqFeatures)
+      OF << " " << std::hex << F;
+    OF << "\n";
+  }
+}
+
+// Outer process. Does not call the target code and thus sohuld not fail.
+void Fuzzer::CrashResistantMerge(const Vector<std::string> &Args,
+                                 const Vector<std::string> &Corpora,
+                                 const char *CoverageSummaryInputPathOrNull,
+                                 const char *CoverageSummaryOutputPathOrNull) {
+  if (Corpora.size() <= 1) {
+    Printf("Merge requires two or more corpus dirs\n");
+    return;
+  }
+  Vector<SizedFile> AllFiles;
+  GetSizedFilesFromDir(Corpora[0], &AllFiles);
+  size_t NumFilesInFirstCorpus = AllFiles.size();
+  std::sort(AllFiles.begin(), AllFiles.end());
+  for (size_t i = 1; i < Corpora.size(); i++)
+    GetSizedFilesFromDir(Corpora[i], &AllFiles);
+  std::sort(AllFiles.begin() + NumFilesInFirstCorpus, AllFiles.end());
+  Printf("MERGE-OUTER: %zd files, %zd in the initial corpus\n",
+         AllFiles.size(), NumFilesInFirstCorpus);
+  auto CFPath = DirPlusFile(TmpDir(),
+                       "libFuzzerTemp." + std::to_string(GetPid()) + ".txt");
+  // Write the control file.
+  RemoveFile(CFPath);
+  std::ofstream ControlFile(CFPath);
+  ControlFile << AllFiles.size() << "\n";
+  ControlFile << NumFilesInFirstCorpus << "\n";
+  for (auto &SF: AllFiles)
+    ControlFile << SF.File << "\n";
+  if (!ControlFile) {
+    Printf("MERGE-OUTER: failed to write to the control file: %s\n",
+           CFPath.c_str());
+    exit(1);
+  }
+  ControlFile.close();
+
+  // Execute the inner process untill it passes.
+  // Every inner process should execute at least one input.
+  auto BaseCmd = SplitBefore("-ignore_remaining_args=1",
+                             CloneArgsWithoutX(Args, "keep-all-flags"));
+  bool Success = false;
+  for (size_t i = 1; i <= AllFiles.size(); i++) {
+    Printf("MERGE-OUTER: attempt %zd\n", i);
+    auto ExitCode = ExecuteCommand(BaseCmd.first + " -merge_control_file=" +
+                                   CFPath + " " + BaseCmd.second);
+    if (!ExitCode) {
+      Printf("MERGE-OUTER: succesfull in %zd attempt(s)\n", i);
+      Success = true;
+      break;
+    }
+  }
+  if (!Success) {
+    Printf("MERGE-OUTER: zero succesfull attempts, exiting\n");
+    exit(1);
+  }
+  // Read the control file and do the merge.
+  Merger M;
+  std::ifstream IF(CFPath);
+  IF.seekg(0, IF.end);
+  Printf("MERGE-OUTER: the control file has %zd bytes\n", (size_t)IF.tellg());
+  IF.seekg(0, IF.beg);
+  M.ParseOrExit(IF, true);
+  IF.close();
+  Printf("MERGE-OUTER: consumed %zdMb (%zdMb rss) to parse the control file\n",
+         M.ApproximateMemoryConsumption() >> 20, GetPeakRSSMb());
+  if (CoverageSummaryOutputPathOrNull) {
+    Printf("MERGE-OUTER: writing coverage summary for %zd files to %s\n",
+           M.Files.size(), CoverageSummaryOutputPathOrNull);
+    std::ofstream SummaryOut(CoverageSummaryOutputPathOrNull);
+    M.PrintSummary(SummaryOut);
+  }
+  Vector<std::string> NewFiles;
+  Set<uint32_t> InitialFeatures;
+  if (CoverageSummaryInputPathOrNull) {
+    std::ifstream SummaryIn(CoverageSummaryInputPathOrNull);
+    InitialFeatures = M.ParseSummary(SummaryIn);
+    Printf("MERGE-OUTER: coverage summary loaded from %s, %zd features found\n",
+           CoverageSummaryInputPathOrNull, InitialFeatures.size());
+  }
+  size_t NumNewFeatures = M.Merge(InitialFeatures, &NewFiles);
+  Printf("MERGE-OUTER: %zd new files with %zd new features added\n",
+         NewFiles.size(), NumNewFeatures);
+  for (auto &F: NewFiles)
+    WriteToOutputCorpus(FileToVector(F));
+  // We are done, delete the control file.
+  RemoveFile(CFPath);
+}
+
+} // namespace fuzzer
diff --git a/lib/fuzzer/FuzzerMerge.h b/lib/fuzzer/FuzzerMerge.h
new file mode 100644
index 0000000..e54885a
--- /dev/null
+++ b/lib/fuzzer/FuzzerMerge.h
@@ -0,0 +1,80 @@
+//===- FuzzerMerge.h - merging corpa ----------------------------*- C++ -* ===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Merging Corpora.
+//
+// The task:
+//   Take the existing corpus (possibly empty) and merge new inputs into
+//   it so that only inputs with new coverage ('features') are added.
+//   The process should tolerate the crashes, OOMs, leaks, etc.
+//
+// Algorithm:
+//   The outter process collects the set of files and writes their names
+//   into a temporary "control" file, then repeatedly launches the inner
+//   process until all inputs are processed.
+//   The outer process does not actually execute the target code.
+//
+//   The inner process reads the control file and sees a) list of all the inputs
+//   and b) the last processed input. Then it starts processing the inputs one
+//   by one. Before processing every input it writes one line to control file:
+//   STARTED INPUT_ID INPUT_SIZE
+//   After processing an input it write another line:
+//   DONE INPUT_ID Feature1 Feature2 Feature3 ...
+//   If a crash happens while processing an input the last line in the control
+//   file will be "STARTED INPUT_ID" and so the next process will know
+//   where to resume.
+//
+//   Once all inputs are processed by the innner process(es) the outer process
+//   reads the control files and does the merge based entirely on the contents
+//   of control file.
+//   It uses a single pass greedy algorithm choosing first the smallest inputs
+//   within the same size the inputs that have more new features.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_FUZZER_MERGE_H
+#define LLVM_FUZZER_MERGE_H
+
+#include "FuzzerDefs.h"
+
+#include <istream>
+#include <ostream>
+#include <set>
+#include <vector>
+
+namespace fuzzer {
+
+struct MergeFileInfo {
+  std::string Name;
+  size_t Size = 0;
+  Vector<uint32_t> Features;
+};
+
+struct Merger {
+  Vector<MergeFileInfo> Files;
+  size_t NumFilesInFirstCorpus = 0;
+  size_t FirstNotProcessedFile = 0;
+  std::string LastFailure;
+
+  bool Parse(std::istream &IS, bool ParseCoverage);
+  bool Parse(const std::string &Str, bool ParseCoverage);
+  void ParseOrExit(std::istream &IS, bool ParseCoverage);
+  void PrintSummary(std::ostream &OS);
+  Set<uint32_t> ParseSummary(std::istream &IS);
+  size_t Merge(const Set<uint32_t> &InitialFeatures,
+               Vector<std::string> *NewFiles);
+  size_t Merge(Vector<std::string> *NewFiles) {
+    return Merge(Set<uint32_t>{}, NewFiles);
+  }
+  size_t ApproximateMemoryConsumption() const;
+  Set<uint32_t> AllFeatures() const;
+};
+
+}  // namespace fuzzer
+
+#endif  // LLVM_FUZZER_MERGE_H
diff --git a/lib/fuzzer/FuzzerMutate.cpp b/lib/fuzzer/FuzzerMutate.cpp
new file mode 100644
index 0000000..9ee5299
--- /dev/null
+++ b/lib/fuzzer/FuzzerMutate.cpp
@@ -0,0 +1,533 @@
+//===- FuzzerMutate.cpp - Mutate a test input -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Mutate a test input.
+//===----------------------------------------------------------------------===//
+
+#include "FuzzerMutate.h"
+#include "FuzzerCorpus.h"
+#include "FuzzerDefs.h"
+#include "FuzzerExtFunctions.h"
+#include "FuzzerIO.h"
+#include "FuzzerOptions.h"
+
+namespace fuzzer {
+
+const size_t Dictionary::kMaxDictSize;
+
+static void PrintASCII(const Word &W, const char *PrintAfter) {
+  PrintASCII(W.data(), W.size(), PrintAfter);
+}
+
+MutationDispatcher::MutationDispatcher(Random &Rand,
+                                       const FuzzingOptions &Options)
+    : Rand(Rand), Options(Options) {
+  DefaultMutators.insert(
+      DefaultMutators.begin(),
+      {
+          {&MutationDispatcher::Mutate_EraseBytes, "EraseBytes"},
+          {&MutationDispatcher::Mutate_InsertByte, "InsertByte"},
+          {&MutationDispatcher::Mutate_InsertRepeatedBytes,
+           "InsertRepeatedBytes"},
+          {&MutationDispatcher::Mutate_ChangeByte, "ChangeByte"},
+          {&MutationDispatcher::Mutate_ChangeBit, "ChangeBit"},
+          {&MutationDispatcher::Mutate_ShuffleBytes, "ShuffleBytes"},
+          {&MutationDispatcher::Mutate_ChangeASCIIInteger, "ChangeASCIIInt"},
+          {&MutationDispatcher::Mutate_ChangeBinaryInteger, "ChangeBinInt"},
+          {&MutationDispatcher::Mutate_CopyPart, "CopyPart"},
+          {&MutationDispatcher::Mutate_CrossOver, "CrossOver"},
+          {&MutationDispatcher::Mutate_AddWordFromManualDictionary,
+           "ManualDict"},
+          {&MutationDispatcher::Mutate_AddWordFromPersistentAutoDictionary,
+           "PersAutoDict"},
+      });
+  if(Options.UseCmp)
+    DefaultMutators.push_back(
+        {&MutationDispatcher::Mutate_AddWordFromTORC, "CMP"});
+
+  if (EF->LLVMFuzzerCustomMutator)
+    Mutators.push_back({&MutationDispatcher::Mutate_Custom, "Custom"});
+  else
+    Mutators = DefaultMutators;
+
+  if (EF->LLVMFuzzerCustomCrossOver)
+    Mutators.push_back(
+        {&MutationDispatcher::Mutate_CustomCrossOver, "CustomCrossOver"});
+}
+
+static char RandCh(Random &Rand) {
+  if (Rand.RandBool()) return Rand(256);
+  const char *Special = "!*'();:@&=+$,/?%#[]012Az-`~.\xff\x00";
+  return Special[Rand(sizeof(Special) - 1)];
+}
+
+size_t MutationDispatcher::Mutate_Custom(uint8_t *Data, size_t Size,
+                                         size_t MaxSize) {
+  return EF->LLVMFuzzerCustomMutator(Data, Size, MaxSize, Rand.Rand());
+}
+
+size_t MutationDispatcher::Mutate_CustomCrossOver(uint8_t *Data, size_t Size,
+                                                  size_t MaxSize) {
+  if (!Corpus || Corpus->size() < 2 || Size == 0)
+    return 0;
+  size_t Idx = Rand(Corpus->size());
+  const Unit &Other = (*Corpus)[Idx];
+  if (Other.empty())
+    return 0;
+  CustomCrossOverInPlaceHere.resize(MaxSize);
+  auto &U = CustomCrossOverInPlaceHere;
+  size_t NewSize = EF->LLVMFuzzerCustomCrossOver(
+      Data, Size, Other.data(), Other.size(), U.data(), U.size(), Rand.Rand());
+  if (!NewSize)
+    return 0;
+  assert(NewSize <= MaxSize && "CustomCrossOver returned overisized unit");
+  memcpy(Data, U.data(), NewSize);
+  return NewSize;
+}
+
+size_t MutationDispatcher::Mutate_ShuffleBytes(uint8_t *Data, size_t Size,
+                                               size_t MaxSize) {
+  if (Size > MaxSize || Size == 0) return 0;
+  size_t ShuffleAmount =
+      Rand(std::min(Size, (size_t)8)) + 1; // [1,8] and <= Size.
+  size_t ShuffleStart = Rand(Size - ShuffleAmount);
+  assert(ShuffleStart + ShuffleAmount <= Size);
+  std::shuffle(Data + ShuffleStart, Data + ShuffleStart + ShuffleAmount, Rand);
+  return Size;
+}
+
+size_t MutationDispatcher::Mutate_EraseBytes(uint8_t *Data, size_t Size,
+                                             size_t MaxSize) {
+  if (Size <= 1) return 0;
+  size_t N = Rand(Size / 2) + 1;
+  assert(N < Size);
+  size_t Idx = Rand(Size - N + 1);
+  // Erase Data[Idx:Idx+N].
+  memmove(Data + Idx, Data + Idx + N, Size - Idx - N);
+  // Printf("Erase: %zd %zd => %zd; Idx %zd\n", N, Size, Size - N, Idx);
+  return Size - N;
+}
+
+size_t MutationDispatcher::Mutate_InsertByte(uint8_t *Data, size_t Size,
+                                             size_t MaxSize) {
+  if (Size >= MaxSize) return 0;
+  size_t Idx = Rand(Size + 1);
+  // Insert new value at Data[Idx].
+  memmove(Data + Idx + 1, Data + Idx, Size - Idx);
+  Data[Idx] = RandCh(Rand);
+  return Size + 1;
+}
+
+size_t MutationDispatcher::Mutate_InsertRepeatedBytes(uint8_t *Data,
+                                                      size_t Size,
+                                                      size_t MaxSize) {
+  const size_t kMinBytesToInsert = 3;
+  if (Size + kMinBytesToInsert >= MaxSize) return 0;
+  size_t MaxBytesToInsert = std::min(MaxSize - Size, (size_t)128);
+  size_t N = Rand(MaxBytesToInsert - kMinBytesToInsert + 1) + kMinBytesToInsert;
+  assert(Size + N <= MaxSize && N);
+  size_t Idx = Rand(Size + 1);
+  // Insert new values at Data[Idx].
+  memmove(Data + Idx + N, Data + Idx, Size - Idx);
+  // Give preference to 0x00 and 0xff.
+  uint8_t Byte = Rand.RandBool() ? Rand(256) : (Rand.RandBool() ? 0 : 255);
+  for (size_t i = 0; i < N; i++)
+    Data[Idx + i] = Byte;
+  return Size + N;
+}
+
+size_t MutationDispatcher::Mutate_ChangeByte(uint8_t *Data, size_t Size,
+                                             size_t MaxSize) {
+  if (Size > MaxSize) return 0;
+  size_t Idx = Rand(Size);
+  Data[Idx] = RandCh(Rand);
+  return Size;
+}
+
+size_t MutationDispatcher::Mutate_ChangeBit(uint8_t *Data, size_t Size,
+                                            size_t MaxSize) {
+  if (Size > MaxSize) return 0;
+  size_t Idx = Rand(Size);
+  Data[Idx] ^= 1 << Rand(8);
+  return Size;
+}
+
+size_t MutationDispatcher::Mutate_AddWordFromManualDictionary(uint8_t *Data,
+                                                              size_t Size,
+                                                              size_t MaxSize) {
+  return AddWordFromDictionary(ManualDictionary, Data, Size, MaxSize);
+}
+
+size_t MutationDispatcher::ApplyDictionaryEntry(uint8_t *Data, size_t Size,
+                                                size_t MaxSize,
+                                                DictionaryEntry &DE) {
+  const Word &W = DE.GetW();
+  bool UsePositionHint = DE.HasPositionHint() &&
+                         DE.GetPositionHint() + W.size() < Size &&
+                         Rand.RandBool();
+  if (Rand.RandBool()) {  // Insert W.
+    if (Size + W.size() > MaxSize) return 0;
+    size_t Idx = UsePositionHint ? DE.GetPositionHint() : Rand(Size + 1);
+    memmove(Data + Idx + W.size(), Data + Idx, Size - Idx);
+    memcpy(Data + Idx, W.data(), W.size());
+    Size += W.size();
+  } else {  // Overwrite some bytes with W.
+    if (W.size() > Size) return 0;
+    size_t Idx = UsePositionHint ? DE.GetPositionHint() : Rand(Size - W.size());
+    memcpy(Data + Idx, W.data(), W.size());
+  }
+  return Size;
+}
+
+// Somewhere in the past we have observed a comparison instructions
+// with arguments Arg1 Arg2. This function tries to guess a dictionary
+// entry that will satisfy that comparison.
+// It first tries to find one of the arguments (possibly swapped) in the
+// input and if it succeeds it creates a DE with a position hint.
+// Otherwise it creates a DE with one of the arguments w/o a position hint.
+DictionaryEntry MutationDispatcher::MakeDictionaryEntryFromCMP(
+    const void *Arg1, const void *Arg2,
+    const void *Arg1Mutation, const void *Arg2Mutation,
+    size_t ArgSize, const uint8_t *Data,
+    size_t Size) {
+  ScopedDoingMyOwnMemOrStr scoped_doing_my_own_mem_os_str;
+  bool HandleFirst = Rand.RandBool();
+  const void *ExistingBytes, *DesiredBytes;
+  Word W;
+  const uint8_t *End = Data + Size;
+  for (int Arg = 0; Arg < 2; Arg++) {
+    ExistingBytes = HandleFirst ? Arg1 : Arg2;
+    DesiredBytes = HandleFirst ? Arg2Mutation : Arg1Mutation;
+    HandleFirst = !HandleFirst;
+    W.Set(reinterpret_cast<const uint8_t*>(DesiredBytes), ArgSize);
+    const size_t kMaxNumPositions = 8;
+    size_t Positions[kMaxNumPositions];
+    size_t NumPositions = 0;
+    for (const uint8_t *Cur = Data;
+         Cur < End && NumPositions < kMaxNumPositions; Cur++) {
+      Cur =
+          (const uint8_t *)SearchMemory(Cur, End - Cur, ExistingBytes, ArgSize);
+      if (!Cur) break;
+      Positions[NumPositions++] = Cur - Data;
+    }
+    if (!NumPositions) continue;
+    return DictionaryEntry(W, Positions[Rand(NumPositions)]);
+  }
+  DictionaryEntry DE(W);
+  return DE;
+}
+
+
+template <class T>
+DictionaryEntry MutationDispatcher::MakeDictionaryEntryFromCMP(
+    T Arg1, T Arg2, const uint8_t *Data, size_t Size) {
+  if (Rand.RandBool()) Arg1 = Bswap(Arg1);
+  if (Rand.RandBool()) Arg2 = Bswap(Arg2);
+  T Arg1Mutation = Arg1 + Rand(-1, 1);
+  T Arg2Mutation = Arg2 + Rand(-1, 1);
+  return MakeDictionaryEntryFromCMP(&Arg1, &Arg2, &Arg1Mutation, &Arg2Mutation,
+                                    sizeof(Arg1), Data, Size);
+}
+
+DictionaryEntry MutationDispatcher::MakeDictionaryEntryFromCMP(
+    const Word &Arg1, const Word &Arg2, const uint8_t *Data, size_t Size) {
+  return MakeDictionaryEntryFromCMP(Arg1.data(), Arg2.data(), Arg1.data(),
+                                    Arg2.data(), Arg1.size(), Data, Size);
+}
+
+size_t MutationDispatcher::Mutate_AddWordFromTORC(
+    uint8_t *Data, size_t Size, size_t MaxSize) {
+  Word W;
+  DictionaryEntry DE;
+  switch (Rand(4)) {
+  case 0: {
+    auto X = TPC.TORC8.Get(Rand.Rand());
+    DE = MakeDictionaryEntryFromCMP(X.A, X.B, Data, Size);
+  } break;
+  case 1: {
+    auto X = TPC.TORC4.Get(Rand.Rand());
+    if ((X.A >> 16) == 0 && (X.B >> 16) == 0 && Rand.RandBool())
+      DE = MakeDictionaryEntryFromCMP((uint16_t)X.A, (uint16_t)X.B, Data, Size);
+    else
+      DE = MakeDictionaryEntryFromCMP(X.A, X.B, Data, Size);
+  } break;
+  case 2: {
+    auto X = TPC.TORCW.Get(Rand.Rand());
+    DE = MakeDictionaryEntryFromCMP(X.A, X.B, Data, Size);
+  } break;
+  case 3: if (Options.UseMemmem) {
+    auto X = TPC.MMT.Get(Rand.Rand());
+    DE = DictionaryEntry(X);
+  } break;
+  default:
+    assert(0);
+  }
+  if (!DE.GetW().size()) return 0;
+  Size = ApplyDictionaryEntry(Data, Size, MaxSize, DE);
+  if (!Size) return 0;
+  DictionaryEntry &DERef =
+      CmpDictionaryEntriesDeque[CmpDictionaryEntriesDequeIdx++ %
+                                kCmpDictionaryEntriesDequeSize];
+  DERef = DE;
+  CurrentDictionaryEntrySequence.push_back(&DERef);
+  return Size;
+}
+
+size_t MutationDispatcher::Mutate_AddWordFromPersistentAutoDictionary(
+    uint8_t *Data, size_t Size, size_t MaxSize) {
+  return AddWordFromDictionary(PersistentAutoDictionary, Data, Size, MaxSize);
+}
+
+size_t MutationDispatcher::AddWordFromDictionary(Dictionary &D, uint8_t *Data,
+                                                 size_t Size, size_t MaxSize) {
+  if (Size > MaxSize) return 0;
+  if (D.empty()) return 0;
+  DictionaryEntry &DE = D[Rand(D.size())];
+  Size = ApplyDictionaryEntry(Data, Size, MaxSize, DE);
+  if (!Size) return 0;
+  DE.IncUseCount();
+  CurrentDictionaryEntrySequence.push_back(&DE);
+  return Size;
+}
+
+// Overwrites part of To[0,ToSize) with a part of From[0,FromSize).
+// Returns ToSize.
+size_t MutationDispatcher::CopyPartOf(const uint8_t *From, size_t FromSize,
+                                      uint8_t *To, size_t ToSize) {
+  // Copy From[FromBeg, FromBeg + CopySize) into To[ToBeg, ToBeg + CopySize).
+  size_t ToBeg = Rand(ToSize);
+  size_t CopySize = Rand(ToSize - ToBeg) + 1;
+  assert(ToBeg + CopySize <= ToSize);
+  CopySize = std::min(CopySize, FromSize);
+  size_t FromBeg = Rand(FromSize - CopySize + 1);
+  assert(FromBeg + CopySize <= FromSize);
+  memmove(To + ToBeg, From + FromBeg, CopySize);
+  return ToSize;
+}
+
+// Inserts part of From[0,ToSize) into To.
+// Returns new size of To on success or 0 on failure.
+size_t MutationDispatcher::InsertPartOf(const uint8_t *From, size_t FromSize,
+                                        uint8_t *To, size_t ToSize,
+                                        size_t MaxToSize) {
+  if (ToSize >= MaxToSize) return 0;
+  size_t AvailableSpace = MaxToSize - ToSize;
+  size_t MaxCopySize = std::min(AvailableSpace, FromSize);
+  size_t CopySize = Rand(MaxCopySize) + 1;
+  size_t FromBeg = Rand(FromSize - CopySize + 1);
+  assert(FromBeg + CopySize <= FromSize);
+  size_t ToInsertPos = Rand(ToSize + 1);
+  assert(ToInsertPos + CopySize <= MaxToSize);
+  size_t TailSize = ToSize - ToInsertPos;
+  if (To == From) {
+    MutateInPlaceHere.resize(MaxToSize);
+    memcpy(MutateInPlaceHere.data(), From + FromBeg, CopySize);
+    memmove(To + ToInsertPos + CopySize, To + ToInsertPos, TailSize);
+    memmove(To + ToInsertPos, MutateInPlaceHere.data(), CopySize);
+  } else {
+    memmove(To + ToInsertPos + CopySize, To + ToInsertPos, TailSize);
+    memmove(To + ToInsertPos, From + FromBeg, CopySize);
+  }
+  return ToSize + CopySize;
+}
+
+size_t MutationDispatcher::Mutate_CopyPart(uint8_t *Data, size_t Size,
+                                           size_t MaxSize) {
+  if (Size > MaxSize || Size == 0) return 0;
+  if (Rand.RandBool())
+    return CopyPartOf(Data, Size, Data, Size);
+  else
+    return InsertPartOf(Data, Size, Data, Size, MaxSize);
+}
+
+size_t MutationDispatcher::Mutate_ChangeASCIIInteger(uint8_t *Data, size_t Size,
+                                                     size_t MaxSize) {
+  if (Size > MaxSize) return 0;
+  size_t B = Rand(Size);
+  while (B < Size && !isdigit(Data[B])) B++;
+  if (B == Size) return 0;
+  size_t E = B;
+  while (E < Size && isdigit(Data[E])) E++;
+  assert(B < E);
+  // now we have digits in [B, E).
+  // strtol and friends don't accept non-zero-teminated data, parse it manually.
+  uint64_t Val = Data[B] - '0';
+  for (size_t i = B + 1; i < E; i++)
+    Val = Val * 10 + Data[i] - '0';
+
+  // Mutate the integer value.
+  switch(Rand(5)) {
+    case 0: Val++; break;
+    case 1: Val--; break;
+    case 2: Val /= 2; break;
+    case 3: Val *= 2; break;
+    case 4: Val = Rand(Val * Val); break;
+    default: assert(0);
+  }
+  // Just replace the bytes with the new ones, don't bother moving bytes.
+  for (size_t i = B; i < E; i++) {
+    size_t Idx = E + B - i - 1;
+    assert(Idx >= B && Idx < E);
+    Data[Idx] = (Val % 10) + '0';
+    Val /= 10;
+  }
+  return Size;
+}
+
+template<class T>
+size_t ChangeBinaryInteger(uint8_t *Data, size_t Size, Random &Rand) {
+  if (Size < sizeof(T)) return 0;
+  size_t Off = Rand(Size - sizeof(T) + 1);
+  assert(Off + sizeof(T) <= Size);
+  T Val;
+  if (Off < 64 && !Rand(4)) {
+    Val = Size;
+    if (Rand.RandBool())
+      Val = Bswap(Val);
+  } else {
+    memcpy(&Val, Data + Off, sizeof(Val));
+    T Add = Rand(21);
+    Add -= 10;
+    if (Rand.RandBool())
+      Val = Bswap(T(Bswap(Val) + Add)); // Add assuming different endiannes.
+    else
+      Val = Val + Add;               // Add assuming current endiannes.
+    if (Add == 0 || Rand.RandBool()) // Maybe negate.
+      Val = -Val;
+  }
+  memcpy(Data + Off, &Val, sizeof(Val));
+  return Size;
+}
+
+size_t MutationDispatcher::Mutate_ChangeBinaryInteger(uint8_t *Data,
+                                                      size_t Size,
+                                                      size_t MaxSize) {
+  if (Size > MaxSize) return 0;
+  switch (Rand(4)) {
+    case 3: return ChangeBinaryInteger<uint64_t>(Data, Size, Rand);
+    case 2: return ChangeBinaryInteger<uint32_t>(Data, Size, Rand);
+    case 1: return ChangeBinaryInteger<uint16_t>(Data, Size, Rand);
+    case 0: return ChangeBinaryInteger<uint8_t>(Data, Size, Rand);
+    default: assert(0);
+  }
+  return 0;
+}
+
+size_t MutationDispatcher::Mutate_CrossOver(uint8_t *Data, size_t Size,
+                                            size_t MaxSize) {
+  if (Size > MaxSize) return 0;
+  if (!Corpus || Corpus->size() < 2 || Size == 0) return 0;
+  size_t Idx = Rand(Corpus->size());
+  const Unit &O = (*Corpus)[Idx];
+  if (O.empty()) return 0;
+  MutateInPlaceHere.resize(MaxSize);
+  auto &U = MutateInPlaceHere;
+  size_t NewSize = 0;
+  switch(Rand(3)) {
+    case 0:
+      NewSize = CrossOver(Data, Size, O.data(), O.size(), U.data(), U.size());
+      break;
+    case 1:
+      NewSize = InsertPartOf(O.data(), O.size(), U.data(), U.size(), MaxSize);
+      if (!NewSize)
+        NewSize = CopyPartOf(O.data(), O.size(), U.data(), U.size());
+      break;
+    case 2:
+      NewSize = CopyPartOf(O.data(), O.size(), U.data(), U.size());
+      break;
+    default: assert(0);
+  }
+  assert(NewSize > 0 && "CrossOver returned empty unit");
+  assert(NewSize <= MaxSize && "CrossOver returned overisized unit");
+  memcpy(Data, U.data(), NewSize);
+  return NewSize;
+}
+
+void MutationDispatcher::StartMutationSequence() {
+  CurrentMutatorSequence.clear();
+  CurrentDictionaryEntrySequence.clear();
+}
+
+// Copy successful dictionary entries to PersistentAutoDictionary.
+void MutationDispatcher::RecordSuccessfulMutationSequence() {
+  for (auto DE : CurrentDictionaryEntrySequence) {
+    // PersistentAutoDictionary.AddWithSuccessCountOne(DE);
+    DE->IncSuccessCount();
+    assert(DE->GetW().size());
+    // Linear search is fine here as this happens seldom.
+    if (!PersistentAutoDictionary.ContainsWord(DE->GetW()))
+      PersistentAutoDictionary.push_back({DE->GetW(), 1});
+  }
+}
+
+void MutationDispatcher::PrintRecommendedDictionary() {
+  Vector<DictionaryEntry> V;
+  for (auto &DE : PersistentAutoDictionary)
+    if (!ManualDictionary.ContainsWord(DE.GetW()))
+      V.push_back(DE);
+  if (V.empty()) return;
+  Printf("###### Recommended dictionary. ######\n");
+  for (auto &DE: V) {
+    assert(DE.GetW().size());
+    Printf("\"");
+    PrintASCII(DE.GetW(), "\"");
+    Printf(" # Uses: %zd\n", DE.GetUseCount());
+  }
+  Printf("###### End of recommended dictionary. ######\n");
+}
+
+void MutationDispatcher::PrintMutationSequence() {
+  Printf("MS: %zd ", CurrentMutatorSequence.size());
+  for (auto M : CurrentMutatorSequence)
+    Printf("%s-", M.Name);
+  if (!CurrentDictionaryEntrySequence.empty()) {
+    Printf(" DE: ");
+    for (auto DE : CurrentDictionaryEntrySequence) {
+      Printf("\"");
+      PrintASCII(DE->GetW(), "\"-");
+    }
+  }
+}
+
+size_t MutationDispatcher::Mutate(uint8_t *Data, size_t Size, size_t MaxSize) {
+  return MutateImpl(Data, Size, MaxSize, Mutators);
+}
+
+size_t MutationDispatcher::DefaultMutate(uint8_t *Data, size_t Size,
+                                         size_t MaxSize) {
+  return MutateImpl(Data, Size, MaxSize, DefaultMutators);
+}
+
+// Mutates Data in place, returns new size.
+size_t MutationDispatcher::MutateImpl(uint8_t *Data, size_t Size,
+                                      size_t MaxSize,
+                                      Vector<Mutator> &Mutators) {
+  assert(MaxSize > 0);
+  // Some mutations may fail (e.g. can't insert more bytes if Size == MaxSize),
+  // in which case they will return 0.
+  // Try several times before returning un-mutated data.
+  for (int Iter = 0; Iter < 100; Iter++) {
+    auto M = Mutators[Rand(Mutators.size())];
+    size_t NewSize = (this->*(M.Fn))(Data, Size, MaxSize);
+    if (NewSize && NewSize <= MaxSize) {
+      if (Options.OnlyASCII)
+        ToASCII(Data, NewSize);
+      CurrentMutatorSequence.push_back(M);
+      return NewSize;
+    }
+  }
+  *Data = ' ';
+  return 1;   // Fallback, should not happen frequently.
+}
+
+void MutationDispatcher::AddWordToManualDictionary(const Word &W) {
+  ManualDictionary.push_back(
+      {W, std::numeric_limits<size_t>::max()});
+}
+
+}  // namespace fuzzer
diff --git a/lib/fuzzer/FuzzerMutate.h b/lib/fuzzer/FuzzerMutate.h
new file mode 100644
index 0000000..4aa58af
--- /dev/null
+++ b/lib/fuzzer/FuzzerMutate.h
@@ -0,0 +1,150 @@
+//===- FuzzerMutate.h - Internal header for the Fuzzer ----------*- C++ -* ===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// fuzzer::MutationDispatcher
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_FUZZER_MUTATE_H
+#define LLVM_FUZZER_MUTATE_H
+
+#include "FuzzerDefs.h"
+#include "FuzzerDictionary.h"
+#include "FuzzerOptions.h"
+#include "FuzzerRandom.h"
+
+namespace fuzzer {
+
+class MutationDispatcher {
+public:
+  MutationDispatcher(Random &Rand, const FuzzingOptions &Options);
+  ~MutationDispatcher() {}
+  /// Indicate that we are about to start a new sequence of mutations.
+  void StartMutationSequence();
+  /// Print the current sequence of mutations.
+  void PrintMutationSequence();
+  /// Indicate that the current sequence of mutations was successfull.
+  void RecordSuccessfulMutationSequence();
+  /// Mutates data by invoking user-provided mutator.
+  size_t Mutate_Custom(uint8_t *Data, size_t Size, size_t MaxSize);
+  /// Mutates data by invoking user-provided crossover.
+  size_t Mutate_CustomCrossOver(uint8_t *Data, size_t Size, size_t MaxSize);
+  /// Mutates data by shuffling bytes.
+  size_t Mutate_ShuffleBytes(uint8_t *Data, size_t Size, size_t MaxSize);
+  /// Mutates data by erasing bytes.
+  size_t Mutate_EraseBytes(uint8_t *Data, size_t Size, size_t MaxSize);
+  /// Mutates data by inserting a byte.
+  size_t Mutate_InsertByte(uint8_t *Data, size_t Size, size_t MaxSize);
+  /// Mutates data by inserting several repeated bytes.
+  size_t Mutate_InsertRepeatedBytes(uint8_t *Data, size_t Size, size_t MaxSize);
+  /// Mutates data by chanding one byte.
+  size_t Mutate_ChangeByte(uint8_t *Data, size_t Size, size_t MaxSize);
+  /// Mutates data by chanding one bit.
+  size_t Mutate_ChangeBit(uint8_t *Data, size_t Size, size_t MaxSize);
+  /// Mutates data by copying/inserting a part of data into a different place.
+  size_t Mutate_CopyPart(uint8_t *Data, size_t Size, size_t MaxSize);
+
+  /// Mutates data by adding a word from the manual dictionary.
+  size_t Mutate_AddWordFromManualDictionary(uint8_t *Data, size_t Size,
+                                            size_t MaxSize);
+
+  /// Mutates data by adding a word from the TORC.
+  size_t Mutate_AddWordFromTORC(uint8_t *Data, size_t Size, size_t MaxSize);
+
+  /// Mutates data by adding a word from the persistent automatic dictionary.
+  size_t Mutate_AddWordFromPersistentAutoDictionary(uint8_t *Data, size_t Size,
+                                                    size_t MaxSize);
+
+  /// Tries to find an ASCII integer in Data, changes it to another ASCII int.
+  size_t Mutate_ChangeASCIIInteger(uint8_t *Data, size_t Size, size_t MaxSize);
+  /// Change a 1-, 2-, 4-, or 8-byte integer in interesting ways.
+  size_t Mutate_ChangeBinaryInteger(uint8_t *Data, size_t Size, size_t MaxSize);
+
+  /// CrossOver Data with some other element of the corpus.
+  size_t Mutate_CrossOver(uint8_t *Data, size_t Size, size_t MaxSize);
+
+  /// Applies one of the configured mutations.
+  /// Returns the new size of data which could be up to MaxSize.
+  size_t Mutate(uint8_t *Data, size_t Size, size_t MaxSize);
+  /// Applies one of the default mutations. Provided as a service
+  /// to mutation authors.
+  size_t DefaultMutate(uint8_t *Data, size_t Size, size_t MaxSize);
+
+  /// Creates a cross-over of two pieces of Data, returns its size.
+  size_t CrossOver(const uint8_t *Data1, size_t Size1, const uint8_t *Data2,
+                   size_t Size2, uint8_t *Out, size_t MaxOutSize);
+
+  void AddWordToManualDictionary(const Word &W);
+
+  void PrintRecommendedDictionary();
+
+  void SetCorpus(const InputCorpus *Corpus) { this->Corpus = Corpus; }
+
+  Random &GetRand() { return Rand; }
+
+private:
+
+  struct Mutator {
+    size_t (MutationDispatcher::*Fn)(uint8_t *Data, size_t Size, size_t Max);
+    const char *Name;
+  };
+
+  size_t AddWordFromDictionary(Dictionary &D, uint8_t *Data, size_t Size,
+                               size_t MaxSize);
+  size_t MutateImpl(uint8_t *Data, size_t Size, size_t MaxSize,
+                    Vector<Mutator> &Mutators);
+
+  size_t InsertPartOf(const uint8_t *From, size_t FromSize, uint8_t *To,
+                      size_t ToSize, size_t MaxToSize);
+  size_t CopyPartOf(const uint8_t *From, size_t FromSize, uint8_t *To,
+                    size_t ToSize);
+  size_t ApplyDictionaryEntry(uint8_t *Data, size_t Size, size_t MaxSize,
+                              DictionaryEntry &DE);
+
+  template <class T>
+  DictionaryEntry MakeDictionaryEntryFromCMP(T Arg1, T Arg2,
+                                             const uint8_t *Data, size_t Size);
+  DictionaryEntry MakeDictionaryEntryFromCMP(const Word &Arg1, const Word &Arg2,
+                                             const uint8_t *Data, size_t Size);
+  DictionaryEntry MakeDictionaryEntryFromCMP(const void *Arg1, const void *Arg2,
+                                             const void *Arg1Mutation,
+                                             const void *Arg2Mutation,
+                                             size_t ArgSize,
+                                             const uint8_t *Data, size_t Size);
+
+  Random &Rand;
+  const FuzzingOptions Options;
+
+  // Dictionary provided by the user via -dict=DICT_FILE.
+  Dictionary ManualDictionary;
+  // Temporary dictionary modified by the fuzzer itself,
+  // recreated periodically.
+  Dictionary TempAutoDictionary;
+  // Persistent dictionary modified by the fuzzer, consists of
+  // entries that led to successfull discoveries in the past mutations.
+  Dictionary PersistentAutoDictionary;
+
+  Vector<Mutator> CurrentMutatorSequence;
+  Vector<DictionaryEntry *> CurrentDictionaryEntrySequence;
+
+  static const size_t kCmpDictionaryEntriesDequeSize = 16;
+  DictionaryEntry CmpDictionaryEntriesDeque[kCmpDictionaryEntriesDequeSize];
+  size_t CmpDictionaryEntriesDequeIdx = 0;
+
+  const InputCorpus *Corpus = nullptr;
+  Vector<uint8_t> MutateInPlaceHere;
+  // CustomCrossOver needs its own buffer as a custom implementation may call
+  // LLVMFuzzerMutate, which in turn may resize MutateInPlaceHere.
+  Vector<uint8_t> CustomCrossOverInPlaceHere;
+
+  Vector<Mutator> Mutators;
+  Vector<Mutator> DefaultMutators;
+};
+
+}  // namespace fuzzer
+
+#endif  // LLVM_FUZZER_MUTATE_H
diff --git a/lib/fuzzer/FuzzerOptions.h b/lib/fuzzer/FuzzerOptions.h
new file mode 100644
index 0000000..73953e1
--- /dev/null
+++ b/lib/fuzzer/FuzzerOptions.h
@@ -0,0 +1,72 @@
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// fuzzer::FuzzingOptions
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_FUZZER_OPTIONS_H
+#define LLVM_FUZZER_OPTIONS_H
+
+#include "FuzzerDefs.h"
+
+namespace fuzzer {
+
+struct FuzzingOptions {
+  int Verbosity = 1;
+  size_t MaxLen = 0;
+  bool ExperimentalLenControl = false;
+  int UnitTimeoutSec = 300;
+  int TimeoutExitCode = 77;
+  int ErrorExitCode = 77;
+  int MaxTotalTimeSec = 0;
+  int RssLimitMb = 0;
+  bool DoCrossOver = true;
+  int MutateDepth = 5;
+  bool UseCounters = false;
+  bool UseIndirCalls = true;
+  bool UseMemmem = true;
+  bool UseCmp = false;
+  bool UseValueProfile = false;
+  bool Shrink = false;
+  bool ReduceInputs = false;
+  int ReloadIntervalSec = 1;
+  bool ShuffleAtStartUp = true;
+  bool PreferSmall = true;
+  size_t MaxNumberOfRuns = -1L;
+  int ReportSlowUnits = 10;
+  bool OnlyASCII = false;
+  std::string OutputCorpus;
+  std::string ArtifactPrefix = "./";
+  std::string ExactArtifactPath;
+  std::string ExitOnSrcPos;
+  std::string ExitOnItem;
+  bool SaveArtifacts = true;
+  bool PrintNEW = true; // Print a status line when new units are found;
+  bool PrintNewCovPcs = false;
+  int PrintNewCovFuncs = 0;
+  bool PrintFinalStats = false;
+  bool PrintCorpusStats = false;
+  bool PrintCoverage = false;
+  bool DumpCoverage = false;
+  bool UseClangCoverage = false;
+  bool DetectLeaks = true;
+  int PurgeAllocatorIntervalSec = 1;
+  int UseFeatureFrequency = false;
+  int  TraceMalloc = 0;
+  bool HandleAbrt = false;
+  bool HandleBus = false;
+  bool HandleFpe = false;
+  bool HandleIll = false;
+  bool HandleInt = false;
+  bool HandleSegv = false;
+  bool HandleTerm = false;
+  bool HandleXfsz = false;
+};
+
+}  // namespace fuzzer
+
+#endif  // LLVM_FUZZER_OPTIONS_H
diff --git a/lib/fuzzer/FuzzerRandom.h b/lib/fuzzer/FuzzerRandom.h
new file mode 100644
index 0000000..8a1aa3e
--- /dev/null
+++ b/lib/fuzzer/FuzzerRandom.h
@@ -0,0 +1,34 @@
+//===- FuzzerRandom.h - Internal header for the Fuzzer ----------*- C++ -* ===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// fuzzer::Random
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_FUZZER_RANDOM_H
+#define LLVM_FUZZER_RANDOM_H
+
+#include <random>
+
+namespace fuzzer {
+class Random : public std::mt19937 {
+ public:
+  Random(unsigned int seed) : std::mt19937(seed) {}
+  result_type operator()() { return this->std::mt19937::operator()(); }
+  size_t Rand() { return this->operator()(); }
+  size_t RandBool() { return Rand() % 2; }
+  size_t operator()(size_t n) { return n ? Rand() % n : 0; }
+  intptr_t operator()(intptr_t From, intptr_t To) {
+    assert(From < To);
+    intptr_t RangeSize = To - From + 1;
+    return operator()(RangeSize) + From;
+  }
+};
+
+}  // namespace fuzzer
+
+#endif  // LLVM_FUZZER_RANDOM_H
diff --git a/lib/fuzzer/FuzzerSHA1.cpp b/lib/fuzzer/FuzzerSHA1.cpp
new file mode 100644
index 0000000..d2f8e81
--- /dev/null
+++ b/lib/fuzzer/FuzzerSHA1.cpp
@@ -0,0 +1,222 @@
+//===- FuzzerSHA1.h - Private copy of the SHA1 implementation ---*- C++ -* ===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This code is taken from public domain
+// (http://oauth.googlecode.com/svn/code/c/liboauth/src/sha1.c)
+// and modified by adding anonymous namespace, adding an interface
+// function fuzzer::ComputeSHA1() and removing unnecessary code.
+//
+// lib/Fuzzer can not use SHA1 implementation from openssl because
+// openssl may not be available and because we may be fuzzing openssl itself.
+// For the same reason we do not want to depend on SHA1 from LLVM tree.
+//===----------------------------------------------------------------------===//
+
+#include "FuzzerSHA1.h"
+#include "FuzzerDefs.h"
+
+/* This code is public-domain - it is based on libcrypt
+ * placed in the public domain by Wei Dai and other contributors.
+ */
+
+#include <iomanip>
+#include <sstream>
+#include <stdint.h>
+#include <string.h>
+
+namespace {  // Added for LibFuzzer
+
+#ifdef __BIG_ENDIAN__
+# define SHA_BIG_ENDIAN
+#elif defined __LITTLE_ENDIAN__
+/* override */
+#elif defined __BYTE_ORDER
+# if __BYTE_ORDER__ ==  __ORDER_BIG_ENDIAN__
+# define SHA_BIG_ENDIAN
+# endif
+#else // ! defined __LITTLE_ENDIAN__
+# include <endian.h> // machine/endian.h
+# if __BYTE_ORDER__ ==  __ORDER_BIG_ENDIAN__
+#  define SHA_BIG_ENDIAN
+# endif
+#endif
+
+
+/* header */
+
+#define HASH_LENGTH 20
+#define BLOCK_LENGTH 64
+
+typedef struct sha1nfo {
+	uint32_t buffer[BLOCK_LENGTH/4];
+	uint32_t state[HASH_LENGTH/4];
+	uint32_t byteCount;
+	uint8_t bufferOffset;
+	uint8_t keyBuffer[BLOCK_LENGTH];
+	uint8_t innerHash[HASH_LENGTH];
+} sha1nfo;
+
+/* public API - prototypes - TODO: doxygen*/
+
+/**
+ */
+void sha1_init(sha1nfo *s);
+/**
+ */
+void sha1_writebyte(sha1nfo *s, uint8_t data);
+/**
+ */
+void sha1_write(sha1nfo *s, const char *data, size_t len);
+/**
+ */
+uint8_t* sha1_result(sha1nfo *s);
+
+
+/* code */
+#define SHA1_K0  0x5a827999
+#define SHA1_K20 0x6ed9eba1
+#define SHA1_K40 0x8f1bbcdc
+#define SHA1_K60 0xca62c1d6
+
+void sha1_init(sha1nfo *s) {
+	s->state[0] = 0x67452301;
+	s->state[1] = 0xefcdab89;
+	s->state[2] = 0x98badcfe;
+	s->state[3] = 0x10325476;
+	s->state[4] = 0xc3d2e1f0;
+	s->byteCount = 0;
+	s->bufferOffset = 0;
+}
+
+uint32_t sha1_rol32(uint32_t number, uint8_t bits) {
+	return ((number << bits) | (number >> (32-bits)));
+}
+
+void sha1_hashBlock(sha1nfo *s) {
+	uint8_t i;
+	uint32_t a,b,c,d,e,t;
+
+	a=s->state[0];
+	b=s->state[1];
+	c=s->state[2];
+	d=s->state[3];
+	e=s->state[4];
+	for (i=0; i<80; i++) {
+		if (i>=16) {
+			t = s->buffer[(i+13)&15] ^ s->buffer[(i+8)&15] ^ s->buffer[(i+2)&15] ^ s->buffer[i&15];
+			s->buffer[i&15] = sha1_rol32(t,1);
+		}
+		if (i<20) {
+			t = (d ^ (b & (c ^ d))) + SHA1_K0;
+		} else if (i<40) {
+			t = (b ^ c ^ d) + SHA1_K20;
+		} else if (i<60) {
+			t = ((b & c) | (d & (b | c))) + SHA1_K40;
+		} else {
+			t = (b ^ c ^ d) + SHA1_K60;
+		}
+		t+=sha1_rol32(a,5) + e + s->buffer[i&15];
+		e=d;
+		d=c;
+		c=sha1_rol32(b,30);
+		b=a;
+		a=t;
+	}
+	s->state[0] += a;
+	s->state[1] += b;
+	s->state[2] += c;
+	s->state[3] += d;
+	s->state[4] += e;
+}
+
+void sha1_addUncounted(sha1nfo *s, uint8_t data) {
+	uint8_t * const b = (uint8_t*) s->buffer;
+#ifdef SHA_BIG_ENDIAN
+	b[s->bufferOffset] = data;
+#else
+	b[s->bufferOffset ^ 3] = data;
+#endif
+	s->bufferOffset++;
+	if (s->bufferOffset == BLOCK_LENGTH) {
+		sha1_hashBlock(s);
+		s->bufferOffset = 0;
+	}
+}
+
+void sha1_writebyte(sha1nfo *s, uint8_t data) {
+	++s->byteCount;
+	sha1_addUncounted(s, data);
+}
+
+void sha1_write(sha1nfo *s, const char *data, size_t len) {
+	for (;len--;) sha1_writebyte(s, (uint8_t) *data++);
+}
+
+void sha1_pad(sha1nfo *s) {
+	// Implement SHA-1 padding (fips180-2 §5.1.1)
+
+	// Pad with 0x80 followed by 0x00 until the end of the block
+	sha1_addUncounted(s, 0x80);
+	while (s->bufferOffset != 56) sha1_addUncounted(s, 0x00);
+
+	// Append length in the last 8 bytes
+	sha1_addUncounted(s, 0); // We're only using 32 bit lengths
+	sha1_addUncounted(s, 0); // But SHA-1 supports 64 bit lengths
+	sha1_addUncounted(s, 0); // So zero pad the top bits
+	sha1_addUncounted(s, s->byteCount >> 29); // Shifting to multiply by 8
+	sha1_addUncounted(s, s->byteCount >> 21); // as SHA-1 supports bitstreams as well as
+	sha1_addUncounted(s, s->byteCount >> 13); // byte.
+	sha1_addUncounted(s, s->byteCount >> 5);
+	sha1_addUncounted(s, s->byteCount << 3);
+}
+
+uint8_t* sha1_result(sha1nfo *s) {
+	// Pad to complete the last block
+	sha1_pad(s);
+
+#ifndef SHA_BIG_ENDIAN
+	// Swap byte order back
+	int i;
+	for (i=0; i<5; i++) {
+		s->state[i]=
+			  (((s->state[i])<<24)& 0xff000000)
+			| (((s->state[i])<<8) & 0x00ff0000)
+			| (((s->state[i])>>8) & 0x0000ff00)
+			| (((s->state[i])>>24)& 0x000000ff);
+	}
+#endif
+
+	// Return pointer to hash (20 characters)
+	return (uint8_t*) s->state;
+}
+
+}  // namespace; Added for LibFuzzer
+
+namespace fuzzer {
+
+// The rest is added for LibFuzzer
+void ComputeSHA1(const uint8_t *Data, size_t Len, uint8_t *Out) {
+  sha1nfo s;
+  sha1_init(&s);
+  sha1_write(&s, (const char*)Data, Len);
+  memcpy(Out, sha1_result(&s), HASH_LENGTH);
+}
+
+std::string Sha1ToString(const uint8_t Sha1[kSHA1NumBytes]) {
+  std::stringstream SS;
+  for (int i = 0; i < kSHA1NumBytes; i++)
+    SS << std::hex << std::setfill('0') << std::setw(2) << (unsigned)Sha1[i];
+  return SS.str();
+}
+
+std::string Hash(const Unit &U) {
+  uint8_t Hash[kSHA1NumBytes];
+  ComputeSHA1(U.data(), U.size(), Hash);
+  return Sha1ToString(Hash);
+}
+
+}
diff --git a/lib/fuzzer/FuzzerSHA1.h b/lib/fuzzer/FuzzerSHA1.h
new file mode 100644
index 0000000..3b5e6e8
--- /dev/null
+++ b/lib/fuzzer/FuzzerSHA1.h
@@ -0,0 +1,33 @@
+//===- FuzzerSHA1.h - Internal header for the SHA1 utils --------*- C++ -* ===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// SHA1 utils.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_FUZZER_SHA1_H
+#define LLVM_FUZZER_SHA1_H
+
+#include "FuzzerDefs.h"
+#include <cstddef>
+#include <stdint.h>
+
+namespace fuzzer {
+
+// Private copy of SHA1 implementation.
+static const int kSHA1NumBytes = 20;
+
+// Computes SHA1 hash of 'Len' bytes in 'Data', writes kSHA1NumBytes to 'Out'.
+void ComputeSHA1(const uint8_t *Data, size_t Len, uint8_t *Out);
+
+std::string Sha1ToString(const uint8_t Sha1[kSHA1NumBytes]);
+
+std::string Hash(const Unit &U);
+
+}  // namespace fuzzer
+
+#endif  // LLVM_FUZZER_SHA1_H
diff --git a/lib/fuzzer/FuzzerShmem.h b/lib/fuzzer/FuzzerShmem.h
new file mode 100644
index 0000000..53568e0
--- /dev/null
+++ b/lib/fuzzer/FuzzerShmem.h
@@ -0,0 +1,69 @@
+//===- FuzzerShmem.h - shared memory interface ------------------*- C++ -* ===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// SharedMemoryRegion
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_FUZZER_SHMEM_H
+#define LLVM_FUZZER_SHMEM_H
+
+#include <algorithm>
+#include <cstring>
+#include <string>
+
+#include "FuzzerDefs.h"
+
+namespace fuzzer {
+
+class SharedMemoryRegion {
+ public:
+  bool Create(const char *Name);
+  bool Open(const char *Name);
+  bool Destroy(const char *Name);
+  uint8_t *GetData() { return Data; }
+  void PostServer() {Post(0);}
+  void WaitServer() {Wait(0);}
+  void PostClient() {Post(1);}
+  void WaitClient() {Wait(1);}
+
+  size_t WriteByteArray(const uint8_t *Bytes, size_t N) {
+    assert(N <= kShmemSize - sizeof(N));
+    memcpy(GetData(), &N, sizeof(N));
+    memcpy(GetData() + sizeof(N), Bytes, N);
+    assert(N == ReadByteArraySize());
+    return N;
+  }
+  size_t ReadByteArraySize() {
+    size_t Res;
+    memcpy(&Res, GetData(), sizeof(Res));
+    return Res;
+  }
+  uint8_t *GetByteArray() { return GetData() + sizeof(size_t); }
+
+  bool IsServer() const { return Data && IAmServer; }
+  bool IsClient() const { return Data && !IAmServer; }
+
+private:
+
+  static const size_t kShmemSize = 1 << 22;
+  bool IAmServer;
+  std::string Path(const char *Name);
+  std::string SemName(const char *Name, int Idx);
+  void Post(int Idx);
+  void Wait(int Idx);
+
+  bool Map(int fd);
+  uint8_t *Data = nullptr;
+  void *Semaphore[2];
+};
+
+extern SharedMemoryRegion SMR;
+
+}  // namespace fuzzer
+
+#endif  // LLVM_FUZZER_SHMEM_H
diff --git a/lib/fuzzer/FuzzerShmemPosix.cpp b/lib/fuzzer/FuzzerShmemPosix.cpp
new file mode 100644
index 0000000..50cdcfb
--- /dev/null
+++ b/lib/fuzzer/FuzzerShmemPosix.cpp
@@ -0,0 +1,103 @@
+//===- FuzzerShmemPosix.cpp - Posix shared memory ---------------*- C++ -* ===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// SharedMemoryRegion
+//===----------------------------------------------------------------------===//
+#include "FuzzerDefs.h"
+#if LIBFUZZER_POSIX
+
+#include "FuzzerIO.h"
+#include "FuzzerShmem.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <semaphore.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+namespace fuzzer {
+
+std::string SharedMemoryRegion::Path(const char *Name) {
+  return DirPlusFile(TmpDir(), Name);
+}
+
+std::string SharedMemoryRegion::SemName(const char *Name, int Idx) {
+  std::string Res(Name);
+  return Res + (char)('0' + Idx);
+}
+
+bool SharedMemoryRegion::Map(int fd) {
+  Data =
+      (uint8_t *)mmap(0, kShmemSize, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0);
+  if (Data == (uint8_t*)-1)
+    return false;
+  return true;
+}
+
+bool SharedMemoryRegion::Create(const char *Name) {
+  int fd = open(Path(Name).c_str(), O_CREAT | O_RDWR, 0777);
+  if (fd < 0) return false;
+  if (ftruncate(fd, kShmemSize) < 0) return false;
+  if (!Map(fd))
+    return false;
+  for (int i = 0; i < 2; i++) {
+    sem_unlink(SemName(Name, i).c_str());
+    Semaphore[i] = sem_open(SemName(Name, i).c_str(), O_CREAT, 0644, 0);
+    if (Semaphore[i] == (void *)-1)
+      return false;
+  }
+  IAmServer = true;
+  return true;
+}
+
+bool SharedMemoryRegion::Open(const char *Name) {
+  int fd = open(Path(Name).c_str(), O_RDWR);
+  if (fd < 0) return false;
+  struct stat stat_res;
+  if (0 != fstat(fd, &stat_res))
+    return false;
+  assert(stat_res.st_size == kShmemSize);
+  if (!Map(fd))
+    return false;
+  for (int i = 0; i < 2; i++) {
+    Semaphore[i] = sem_open(SemName(Name, i).c_str(), 0);
+    if (Semaphore[i] == (void *)-1)
+      return false;
+  }
+  IAmServer = false;
+  return true;
+}
+
+bool SharedMemoryRegion::Destroy(const char *Name) {
+  return 0 == unlink(Path(Name).c_str());
+}
+
+void SharedMemoryRegion::Post(int Idx) {
+  assert(Idx == 0 || Idx == 1);
+  sem_post((sem_t*)Semaphore[Idx]);
+}
+
+void SharedMemoryRegion::Wait(int Idx) {
+  assert(Idx == 0 || Idx == 1);
+  for (int i = 0; i < 10 && sem_wait((sem_t*)Semaphore[Idx]); i++) {
+    // sem_wait may fail if interrupted by a signal.
+    sleep(i);
+    if (i)
+      Printf("%s: sem_wait[%d] failed %s\n", i < 9 ? "WARNING" : "ERROR", i,
+             strerror(errno));
+    if (i == 9) abort();
+  }
+}
+
+}  // namespace fuzzer
+
+#endif  // LIBFUZZER_POSIX
diff --git a/lib/fuzzer/FuzzerShmemWindows.cpp b/lib/fuzzer/FuzzerShmemWindows.cpp
new file mode 100644
index 0000000..d330ebf
--- /dev/null
+++ b/lib/fuzzer/FuzzerShmemWindows.cpp
@@ -0,0 +1,64 @@
+//===- FuzzerShmemWindows.cpp - Posix shared memory -------------*- C++ -* ===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// SharedMemoryRegion
+//===----------------------------------------------------------------------===//
+#include "FuzzerDefs.h"
+#if LIBFUZZER_WINDOWS
+
+#include "FuzzerIO.h"
+#include "FuzzerShmem.h"
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+namespace fuzzer {
+
+std::string SharedMemoryRegion::Path(const char *Name) {
+  return DirPlusFile(TmpDir(), Name);
+}
+
+std::string SharedMemoryRegion::SemName(const char *Name, int Idx) {
+  std::string Res(Name);
+  return Res + (char)('0' + Idx);
+}
+
+bool SharedMemoryRegion::Map(int fd) {
+  assert(0 && "UNIMPLEMENTED");
+  return false;
+}
+
+bool SharedMemoryRegion::Create(const char *Name) {
+  assert(0 && "UNIMPLEMENTED");
+  return false;
+}
+
+bool SharedMemoryRegion::Open(const char *Name) {
+  assert(0 && "UNIMPLEMENTED");
+  return false;
+}
+
+bool SharedMemoryRegion::Destroy(const char *Name) {
+  assert(0 && "UNIMPLEMENTED");
+  return false;
+}
+
+void SharedMemoryRegion::Post(int Idx) {
+  assert(0 && "UNIMPLEMENTED");
+}
+
+void SharedMemoryRegion::Wait(int Idx) {
+  Semaphore[1] = nullptr;
+  assert(0 && "UNIMPLEMENTED");
+}
+
+}  // namespace fuzzer
+
+#endif  // LIBFUZZER_WINDOWS
diff --git a/lib/fuzzer/FuzzerTracePC.cpp b/lib/fuzzer/FuzzerTracePC.cpp
new file mode 100644
index 0000000..5e9f9f2
--- /dev/null
+++ b/lib/fuzzer/FuzzerTracePC.cpp
@@ -0,0 +1,602 @@
+//===- FuzzerTracePC.cpp - PC tracing--------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Trace PCs.
+// This module implements __sanitizer_cov_trace_pc_guard[_init],
+// the callback required for -fsanitize-coverage=trace-pc-guard instrumentation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "FuzzerTracePC.h"
+#include "FuzzerCorpus.h"
+#include "FuzzerDefs.h"
+#include "FuzzerDictionary.h"
+#include "FuzzerExtFunctions.h"
+#include "FuzzerIO.h"
+#include "FuzzerUtil.h"
+#include "FuzzerValueBitMap.h"
+#include <set>
+
+// The coverage counters and PCs.
+// These are declared as global variables named "__sancov_*" to simplify
+// experiments with inlined instrumentation.
+alignas(64) ATTRIBUTE_INTERFACE
+uint8_t __sancov_trace_pc_guard_8bit_counters[fuzzer::TracePC::kNumPCs];
+
+ATTRIBUTE_INTERFACE
+uintptr_t __sancov_trace_pc_pcs[fuzzer::TracePC::kNumPCs];
+
+// Used by -fsanitize-coverage=stack-depth to track stack depth
+ATTRIBUTE_INTERFACE __attribute__((tls_model("initial-exec")))
+thread_local uintptr_t __sancov_lowest_stack;
+
+namespace fuzzer {
+
+TracePC TPC;
+
+int ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr;
+
+uint8_t *TracePC::Counters() const {
+  return __sancov_trace_pc_guard_8bit_counters;
+}
+
+uintptr_t *TracePC::PCs() const {
+  return __sancov_trace_pc_pcs;
+}
+
+size_t TracePC::GetTotalPCCoverage() {
+  if (ObservedPCs.size())
+    return ObservedPCs.size();
+  size_t Res = 0;
+  for (size_t i = 1, N = GetNumPCs(); i < N; i++)
+    if (PCs()[i])
+      Res++;
+  return Res;
+}
+
+
+void TracePC::HandleInline8bitCountersInit(uint8_t *Start, uint8_t *Stop) {
+  if (Start == Stop) return;
+  if (NumModulesWithInline8bitCounters &&
+      ModuleCounters[NumModulesWithInline8bitCounters-1].Start == Start) return;
+  assert(NumModulesWithInline8bitCounters <
+         sizeof(ModuleCounters) / sizeof(ModuleCounters[0]));
+  ModuleCounters[NumModulesWithInline8bitCounters++] = {Start, Stop};
+  NumInline8bitCounters += Stop - Start;
+}
+
+void TracePC::HandlePCsInit(const uintptr_t *Start, const uintptr_t *Stop) {
+  const PCTableEntry *B = reinterpret_cast<const PCTableEntry *>(Start);
+  const PCTableEntry *E = reinterpret_cast<const PCTableEntry *>(Stop);
+  if (NumPCTables && ModulePCTable[NumPCTables - 1].Start == B) return;
+  assert(NumPCTables < sizeof(ModulePCTable) / sizeof(ModulePCTable[0]));
+  ModulePCTable[NumPCTables++] = {B, E};
+  NumPCsInPCTables += E - B;
+}
+
+void TracePC::HandleInit(uint32_t *Start, uint32_t *Stop) {
+  if (Start == Stop || *Start) return;
+  assert(NumModules < sizeof(Modules) / sizeof(Modules[0]));
+  for (uint32_t *P = Start; P < Stop; P++) {
+    NumGuards++;
+    if (NumGuards == kNumPCs) {
+      RawPrint(
+          "WARNING: The binary has too many instrumented PCs.\n"
+          "         You may want to reduce the size of the binary\n"
+          "         for more efficient fuzzing and precise coverage data\n");
+    }
+    *P = NumGuards % kNumPCs;
+  }
+  Modules[NumModules].Start = Start;
+  Modules[NumModules].Stop = Stop;
+  NumModules++;
+}
+
+void TracePC::PrintModuleInfo() {
+  if (NumGuards) {
+    Printf("INFO: Loaded %zd modules   (%zd guards): ", NumModules, NumGuards);
+    for (size_t i = 0; i < NumModules; i++)
+      Printf("%zd [%p, %p), ", Modules[i].Stop - Modules[i].Start,
+             Modules[i].Start, Modules[i].Stop);
+    Printf("\n");
+  }
+  if (NumModulesWithInline8bitCounters) {
+    Printf("INFO: Loaded %zd modules   (%zd inline 8-bit counters): ",
+           NumModulesWithInline8bitCounters, NumInline8bitCounters);
+    for (size_t i = 0; i < NumModulesWithInline8bitCounters; i++)
+      Printf("%zd [%p, %p), ", ModuleCounters[i].Stop - ModuleCounters[i].Start,
+             ModuleCounters[i].Start, ModuleCounters[i].Stop);
+    Printf("\n");
+  }
+  if (NumPCTables) {
+    Printf("INFO: Loaded %zd PC tables (%zd PCs): ", NumPCTables,
+           NumPCsInPCTables);
+    for (size_t i = 0; i < NumPCTables; i++) {
+      Printf("%zd [%p,%p), ", ModulePCTable[i].Stop - ModulePCTable[i].Start,
+             ModulePCTable[i].Start, ModulePCTable[i].Stop);
+    }
+    Printf("\n");
+
+    if ((NumGuards && NumGuards != NumPCsInPCTables) ||
+        (NumInline8bitCounters && NumInline8bitCounters != NumPCsInPCTables)) {
+      Printf("ERROR: The size of coverage PC tables does not match the\n"
+             "number of instrumented PCs. This might be a compiler bug,\n"
+             "please contact the libFuzzer developers.\n"
+             "Also check https://bugs.llvm.org/show_bug.cgi?id=34636\n"
+             "for possible workarounds (tl;dr: don't use the old GNU ld)\n");
+      _Exit(1);
+    }
+  }
+  if (size_t NumClangCounters = ClangCountersEnd() - ClangCountersBegin())
+    Printf("INFO: %zd Clang Coverage Counters\n", NumClangCounters);
+}
+
+ATTRIBUTE_NO_SANITIZE_ALL
+void TracePC::HandleCallerCallee(uintptr_t Caller, uintptr_t Callee) {
+  const uintptr_t kBits = 12;
+  const uintptr_t kMask = (1 << kBits) - 1;
+  uintptr_t Idx = (Caller & kMask) | ((Callee & kMask) << kBits);
+  ValueProfileMap.AddValueModPrime(Idx);
+}
+
+void TracePC::UpdateObservedPCs() {
+  Vector<uintptr_t> CoveredFuncs;
+  auto ObservePC = [&](uintptr_t PC) {
+    if (ObservedPCs.insert(PC).second && DoPrintNewPCs)
+      PrintPC("\tNEW_PC: %p %F %L\n", "\tNEW_PC: %p\n", PC + 1);
+  };
+
+  auto Observe = [&](const PCTableEntry &TE) {
+    if (TE.PCFlags & 1)
+      if (ObservedFuncs.insert(TE.PC).second && NumPrintNewFuncs)
+        CoveredFuncs.push_back(TE.PC);
+    ObservePC(TE.PC);
+  };
+
+  if (NumPCsInPCTables) {
+    if (NumInline8bitCounters == NumPCsInPCTables) {
+      for (size_t i = 0; i < NumModulesWithInline8bitCounters; i++) {
+        uint8_t *Beg = ModuleCounters[i].Start;
+        size_t Size = ModuleCounters[i].Stop - Beg;
+        assert(Size ==
+               (size_t)(ModulePCTable[i].Stop - ModulePCTable[i].Start));
+        for (size_t j = 0; j < Size; j++)
+          if (Beg[j])
+            Observe(ModulePCTable[i].Start[j]);
+      }
+    } else if (NumGuards == NumPCsInPCTables) {
+      size_t GuardIdx = 1;
+      for (size_t i = 0; i < NumModules; i++) {
+        uint32_t *Beg = Modules[i].Start;
+        size_t Size = Modules[i].Stop - Beg;
+        assert(Size ==
+               (size_t)(ModulePCTable[i].Stop - ModulePCTable[i].Start));
+        for (size_t j = 0; j < Size; j++, GuardIdx++)
+          if (Counters()[GuardIdx])
+            Observe(ModulePCTable[i].Start[j]);
+      }
+    }
+  }
+  if (size_t NumClangCounters =
+      ClangCountersEnd() - ClangCountersBegin()) {
+    auto P = ClangCountersBegin();
+    for (size_t Idx = 0; Idx < NumClangCounters; Idx++)
+      if (P[Idx])
+        ObservePC((uintptr_t)Idx);
+  }
+
+  for (size_t i = 0, N = Min(CoveredFuncs.size(), NumPrintNewFuncs); i < N; i++) {
+    Printf("\tNEW_FUNC[%zd/%zd]: ", i, CoveredFuncs.size());
+    PrintPC("%p %F %L\n", "%p\n", CoveredFuncs[i] + 1);
+  }
+}
+
+inline ALWAYS_INLINE uintptr_t GetPreviousInstructionPc(uintptr_t PC) {
+  // TODO: this implementation is x86 only.
+  // see sanitizer_common GetPreviousInstructionPc for full implementation.
+  return PC - 1;
+}
+
+inline ALWAYS_INLINE uintptr_t GetNextInstructionPc(uintptr_t PC) {
+  // TODO: this implementation is x86 only.
+  // see sanitizer_common GetPreviousInstructionPc for full implementation.
+  return PC + 1;
+}
+
+static std::string GetModuleName(uintptr_t PC) {
+  char ModulePathRaw[4096] = "";  // What's PATH_MAX in portable C++?
+  void *OffsetRaw = nullptr;
+  if (!EF->__sanitizer_get_module_and_offset_for_pc(
+      reinterpret_cast<void *>(PC), ModulePathRaw,
+      sizeof(ModulePathRaw), &OffsetRaw))
+    return "";
+  return ModulePathRaw;
+}
+
+void TracePC::PrintCoverage() {
+  if (!EF->__sanitizer_symbolize_pc ||
+      !EF->__sanitizer_get_module_and_offset_for_pc) {
+    Printf("INFO: __sanitizer_symbolize_pc or "
+           "__sanitizer_get_module_and_offset_for_pc is not available,"
+           " not printing coverage\n");
+    return;
+  }
+  Printf("COVERAGE:\n");
+  std::string LastFunctionName = "";
+  std::string LastFileStr = "";
+  Set<size_t> UncoveredLines;
+  Set<size_t> CoveredLines;
+
+  auto FunctionEndCallback = [&](const std::string &CurrentFunc,
+                                 const std::string &CurrentFile) {
+    if (LastFunctionName != CurrentFunc) {
+      if (CoveredLines.empty() && !UncoveredLines.empty()) {
+        Printf("UNCOVERED_FUNC: %s\n", LastFunctionName.c_str());
+      } else {
+        for (auto Line : UncoveredLines) {
+          if (!CoveredLines.count(Line))
+            Printf("UNCOVERED_LINE: %s %s:%zd\n", LastFunctionName.c_str(),
+                   LastFileStr.c_str(), Line);
+        }
+      }
+
+      UncoveredLines.clear();
+      CoveredLines.clear();
+      LastFunctionName = CurrentFunc;
+      LastFileStr = CurrentFile;
+    }
+  };
+
+  for (size_t i = 0; i < NumPCTables; i++) {
+    auto &M = ModulePCTable[i];
+    assert(M.Start < M.Stop);
+    auto ModuleName = GetModuleName(M.Start->PC);
+    for (auto Ptr = M.Start; Ptr < M.Stop; Ptr++) {
+      auto PC = Ptr->PC;
+      auto VisualizePC = GetNextInstructionPc(PC);
+      bool IsObserved = ObservedPCs.count(PC);
+      std::string FileStr = DescribePC("%s", VisualizePC);
+      if (!IsInterestingCoverageFile(FileStr)) continue;
+      std::string FunctionStr = DescribePC("%F", VisualizePC);
+      FunctionEndCallback(FunctionStr, FileStr);
+      std::string LineStr = DescribePC("%l", VisualizePC);
+      size_t Line = std::stoul(LineStr);
+      if (IsObserved && CoveredLines.insert(Line).second)
+        Printf("COVERED: %s %s:%zd\n", FunctionStr.c_str(), FileStr.c_str(),
+               Line);
+      else
+        UncoveredLines.insert(Line);
+    }
+  }
+  FunctionEndCallback("", "");
+}
+
+void TracePC::DumpCoverage() {
+  if (EF->__sanitizer_dump_coverage) {
+    Vector<uintptr_t> PCsCopy(GetNumPCs());
+    for (size_t i = 0; i < GetNumPCs(); i++)
+      PCsCopy[i] = PCs()[i] ? GetPreviousInstructionPc(PCs()[i]) : 0;
+    EF->__sanitizer_dump_coverage(PCsCopy.data(), PCsCopy.size());
+  }
+}
+
+// Value profile.
+// We keep track of various values that affect control flow.
+// These values are inserted into a bit-set-based hash map.
+// Every new bit in the map is treated as a new coverage.
+//
+// For memcmp/strcmp/etc the interesting value is the length of the common
+// prefix of the parameters.
+// For cmp instructions the interesting value is a XOR of the parameters.
+// The interesting value is mixed up with the PC and is then added to the map.
+
+ATTRIBUTE_NO_SANITIZE_ALL
+void TracePC::AddValueForMemcmp(void *caller_pc, const void *s1, const void *s2,
+                                size_t n, bool StopAtZero) {
+  if (!n) return;
+  size_t Len = std::min(n, Word::GetMaxSize());
+  const uint8_t *A1 = reinterpret_cast<const uint8_t *>(s1);
+  const uint8_t *A2 = reinterpret_cast<const uint8_t *>(s2);
+  uint8_t B1[Word::kMaxSize];
+  uint8_t B2[Word::kMaxSize];
+  // Copy the data into locals in this non-msan-instrumented function
+  // to avoid msan complaining further.
+  size_t Hash = 0;  // Compute some simple hash of both strings.
+  for (size_t i = 0; i < Len; i++) {
+    B1[i] = A1[i];
+    B2[i] = A2[i];
+    size_t T = B1[i];
+    Hash ^= (T << 8) | B2[i];
+  }
+  size_t I = 0;
+  for (; I < Len; I++)
+    if (B1[I] != B2[I] || (StopAtZero && B1[I] == 0))
+      break;
+  size_t PC = reinterpret_cast<size_t>(caller_pc);
+  size_t Idx = (PC & 4095) | (I << 12);
+  ValueProfileMap.AddValue(Idx);
+  TORCW.Insert(Idx ^ Hash, Word(B1, Len), Word(B2, Len));
+}
+
+template <class T>
+ATTRIBUTE_TARGET_POPCNT ALWAYS_INLINE
+ATTRIBUTE_NO_SANITIZE_ALL
+void TracePC::HandleCmp(uintptr_t PC, T Arg1, T Arg2) {
+  uint64_t ArgXor = Arg1 ^ Arg2;
+  uint64_t ArgDistance = __builtin_popcountll(ArgXor) + 1; // [1,65]
+  uintptr_t Idx = ((PC & 4095) + 1) * ArgDistance;
+  if (sizeof(T) == 4)
+      TORC4.Insert(ArgXor, Arg1, Arg2);
+  else if (sizeof(T) == 8)
+      TORC8.Insert(ArgXor, Arg1, Arg2);
+  ValueProfileMap.AddValue(Idx);
+}
+
+static size_t InternalStrnlen(const char *S, size_t MaxLen) {
+  size_t Len = 0;
+  for (; Len < MaxLen && S[Len]; Len++) {}
+  return Len;
+}
+
+// Finds min of (strlen(S1), strlen(S2)).
+// Needed bacause one of these strings may actually be non-zero terminated.
+static size_t InternalStrnlen2(const char *S1, const char *S2) {
+  size_t Len = 0;
+  for (; S1[Len] && S2[Len]; Len++)  {}
+  return Len;
+}
+
+void TracePC::ClearInlineCounters() {
+  for (size_t i = 0; i < NumModulesWithInline8bitCounters; i++) {
+    uint8_t *Beg = ModuleCounters[i].Start;
+    size_t Size = ModuleCounters[i].Stop - Beg;
+    memset(Beg, 0, Size);
+  }
+}
+
+ATTRIBUTE_NO_SANITIZE_ALL
+void TracePC::RecordInitialStack() {
+  int stack;
+  __sancov_lowest_stack = InitialStack = reinterpret_cast<uintptr_t>(&stack);
+}
+
+uintptr_t TracePC::GetMaxStackOffset() const {
+  return InitialStack - __sancov_lowest_stack;  // Stack grows down
+}
+
+} // namespace fuzzer
+
+extern "C" {
+ATTRIBUTE_INTERFACE
+ATTRIBUTE_NO_SANITIZE_ALL
+void __sanitizer_cov_trace_pc_guard(uint32_t *Guard) {
+  uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
+  uint32_t Idx = *Guard;
+  __sancov_trace_pc_pcs[Idx] = PC;
+  __sancov_trace_pc_guard_8bit_counters[Idx]++;
+}
+
+// Best-effort support for -fsanitize-coverage=trace-pc, which is available
+// in both Clang and GCC.
+ATTRIBUTE_INTERFACE
+ATTRIBUTE_NO_SANITIZE_ALL
+void __sanitizer_cov_trace_pc() {
+  uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
+  uintptr_t Idx = PC & (((uintptr_t)1 << fuzzer::TracePC::kTracePcBits) - 1);
+  __sancov_trace_pc_pcs[Idx] = PC;
+  __sancov_trace_pc_guard_8bit_counters[Idx]++;
+}
+
+ATTRIBUTE_INTERFACE
+void __sanitizer_cov_trace_pc_guard_init(uint32_t *Start, uint32_t *Stop) {
+  fuzzer::TPC.HandleInit(Start, Stop);
+}
+
+ATTRIBUTE_INTERFACE
+void __sanitizer_cov_8bit_counters_init(uint8_t *Start, uint8_t *Stop) {
+  fuzzer::TPC.HandleInline8bitCountersInit(Start, Stop);
+}
+
+ATTRIBUTE_INTERFACE
+void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg,
+                              const uintptr_t *pcs_end) {
+  fuzzer::TPC.HandlePCsInit(pcs_beg, pcs_end);
+}
+
+ATTRIBUTE_INTERFACE
+ATTRIBUTE_NO_SANITIZE_ALL
+void __sanitizer_cov_trace_pc_indir(uintptr_t Callee) {
+  uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
+  fuzzer::TPC.HandleCallerCallee(PC, Callee);
+}
+
+ATTRIBUTE_INTERFACE
+ATTRIBUTE_NO_SANITIZE_ALL
+ATTRIBUTE_TARGET_POPCNT
+void __sanitizer_cov_trace_cmp8(uint64_t Arg1, uint64_t Arg2) {
+  uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
+  fuzzer::TPC.HandleCmp(PC, Arg1, Arg2);
+}
+
+ATTRIBUTE_INTERFACE
+ATTRIBUTE_NO_SANITIZE_ALL
+ATTRIBUTE_TARGET_POPCNT
+// Now the __sanitizer_cov_trace_const_cmp[1248] callbacks just mimic
+// the behaviour of __sanitizer_cov_trace_cmp[1248] ones. This, however,
+// should be changed later to make full use of instrumentation.
+void __sanitizer_cov_trace_const_cmp8(uint64_t Arg1, uint64_t Arg2) {
+  uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
+  fuzzer::TPC.HandleCmp(PC, Arg1, Arg2);
+}
+
+ATTRIBUTE_INTERFACE
+ATTRIBUTE_NO_SANITIZE_ALL
+ATTRIBUTE_TARGET_POPCNT
+void __sanitizer_cov_trace_cmp4(uint32_t Arg1, uint32_t Arg2) {
+  uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
+  fuzzer::TPC.HandleCmp(PC, Arg1, Arg2);
+}
+
+ATTRIBUTE_INTERFACE
+ATTRIBUTE_NO_SANITIZE_ALL
+ATTRIBUTE_TARGET_POPCNT
+void __sanitizer_cov_trace_const_cmp4(uint32_t Arg1, uint32_t Arg2) {
+  uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
+  fuzzer::TPC.HandleCmp(PC, Arg1, Arg2);
+}
+
+ATTRIBUTE_INTERFACE
+ATTRIBUTE_NO_SANITIZE_ALL
+ATTRIBUTE_TARGET_POPCNT
+void __sanitizer_cov_trace_cmp2(uint16_t Arg1, uint16_t Arg2) {
+  uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
+  fuzzer::TPC.HandleCmp(PC, Arg1, Arg2);
+}
+
+ATTRIBUTE_INTERFACE
+ATTRIBUTE_NO_SANITIZE_ALL
+ATTRIBUTE_TARGET_POPCNT
+void __sanitizer_cov_trace_const_cmp2(uint16_t Arg1, uint16_t Arg2) {
+  uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
+  fuzzer::TPC.HandleCmp(PC, Arg1, Arg2);
+}
+
+ATTRIBUTE_INTERFACE
+ATTRIBUTE_NO_SANITIZE_ALL
+ATTRIBUTE_TARGET_POPCNT
+void __sanitizer_cov_trace_cmp1(uint8_t Arg1, uint8_t Arg2) {
+  uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
+  fuzzer::TPC.HandleCmp(PC, Arg1, Arg2);
+}
+
+ATTRIBUTE_INTERFACE
+ATTRIBUTE_NO_SANITIZE_ALL
+ATTRIBUTE_TARGET_POPCNT
+void __sanitizer_cov_trace_const_cmp1(uint8_t Arg1, uint8_t Arg2) {
+  uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
+  fuzzer::TPC.HandleCmp(PC, Arg1, Arg2);
+}
+
+ATTRIBUTE_INTERFACE
+ATTRIBUTE_NO_SANITIZE_ALL
+ATTRIBUTE_TARGET_POPCNT
+void __sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases) {
+  uint64_t N = Cases[0];
+  uint64_t ValSizeInBits = Cases[1];
+  uint64_t *Vals = Cases + 2;
+  // Skip the most common and the most boring case.
+  if (Vals[N - 1]  < 256 && Val < 256)
+    return;
+  uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
+  size_t i;
+  uint64_t Token = 0;
+  for (i = 0; i < N; i++) {
+    Token = Val ^ Vals[i];
+    if (Val < Vals[i])
+      break;
+  }
+
+  if (ValSizeInBits == 16)
+    fuzzer::TPC.HandleCmp(PC + i, static_cast<uint16_t>(Token), (uint16_t)(0));
+  else if (ValSizeInBits == 32)
+    fuzzer::TPC.HandleCmp(PC + i, static_cast<uint32_t>(Token), (uint32_t)(0));
+  else
+    fuzzer::TPC.HandleCmp(PC + i, Token, (uint64_t)(0));
+}
+
+ATTRIBUTE_INTERFACE
+ATTRIBUTE_NO_SANITIZE_ALL
+ATTRIBUTE_TARGET_POPCNT
+void __sanitizer_cov_trace_div4(uint32_t Val) {
+  uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
+  fuzzer::TPC.HandleCmp(PC, Val, (uint32_t)0);
+}
+
+ATTRIBUTE_INTERFACE
+ATTRIBUTE_NO_SANITIZE_ALL
+ATTRIBUTE_TARGET_POPCNT
+void __sanitizer_cov_trace_div8(uint64_t Val) {
+  uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
+  fuzzer::TPC.HandleCmp(PC, Val, (uint64_t)0);
+}
+
+ATTRIBUTE_INTERFACE
+ATTRIBUTE_NO_SANITIZE_ALL
+ATTRIBUTE_TARGET_POPCNT
+void __sanitizer_cov_trace_gep(uintptr_t Idx) {
+  uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
+  fuzzer::TPC.HandleCmp(PC, Idx, (uintptr_t)0);
+}
+
+ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY
+void __sanitizer_weak_hook_memcmp(void *caller_pc, const void *s1,
+                                  const void *s2, size_t n, int result) {
+  if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return;
+  if (result == 0) return;  // No reason to mutate.
+  if (n <= 1) return;  // Not interesting.
+  fuzzer::TPC.AddValueForMemcmp(caller_pc, s1, s2, n, /*StopAtZero*/false);
+}
+
+ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY
+void __sanitizer_weak_hook_strncmp(void *caller_pc, const char *s1,
+                                   const char *s2, size_t n, int result) {
+  if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return;
+  if (result == 0) return;  // No reason to mutate.
+  size_t Len1 = fuzzer::InternalStrnlen(s1, n);
+  size_t Len2 = fuzzer::InternalStrnlen(s2, n);
+  n = std::min(n, Len1);
+  n = std::min(n, Len2);
+  if (n <= 1) return;  // Not interesting.
+  fuzzer::TPC.AddValueForMemcmp(caller_pc, s1, s2, n, /*StopAtZero*/true);
+}
+
+ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY
+void __sanitizer_weak_hook_strcmp(void *caller_pc, const char *s1,
+                                   const char *s2, int result) {
+  if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return;
+  if (result == 0) return;  // No reason to mutate.
+  size_t N = fuzzer::InternalStrnlen2(s1, s2);
+  if (N <= 1) return;  // Not interesting.
+  fuzzer::TPC.AddValueForMemcmp(caller_pc, s1, s2, N, /*StopAtZero*/true);
+}
+
+ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY
+void __sanitizer_weak_hook_strncasecmp(void *called_pc, const char *s1,
+                                       const char *s2, size_t n, int result) {
+  if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return;
+  return __sanitizer_weak_hook_strncmp(called_pc, s1, s2, n, result);
+}
+
+ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY
+void __sanitizer_weak_hook_strcasecmp(void *called_pc, const char *s1,
+                                      const char *s2, int result) {
+  if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return;
+  return __sanitizer_weak_hook_strcmp(called_pc, s1, s2, result);
+}
+
+ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY
+void __sanitizer_weak_hook_strstr(void *called_pc, const char *s1,
+                                  const char *s2, char *result) {
+  if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return;
+  fuzzer::TPC.MMT.Add(reinterpret_cast<const uint8_t *>(s2), strlen(s2));
+}
+
+ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY
+void __sanitizer_weak_hook_strcasestr(void *called_pc, const char *s1,
+                                      const char *s2, char *result) {
+  if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return;
+  fuzzer::TPC.MMT.Add(reinterpret_cast<const uint8_t *>(s2), strlen(s2));
+}
+
+ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY
+void __sanitizer_weak_hook_memmem(void *called_pc, const void *s1, size_t len1,
+                                  const void *s2, size_t len2, void *result) {
+  if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return;
+  fuzzer::TPC.MMT.Add(reinterpret_cast<const uint8_t *>(s2), len2);
+}
+}  // extern "C"
diff --git a/lib/fuzzer/FuzzerTracePC.h b/lib/fuzzer/FuzzerTracePC.h
new file mode 100644
index 0000000..b3c9b98
--- /dev/null
+++ b/lib/fuzzer/FuzzerTracePC.h
@@ -0,0 +1,267 @@
+//===- FuzzerTracePC.h - Internal header for the Fuzzer ---------*- C++ -* ===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// fuzzer::TracePC
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_FUZZER_TRACE_PC
+#define LLVM_FUZZER_TRACE_PC
+
+#include "FuzzerDefs.h"
+#include "FuzzerDictionary.h"
+#include "FuzzerValueBitMap.h"
+
+#include <set>
+
+namespace fuzzer {
+
+// TableOfRecentCompares (TORC) remembers the most recently performed
+// comparisons of type T.
+// We record the arguments of CMP instructions in this table unconditionally
+// because it seems cheaper this way than to compute some expensive
+// conditions inside __sanitizer_cov_trace_cmp*.
+// After the unit has been executed we may decide to use the contents of
+// this table to populate a Dictionary.
+template<class T, size_t kSizeT>
+struct TableOfRecentCompares {
+  static const size_t kSize = kSizeT;
+  struct Pair {
+    T A, B;
+  };
+  ATTRIBUTE_NO_SANITIZE_ALL
+  void Insert(size_t Idx, const T &Arg1, const T &Arg2) {
+    Idx = Idx % kSize;
+    Table[Idx].A = Arg1;
+    Table[Idx].B = Arg2;
+  }
+
+  Pair Get(size_t I) { return Table[I % kSize]; }
+
+  Pair Table[kSize];
+};
+
+template <size_t kSizeT>
+struct MemMemTable {
+  static const size_t kSize = kSizeT;
+  Word MemMemWords[kSize];
+  Word EmptyWord;
+
+  void Add(const uint8_t *Data, size_t Size) {
+    if (Size <= 2) return;
+    Size = std::min(Size, Word::GetMaxSize());
+    size_t Idx = SimpleFastHash(Data, Size) % kSize;
+    MemMemWords[Idx].Set(Data, Size);
+  }
+  const Word &Get(size_t Idx) {
+    for (size_t i = 0; i < kSize; i++) {
+      const Word &W = MemMemWords[(Idx + i) % kSize];
+      if (W.size()) return W;
+    }
+    EmptyWord.Set(nullptr, 0);
+    return EmptyWord;
+  }
+};
+
+class TracePC {
+ public:
+  static const size_t kNumPCs = 1 << 21;
+  // How many bits of PC are used from __sanitizer_cov_trace_pc.
+  static const size_t kTracePcBits = 18;
+
+  void HandleInit(uint32_t *Start, uint32_t *Stop);
+  void HandleInline8bitCountersInit(uint8_t *Start, uint8_t *Stop);
+  void HandlePCsInit(const uintptr_t *Start, const uintptr_t *Stop);
+  void HandleCallerCallee(uintptr_t Caller, uintptr_t Callee);
+  template <class T> void HandleCmp(uintptr_t PC, T Arg1, T Arg2);
+  size_t GetTotalPCCoverage();
+  void SetUseCounters(bool UC) { UseCounters = UC; }
+  void SetUseClangCoverage(bool UCC) { UseClangCoverage = UCC; }
+  void SetUseValueProfile(bool VP) { UseValueProfile = VP; }
+  void SetPrintNewPCs(bool P) { DoPrintNewPCs = P; }
+  void SetPrintNewFuncs(size_t P) { NumPrintNewFuncs = P; }
+  void UpdateObservedPCs();
+  template <class Callback> void CollectFeatures(Callback CB) const;
+
+  void ResetMaps() {
+    ValueProfileMap.Reset();
+    if (NumModules)
+      memset(Counters(), 0, GetNumPCs());
+    ClearExtraCounters();
+    ClearInlineCounters();
+    if (UseClangCoverage)
+      ClearClangCounters();
+  }
+
+  void ClearInlineCounters();
+
+  void UpdateFeatureSet(size_t CurrentElementIdx, size_t CurrentElementSize);
+  void PrintFeatureSet();
+
+  void PrintModuleInfo();
+
+  void PrintCoverage();
+  void DumpCoverage();
+
+  void AddValueForMemcmp(void *caller_pc, const void *s1, const void *s2,
+                         size_t n, bool StopAtZero);
+
+  TableOfRecentCompares<uint32_t, 32> TORC4;
+  TableOfRecentCompares<uint64_t, 32> TORC8;
+  TableOfRecentCompares<Word, 32> TORCW;
+  MemMemTable<1024> MMT;
+
+  size_t GetNumPCs() const {
+    return NumGuards == 0 ? (1 << kTracePcBits) : Min(kNumPCs, NumGuards + 1);
+  }
+  uintptr_t GetPC(size_t Idx) {
+    assert(Idx < GetNumPCs());
+    return PCs()[Idx];
+  }
+
+  void RecordInitialStack();
+  uintptr_t GetMaxStackOffset() const;
+
+  template<class CallBack>
+  void ForEachObservedPC(CallBack CB) {
+    for (auto PC : ObservedPCs)
+      CB(PC);
+  }
+
+private:
+  bool UseCounters = false;
+  bool UseValueProfile = false;
+  bool UseClangCoverage = false;
+  bool DoPrintNewPCs = false;
+  size_t NumPrintNewFuncs = 0;
+
+  struct Module {
+    uint32_t *Start, *Stop;
+  };
+
+  Module Modules[4096];
+  size_t NumModules;  // linker-initialized.
+  size_t NumGuards;  // linker-initialized.
+
+  struct { uint8_t *Start, *Stop; } ModuleCounters[4096];
+  size_t NumModulesWithInline8bitCounters;  // linker-initialized.
+  size_t NumInline8bitCounters;
+
+  struct PCTableEntry {
+    uintptr_t PC, PCFlags;
+  };
+
+  struct { const PCTableEntry *Start, *Stop; } ModulePCTable[4096];
+  size_t NumPCTables;
+  size_t NumPCsInPCTables;
+
+  uint8_t *Counters() const;
+  uintptr_t *PCs() const;
+
+  Set<uintptr_t> ObservedPCs;
+  Set<uintptr_t> ObservedFuncs;
+
+  ValueBitMap ValueProfileMap;
+  uintptr_t InitialStack;
+};
+
+template <class Callback>
+// void Callback(size_t FirstFeature, size_t Idx, uint8_t Value);
+ATTRIBUTE_NO_SANITIZE_ALL
+void ForEachNonZeroByte(const uint8_t *Begin, const uint8_t *End,
+                        size_t FirstFeature, Callback Handle8bitCounter) {
+  typedef uintptr_t LargeType;
+  const size_t Step = sizeof(LargeType) / sizeof(uint8_t);
+  const size_t StepMask = Step - 1;
+  auto P = Begin;
+  // Iterate by 1 byte until either the alignment boundary or the end.
+  for (; reinterpret_cast<uintptr_t>(P) & StepMask && P < End; P++)
+    if (uint8_t V = *P)
+      Handle8bitCounter(FirstFeature, P - Begin, V);
+
+  // Iterate by Step bytes at a time.
+  for (; P < End; P += Step)
+    if (LargeType Bundle = *reinterpret_cast<const LargeType *>(P))
+      for (size_t I = 0; I < Step; I++, Bundle >>= 8)
+        if (uint8_t V = Bundle & 0xff)
+          Handle8bitCounter(FirstFeature, P - Begin + I, V);
+
+  // Iterate by 1 byte until the end.
+  for (; P < End; P++)
+    if (uint8_t V = *P)
+      Handle8bitCounter(FirstFeature, P - Begin, V);
+}
+
+// Given a non-zero Counters returns a number in [0,7].
+template<class T>
+unsigned CounterToFeature(T Counter) {
+    assert(Counter);
+    unsigned Bit = 0;
+    /**/ if (Counter >= 128) Bit = 7;
+    else if (Counter >= 32) Bit = 6;
+    else if (Counter >= 16) Bit = 5;
+    else if (Counter >= 8) Bit = 4;
+    else if (Counter >= 4) Bit = 3;
+    else if (Counter >= 3) Bit = 2;
+    else if (Counter >= 2) Bit = 1;
+    return Bit;
+}
+
+template <class Callback>  // void Callback(size_t Feature)
+ATTRIBUTE_NO_SANITIZE_ADDRESS
+__attribute__((noinline))
+void TracePC::CollectFeatures(Callback HandleFeature) const {
+  uint8_t *Counters = this->Counters();
+  size_t N = GetNumPCs();
+  auto Handle8bitCounter = [&](size_t FirstFeature,
+                               size_t Idx, uint8_t Counter) {
+    HandleFeature(FirstFeature + Idx * 8 + CounterToFeature(Counter));
+  };
+
+  size_t FirstFeature = 0;
+
+  if (!NumInline8bitCounters) {
+    ForEachNonZeroByte(Counters, Counters + N, FirstFeature, Handle8bitCounter);
+    FirstFeature += N * 8;
+  }
+
+  if (NumInline8bitCounters) {
+    for (size_t i = 0; i < NumModulesWithInline8bitCounters; i++) {
+      ForEachNonZeroByte(ModuleCounters[i].Start, ModuleCounters[i].Stop,
+                         FirstFeature, Handle8bitCounter);
+      FirstFeature += 8 * (ModuleCounters[i].Stop - ModuleCounters[i].Start);
+    }
+  }
+
+  if (size_t NumClangCounters = ClangCountersEnd() - ClangCountersBegin()) {
+    auto P = ClangCountersBegin();
+    for (size_t Idx = 0; Idx < NumClangCounters; Idx++)
+      if (auto Cnt = P[Idx])
+        HandleFeature(FirstFeature + Idx * 8 + CounterToFeature(Cnt));
+    FirstFeature += NumClangCounters;
+  }
+
+  ForEachNonZeroByte(ExtraCountersBegin(), ExtraCountersEnd(), FirstFeature,
+                     Handle8bitCounter);
+  FirstFeature += (ExtraCountersEnd() - ExtraCountersBegin()) * 8;
+
+  if (UseValueProfile) {
+    ValueProfileMap.ForEach([&](size_t Idx) {
+      HandleFeature(FirstFeature + Idx);
+    });
+    FirstFeature += ValueProfileMap.SizeInBits();
+  }
+
+  if (auto MaxStackOffset = GetMaxStackOffset())
+    HandleFeature(FirstFeature + MaxStackOffset);
+}
+
+extern TracePC TPC;
+
+}  // namespace fuzzer
+
+#endif  // LLVM_FUZZER_TRACE_PC
diff --git a/lib/fuzzer/FuzzerUtil.cpp b/lib/fuzzer/FuzzerUtil.cpp
new file mode 100644
index 0000000..65f0e17
--- /dev/null
+++ b/lib/fuzzer/FuzzerUtil.cpp
@@ -0,0 +1,215 @@
+//===- FuzzerUtil.cpp - Misc utils ----------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Misc utils.
+//===----------------------------------------------------------------------===//
+
+#include "FuzzerUtil.h"
+#include "FuzzerIO.h"
+#include "FuzzerInternal.h"
+#include <cassert>
+#include <chrono>
+#include <cstring>
+#include <errno.h>
+#include <signal.h>
+#include <sstream>
+#include <stdio.h>
+#include <sys/types.h>
+#include <thread>
+
+namespace fuzzer {
+
+void PrintHexArray(const uint8_t *Data, size_t Size,
+                   const char *PrintAfter) {
+  for (size_t i = 0; i < Size; i++)
+    Printf("0x%x,", (unsigned)Data[i]);
+  Printf("%s", PrintAfter);
+}
+
+void Print(const Unit &v, const char *PrintAfter) {
+  PrintHexArray(v.data(), v.size(), PrintAfter);
+}
+
+void PrintASCIIByte(uint8_t Byte) {
+  if (Byte == '\\')
+    Printf("\\\\");
+  else if (Byte == '"')
+    Printf("\\\"");
+  else if (Byte >= 32 && Byte < 127)
+    Printf("%c", Byte);
+  else
+    Printf("\\x%02x", Byte);
+}
+
+void PrintASCII(const uint8_t *Data, size_t Size, const char *PrintAfter) {
+  for (size_t i = 0; i < Size; i++)
+    PrintASCIIByte(Data[i]);
+  Printf("%s", PrintAfter);
+}
+
+void PrintASCII(const Unit &U, const char *PrintAfter) {
+  PrintASCII(U.data(), U.size(), PrintAfter);
+}
+
+bool ToASCII(uint8_t *Data, size_t Size) {
+  bool Changed = false;
+  for (size_t i = 0; i < Size; i++) {
+    uint8_t &X = Data[i];
+    auto NewX = X;
+    NewX &= 127;
+    if (!isspace(NewX) && !isprint(NewX))
+      NewX = ' ';
+    Changed |= NewX != X;
+    X = NewX;
+  }
+  return Changed;
+}
+
+bool IsASCII(const Unit &U) { return IsASCII(U.data(), U.size()); }
+
+bool IsASCII(const uint8_t *Data, size_t Size) {
+  for (size_t i = 0; i < Size; i++)
+    if (!(isprint(Data[i]) || isspace(Data[i]))) return false;
+  return true;
+}
+
+bool ParseOneDictionaryEntry(const std::string &Str, Unit *U) {
+  U->clear();
+  if (Str.empty()) return false;
+  size_t L = 0, R = Str.size() - 1;  // We are parsing the range [L,R].
+  // Skip spaces from both sides.
+  while (L < R && isspace(Str[L])) L++;
+  while (R > L && isspace(Str[R])) R--;
+  if (R - L < 2) return false;
+  // Check the closing "
+  if (Str[R] != '"') return false;
+  R--;
+  // Find the opening "
+  while (L < R && Str[L] != '"') L++;
+  if (L >= R) return false;
+  assert(Str[L] == '\"');
+  L++;
+  assert(L <= R);
+  for (size_t Pos = L; Pos <= R; Pos++) {
+    uint8_t V = (uint8_t)Str[Pos];
+    if (!isprint(V) && !isspace(V)) return false;
+    if (V =='\\') {
+      // Handle '\\'
+      if (Pos + 1 <= R && (Str[Pos + 1] == '\\' || Str[Pos + 1] == '"')) {
+        U->push_back(Str[Pos + 1]);
+        Pos++;
+        continue;
+      }
+      // Handle '\xAB'
+      if (Pos + 3 <= R && Str[Pos + 1] == 'x'
+           && isxdigit(Str[Pos + 2]) && isxdigit(Str[Pos + 3])) {
+        char Hex[] = "0xAA";
+        Hex[2] = Str[Pos + 2];
+        Hex[3] = Str[Pos + 3];
+        U->push_back(strtol(Hex, nullptr, 16));
+        Pos += 3;
+        continue;
+      }
+      return false;  // Invalid escape.
+    } else {
+      // Any other character.
+      U->push_back(V);
+    }
+  }
+  return true;
+}
+
+bool ParseDictionaryFile(const std::string &Text, Vector<Unit> *Units) {
+  if (Text.empty()) {
+    Printf("ParseDictionaryFile: file does not exist or is empty\n");
+    return false;
+  }
+  std::istringstream ISS(Text);
+  Units->clear();
+  Unit U;
+  int LineNo = 0;
+  std::string S;
+  while (std::getline(ISS, S, '\n')) {
+    LineNo++;
+    size_t Pos = 0;
+    while (Pos < S.size() && isspace(S[Pos])) Pos++;  // Skip spaces.
+    if (Pos == S.size()) continue;  // Empty line.
+    if (S[Pos] == '#') continue;  // Comment line.
+    if (ParseOneDictionaryEntry(S, &U)) {
+      Units->push_back(U);
+    } else {
+      Printf("ParseDictionaryFile: error in line %d\n\t\t%s\n", LineNo,
+             S.c_str());
+      return false;
+    }
+  }
+  return true;
+}
+
+std::string Base64(const Unit &U) {
+  static const char Table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                              "abcdefghijklmnopqrstuvwxyz"
+                              "0123456789+/";
+  std::string Res;
+  size_t i;
+  for (i = 0; i + 2 < U.size(); i += 3) {
+    uint32_t x = (U[i] << 16) + (U[i + 1] << 8) + U[i + 2];
+    Res += Table[(x >> 18) & 63];
+    Res += Table[(x >> 12) & 63];
+    Res += Table[(x >> 6) & 63];
+    Res += Table[x & 63];
+  }
+  if (i + 1 == U.size()) {
+    uint32_t x = (U[i] << 16);
+    Res += Table[(x >> 18) & 63];
+    Res += Table[(x >> 12) & 63];
+    Res += "==";
+  } else if (i + 2 == U.size()) {
+    uint32_t x = (U[i] << 16) + (U[i + 1] << 8);
+    Res += Table[(x >> 18) & 63];
+    Res += Table[(x >> 12) & 63];
+    Res += Table[(x >> 6) & 63];
+    Res += "=";
+  }
+  return Res;
+}
+
+std::string DescribePC(const char *SymbolizedFMT, uintptr_t PC) {
+  if (!EF->__sanitizer_symbolize_pc) return "<can not symbolize>";
+  char PcDescr[1024];
+  EF->__sanitizer_symbolize_pc(reinterpret_cast<void*>(PC),
+                               SymbolizedFMT, PcDescr, sizeof(PcDescr));
+  PcDescr[sizeof(PcDescr) - 1] = 0;  // Just in case.
+  return PcDescr;
+}
+
+void PrintPC(const char *SymbolizedFMT, const char *FallbackFMT, uintptr_t PC) {
+  if (EF->__sanitizer_symbolize_pc)
+    Printf("%s", DescribePC(SymbolizedFMT, PC).c_str());
+  else
+    Printf(FallbackFMT, PC);
+}
+
+unsigned NumberOfCpuCores() {
+  unsigned N = std::thread::hardware_concurrency();
+  if (!N) {
+    Printf("WARNING: std::thread::hardware_concurrency not well defined for "
+           "your platform. Assuming CPU count of 1.\n");
+    N = 1;
+  }
+  return N;
+}
+
+size_t SimpleFastHash(const uint8_t *Data, size_t Size) {
+  size_t Res = 0;
+  for (size_t i = 0; i < Size; i++)
+    Res = Res * 11 + Data[i];
+  return Res;
+}
+
+}  // namespace fuzzer
diff --git a/lib/fuzzer/FuzzerUtil.h b/lib/fuzzer/FuzzerUtil.h
new file mode 100644
index 0000000..6cebd7c
--- /dev/null
+++ b/lib/fuzzer/FuzzerUtil.h
@@ -0,0 +1,84 @@
+//===- FuzzerUtil.h - Internal header for the Fuzzer Utils ------*- C++ -* ===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Util functions.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_FUZZER_UTIL_H
+#define LLVM_FUZZER_UTIL_H
+
+#include "FuzzerDefs.h"
+
+namespace fuzzer {
+
+void PrintHexArray(const Unit &U, const char *PrintAfter = "");
+
+void PrintHexArray(const uint8_t *Data, size_t Size,
+                   const char *PrintAfter = "");
+
+void PrintASCII(const uint8_t *Data, size_t Size, const char *PrintAfter = "");
+
+void PrintASCII(const Unit &U, const char *PrintAfter = "");
+
+// Changes U to contain only ASCII (isprint+isspace) characters.
+// Returns true iff U has been changed.
+bool ToASCII(uint8_t *Data, size_t Size);
+
+bool IsASCII(const Unit &U);
+
+bool IsASCII(const uint8_t *Data, size_t Size);
+
+std::string Base64(const Unit &U);
+
+void PrintPC(const char *SymbolizedFMT, const char *FallbackFMT, uintptr_t PC);
+
+std::string DescribePC(const char *SymbolizedFMT, uintptr_t PC);
+
+unsigned NumberOfCpuCores();
+
+// Platform specific functions.
+void SetSignalHandler(const FuzzingOptions& Options);
+
+void SleepSeconds(int Seconds);
+
+unsigned long GetPid();
+
+size_t GetPeakRSSMb();
+
+int ExecuteCommand(const std::string &Command);
+
+FILE *OpenProcessPipe(const char *Command, const char *Mode);
+
+const void *SearchMemory(const void *haystack, size_t haystacklen,
+                         const void *needle, size_t needlelen);
+
+std::string CloneArgsWithoutX(const Vector<std::string> &Args,
+                              const char *X1, const char *X2);
+
+inline std::string CloneArgsWithoutX(const Vector<std::string> &Args,
+                                     const char *X) {
+  return CloneArgsWithoutX(Args, X, X);
+}
+
+inline std::pair<std::string, std::string> SplitBefore(std::string X,
+                                                       std::string S) {
+  auto Pos = S.find(X);
+  if (Pos == std::string::npos)
+    return std::make_pair(S, "");
+  return std::make_pair(S.substr(0, Pos), S.substr(Pos));
+}
+
+std::string DisassembleCmd(const std::string &FileName);
+
+std::string SearchRegexCmd(const std::string &Regex);
+
+size_t SimpleFastHash(const uint8_t *Data, size_t Size);
+
+}  // namespace fuzzer
+
+#endif  // LLVM_FUZZER_UTIL_H
diff --git a/lib/fuzzer/FuzzerUtilDarwin.cpp b/lib/fuzzer/FuzzerUtilDarwin.cpp
new file mode 100644
index 0000000..2df4872
--- /dev/null
+++ b/lib/fuzzer/FuzzerUtilDarwin.cpp
@@ -0,0 +1,161 @@
+//===- FuzzerUtilDarwin.cpp - Misc utils ----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Misc utils for Darwin.
+//===----------------------------------------------------------------------===//
+#include "FuzzerDefs.h"
+#if LIBFUZZER_APPLE
+
+#include "FuzzerIO.h"
+#include <mutex>
+#include <signal.h>
+#include <spawn.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/wait.h>
+
+// There is no header for this on macOS so declare here
+extern "C" char **environ;
+
+namespace fuzzer {
+
+static std::mutex SignalMutex;
+// Global variables used to keep track of how signal handling should be
+// restored. They should **not** be accessed without holding `SignalMutex`.
+static int ActiveThreadCount = 0;
+static struct sigaction OldSigIntAction;
+static struct sigaction OldSigQuitAction;
+static sigset_t OldBlockedSignalsSet;
+
+// This is a reimplementation of Libc's `system()`. On Darwin the Libc
+// implementation contains a mutex which prevents it from being used
+// concurrently. This implementation **can** be used concurrently. It sets the
+// signal handlers when the first thread enters and restores them when the last
+// thread finishes execution of the function and ensures this is not racey by
+// using a mutex.
+int ExecuteCommand(const std::string &Command) {
+  posix_spawnattr_t SpawnAttributes;
+  if (posix_spawnattr_init(&SpawnAttributes))
+    return -1;
+  // Block and ignore signals of the current process when the first thread
+  // enters.
+  {
+    std::lock_guard<std::mutex> Lock(SignalMutex);
+    if (ActiveThreadCount == 0) {
+      static struct sigaction IgnoreSignalAction;
+      sigset_t BlockedSignalsSet;
+      memset(&IgnoreSignalAction, 0, sizeof(IgnoreSignalAction));
+      IgnoreSignalAction.sa_handler = SIG_IGN;
+
+      if (sigaction(SIGINT, &IgnoreSignalAction, &OldSigIntAction) == -1) {
+        Printf("Failed to ignore SIGINT\n");
+        (void)posix_spawnattr_destroy(&SpawnAttributes);
+        return -1;
+      }
+      if (sigaction(SIGQUIT, &IgnoreSignalAction, &OldSigQuitAction) == -1) {
+        Printf("Failed to ignore SIGQUIT\n");
+        // Try our best to restore the signal handlers.
+        (void)sigaction(SIGINT, &OldSigIntAction, NULL);
+        (void)posix_spawnattr_destroy(&SpawnAttributes);
+        return -1;
+      }
+
+      (void)sigemptyset(&BlockedSignalsSet);
+      (void)sigaddset(&BlockedSignalsSet, SIGCHLD);
+      if (sigprocmask(SIG_BLOCK, &BlockedSignalsSet, &OldBlockedSignalsSet) ==
+          -1) {
+        Printf("Failed to block SIGCHLD\n");
+        // Try our best to restore the signal handlers.
+        (void)sigaction(SIGQUIT, &OldSigQuitAction, NULL);
+        (void)sigaction(SIGINT, &OldSigIntAction, NULL);
+        (void)posix_spawnattr_destroy(&SpawnAttributes);
+        return -1;
+      }
+    }
+    ++ActiveThreadCount;
+  }
+
+  // NOTE: Do not introduce any new `return` statements past this
+  // point. It is important that `ActiveThreadCount` always be decremented
+  // when leaving this function.
+
+  // Make sure the child process uses the default handlers for the
+  // following signals rather than inheriting what the parent has.
+  sigset_t DefaultSigSet;
+  (void)sigemptyset(&DefaultSigSet);
+  (void)sigaddset(&DefaultSigSet, SIGQUIT);
+  (void)sigaddset(&DefaultSigSet, SIGINT);
+  (void)posix_spawnattr_setsigdefault(&SpawnAttributes, &DefaultSigSet);
+  // Make sure the child process doesn't block SIGCHLD
+  (void)posix_spawnattr_setsigmask(&SpawnAttributes, &OldBlockedSignalsSet);
+  short SpawnFlags = POSIX_SPAWN_SETSIGDEF | POSIX_SPAWN_SETSIGMASK;
+  (void)posix_spawnattr_setflags(&SpawnAttributes, SpawnFlags);
+
+  pid_t Pid;
+  char **Environ = environ; // Read from global
+  const char *CommandCStr = Command.c_str();
+  char *const Argv[] = {
+    strdup("sh"),
+    strdup("-c"),
+    strdup(CommandCStr),
+    NULL
+  };
+  int ErrorCode = 0, ProcessStatus = 0;
+  // FIXME: We probably shouldn't hardcode the shell path.
+  ErrorCode = posix_spawn(&Pid, "/bin/sh", NULL, &SpawnAttributes,
+                          Argv, Environ);
+  (void)posix_spawnattr_destroy(&SpawnAttributes);
+  if (!ErrorCode) {
+    pid_t SavedPid = Pid;
+    do {
+      // Repeat until call completes uninterrupted.
+      Pid = waitpid(SavedPid, &ProcessStatus, /*options=*/0);
+    } while (Pid == -1 && errno == EINTR);
+    if (Pid == -1) {
+      // Fail for some other reason.
+      ProcessStatus = -1;
+    }
+  } else if (ErrorCode == ENOMEM || ErrorCode == EAGAIN) {
+    // Fork failure.
+    ProcessStatus = -1;
+  } else {
+    // Shell execution failure.
+    ProcessStatus = W_EXITCODE(127, 0);
+  }
+  for (unsigned i = 0, n = sizeof(Argv) / sizeof(Argv[0]); i < n; ++i)
+    free(Argv[i]);
+
+  // Restore the signal handlers of the current process when the last thread
+  // using this function finishes.
+  {
+    std::lock_guard<std::mutex> Lock(SignalMutex);
+    --ActiveThreadCount;
+    if (ActiveThreadCount == 0) {
+      bool FailedRestore = false;
+      if (sigaction(SIGINT, &OldSigIntAction, NULL) == -1) {
+        Printf("Failed to restore SIGINT handling\n");
+        FailedRestore = true;
+      }
+      if (sigaction(SIGQUIT, &OldSigQuitAction, NULL) == -1) {
+        Printf("Failed to restore SIGQUIT handling\n");
+        FailedRestore = true;
+      }
+      if (sigprocmask(SIG_BLOCK, &OldBlockedSignalsSet, NULL) == -1) {
+        Printf("Failed to unblock SIGCHLD\n");
+        FailedRestore = true;
+      }
+      if (FailedRestore)
+        ProcessStatus = -1;
+    }
+  }
+  return ProcessStatus;
+}
+
+} // namespace fuzzer
+
+#endif // LIBFUZZER_APPLE
diff --git a/lib/fuzzer/FuzzerUtilLinux.cpp b/lib/fuzzer/FuzzerUtilLinux.cpp
new file mode 100644
index 0000000..69d46b5
--- /dev/null
+++ b/lib/fuzzer/FuzzerUtilLinux.cpp
@@ -0,0 +1,24 @@
+//===- FuzzerUtilLinux.cpp - Misc utils for Linux. ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Misc utils for Linux.
+//===----------------------------------------------------------------------===//
+#include "FuzzerDefs.h"
+#if LIBFUZZER_LINUX || LIBFUZZER_NETBSD
+
+#include <stdlib.h>
+
+namespace fuzzer {
+
+int ExecuteCommand(const std::string &Command) {
+  return system(Command.c_str());
+}
+
+} // namespace fuzzer
+
+#endif // LIBFUZZER_LINUX || LIBFUZZER_NETBSD
diff --git a/lib/fuzzer/FuzzerUtilPosix.cpp b/lib/fuzzer/FuzzerUtilPosix.cpp
new file mode 100644
index 0000000..24c5ccc
--- /dev/null
+++ b/lib/fuzzer/FuzzerUtilPosix.cpp
@@ -0,0 +1,143 @@
+//===- FuzzerUtilPosix.cpp - Misc utils for Posix. ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Misc utils implementation using Posix API.
+//===----------------------------------------------------------------------===//
+#include "FuzzerDefs.h"
+#if LIBFUZZER_POSIX
+#include "FuzzerIO.h"
+#include "FuzzerInternal.h"
+#include <cassert>
+#include <chrono>
+#include <cstring>
+#include <errno.h>
+#include <iomanip>
+#include <signal.h>
+#include <stdio.h>
+#include <sys/resource.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <thread>
+#include <unistd.h>
+
+namespace fuzzer {
+
+static void AlarmHandler(int, siginfo_t *, void *) {
+  Fuzzer::StaticAlarmCallback();
+}
+
+static void CrashHandler(int, siginfo_t *, void *) {
+  Fuzzer::StaticCrashSignalCallback();
+}
+
+static void InterruptHandler(int, siginfo_t *, void *) {
+  Fuzzer::StaticInterruptCallback();
+}
+
+static void FileSizeExceedHandler(int, siginfo_t *, void *) {
+  Fuzzer::StaticFileSizeExceedCallback();
+}
+
+static void SetSigaction(int signum,
+                         void (*callback)(int, siginfo_t *, void *)) {
+  struct sigaction sigact = {};
+  if (sigaction(signum, nullptr, &sigact)) {
+    Printf("libFuzzer: sigaction failed with %d\n", errno);
+    exit(1);
+  }
+  if (sigact.sa_flags & SA_SIGINFO) {
+    if (sigact.sa_sigaction)
+      return;
+  } else {
+    if (sigact.sa_handler != SIG_DFL && sigact.sa_handler != SIG_IGN &&
+        sigact.sa_handler != SIG_ERR)
+      return;
+  }
+
+  sigact = {};
+  sigact.sa_sigaction = callback;
+  if (sigaction(signum, &sigact, 0)) {
+    Printf("libFuzzer: sigaction failed with %d\n", errno);
+    exit(1);
+  }
+}
+
+void SetTimer(int Seconds) {
+  struct itimerval T {
+    {Seconds, 0}, { Seconds, 0 }
+  };
+  if (setitimer(ITIMER_REAL, &T, nullptr)) {
+    Printf("libFuzzer: setitimer failed with %d\n", errno);
+    exit(1);
+  }
+  SetSigaction(SIGALRM, AlarmHandler);
+}
+
+void SetSignalHandler(const FuzzingOptions& Options) {
+  if (Options.UnitTimeoutSec > 0)
+    SetTimer(Options.UnitTimeoutSec / 2 + 1);
+  if (Options.HandleInt)
+    SetSigaction(SIGINT, InterruptHandler);
+  if (Options.HandleTerm)
+    SetSigaction(SIGTERM, InterruptHandler);
+  if (Options.HandleSegv)
+    SetSigaction(SIGSEGV, CrashHandler);
+  if (Options.HandleBus)
+    SetSigaction(SIGBUS, CrashHandler);
+  if (Options.HandleAbrt)
+    SetSigaction(SIGABRT, CrashHandler);
+  if (Options.HandleIll)
+    SetSigaction(SIGILL, CrashHandler);
+  if (Options.HandleFpe)
+    SetSigaction(SIGFPE, CrashHandler);
+  if (Options.HandleXfsz)
+    SetSigaction(SIGXFSZ, FileSizeExceedHandler);
+}
+
+void SleepSeconds(int Seconds) {
+  sleep(Seconds); // Use C API to avoid coverage from instrumented libc++.
+}
+
+unsigned long GetPid() { return (unsigned long)getpid(); }
+
+size_t GetPeakRSSMb() {
+  struct rusage usage;
+  if (getrusage(RUSAGE_SELF, &usage))
+    return 0;
+  if (LIBFUZZER_LINUX) {
+    // ru_maxrss is in KiB
+    return usage.ru_maxrss >> 10;
+  } else if (LIBFUZZER_APPLE) {
+    // ru_maxrss is in bytes
+    return usage.ru_maxrss >> 20;
+  }
+  assert(0 && "GetPeakRSSMb() is not implemented for your platform");
+  return 0;
+}
+
+FILE *OpenProcessPipe(const char *Command, const char *Mode) {
+  return popen(Command, Mode);
+}
+
+const void *SearchMemory(const void *Data, size_t DataLen, const void *Patt,
+                         size_t PattLen) {
+  return memmem(Data, DataLen, Patt, PattLen);
+}
+
+std::string DisassembleCmd(const std::string &FileName) {
+  return "objdump -d " + FileName;
+}
+
+std::string SearchRegexCmd(const std::string &Regex) {
+  return "grep '" + Regex + "'";
+}
+
+}  // namespace fuzzer
+
+#endif // LIBFUZZER_POSIX
diff --git a/lib/fuzzer/FuzzerUtilWindows.cpp b/lib/fuzzer/FuzzerUtilWindows.cpp
new file mode 100644
index 0000000..e8bdd2e
--- /dev/null
+++ b/lib/fuzzer/FuzzerUtilWindows.cpp
@@ -0,0 +1,192 @@
+//===- FuzzerUtilWindows.cpp - Misc utils for Windows. --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Misc utils implementation for Windows.
+//===----------------------------------------------------------------------===//
+#include "FuzzerDefs.h"
+#if LIBFUZZER_WINDOWS
+#include "FuzzerIO.h"
+#include "FuzzerInternal.h"
+#include <cassert>
+#include <chrono>
+#include <cstring>
+#include <errno.h>
+#include <iomanip>
+#include <signal.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <windows.h>
+
+// This must be included after windows.h.
+#include <Psapi.h>
+
+namespace fuzzer {
+
+static const FuzzingOptions* HandlerOpt = nullptr;
+
+static LONG CALLBACK ExceptionHandler(PEXCEPTION_POINTERS ExceptionInfo) {
+  switch (ExceptionInfo->ExceptionRecord->ExceptionCode) {
+    case EXCEPTION_ACCESS_VIOLATION:
+    case EXCEPTION_ARRAY_BOUNDS_EXCEEDED:
+    case EXCEPTION_STACK_OVERFLOW:
+      if (HandlerOpt->HandleSegv)
+        Fuzzer::StaticCrashSignalCallback();
+      break;
+    case EXCEPTION_DATATYPE_MISALIGNMENT:
+    case EXCEPTION_IN_PAGE_ERROR:
+      if (HandlerOpt->HandleBus)
+        Fuzzer::StaticCrashSignalCallback();
+      break;
+    case EXCEPTION_ILLEGAL_INSTRUCTION:
+    case EXCEPTION_PRIV_INSTRUCTION:
+      if (HandlerOpt->HandleIll)
+        Fuzzer::StaticCrashSignalCallback();
+      break;
+    case EXCEPTION_FLT_DENORMAL_OPERAND:
+    case EXCEPTION_FLT_DIVIDE_BY_ZERO:
+    case EXCEPTION_FLT_INEXACT_RESULT:
+    case EXCEPTION_FLT_INVALID_OPERATION:
+    case EXCEPTION_FLT_OVERFLOW:
+    case EXCEPTION_FLT_STACK_CHECK:
+    case EXCEPTION_FLT_UNDERFLOW:
+    case EXCEPTION_INT_DIVIDE_BY_ZERO:
+    case EXCEPTION_INT_OVERFLOW:
+      if (HandlerOpt->HandleFpe)
+        Fuzzer::StaticCrashSignalCallback();
+      break;
+    // TODO: handle (Options.HandleXfsz)
+  }
+  return EXCEPTION_CONTINUE_SEARCH;
+}
+
+BOOL WINAPI CtrlHandler(DWORD dwCtrlType) {
+  switch (dwCtrlType) {
+    case CTRL_C_EVENT:
+      if (HandlerOpt->HandleInt)
+        Fuzzer::StaticInterruptCallback();
+      return TRUE;
+    case CTRL_BREAK_EVENT:
+      if (HandlerOpt->HandleTerm)
+        Fuzzer::StaticInterruptCallback();
+      return TRUE;
+  }
+  return FALSE;
+}
+
+void CALLBACK AlarmHandler(PVOID, BOOLEAN) {
+  Fuzzer::StaticAlarmCallback();
+}
+
+class TimerQ {
+  HANDLE TimerQueue;
+ public:
+  TimerQ() : TimerQueue(NULL) {};
+  ~TimerQ() {
+    if (TimerQueue)
+      DeleteTimerQueueEx(TimerQueue, NULL);
+  };
+  void SetTimer(int Seconds) {
+    if (!TimerQueue) {
+      TimerQueue = CreateTimerQueue();
+      if (!TimerQueue) {
+        Printf("libFuzzer: CreateTimerQueue failed.\n");
+        exit(1);
+      }
+    }
+    HANDLE Timer;
+    if (!CreateTimerQueueTimer(&Timer, TimerQueue, AlarmHandler, NULL,
+        Seconds*1000, Seconds*1000, 0)) {
+      Printf("libFuzzer: CreateTimerQueueTimer failed.\n");
+      exit(1);
+    }
+  };
+};
+
+static TimerQ Timer;
+
+static void CrashHandler(int) { Fuzzer::StaticCrashSignalCallback(); }
+
+void SetSignalHandler(const FuzzingOptions& Options) {
+  HandlerOpt = &Options;
+
+  if (Options.UnitTimeoutSec > 0)
+    Timer.SetTimer(Options.UnitTimeoutSec / 2 + 1);
+
+  if (Options.HandleInt || Options.HandleTerm)
+    if (!SetConsoleCtrlHandler(CtrlHandler, TRUE)) {
+      DWORD LastError = GetLastError();
+      Printf("libFuzzer: SetConsoleCtrlHandler failed (Error code: %lu).\n",
+        LastError);
+      exit(1);
+    }
+
+  if (Options.HandleSegv || Options.HandleBus || Options.HandleIll ||
+      Options.HandleFpe)
+    SetUnhandledExceptionFilter(ExceptionHandler);
+
+  if (Options.HandleAbrt)
+    if (SIG_ERR == signal(SIGABRT, CrashHandler)) {
+      Printf("libFuzzer: signal failed with %d\n", errno);
+      exit(1);
+    }
+}
+
+void SleepSeconds(int Seconds) { Sleep(Seconds * 1000); }
+
+unsigned long GetPid() { return GetCurrentProcessId(); }
+
+size_t GetPeakRSSMb() {
+  PROCESS_MEMORY_COUNTERS info;
+  if (!GetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info)))
+    return 0;
+  return info.PeakWorkingSetSize >> 20;
+}
+
+FILE *OpenProcessPipe(const char *Command, const char *Mode) {
+  return _popen(Command, Mode);
+}
+
+int ExecuteCommand(const std::string &Command) {
+  return system(Command.c_str());
+}
+
+const void *SearchMemory(const void *Data, size_t DataLen, const void *Patt,
+                         size_t PattLen) {
+  // TODO: make this implementation more efficient.
+  const char *Cdata = (const char *)Data;
+  const char *Cpatt = (const char *)Patt;
+
+  if (!Data || !Patt || DataLen == 0 || PattLen == 0 || DataLen < PattLen)
+    return NULL;
+
+  if (PattLen == 1)
+    return memchr(Data, *Cpatt, DataLen);
+
+  const char *End = Cdata + DataLen - PattLen + 1;
+
+  for (const char *It = Cdata; It < End; ++It)
+    if (It[0] == Cpatt[0] && memcmp(It, Cpatt, PattLen) == 0)
+      return It;
+
+  return NULL;
+}
+
+std::string DisassembleCmd(const std::string &FileName) {
+  if (ExecuteCommand("dumpbin /summary > nul") == 0)
+    return "dumpbin /disasm " + FileName;
+  Printf("libFuzzer: couldn't find tool to disassemble (dumpbin)\n");
+  exit(1);
+}
+
+std::string SearchRegexCmd(const std::string &Regex) {
+  return "findstr /r \"" + Regex + "\"";
+}
+
+} // namespace fuzzer
+
+#endif // LIBFUZZER_WINDOWS
diff --git a/lib/fuzzer/FuzzerValueBitMap.h b/lib/fuzzer/FuzzerValueBitMap.h
new file mode 100644
index 0000000..13d7cbd
--- /dev/null
+++ b/lib/fuzzer/FuzzerValueBitMap.h
@@ -0,0 +1,73 @@
+//===- FuzzerValueBitMap.h - INTERNAL - Bit map -----------------*- C++ -* ===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// ValueBitMap.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_FUZZER_VALUE_BIT_MAP_H
+#define LLVM_FUZZER_VALUE_BIT_MAP_H
+
+#include "FuzzerDefs.h"
+
+namespace fuzzer {
+
+// A bit map containing kMapSizeInWords bits.
+struct ValueBitMap {
+  static const size_t kMapSizeInBits = 1 << 16;
+  static const size_t kMapPrimeMod = 65371;  // Largest Prime < kMapSizeInBits;
+  static const size_t kBitsInWord = (sizeof(uintptr_t) * 8);
+  static const size_t kMapSizeInWords = kMapSizeInBits / kBitsInWord;
+ public:
+
+  // Clears all bits.
+  void Reset() { memset(Map, 0, sizeof(Map)); }
+
+  // Computes a hash function of Value and sets the corresponding bit.
+  // Returns true if the bit was changed from 0 to 1.
+  ATTRIBUTE_NO_SANITIZE_ALL
+  inline bool AddValue(uintptr_t Value) {
+    uintptr_t Idx = Value % kMapSizeInBits;
+    uintptr_t WordIdx = Idx / kBitsInWord;
+    uintptr_t BitIdx = Idx % kBitsInWord;
+    uintptr_t Old = Map[WordIdx];
+    uintptr_t New = Old | (1UL << BitIdx);
+    Map[WordIdx] = New;
+    return New != Old;
+  }
+
+  ATTRIBUTE_NO_SANITIZE_ALL
+  inline bool AddValueModPrime(uintptr_t Value) {
+    return AddValue(Value % kMapPrimeMod);
+  }
+
+  inline bool Get(uintptr_t Idx) {
+    assert(Idx < kMapSizeInBits);
+    uintptr_t WordIdx = Idx / kBitsInWord;
+    uintptr_t BitIdx = Idx % kBitsInWord;
+    return Map[WordIdx] & (1UL << BitIdx);
+  }
+
+  size_t SizeInBits() const { return kMapSizeInBits; }
+
+  template <class Callback>
+  ATTRIBUTE_NO_SANITIZE_ALL
+  void ForEach(Callback CB) const {
+    for (size_t i = 0; i < kMapSizeInWords; i++)
+      if (uintptr_t M = Map[i])
+        for (size_t j = 0; j < sizeof(M) * 8; j++)
+          if (M & ((uintptr_t)1 << j))
+            CB(i * sizeof(M) * 8 + j);
+  }
+
+ private:
+  uintptr_t Map[kMapSizeInWords] __attribute__((aligned(512)));
+};
+
+}  // namespace fuzzer
+
+#endif  // LLVM_FUZZER_VALUE_BIT_MAP_H
diff --git a/lib/fuzzer/README.txt b/lib/fuzzer/README.txt
new file mode 100644
index 0000000..3eee01c
--- /dev/null
+++ b/lib/fuzzer/README.txt
@@ -0,0 +1 @@
+See http://llvm.org/docs/LibFuzzer.html
diff --git a/lib/fuzzer/afl/afl_driver.cpp b/lib/fuzzer/afl/afl_driver.cpp
new file mode 100644
index 0000000..f102479
--- /dev/null
+++ b/lib/fuzzer/afl/afl_driver.cpp
@@ -0,0 +1,341 @@
+//===- afl_driver.cpp - a glue between AFL and libFuzzer --------*- C++ -* ===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//===----------------------------------------------------------------------===//
+
+/* This file allows to fuzz libFuzzer-style target functions
+ (LLVMFuzzerTestOneInput) with AFL using AFL's persistent (in-process) mode.
+
+Usage:
+################################################################################
+cat << EOF > test_fuzzer.cc
+#include <stddef.h>
+#include <stdint.h>
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+  if (size > 0 && data[0] == 'H')
+    if (size > 1 && data[1] == 'I')
+       if (size > 2 && data[2] == '!')
+       __builtin_trap();
+  return 0;
+}
+EOF
+# Build your target with -fsanitize-coverage=trace-pc-guard using fresh clang.
+clang -g -fsanitize-coverage=trace-pc-guard test_fuzzer.cc -c
+# Build afl-llvm-rt.o.c from the AFL distribution.
+clang -c -w $AFL_HOME/llvm_mode/afl-llvm-rt.o.c
+# Build this file, link it with afl-llvm-rt.o.o and the target code.
+clang++ afl_driver.cpp test_fuzzer.o afl-llvm-rt.o.o
+# Run AFL:
+rm -rf IN OUT; mkdir IN OUT; echo z > IN/z;
+$AFL_HOME/afl-fuzz -i IN -o OUT ./a.out
+################################################################################
+Environment Variables:
+There are a few environment variables that can be set to use features that
+afl-fuzz doesn't have.
+
+AFL_DRIVER_STDERR_DUPLICATE_FILENAME: Setting this *appends* stderr to the file
+specified. If the file does not exist, it is created. This is useful for getting
+stack traces (when using ASAN for example) or original error messages on hard to
+reproduce bugs.
+
+AFL_DRIVER_EXTRA_STATS_FILENAME: Setting this causes afl_driver to write extra
+statistics to the file specified. Currently these are peak_rss_mb
+(the peak amount of virtual memory used in MB) and slowest_unit_time_secs. If
+the file does not exist it is created. If the file does exist then
+afl_driver assumes it was restarted by afl-fuzz and will try to read old
+statistics from the file. If that fails then the process will quit.
+
+*/
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+#include <fstream>
+#include <iostream>
+#include <vector>
+
+// Platform detection. Copied from FuzzerInternal.h
+#ifdef __linux__
+#define LIBFUZZER_LINUX 1
+#define LIBFUZZER_APPLE 0
+#define LIBFUZZER_NETBSD 0
+#elif __APPLE__
+#define LIBFUZZER_LINUX 0
+#define LIBFUZZER_APPLE 1
+#define LIBFUZZER_NETBSD 0
+#elif __NetBSD__
+#define LIBFUZZER_LINUX 0
+#define LIBFUZZER_APPLE 0
+#define LIBFUZZER_NETBSD 1
+#else
+#error "Support for your platform has not been implemented"
+#endif
+
+// Used to avoid repeating error checking boilerplate. If cond is false, a
+// fatal error has occured in the program. In this event print error_message
+// to stderr and abort(). Otherwise do nothing. Note that setting
+// AFL_DRIVER_STDERR_DUPLICATE_FILENAME may cause error_message to be appended
+// to the file as well, if the error occurs after the duplication is performed.
+#define CHECK_ERROR(cond, error_message)                                       \
+  if (!(cond)) {                                                               \
+    fprintf(stderr, (error_message));                                          \
+    abort();                                                                   \
+  }
+
+// libFuzzer interface is thin, so we don't include any libFuzzer headers.
+extern "C" {
+int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size);
+__attribute__((weak)) int LLVMFuzzerInitialize(int *argc, char ***argv);
+}
+
+// Notify AFL about persistent mode.
+static volatile char AFL_PERSISTENT[] = "##SIG_AFL_PERSISTENT##";
+extern "C" int __afl_persistent_loop(unsigned int);
+static volatile char suppress_warning2 = AFL_PERSISTENT[0];
+
+// Notify AFL about deferred forkserver.
+static volatile char AFL_DEFER_FORKSVR[] = "##SIG_AFL_DEFER_FORKSRV##";
+extern "C" void  __afl_manual_init();
+static volatile char suppress_warning1 = AFL_DEFER_FORKSVR[0];
+
+// Input buffer.
+static const size_t kMaxAflInputSize = 1 << 20;
+static uint8_t AflInputBuf[kMaxAflInputSize];
+
+// Variables we need for writing to the extra stats file.
+static FILE *extra_stats_file = NULL;
+static uint32_t previous_peak_rss = 0;
+static time_t slowest_unit_time_secs = 0;
+static const int kNumExtraStats = 2;
+static const char *kExtraStatsFormatString = "peak_rss_mb            : %u\n"
+                                             "slowest_unit_time_sec  : %u\n";
+
+// Copied from FuzzerUtil.cpp.
+size_t GetPeakRSSMb() {
+  struct rusage usage;
+  if (getrusage(RUSAGE_SELF, &usage))
+    return 0;
+  if (LIBFUZZER_LINUX || LIBFUZZER_NETBSD) {
+    // ru_maxrss is in KiB
+    return usage.ru_maxrss >> 10;
+  } else if (LIBFUZZER_APPLE) {
+    // ru_maxrss is in bytes
+    return usage.ru_maxrss >> 20;
+  }
+  assert(0 && "GetPeakRSSMb() is not implemented for your platform");
+  return 0;
+}
+
+// Based on SetSigaction in FuzzerUtil.cpp
+static void SetSigaction(int signum,
+                         void (*callback)(int, siginfo_t *, void *)) {
+  struct sigaction sigact;
+  memset(&sigact, 0, sizeof(sigact));
+  sigact.sa_sigaction = callback;
+  if (sigaction(signum, &sigact, 0)) {
+    fprintf(stderr, "libFuzzer: sigaction failed with %d\n", errno);
+    exit(1);
+  }
+}
+
+// Write extra stats to the file specified by the user. If none is specified
+// this function will never be called.
+static void write_extra_stats() {
+  uint32_t peak_rss = GetPeakRSSMb();
+
+  if (peak_rss < previous_peak_rss)
+    peak_rss = previous_peak_rss;
+
+  int chars_printed = fprintf(extra_stats_file, kExtraStatsFormatString,
+                              peak_rss, slowest_unit_time_secs);
+
+  CHECK_ERROR(chars_printed != 0, "Failed to write extra_stats_file");
+
+  CHECK_ERROR(fclose(extra_stats_file) == 0,
+              "Failed to close extra_stats_file");
+}
+
+// Call write_extra_stats before we exit.
+static void crash_handler(int, siginfo_t *, void *) {
+  // Make sure we don't try calling write_extra_stats again if we crashed while
+  // trying to call it.
+  static bool first_crash = true;
+  CHECK_ERROR(first_crash,
+              "Crashed in crash signal handler. This is a bug in the fuzzer.");
+
+  first_crash = false;
+  write_extra_stats();
+}
+
+// If the user has specified an extra_stats_file through the environment
+// variable AFL_DRIVER_EXTRA_STATS_FILENAME, then perform necessary set up
+// to write stats to it on exit. If no file is specified, do nothing. Otherwise
+// install signal and exit handlers to write to the file when the process exits.
+// Then if the file doesn't exist create it and set extra stats to 0. But if it
+// does exist then read the initial values of the extra stats from the file
+// and check that the file is writable.
+static void maybe_initialize_extra_stats() {
+  // If AFL_DRIVER_EXTRA_STATS_FILENAME isn't set then we have nothing to do.
+  char *extra_stats_filename = getenv("AFL_DRIVER_EXTRA_STATS_FILENAME");
+  if (!extra_stats_filename)
+    return;
+
+  // Open the file and find the previous peak_rss_mb value.
+  // This is necessary because the fuzzing process is restarted after N
+  // iterations are completed. So we may need to get this value from a previous
+  // process to be accurate.
+  extra_stats_file = fopen(extra_stats_filename, "r");
+
+  // If extra_stats_file already exists: read old stats from it.
+  if (extra_stats_file) {
+    int matches = fscanf(extra_stats_file, kExtraStatsFormatString,
+                         &previous_peak_rss, &slowest_unit_time_secs);
+
+    // Make sure we have read a real extra stats file and that we have used it
+    // to set slowest_unit_time_secs and previous_peak_rss.
+    CHECK_ERROR(matches == kNumExtraStats, "Extra stats file is corrupt");
+
+    CHECK_ERROR(fclose(extra_stats_file) == 0, "Failed to close file");
+
+    // Now open the file for writing.
+    extra_stats_file = fopen(extra_stats_filename, "w");
+    CHECK_ERROR(extra_stats_file,
+                "Failed to open extra stats file for writing");
+  } else {
+    // Looks like this is the first time in a fuzzing job this is being called.
+    extra_stats_file = fopen(extra_stats_filename, "w+");
+    CHECK_ERROR(extra_stats_file, "failed to create extra stats file");
+  }
+
+  // Make sure that crash_handler gets called on any kind of fatal error.
+  int crash_signals[] = {SIGSEGV, SIGBUS, SIGABRT, SIGILL, SIGFPE,  SIGINT,
+                         SIGTERM};
+
+  const size_t num_signals = sizeof(crash_signals) / sizeof(crash_signals[0]);
+
+  for (size_t idx = 0; idx < num_signals; idx++)
+    SetSigaction(crash_signals[idx], crash_handler);
+
+  // Make sure it gets called on other kinds of exits.
+  atexit(write_extra_stats);
+}
+
+// If the user asks us to duplicate stderr, then do it.
+static void maybe_duplicate_stderr() {
+  char* stderr_duplicate_filename =
+      getenv("AFL_DRIVER_STDERR_DUPLICATE_FILENAME");
+
+  if (!stderr_duplicate_filename)
+    return;
+
+  FILE* stderr_duplicate_stream =
+      freopen(stderr_duplicate_filename, "a+", stderr);
+
+  if (!stderr_duplicate_stream) {
+    fprintf(
+        stderr,
+        "Failed to duplicate stderr to AFL_DRIVER_STDERR_DUPLICATE_FILENAME");
+    abort();
+  }
+}
+
+// Define LLVMFuzzerMutate to avoid link failures for targets that use it
+// with libFuzzer's LLVMFuzzerCustomMutator.
+extern "C" size_t LLVMFuzzerMutate(uint8_t *Data, size_t Size, size_t MaxSize) {
+  assert(false && "LLVMFuzzerMutate should not be called from afl_driver");
+  return 0;
+}
+
+// Execute any files provided as parameters.
+int ExecuteFilesOnyByOne(int argc, char **argv) {
+  for (int i = 1; i < argc; i++) {
+    std::ifstream in(argv[i]);
+    in.seekg(0, in.end);
+    size_t length = in.tellg();
+    in.seekg (0, in.beg);
+    std::cout << "Reading " << length << " bytes from " << argv[i] << std::endl;
+    // Allocate exactly length bytes so that we reliably catch buffer overflows.
+    std::vector<char> bytes(length);
+    in.read(bytes.data(), bytes.size());
+    assert(in);
+    LLVMFuzzerTestOneInput(reinterpret_cast<const uint8_t *>(bytes.data()),
+                           bytes.size());
+    std::cout << "Execution successfull" << std::endl;
+  }
+  return 0;
+}
+
+int main(int argc, char **argv) {
+  fprintf(stderr,
+      "======================= INFO =========================\n"
+      "This binary is built for AFL-fuzz.\n"
+      "To run the target function on individual input(s) execute this:\n"
+      "  %s < INPUT_FILE\n"
+      "or\n"
+      "  %s INPUT_FILE1 [INPUT_FILE2 ... ]\n"
+      "To fuzz with afl-fuzz execute this:\n"
+      "  afl-fuzz [afl-flags] %s [-N]\n"
+      "afl-fuzz will run N iterations before "
+      "re-spawning the process (default: 1000)\n"
+      "======================================================\n",
+          argv[0], argv[0], argv[0]);
+  if (LLVMFuzzerInitialize)
+    LLVMFuzzerInitialize(&argc, &argv);
+  // Do any other expensive one-time initialization here.
+
+  maybe_duplicate_stderr();
+  maybe_initialize_extra_stats();
+
+  __afl_manual_init();
+
+  int N = 1000;
+  if (argc == 2 && argv[1][0] == '-')
+      N = atoi(argv[1] + 1);
+  else if(argc == 2 && (N = atoi(argv[1])) > 0)
+      fprintf(stderr, "WARNING: using the deprecated call style `%s %d`\n",
+              argv[0], N);
+  else if (argc > 1)
+    return ExecuteFilesOnyByOne(argc, argv);
+
+  assert(N > 0);
+  time_t unit_time_secs;
+  int num_runs = 0;
+  while (__afl_persistent_loop(N)) {
+    ssize_t n_read = read(0, AflInputBuf, kMaxAflInputSize);
+    if (n_read > 0) {
+      // Copy AflInputBuf into a separate buffer to let asan find buffer
+      // overflows. Don't use unique_ptr/etc to avoid extra dependencies.
+      uint8_t *copy = new uint8_t[n_read];
+      memcpy(copy, AflInputBuf, n_read);
+
+      struct timeval unit_start_time;
+      CHECK_ERROR(gettimeofday(&unit_start_time, NULL) == 0,
+                  "Calling gettimeofday failed");
+
+      num_runs++;
+      LLVMFuzzerTestOneInput(copy, n_read);
+
+      struct timeval unit_stop_time;
+      CHECK_ERROR(gettimeofday(&unit_stop_time, NULL) == 0,
+                  "Calling gettimeofday failed");
+
+      // Update slowest_unit_time_secs if we see a new max.
+      unit_time_secs = unit_stop_time.tv_sec - unit_start_time.tv_sec;
+      if (slowest_unit_time_secs < unit_time_secs)
+        slowest_unit_time_secs = unit_time_secs;
+
+      delete[] copy;
+    }
+  }
+  fprintf(stderr, "%s: successfully executed %d input(s)\n", argv[0], num_runs);
+}
diff --git a/lib/fuzzer/build.sh b/lib/fuzzer/build.sh
new file mode 100755
index 0000000..4556af5
--- /dev/null
+++ b/lib/fuzzer/build.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+LIBFUZZER_SRC_DIR=$(dirname $0)
+CXX="${CXX:-clang}"
+for f in $LIBFUZZER_SRC_DIR/*.cpp; do
+  $CXX -g -O2 -fno-omit-frame-pointer -std=c++11 $f -c &
+done
+wait
+rm -f libFuzzer.a
+ar ru libFuzzer.a Fuzzer*.o
+rm -f Fuzzer*.o
+
diff --git a/lib/fuzzer/scripts/unbalanced_allocs.py b/lib/fuzzer/scripts/unbalanced_allocs.py
new file mode 100755
index 0000000..a4ce187
--- /dev/null
+++ b/lib/fuzzer/scripts/unbalanced_allocs.py
@@ -0,0 +1,93 @@
+#!/usr/bin/env python
+#===- lib/fuzzer/scripts/unbalanced_allocs.py ------------------------------===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+#
+# Post-process -trace_malloc=2 output and printout only allocations and frees
+# unbalanced inside of fuzzer runs.
+# Usage:
+#   my_fuzzer -trace_malloc=2 -runs=10 2>&1 | unbalanced_allocs.py -skip=5
+#
+#===------------------------------------------------------------------------===#
+
+import argparse
+import sys
+
+_skip = 0
+
+def PrintStack(line, stack):
+  global _skip
+  if _skip > 0:
+    return
+  print 'Unbalanced ' + line.rstrip();
+  for l in stack:
+    print l.rstrip()
+
+def ProcessStack(line, f):
+  stack = []
+  while line and line.startswith('    #'):
+    stack += [line]
+    line = f.readline()
+  return line, stack
+
+def ProcessFree(line, f, allocs):
+  if not line.startswith('FREE['):
+    return f.readline()
+
+  addr = int(line.split()[1], 16)
+  next_line, stack = ProcessStack(f.readline(), f)
+  if addr in allocs:
+    del allocs[addr]
+  else:
+    PrintStack(line, stack)
+  return next_line
+
+def ProcessMalloc(line, f, allocs):
+  if not line.startswith('MALLOC['):
+    return ProcessFree(line, f, allocs)
+
+  addr = int(line.split()[1], 16)
+  assert not addr in allocs
+
+  next_line, stack = ProcessStack(f.readline(), f)
+  allocs[addr] = (line, stack)
+  return next_line
+
+def ProcessRun(line, f):
+  if not line.startswith('MallocFreeTracer: START'):
+    return ProcessMalloc(line, f, {})
+
+  allocs = {}
+  print line.rstrip()
+  line = f.readline()
+  while line:
+    if line.startswith('MallocFreeTracer: STOP'):
+      global _skip
+      _skip = _skip - 1
+      for _, (l, s) in allocs.iteritems():
+        PrintStack(l, s)
+      print line.rstrip()
+      return f.readline()
+    line = ProcessMalloc(line, f, allocs)
+  return line
+
+def ProcessFile(f):
+  line = f.readline()
+  while line:
+    line = ProcessRun(line, f);
+
+def main(argv):
+  parser = argparse.ArgumentParser()
+  parser.add_argument('--skip', default=0, help='number of runs to ignore')
+  args = parser.parse_args()
+  global _skip
+  _skip = int(args.skip) + 1
+  ProcessFile(sys.stdin)
+
+if __name__ == '__main__':
+  main(sys.argv)
diff --git a/lib/fuzzer/standalone/StandaloneFuzzTargetMain.c b/lib/fuzzer/standalone/StandaloneFuzzTargetMain.c
new file mode 100644
index 0000000..0d76ea4
--- /dev/null
+++ b/lib/fuzzer/standalone/StandaloneFuzzTargetMain.c
@@ -0,0 +1,41 @@
+/*===- StandaloneFuzzTargetMain.c - standalone main() for fuzz targets. ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This main() function can be linked to a fuzz target (i.e. a library
+// that exports LLVMFuzzerTestOneInput() and possibly LLVMFuzzerInitialize())
+// instead of libFuzzer. This main() function will not perform any fuzzing
+// but will simply feed all input files one by one to the fuzz target.
+//
+// Use this file to provide reproducers for bugs when linking against libFuzzer
+// or other fuzzing engine is undesirable.
+//===----------------------------------------------------------------------===*/
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+extern int LLVMFuzzerTestOneInput(const unsigned char *data, size_t size);
+__attribute__((weak)) extern int LLVMFuzzerInitialize(int *argc, char ***argv);
+int main(int argc, char **argv) {
+  fprintf(stderr, "StandaloneFuzzTargetMain: running %d inputs\n", argc - 1);
+  if (LLVMFuzzerInitialize)
+    LLVMFuzzerInitialize(&argc, &argv);
+  for (int i = 1; i < argc; i++) {
+    fprintf(stderr, "Running: %s\n", argv[i]);
+    FILE *f = fopen(argv[i], "r");
+    assert(f);
+    fseek(f, 0, SEEK_END);
+    size_t len = ftell(f);
+    fseek(f, 0, SEEK_SET);
+    unsigned char *buf = (unsigned char*)malloc(len);
+    size_t n_read = fread(buf, 1, len, f);
+    assert(n_read == len);
+    LLVMFuzzerTestOneInput(buf, len);
+    free(buf);
+    fprintf(stderr, "Done:    %s: (%zd bytes)\n", argv[i], n_read);
+  }
+}
diff --git a/lib/fuzzer/tests/CMakeLists.txt b/lib/fuzzer/tests/CMakeLists.txt
new file mode 100644
index 0000000..dac8773
--- /dev/null
+++ b/lib/fuzzer/tests/CMakeLists.txt
@@ -0,0 +1,46 @@
+set(LIBFUZZER_UNITTEST_CFLAGS
+  ${COMPILER_RT_UNITTEST_CFLAGS}
+  ${COMPILER_RT_GTEST_CFLAGS}
+  -I${COMPILER_RT_SOURCE_DIR}/lib/fuzzer
+  -fno-rtti
+  -Werror
+  -O2)
+
+add_custom_target(FuzzerUnitTests)
+set_target_properties(FuzzerUnitTests PROPERTIES FOLDER "Compiler-RT Tests")
+
+set(LIBFUZZER_UNITTEST_LINK_FLAGS ${COMPILER_RT_UNITTEST_LINK_FLAGS})
+list(APPEND LIBFUZZER_UNITTEST_LINK_FLAGS --driver-mode=g++)
+
+if(APPLE)
+  list(APPEND LIBFUZZER_UNITTEST_LINK_FLAGS -lc++)
+else()
+  list(APPEND LIBFUZZER_UNITTEST_LINK_FLAGS -lstdc++ -lpthread)
+endif()
+
+foreach(arch ${FUZZER_SUPPORTED_ARCH})
+  set(LIBFUZZER_TEST_RUNTIME RTFuzzerTest.${arch})
+  if(APPLE)
+    set(LIBFUZZER_TEST_RUNTIME_OBJECTS
+      $<TARGET_OBJECTS:RTfuzzer.osx>)
+  else()
+    set(LIBFUZZER_TEST_RUNTIME_OBJECTS
+      $<TARGET_OBJECTS:RTfuzzer.${arch}>)
+  endif()
+  add_library(${LIBFUZZER_TEST_RUNTIME} STATIC
+    ${LIBFUZZER_TEST_RUNTIME_OBJECTS})
+  set_target_properties(${LIBFUZZER_TEST_RUNTIME} PROPERTIES
+    ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+    FOLDER "Compiler-RT Runtime tests")
+
+  set(FuzzerTestObjects)
+  generate_compiler_rt_tests(FuzzerTestObjects
+    FuzzerUnitTests "Fuzzer-${arch}-Test" ${arch}
+    SOURCES FuzzerUnittest.cpp ${COMPILER_RT_GTEST_SOURCE}
+    RUNTIME ${LIBFUZZER_TEST_RUNTIME}
+    DEPS gtest
+    CFLAGS ${LIBFUZZER_UNITTEST_CFLAGS}
+    LINK_FLAGS ${LIBFUZZER_UNITTEST_LINK_FLAGS})
+  set_target_properties(FuzzerUnitTests PROPERTIES
+    RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
+endforeach()
diff --git a/lib/fuzzer/tests/FuzzerUnittest.cpp b/lib/fuzzer/tests/FuzzerUnittest.cpp
new file mode 100644
index 0000000..f3e8226
--- /dev/null
+++ b/lib/fuzzer/tests/FuzzerUnittest.cpp
@@ -0,0 +1,772 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Avoid ODR violations (LibFuzzer is built without ASan and this test is built
+// with ASan) involving C++ standard library types when using libcxx.
+#define _LIBCPP_HAS_NO_ASAN
+
+// Do not attempt to use LLVM ostream from gtest.
+#define GTEST_NO_LLVM_RAW_OSTREAM 1
+
+#include "FuzzerCorpus.h"
+#include "FuzzerDictionary.h"
+#include "FuzzerInternal.h"
+#include "FuzzerMerge.h"
+#include "FuzzerMutate.h"
+#include "FuzzerRandom.h"
+#include "FuzzerTracePC.h"
+#include "gtest/gtest.h"
+#include <memory>
+#include <set>
+#include <sstream>
+
+using namespace fuzzer;
+
+// For now, have LLVMFuzzerTestOneInput just to make it link.
+// Later we may want to make unittests that actually call LLVMFuzzerTestOneInput.
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  abort();
+}
+
+TEST(Fuzzer, CrossOver) {
+  std::unique_ptr<ExternalFunctions> t(new ExternalFunctions());
+  fuzzer::EF = t.get();
+  Random Rand(0);
+  MutationDispatcher MD(Rand, {});
+  Unit A({0, 1, 2}), B({5, 6, 7});
+  Unit C;
+  Unit Expected[] = {
+       { 0 },
+       { 0, 1 },
+       { 0, 5 },
+       { 0, 1, 2 },
+       { 0, 1, 5 },
+       { 0, 5, 1 },
+       { 0, 5, 6 },
+       { 0, 1, 2, 5 },
+       { 0, 1, 5, 2 },
+       { 0, 1, 5, 6 },
+       { 0, 5, 1, 2 },
+       { 0, 5, 1, 6 },
+       { 0, 5, 6, 1 },
+       { 0, 5, 6, 7 },
+       { 0, 1, 2, 5, 6 },
+       { 0, 1, 5, 2, 6 },
+       { 0, 1, 5, 6, 2 },
+       { 0, 1, 5, 6, 7 },
+       { 0, 5, 1, 2, 6 },
+       { 0, 5, 1, 6, 2 },
+       { 0, 5, 1, 6, 7 },
+       { 0, 5, 6, 1, 2 },
+       { 0, 5, 6, 1, 7 },
+       { 0, 5, 6, 7, 1 },
+       { 0, 1, 2, 5, 6, 7 },
+       { 0, 1, 5, 2, 6, 7 },
+       { 0, 1, 5, 6, 2, 7 },
+       { 0, 1, 5, 6, 7, 2 },
+       { 0, 5, 1, 2, 6, 7 },
+       { 0, 5, 1, 6, 2, 7 },
+       { 0, 5, 1, 6, 7, 2 },
+       { 0, 5, 6, 1, 2, 7 },
+       { 0, 5, 6, 1, 7, 2 },
+       { 0, 5, 6, 7, 1, 2 }
+  };
+  for (size_t Len = 1; Len < 8; Len++) {
+    Set<Unit> FoundUnits, ExpectedUnitsWitThisLength;
+    for (int Iter = 0; Iter < 3000; Iter++) {
+      C.resize(Len);
+      size_t NewSize = MD.CrossOver(A.data(), A.size(), B.data(), B.size(),
+                                    C.data(), C.size());
+      C.resize(NewSize);
+      FoundUnits.insert(C);
+    }
+    for (const Unit &U : Expected)
+      if (U.size() <= Len)
+        ExpectedUnitsWitThisLength.insert(U);
+    EXPECT_EQ(ExpectedUnitsWitThisLength, FoundUnits);
+  }
+}
+
+TEST(Fuzzer, Hash) {
+  uint8_t A[] = {'a', 'b', 'c'};
+  fuzzer::Unit U(A, A + sizeof(A));
+  EXPECT_EQ("a9993e364706816aba3e25717850c26c9cd0d89d", fuzzer::Hash(U));
+  U.push_back('d');
+  EXPECT_EQ("81fe8bfe87576c3ecb22426f8e57847382917acf", fuzzer::Hash(U));
+}
+
+typedef size_t (MutationDispatcher::*Mutator)(uint8_t *Data, size_t Size,
+                                              size_t MaxSize);
+
+void TestEraseBytes(Mutator M, int NumIter) {
+  std::unique_ptr<ExternalFunctions> t(new ExternalFunctions());
+  fuzzer::EF = t.get();
+  uint8_t REM0[8] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77};
+  uint8_t REM1[8] = {0x00, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77};
+  uint8_t REM2[8] = {0x00, 0x11, 0x33, 0x44, 0x55, 0x66, 0x77};
+  uint8_t REM3[8] = {0x00, 0x11, 0x22, 0x44, 0x55, 0x66, 0x77};
+  uint8_t REM4[8] = {0x00, 0x11, 0x22, 0x33, 0x55, 0x66, 0x77};
+  uint8_t REM5[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x66, 0x77};
+  uint8_t REM6[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x77};
+  uint8_t REM7[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66};
+
+  uint8_t REM8[6] = {0x22, 0x33, 0x44, 0x55, 0x66, 0x77};
+  uint8_t REM9[6] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55};
+  uint8_t REM10[6] = {0x00, 0x11, 0x22, 0x55, 0x66, 0x77};
+
+  uint8_t REM11[5] = {0x33, 0x44, 0x55, 0x66, 0x77};
+  uint8_t REM12[5] = {0x00, 0x11, 0x22, 0x33, 0x44};
+  uint8_t REM13[5] = {0x00, 0x44, 0x55, 0x66, 0x77};
+
+
+  Random Rand(0);
+  MutationDispatcher MD(Rand, {});
+  int FoundMask = 0;
+  for (int i = 0; i < NumIter; i++) {
+    uint8_t T[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77};
+    size_t NewSize = (MD.*M)(T, sizeof(T), sizeof(T));
+    if (NewSize == 7 && !memcmp(REM0, T, 7)) FoundMask |= 1 << 0;
+    if (NewSize == 7 && !memcmp(REM1, T, 7)) FoundMask |= 1 << 1;
+    if (NewSize == 7 && !memcmp(REM2, T, 7)) FoundMask |= 1 << 2;
+    if (NewSize == 7 && !memcmp(REM3, T, 7)) FoundMask |= 1 << 3;
+    if (NewSize == 7 && !memcmp(REM4, T, 7)) FoundMask |= 1 << 4;
+    if (NewSize == 7 && !memcmp(REM5, T, 7)) FoundMask |= 1 << 5;
+    if (NewSize == 7 && !memcmp(REM6, T, 7)) FoundMask |= 1 << 6;
+    if (NewSize == 7 && !memcmp(REM7, T, 7)) FoundMask |= 1 << 7;
+
+    if (NewSize == 6 && !memcmp(REM8, T, 6)) FoundMask |= 1 << 8;
+    if (NewSize == 6 && !memcmp(REM9, T, 6)) FoundMask |= 1 << 9;
+    if (NewSize == 6 && !memcmp(REM10, T, 6)) FoundMask |= 1 << 10;
+
+    if (NewSize == 5 && !memcmp(REM11, T, 5)) FoundMask |= 1 << 11;
+    if (NewSize == 5 && !memcmp(REM12, T, 5)) FoundMask |= 1 << 12;
+    if (NewSize == 5 && !memcmp(REM13, T, 5)) FoundMask |= 1 << 13;
+  }
+  EXPECT_EQ(FoundMask, (1 << 14) - 1);
+}
+
+TEST(FuzzerMutate, EraseBytes1) {
+  TestEraseBytes(&MutationDispatcher::Mutate_EraseBytes, 200);
+}
+TEST(FuzzerMutate, EraseBytes2) {
+  TestEraseBytes(&MutationDispatcher::Mutate, 2000);
+}
+
+void TestInsertByte(Mutator M, int NumIter) {
+  std::unique_ptr<ExternalFunctions> t(new ExternalFunctions());
+  fuzzer::EF = t.get();
+  Random Rand(0);
+  MutationDispatcher MD(Rand, {});
+  int FoundMask = 0;
+  uint8_t INS0[8] = {0xF1, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66};
+  uint8_t INS1[8] = {0x00, 0xF2, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66};
+  uint8_t INS2[8] = {0x00, 0x11, 0xF3, 0x22, 0x33, 0x44, 0x55, 0x66};
+  uint8_t INS3[8] = {0x00, 0x11, 0x22, 0xF4, 0x33, 0x44, 0x55, 0x66};
+  uint8_t INS4[8] = {0x00, 0x11, 0x22, 0x33, 0xF5, 0x44, 0x55, 0x66};
+  uint8_t INS5[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0xF6, 0x55, 0x66};
+  uint8_t INS6[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0xF7, 0x66};
+  uint8_t INS7[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0xF8};
+  for (int i = 0; i < NumIter; i++) {
+    uint8_t T[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66};
+    size_t NewSize = (MD.*M)(T, 7, 8);
+    if (NewSize == 8 && !memcmp(INS0, T, 8)) FoundMask |= 1 << 0;
+    if (NewSize == 8 && !memcmp(INS1, T, 8)) FoundMask |= 1 << 1;
+    if (NewSize == 8 && !memcmp(INS2, T, 8)) FoundMask |= 1 << 2;
+    if (NewSize == 8 && !memcmp(INS3, T, 8)) FoundMask |= 1 << 3;
+    if (NewSize == 8 && !memcmp(INS4, T, 8)) FoundMask |= 1 << 4;
+    if (NewSize == 8 && !memcmp(INS5, T, 8)) FoundMask |= 1 << 5;
+    if (NewSize == 8 && !memcmp(INS6, T, 8)) FoundMask |= 1 << 6;
+    if (NewSize == 8 && !memcmp(INS7, T, 8)) FoundMask |= 1 << 7;
+  }
+  EXPECT_EQ(FoundMask, 255);
+}
+
+TEST(FuzzerMutate, InsertByte1) {
+  TestInsertByte(&MutationDispatcher::Mutate_InsertByte, 1 << 15);
+}
+TEST(FuzzerMutate, InsertByte2) {
+  TestInsertByte(&MutationDispatcher::Mutate, 1 << 17);
+}
+
+void TestInsertRepeatedBytes(Mutator M, int NumIter) {
+  std::unique_ptr<ExternalFunctions> t(new ExternalFunctions());
+  fuzzer::EF = t.get();
+  Random Rand(0);
+  MutationDispatcher MD(Rand, {});
+  int FoundMask = 0;
+  uint8_t INS0[7] = {0x00, 0x11, 0x22, 0x33, 'a', 'a', 'a'};
+  uint8_t INS1[7] = {0x00, 0x11, 0x22, 'a', 'a', 'a', 0x33};
+  uint8_t INS2[7] = {0x00, 0x11, 'a', 'a', 'a', 0x22, 0x33};
+  uint8_t INS3[7] = {0x00, 'a', 'a', 'a', 0x11, 0x22, 0x33};
+  uint8_t INS4[7] = {'a', 'a', 'a', 0x00, 0x11, 0x22, 0x33};
+
+  uint8_t INS5[8] = {0x00, 0x11, 0x22, 0x33, 'b', 'b', 'b', 'b'};
+  uint8_t INS6[8] = {0x00, 0x11, 0x22, 'b', 'b', 'b', 'b', 0x33};
+  uint8_t INS7[8] = {0x00, 0x11, 'b', 'b', 'b', 'b', 0x22, 0x33};
+  uint8_t INS8[8] = {0x00, 'b', 'b', 'b', 'b', 0x11, 0x22, 0x33};
+  uint8_t INS9[8] = {'b', 'b', 'b', 'b', 0x00, 0x11, 0x22, 0x33};
+
+  for (int i = 0; i < NumIter; i++) {
+    uint8_t T[8] = {0x00, 0x11, 0x22, 0x33};
+    size_t NewSize = (MD.*M)(T, 4, 8);
+    if (NewSize == 7 && !memcmp(INS0, T, 7)) FoundMask |= 1 << 0;
+    if (NewSize == 7 && !memcmp(INS1, T, 7)) FoundMask |= 1 << 1;
+    if (NewSize == 7 && !memcmp(INS2, T, 7)) FoundMask |= 1 << 2;
+    if (NewSize == 7 && !memcmp(INS3, T, 7)) FoundMask |= 1 << 3;
+    if (NewSize == 7 && !memcmp(INS4, T, 7)) FoundMask |= 1 << 4;
+
+    if (NewSize == 8 && !memcmp(INS5, T, 8)) FoundMask |= 1 << 5;
+    if (NewSize == 8 && !memcmp(INS6, T, 8)) FoundMask |= 1 << 6;
+    if (NewSize == 8 && !memcmp(INS7, T, 8)) FoundMask |= 1 << 7;
+    if (NewSize == 8 && !memcmp(INS8, T, 8)) FoundMask |= 1 << 8;
+    if (NewSize == 8 && !memcmp(INS9, T, 8)) FoundMask |= 1 << 9;
+
+  }
+  EXPECT_EQ(FoundMask, (1 << 10) - 1);
+}
+
+TEST(FuzzerMutate, InsertRepeatedBytes1) {
+  TestInsertRepeatedBytes(&MutationDispatcher::Mutate_InsertRepeatedBytes, 10000);
+}
+TEST(FuzzerMutate, InsertRepeatedBytes2) {
+  TestInsertRepeatedBytes(&MutationDispatcher::Mutate, 300000);
+}
+
+void TestChangeByte(Mutator M, int NumIter) {
+  std::unique_ptr<ExternalFunctions> t(new ExternalFunctions());
+  fuzzer::EF = t.get();
+  Random Rand(0);
+  MutationDispatcher MD(Rand, {});
+  int FoundMask = 0;
+  uint8_t CH0[8] = {0xF0, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77};
+  uint8_t CH1[8] = {0x00, 0xF1, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77};
+  uint8_t CH2[8] = {0x00, 0x11, 0xF2, 0x33, 0x44, 0x55, 0x66, 0x77};
+  uint8_t CH3[8] = {0x00, 0x11, 0x22, 0xF3, 0x44, 0x55, 0x66, 0x77};
+  uint8_t CH4[8] = {0x00, 0x11, 0x22, 0x33, 0xF4, 0x55, 0x66, 0x77};
+  uint8_t CH5[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0xF5, 0x66, 0x77};
+  uint8_t CH6[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0xF5, 0x77};
+  uint8_t CH7[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0xF7};
+  for (int i = 0; i < NumIter; i++) {
+    uint8_t T[9] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77};
+    size_t NewSize = (MD.*M)(T, 8, 9);
+    if (NewSize == 8 && !memcmp(CH0, T, 8)) FoundMask |= 1 << 0;
+    if (NewSize == 8 && !memcmp(CH1, T, 8)) FoundMask |= 1 << 1;
+    if (NewSize == 8 && !memcmp(CH2, T, 8)) FoundMask |= 1 << 2;
+    if (NewSize == 8 && !memcmp(CH3, T, 8)) FoundMask |= 1 << 3;
+    if (NewSize == 8 && !memcmp(CH4, T, 8)) FoundMask |= 1 << 4;
+    if (NewSize == 8 && !memcmp(CH5, T, 8)) FoundMask |= 1 << 5;
+    if (NewSize == 8 && !memcmp(CH6, T, 8)) FoundMask |= 1 << 6;
+    if (NewSize == 8 && !memcmp(CH7, T, 8)) FoundMask |= 1 << 7;
+  }
+  EXPECT_EQ(FoundMask, 255);
+}
+
+TEST(FuzzerMutate, ChangeByte1) {
+  TestChangeByte(&MutationDispatcher::Mutate_ChangeByte, 1 << 15);
+}
+TEST(FuzzerMutate, ChangeByte2) {
+  TestChangeByte(&MutationDispatcher::Mutate, 1 << 17);
+}
+
+void TestChangeBit(Mutator M, int NumIter) {
+  std::unique_ptr<ExternalFunctions> t(new ExternalFunctions());
+  fuzzer::EF = t.get();
+  Random Rand(0);
+  MutationDispatcher MD(Rand, {});
+  int FoundMask = 0;
+  uint8_t CH0[8] = {0x01, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77};
+  uint8_t CH1[8] = {0x00, 0x13, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77};
+  uint8_t CH2[8] = {0x00, 0x11, 0x02, 0x33, 0x44, 0x55, 0x66, 0x77};
+  uint8_t CH3[8] = {0x00, 0x11, 0x22, 0x37, 0x44, 0x55, 0x66, 0x77};
+  uint8_t CH4[8] = {0x00, 0x11, 0x22, 0x33, 0x54, 0x55, 0x66, 0x77};
+  uint8_t CH5[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x54, 0x66, 0x77};
+  uint8_t CH6[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x76, 0x77};
+  uint8_t CH7[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0xF7};
+  for (int i = 0; i < NumIter; i++) {
+    uint8_t T[9] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77};
+    size_t NewSize = (MD.*M)(T, 8, 9);
+    if (NewSize == 8 && !memcmp(CH0, T, 8)) FoundMask |= 1 << 0;
+    if (NewSize == 8 && !memcmp(CH1, T, 8)) FoundMask |= 1 << 1;
+    if (NewSize == 8 && !memcmp(CH2, T, 8)) FoundMask |= 1 << 2;
+    if (NewSize == 8 && !memcmp(CH3, T, 8)) FoundMask |= 1 << 3;
+    if (NewSize == 8 && !memcmp(CH4, T, 8)) FoundMask |= 1 << 4;
+    if (NewSize == 8 && !memcmp(CH5, T, 8)) FoundMask |= 1 << 5;
+    if (NewSize == 8 && !memcmp(CH6, T, 8)) FoundMask |= 1 << 6;
+    if (NewSize == 8 && !memcmp(CH7, T, 8)) FoundMask |= 1 << 7;
+  }
+  EXPECT_EQ(FoundMask, 255);
+}
+
+TEST(FuzzerMutate, ChangeBit1) {
+  TestChangeBit(&MutationDispatcher::Mutate_ChangeBit, 1 << 16);
+}
+TEST(FuzzerMutate, ChangeBit2) {
+  TestChangeBit(&MutationDispatcher::Mutate, 1 << 18);
+}
+
+void TestShuffleBytes(Mutator M, int NumIter) {
+  std::unique_ptr<ExternalFunctions> t(new ExternalFunctions());
+  fuzzer::EF = t.get();
+  Random Rand(0);
+  MutationDispatcher MD(Rand, {});
+  int FoundMask = 0;
+  uint8_t CH0[7] = {0x00, 0x22, 0x11, 0x33, 0x44, 0x55, 0x66};
+  uint8_t CH1[7] = {0x11, 0x00, 0x33, 0x22, 0x44, 0x55, 0x66};
+  uint8_t CH2[7] = {0x00, 0x33, 0x11, 0x22, 0x44, 0x55, 0x66};
+  uint8_t CH3[7] = {0x00, 0x11, 0x22, 0x44, 0x55, 0x66, 0x33};
+  uint8_t CH4[7] = {0x00, 0x11, 0x22, 0x33, 0x55, 0x44, 0x66};
+  for (int i = 0; i < NumIter; i++) {
+    uint8_t T[7] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66};
+    size_t NewSize = (MD.*M)(T, 7, 7);
+    if (NewSize == 7 && !memcmp(CH0, T, 7)) FoundMask |= 1 << 0;
+    if (NewSize == 7 && !memcmp(CH1, T, 7)) FoundMask |= 1 << 1;
+    if (NewSize == 7 && !memcmp(CH2, T, 7)) FoundMask |= 1 << 2;
+    if (NewSize == 7 && !memcmp(CH3, T, 7)) FoundMask |= 1 << 3;
+    if (NewSize == 7 && !memcmp(CH4, T, 7)) FoundMask |= 1 << 4;
+  }
+  EXPECT_EQ(FoundMask, 31);
+}
+
+TEST(FuzzerMutate, ShuffleBytes1) {
+  TestShuffleBytes(&MutationDispatcher::Mutate_ShuffleBytes, 1 << 16);
+}
+TEST(FuzzerMutate, ShuffleBytes2) {
+  TestShuffleBytes(&MutationDispatcher::Mutate, 1 << 20);
+}
+
+void TestCopyPart(Mutator M, int NumIter) {
+  std::unique_ptr<ExternalFunctions> t(new ExternalFunctions());
+  fuzzer::EF = t.get();
+  Random Rand(0);
+  MutationDispatcher MD(Rand, {});
+  int FoundMask = 0;
+  uint8_t CH0[7] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x00, 0x11};
+  uint8_t CH1[7] = {0x55, 0x66, 0x22, 0x33, 0x44, 0x55, 0x66};
+  uint8_t CH2[7] = {0x00, 0x55, 0x66, 0x33, 0x44, 0x55, 0x66};
+  uint8_t CH3[7] = {0x00, 0x11, 0x22, 0x00, 0x11, 0x22, 0x66};
+  uint8_t CH4[7] = {0x00, 0x11, 0x11, 0x22, 0x33, 0x55, 0x66};
+
+  for (int i = 0; i < NumIter; i++) {
+    uint8_t T[7] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66};
+    size_t NewSize = (MD.*M)(T, 7, 7);
+    if (NewSize == 7 && !memcmp(CH0, T, 7)) FoundMask |= 1 << 0;
+    if (NewSize == 7 && !memcmp(CH1, T, 7)) FoundMask |= 1 << 1;
+    if (NewSize == 7 && !memcmp(CH2, T, 7)) FoundMask |= 1 << 2;
+    if (NewSize == 7 && !memcmp(CH3, T, 7)) FoundMask |= 1 << 3;
+    if (NewSize == 7 && !memcmp(CH4, T, 7)) FoundMask |= 1 << 4;
+  }
+
+  uint8_t CH5[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x00, 0x11, 0x22};
+  uint8_t CH6[8] = {0x22, 0x33, 0x44, 0x00, 0x11, 0x22, 0x33, 0x44};
+  uint8_t CH7[8] = {0x00, 0x11, 0x22, 0x00, 0x11, 0x22, 0x33, 0x44};
+  uint8_t CH8[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x22, 0x33, 0x44};
+  uint8_t CH9[8] = {0x00, 0x11, 0x22, 0x22, 0x33, 0x44, 0x33, 0x44};
+
+  for (int i = 0; i < NumIter; i++) {
+    uint8_t T[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77};
+    size_t NewSize = (MD.*M)(T, 5, 8);
+    if (NewSize == 8 && !memcmp(CH5, T, 8)) FoundMask |= 1 << 5;
+    if (NewSize == 8 && !memcmp(CH6, T, 8)) FoundMask |= 1 << 6;
+    if (NewSize == 8 && !memcmp(CH7, T, 8)) FoundMask |= 1 << 7;
+    if (NewSize == 8 && !memcmp(CH8, T, 8)) FoundMask |= 1 << 8;
+    if (NewSize == 8 && !memcmp(CH9, T, 8)) FoundMask |= 1 << 9;
+  }
+
+  EXPECT_EQ(FoundMask, 1023);
+}
+
+TEST(FuzzerMutate, CopyPart1) {
+  TestCopyPart(&MutationDispatcher::Mutate_CopyPart, 1 << 10);
+}
+TEST(FuzzerMutate, CopyPart2) {
+  TestCopyPart(&MutationDispatcher::Mutate, 1 << 13);
+}
+
+void TestAddWordFromDictionary(Mutator M, int NumIter) {
+  std::unique_ptr<ExternalFunctions> t(new ExternalFunctions());
+  fuzzer::EF = t.get();
+  Random Rand(0);
+  MutationDispatcher MD(Rand, {});
+  uint8_t Word1[4] = {0xAA, 0xBB, 0xCC, 0xDD};
+  uint8_t Word2[3] = {0xFF, 0xEE, 0xEF};
+  MD.AddWordToManualDictionary(Word(Word1, sizeof(Word1)));
+  MD.AddWordToManualDictionary(Word(Word2, sizeof(Word2)));
+  int FoundMask = 0;
+  uint8_t CH0[7] = {0x00, 0x11, 0x22, 0xAA, 0xBB, 0xCC, 0xDD};
+  uint8_t CH1[7] = {0x00, 0x11, 0xAA, 0xBB, 0xCC, 0xDD, 0x22};
+  uint8_t CH2[7] = {0x00, 0xAA, 0xBB, 0xCC, 0xDD, 0x11, 0x22};
+  uint8_t CH3[7] = {0xAA, 0xBB, 0xCC, 0xDD, 0x00, 0x11, 0x22};
+  uint8_t CH4[6] = {0x00, 0x11, 0x22, 0xFF, 0xEE, 0xEF};
+  uint8_t CH5[6] = {0x00, 0x11, 0xFF, 0xEE, 0xEF, 0x22};
+  uint8_t CH6[6] = {0x00, 0xFF, 0xEE, 0xEF, 0x11, 0x22};
+  uint8_t CH7[6] = {0xFF, 0xEE, 0xEF, 0x00, 0x11, 0x22};
+  for (int i = 0; i < NumIter; i++) {
+    uint8_t T[7] = {0x00, 0x11, 0x22};
+    size_t NewSize = (MD.*M)(T, 3, 7);
+    if (NewSize == 7 && !memcmp(CH0, T, 7)) FoundMask |= 1 << 0;
+    if (NewSize == 7 && !memcmp(CH1, T, 7)) FoundMask |= 1 << 1;
+    if (NewSize == 7 && !memcmp(CH2, T, 7)) FoundMask |= 1 << 2;
+    if (NewSize == 7 && !memcmp(CH3, T, 7)) FoundMask |= 1 << 3;
+    if (NewSize == 6 && !memcmp(CH4, T, 6)) FoundMask |= 1 << 4;
+    if (NewSize == 6 && !memcmp(CH5, T, 6)) FoundMask |= 1 << 5;
+    if (NewSize == 6 && !memcmp(CH6, T, 6)) FoundMask |= 1 << 6;
+    if (NewSize == 6 && !memcmp(CH7, T, 6)) FoundMask |= 1 << 7;
+  }
+  EXPECT_EQ(FoundMask, 255);
+}
+
+TEST(FuzzerMutate, AddWordFromDictionary1) {
+  TestAddWordFromDictionary(
+      &MutationDispatcher::Mutate_AddWordFromManualDictionary, 1 << 15);
+}
+
+TEST(FuzzerMutate, AddWordFromDictionary2) {
+  TestAddWordFromDictionary(&MutationDispatcher::Mutate, 1 << 15);
+}
+
+void TestChangeASCIIInteger(Mutator M, int NumIter) {
+  std::unique_ptr<ExternalFunctions> t(new ExternalFunctions());
+  fuzzer::EF = t.get();
+  Random Rand(0);
+  MutationDispatcher MD(Rand, {});
+
+  uint8_t CH0[8] = {'1', '2', '3', '4', '5', '6', '7', '7'};
+  uint8_t CH1[8] = {'1', '2', '3', '4', '5', '6', '7', '9'};
+  uint8_t CH2[8] = {'2', '4', '6', '9', '1', '3', '5', '6'};
+  uint8_t CH3[8] = {'0', '6', '1', '7', '2', '8', '3', '9'};
+  int FoundMask = 0;
+  for (int i = 0; i < NumIter; i++) {
+    uint8_t T[8] = {'1', '2', '3', '4', '5', '6', '7', '8'};
+    size_t NewSize = (MD.*M)(T, 8, 8);
+    /**/ if (NewSize == 8 && !memcmp(CH0, T, 8)) FoundMask |= 1 << 0;
+    else if (NewSize == 8 && !memcmp(CH1, T, 8)) FoundMask |= 1 << 1;
+    else if (NewSize == 8 && !memcmp(CH2, T, 8)) FoundMask |= 1 << 2;
+    else if (NewSize == 8 && !memcmp(CH3, T, 8)) FoundMask |= 1 << 3;
+    else if (NewSize == 8)                       FoundMask |= 1 << 4;
+  }
+  EXPECT_EQ(FoundMask, 31);
+}
+
+TEST(FuzzerMutate, ChangeASCIIInteger1) {
+  TestChangeASCIIInteger(&MutationDispatcher::Mutate_ChangeASCIIInteger,
+                         1 << 15);
+}
+
+TEST(FuzzerMutate, ChangeASCIIInteger2) {
+  TestChangeASCIIInteger(&MutationDispatcher::Mutate, 1 << 15);
+}
+
+void TestChangeBinaryInteger(Mutator M, int NumIter) {
+  std::unique_ptr<ExternalFunctions> t(new ExternalFunctions());
+  fuzzer::EF = t.get();
+  Random Rand(0);
+  MutationDispatcher MD(Rand, {});
+
+  uint8_t CH0[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x79};
+  uint8_t CH1[8] = {0x00, 0x11, 0x22, 0x31, 0x44, 0x55, 0x66, 0x77};
+  uint8_t CH2[8] = {0xff, 0x10, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77};
+  uint8_t CH3[8] = {0x00, 0x11, 0x2a, 0x33, 0x44, 0x55, 0x66, 0x77};
+  uint8_t CH4[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x4f, 0x66, 0x77};
+  uint8_t CH5[8] = {0xff, 0xee, 0xdd, 0xcc, 0xbb, 0xaa, 0x99, 0x88};
+  uint8_t CH6[8] = {0x00, 0x11, 0x22, 0x00, 0x00, 0x00, 0x08, 0x77}; // Size
+  uint8_t CH7[8] = {0x00, 0x08, 0x00, 0x33, 0x44, 0x55, 0x66, 0x77}; // Sw(Size)
+
+  int FoundMask = 0;
+  for (int i = 0; i < NumIter; i++) {
+    uint8_t T[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77};
+    size_t NewSize = (MD.*M)(T, 8, 8);
+    /**/ if (NewSize == 8 && !memcmp(CH0, T, 8)) FoundMask |= 1 << 0;
+    else if (NewSize == 8 && !memcmp(CH1, T, 8)) FoundMask |= 1 << 1;
+    else if (NewSize == 8 && !memcmp(CH2, T, 8)) FoundMask |= 1 << 2;
+    else if (NewSize == 8 && !memcmp(CH3, T, 8)) FoundMask |= 1 << 3;
+    else if (NewSize == 8 && !memcmp(CH4, T, 8)) FoundMask |= 1 << 4;
+    else if (NewSize == 8 && !memcmp(CH5, T, 8)) FoundMask |= 1 << 5;
+    else if (NewSize == 8 && !memcmp(CH6, T, 8)) FoundMask |= 1 << 6;
+    else if (NewSize == 8 && !memcmp(CH7, T, 8)) FoundMask |= 1 << 7;
+  }
+  EXPECT_EQ(FoundMask, 255);
+}
+
+TEST(FuzzerMutate, ChangeBinaryInteger1) {
+  TestChangeBinaryInteger(&MutationDispatcher::Mutate_ChangeBinaryInteger,
+                         1 << 12);
+}
+
+TEST(FuzzerMutate, ChangeBinaryInteger2) {
+  TestChangeBinaryInteger(&MutationDispatcher::Mutate, 1 << 15);
+}
+
+
+TEST(FuzzerDictionary, ParseOneDictionaryEntry) {
+  Unit U;
+  EXPECT_FALSE(ParseOneDictionaryEntry("", &U));
+  EXPECT_FALSE(ParseOneDictionaryEntry(" ", &U));
+  EXPECT_FALSE(ParseOneDictionaryEntry("\t  ", &U));
+  EXPECT_FALSE(ParseOneDictionaryEntry("  \" ", &U));
+  EXPECT_FALSE(ParseOneDictionaryEntry("  zz\" ", &U));
+  EXPECT_FALSE(ParseOneDictionaryEntry("  \"zz ", &U));
+  EXPECT_FALSE(ParseOneDictionaryEntry("  \"\" ", &U));
+  EXPECT_TRUE(ParseOneDictionaryEntry("\"a\"", &U));
+  EXPECT_EQ(U, Unit({'a'}));
+  EXPECT_TRUE(ParseOneDictionaryEntry("\"abc\"", &U));
+  EXPECT_EQ(U, Unit({'a', 'b', 'c'}));
+  EXPECT_TRUE(ParseOneDictionaryEntry("abc=\"abc\"", &U));
+  EXPECT_EQ(U, Unit({'a', 'b', 'c'}));
+  EXPECT_FALSE(ParseOneDictionaryEntry("\"\\\"", &U));
+  EXPECT_TRUE(ParseOneDictionaryEntry("\"\\\\\"", &U));
+  EXPECT_EQ(U, Unit({'\\'}));
+  EXPECT_TRUE(ParseOneDictionaryEntry("\"\\xAB\"", &U));
+  EXPECT_EQ(U, Unit({0xAB}));
+  EXPECT_TRUE(ParseOneDictionaryEntry("\"\\xABz\\xDE\"", &U));
+  EXPECT_EQ(U, Unit({0xAB, 'z', 0xDE}));
+  EXPECT_TRUE(ParseOneDictionaryEntry("\"#\"", &U));
+  EXPECT_EQ(U, Unit({'#'}));
+  EXPECT_TRUE(ParseOneDictionaryEntry("\"\\\"\"", &U));
+  EXPECT_EQ(U, Unit({'"'}));
+}
+
+TEST(FuzzerDictionary, ParseDictionaryFile) {
+  Vector<Unit> Units;
+  EXPECT_FALSE(ParseDictionaryFile("zzz\n", &Units));
+  EXPECT_FALSE(ParseDictionaryFile("", &Units));
+  EXPECT_TRUE(ParseDictionaryFile("\n", &Units));
+  EXPECT_EQ(Units.size(), 0U);
+  EXPECT_TRUE(ParseDictionaryFile("#zzzz a b c d\n", &Units));
+  EXPECT_EQ(Units.size(), 0U);
+  EXPECT_TRUE(ParseDictionaryFile(" #zzzz\n", &Units));
+  EXPECT_EQ(Units.size(), 0U);
+  EXPECT_TRUE(ParseDictionaryFile("  #zzzz\n", &Units));
+  EXPECT_EQ(Units.size(), 0U);
+  EXPECT_TRUE(ParseDictionaryFile("  #zzzz\naaa=\"aa\"", &Units));
+  EXPECT_EQ(Units, Vector<Unit>({Unit({'a', 'a'})}));
+  EXPECT_TRUE(
+      ParseDictionaryFile("  #zzzz\naaa=\"aa\"\n\nabc=\"abc\"", &Units));
+  EXPECT_EQ(Units,
+            Vector<Unit>({Unit({'a', 'a'}), Unit({'a', 'b', 'c'})}));
+}
+
+TEST(FuzzerUtil, Base64) {
+  EXPECT_EQ("", Base64({}));
+  EXPECT_EQ("YQ==", Base64({'a'}));
+  EXPECT_EQ("eA==", Base64({'x'}));
+  EXPECT_EQ("YWI=", Base64({'a', 'b'}));
+  EXPECT_EQ("eHk=", Base64({'x', 'y'}));
+  EXPECT_EQ("YWJj", Base64({'a', 'b', 'c'}));
+  EXPECT_EQ("eHl6", Base64({'x', 'y', 'z'}));
+  EXPECT_EQ("YWJjeA==", Base64({'a', 'b', 'c', 'x'}));
+  EXPECT_EQ("YWJjeHk=", Base64({'a', 'b', 'c', 'x', 'y'}));
+  EXPECT_EQ("YWJjeHl6", Base64({'a', 'b', 'c', 'x', 'y', 'z'}));
+}
+
+TEST(Corpus, Distribution) {
+  Random Rand(0);
+  std::unique_ptr<InputCorpus> C(new InputCorpus(""));
+  size_t N = 10;
+  size_t TriesPerUnit = 1<<16;
+  for (size_t i = 0; i < N; i++)
+    C->AddToCorpus(Unit{ static_cast<uint8_t>(i) }, 1, false, {});
+
+  Vector<size_t> Hist(N);
+  for (size_t i = 0; i < N * TriesPerUnit; i++) {
+    Hist[C->ChooseUnitIdxToMutate(Rand)]++;
+  }
+  for (size_t i = 0; i < N; i++) {
+    // A weak sanity check that every unit gets invoked.
+    EXPECT_GT(Hist[i], TriesPerUnit / N / 3);
+  }
+}
+
+TEST(Merge, Bad) {
+  const char *kInvalidInputs[] = {
+    "",
+    "x",
+    "3\nx",
+    "2\n3",
+    "2\n2",
+    "2\n2\nA\n",
+    "2\n2\nA\nB\nC\n",
+    "0\n0\n",
+    "1\n1\nA\nDONE 0",
+    "1\n1\nA\nSTARTED 1",
+  };
+  Merger M;
+  for (auto S : kInvalidInputs) {
+    // fprintf(stderr, "TESTING:\n%s\n", S);
+    EXPECT_FALSE(M.Parse(S, false));
+  }
+}
+
+void EQ(const Vector<uint32_t> &A, const Vector<uint32_t> &B) {
+  EXPECT_EQ(A, B);
+}
+
+void EQ(const Vector<std::string> &A, const Vector<std::string> &B) {
+  Set<std::string> a(A.begin(), A.end());
+  Set<std::string> b(B.begin(), B.end());
+  EXPECT_EQ(a, b);
+}
+
+static void Merge(const std::string &Input,
+                  const Vector<std::string> Result,
+                  size_t NumNewFeatures) {
+  Merger M;
+  Vector<std::string> NewFiles;
+  EXPECT_TRUE(M.Parse(Input, true));
+  std::stringstream SS;
+  M.PrintSummary(SS);
+  EXPECT_EQ(NumNewFeatures, M.Merge(&NewFiles));
+  EXPECT_EQ(M.AllFeatures(), M.ParseSummary(SS));
+  EQ(NewFiles, Result);
+}
+
+TEST(Merge, Good) {
+  Merger M;
+
+  EXPECT_TRUE(M.Parse("1\n0\nAA\n", false));
+  EXPECT_EQ(M.Files.size(), 1U);
+  EXPECT_EQ(M.NumFilesInFirstCorpus, 0U);
+  EXPECT_EQ(M.Files[0].Name, "AA");
+  EXPECT_TRUE(M.LastFailure.empty());
+  EXPECT_EQ(M.FirstNotProcessedFile, 0U);
+
+  EXPECT_TRUE(M.Parse("2\n1\nAA\nBB\nSTARTED 0 42\n", false));
+  EXPECT_EQ(M.Files.size(), 2U);
+  EXPECT_EQ(M.NumFilesInFirstCorpus, 1U);
+  EXPECT_EQ(M.Files[0].Name, "AA");
+  EXPECT_EQ(M.Files[1].Name, "BB");
+  EXPECT_EQ(M.LastFailure, "AA");
+  EXPECT_EQ(M.FirstNotProcessedFile, 1U);
+
+  EXPECT_TRUE(M.Parse("3\n1\nAA\nBB\nC\n"
+                        "STARTED 0 1000\n"
+                        "DONE 0 1 2 3\n"
+                        "STARTED 1 1001\n"
+                        "DONE 1 4 5 6 \n"
+                        "STARTED 2 1002\n"
+                        "", true));
+  EXPECT_EQ(M.Files.size(), 3U);
+  EXPECT_EQ(M.NumFilesInFirstCorpus, 1U);
+  EXPECT_EQ(M.Files[0].Name, "AA");
+  EXPECT_EQ(M.Files[0].Size, 1000U);
+  EXPECT_EQ(M.Files[1].Name, "BB");
+  EXPECT_EQ(M.Files[1].Size, 1001U);
+  EXPECT_EQ(M.Files[2].Name, "C");
+  EXPECT_EQ(M.Files[2].Size, 1002U);
+  EXPECT_EQ(M.LastFailure, "C");
+  EXPECT_EQ(M.FirstNotProcessedFile, 3U);
+  EQ(M.Files[0].Features, {1, 2, 3});
+  EQ(M.Files[1].Features, {4, 5, 6});
+
+
+  Vector<std::string> NewFiles;
+
+  EXPECT_TRUE(M.Parse("3\n2\nAA\nBB\nC\n"
+                        "STARTED 0 1000\nDONE 0 1 2 3\n"
+                        "STARTED 1 1001\nDONE 1 4 5 6 \n"
+                        "STARTED 2 1002\nDONE 2 6 1 3 \n"
+                        "", true));
+  EXPECT_EQ(M.Files.size(), 3U);
+  EXPECT_EQ(M.NumFilesInFirstCorpus, 2U);
+  EXPECT_TRUE(M.LastFailure.empty());
+  EXPECT_EQ(M.FirstNotProcessedFile, 3U);
+  EQ(M.Files[0].Features, {1, 2, 3});
+  EQ(M.Files[1].Features, {4, 5, 6});
+  EQ(M.Files[2].Features, {1, 3, 6});
+  EXPECT_EQ(0U, M.Merge(&NewFiles));
+  EQ(NewFiles, {});
+
+  EXPECT_TRUE(M.Parse("3\n1\nA\nB\nC\n"
+                        "STARTED 0 1000\nDONE 0 1 2 3\n"
+                        "STARTED 1 1001\nDONE 1 4 5 6 \n"
+                        "STARTED 2 1002\nDONE 2 6 1 3\n"
+                        "", true));
+  EQ(M.Files[0].Features, {1, 2, 3});
+  EQ(M.Files[1].Features, {4, 5, 6});
+  EQ(M.Files[2].Features, {1, 3, 6});
+  EXPECT_EQ(3U, M.Merge(&NewFiles));
+  EQ(NewFiles, {"B"});
+
+  // Same as the above, but with InitialFeatures.
+  EXPECT_TRUE(M.Parse("2\n0\nB\nC\n"
+                        "STARTED 0 1001\nDONE 0 4 5 6 \n"
+                        "STARTED 1 1002\nDONE 1 6 1 3\n"
+                        "", true));
+  EQ(M.Files[0].Features, {4, 5, 6});
+  EQ(M.Files[1].Features, {1, 3, 6});
+  Set<uint32_t> InitialFeatures;
+  InitialFeatures.insert(1);
+  InitialFeatures.insert(2);
+  InitialFeatures.insert(3);
+  EXPECT_EQ(3U, M.Merge(InitialFeatures, &NewFiles));
+  EQ(NewFiles, {"B"});
+}
+
+TEST(Merge, Merge) {
+
+  Merge("3\n1\nA\nB\nC\n"
+        "STARTED 0 1000\nDONE 0 1 2 3\n"
+        "STARTED 1 1001\nDONE 1 4 5 6 \n"
+        "STARTED 2 1002\nDONE 2 6 1 3 \n",
+        {"B"}, 3);
+
+  Merge("3\n0\nA\nB\nC\n"
+        "STARTED 0 2000\nDONE 0 1 2 3\n"
+        "STARTED 1 1001\nDONE 1 4 5 6 \n"
+        "STARTED 2 1002\nDONE 2 6 1 3 \n",
+        {"A", "B", "C"}, 6);
+
+  Merge("4\n0\nA\nB\nC\nD\n"
+        "STARTED 0 2000\nDONE 0 1 2 3\n"
+        "STARTED 1 1101\nDONE 1 4 5 6 \n"
+        "STARTED 2 1102\nDONE 2 6 1 3 100 \n"
+        "STARTED 3 1000\nDONE 3 1  \n",
+        {"A", "B", "C", "D"}, 7);
+
+  Merge("4\n1\nA\nB\nC\nD\n"
+        "STARTED 0 2000\nDONE 0 4 5 6 7 8\n"
+        "STARTED 1 1100\nDONE 1 1 2 3 \n"
+        "STARTED 2 1100\nDONE 2 2 3 \n"
+        "STARTED 3 1000\nDONE 3 1  \n",
+        {"B", "D"}, 3);
+}
+
+TEST(Fuzzer, ForEachNonZeroByte) {
+  const size_t N = 64;
+  alignas(64) uint8_t Ar[N + 8] = {
+    0, 0, 0, 0, 0, 0, 0, 0,
+    1, 2, 0, 0, 0, 0, 0, 0,
+    0, 0, 3, 0, 4, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 5, 0, 6, 0, 0,
+    0, 0, 0, 0, 0, 0, 7, 0,
+    0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 8,
+    9, 9, 9, 9, 9, 9, 9, 9,
+  };
+  typedef Vector<std::pair<size_t, uint8_t> > Vec;
+  Vec Res, Expected;
+  auto CB = [&](size_t FirstFeature, size_t Idx, uint8_t V) {
+    Res.push_back({FirstFeature + Idx, V});
+  };
+  ForEachNonZeroByte(Ar, Ar + N, 100, CB);
+  Expected = {{108, 1}, {109, 2}, {118, 3}, {120, 4},
+              {135, 5}, {137, 6}, {146, 7}, {163, 8}};
+  EXPECT_EQ(Res, Expected);
+
+  Res.clear();
+  ForEachNonZeroByte(Ar + 9, Ar + N, 109, CB);
+  Expected = {          {109, 2}, {118, 3}, {120, 4},
+              {135, 5}, {137, 6}, {146, 7}, {163, 8}};
+  EXPECT_EQ(Res, Expected);
+
+  Res.clear();
+  ForEachNonZeroByte(Ar + 9, Ar + N - 9, 109, CB);
+  Expected = {          {109, 2}, {118, 3}, {120, 4},
+              {135, 5}, {137, 6}, {146, 7}};
+  EXPECT_EQ(Res, Expected);
+}
+
+int main(int argc, char **argv) {
+  testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/lib/interception/interception.h b/lib/interception/interception.h
index d79fa67..9534984 100644
--- a/lib/interception/interception.h
+++ b/lib/interception/interception.h
@@ -15,8 +15,8 @@
 #ifndef INTERCEPTION_H
 #define INTERCEPTION_H
 
-#if !defined(__linux__) && !defined(__FreeBSD__) && \
-  !defined(__APPLE__) && !defined(_WIN32)
+#if !defined(__linux__) && !defined(__FreeBSD__) && !defined(__APPLE__) && \
+    !defined(__NetBSD__) && !defined(_WIN32) && !defined(__Fuchsia__)
 # error "Interception doesn't work on this operating system."
 #endif
 
@@ -129,7 +129,7 @@
     extern "C" ret_type func(__VA_ARGS__);
 # define DECLARE_WRAPPER_WINAPI(ret_type, func, ...) \
     extern "C" __declspec(dllimport) ret_type __stdcall func(__VA_ARGS__);
-#elif defined(__FreeBSD__)
+#elif defined(__FreeBSD__) || defined(__NetBSD__)
 # define WRAP(x) __interceptor_ ## x
 # define WRAPPER_NAME(x) "__interceptor_" #x
 # define INTERCEPTOR_ATTRIBUTE __attribute__((visibility("default")))
@@ -139,7 +139,7 @@
 # define DECLARE_WRAPPER(ret_type, func, ...) \
      extern "C" ret_type func(__VA_ARGS__) \
      __attribute__((alias("__interceptor_" #func), visibility("default")));
-#else
+#elif !defined(__Fuchsia__)
 # define WRAP(x) __interceptor_ ## x
 # define WRAPPER_NAME(x) "__interceptor_" #x
 # define INTERCEPTOR_ATTRIBUTE __attribute__((visibility("default")))
@@ -148,7 +148,15 @@
     __attribute__((weak, alias("__interceptor_" #func), visibility("default")));
 #endif
 
-#if !defined(__APPLE__)
+#if defined(__Fuchsia__)
+// There is no general interception at all on Fuchsia.
+// Sanitizer runtimes just define functions directly to preempt them,
+// and have bespoke ways to access the underlying libc functions.
+# include <zircon/sanitizer.h>
+# define INTERCEPTOR_ATTRIBUTE __attribute__((visibility("default")))
+# define REAL(x) __unsanitized_##x
+# define DECLARE_REAL(ret_type, func, ...)
+#elif !defined(__APPLE__)
 # define PTR_TO_REAL(x) real_##x
 # define REAL(x) __interception::PTR_TO_REAL(x)
 # define FUNC_TYPE(x) x##_f
@@ -166,15 +174,19 @@
 # define ASSIGN_REAL(x, y)
 #endif  // __APPLE__
 
+#if !defined(__Fuchsia__)
 #define DECLARE_REAL_AND_INTERCEPTOR(ret_type, func, ...) \
   DECLARE_REAL(ret_type, func, __VA_ARGS__) \
   extern "C" ret_type WRAP(func)(__VA_ARGS__);
+#else
+#define DECLARE_REAL_AND_INTERCEPTOR(ret_type, func, ...)
+#endif
 
 // Generally, you don't need to use DEFINE_REAL by itself, as INTERCEPTOR
 // macros does its job. In exceptional cases you may need to call REAL(foo)
 // without defining INTERCEPTOR(..., foo, ...). For example, if you override
 // foo with an interceptor for other function.
-#if !defined(__APPLE__)
+#if !defined(__APPLE__) && !defined(__Fuchsia__)
 # define DEFINE_REAL(ret_type, func, ...) \
     typedef ret_type (*FUNC_TYPE(func))(__VA_ARGS__); \
     namespace __interception { \
@@ -184,7 +196,18 @@
 # define DEFINE_REAL(ret_type, func, ...)
 #endif
 
-#if !defined(__APPLE__)
+#if defined(__Fuchsia__)
+
+// We need to define the __interceptor_func name just to get
+// sanitizer_common/scripts/gen_dynamic_list.py to export func.
+// But we don't need to export __interceptor_func to get that.
+#define INTERCEPTOR(ret_type, func, ...)                                \
+  extern "C"[[ gnu::alias(#func), gnu::visibility("hidden") ]] ret_type \
+      __interceptor_##func(__VA_ARGS__);                                \
+  extern "C" INTERCEPTOR_ATTRIBUTE ret_type func(__VA_ARGS__)
+
+#elif !defined(__APPLE__)
+
 #define INTERCEPTOR(ret_type, func, ...) \
   DEFINE_REAL(ret_type, func, __VA_ARGS__) \
   DECLARE_WRAPPER(ret_type, func, __VA_ARGS__) \
@@ -241,7 +264,7 @@
 
 #define INCLUDED_FROM_INTERCEPTION_LIB
 
-#if defined(__linux__) || defined(__FreeBSD__)
+#if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__)
 # include "interception_linux.h"
 # define INTERCEPT_FUNCTION(func) INTERCEPT_FUNCTION_LINUX_OR_FREEBSD(func)
 # define INTERCEPT_FUNCTION_VER(func, symver) \
@@ -251,7 +274,7 @@
 # define INTERCEPT_FUNCTION(func) INTERCEPT_FUNCTION_MAC(func)
 # define INTERCEPT_FUNCTION_VER(func, symver) \
     INTERCEPT_FUNCTION_VER_MAC(func, symver)
-#else  // defined(_WIN32)
+#elif defined(_WIN32)
 # include "interception_win.h"
 # define INTERCEPT_FUNCTION(func) INTERCEPT_FUNCTION_WIN(func)
 # define INTERCEPT_FUNCTION_VER(func, symver) \
diff --git a/lib/interception/interception_linux.cc b/lib/interception/interception_linux.cc
index 6e908ac..2378042 100644
--- a/lib/interception/interception_linux.cc
+++ b/lib/interception/interception_linux.cc
@@ -12,14 +12,22 @@
 // Linux-specific interception methods.
 //===----------------------------------------------------------------------===//
 
-#if defined(__linux__) || defined(__FreeBSD__)
+#if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__)
 #include "interception.h"
 
 #include <dlfcn.h>   // for dlsym() and dlvsym()
 
+#ifdef __NetBSD__
+#include "sanitizer_common/sanitizer_libc.h"
+#endif
+
 namespace __interception {
 bool GetRealFunctionAddress(const char *func_name, uptr *func_addr,
     uptr real, uptr wrapper) {
+#ifdef __NetBSD__
+  // XXX: Find a better way to handle renames
+  if (internal_strcmp(func_name, "sigaction") == 0) func_name = "__sigaction14";
+#endif
   *func_addr = (uptr)dlsym(RTLD_NEXT, func_name);
   return real == wrapper;
 }
@@ -32,5 +40,4 @@
 
 }  // namespace __interception
 
-
-#endif  // __linux__ || __FreeBSD__
+#endif  // __linux__ || __FreeBSD__ || __NetBSD__
diff --git a/lib/interception/interception_linux.h b/lib/interception/interception_linux.h
index 27a66c8..0e15bd8 100644
--- a/lib/interception/interception_linux.h
+++ b/lib/interception/interception_linux.h
@@ -12,7 +12,7 @@
 // Linux-specific interception methods.
 //===----------------------------------------------------------------------===//
 
-#if defined(__linux__) || defined(__FreeBSD__)
+#if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__)
 
 #if !defined(INCLUDED_FROM_INTERCEPTION_LIB)
 # error "interception_linux.h should be included from interception library only"
@@ -44,4 +44,4 @@
 #endif  // !defined(__ANDROID__)
 
 #endif  // INTERCEPTION_LINUX_H
-#endif  // __linux__ || __FreeBSD__
+#endif  // __linux__ || __FreeBSD__ || __NetBSD__
diff --git a/lib/interception/interception_win.cc b/lib/interception/interception_win.cc
index e4f3d35..ac0b050 100644
--- a/lib/interception/interception_win.cc
+++ b/lib/interception/interception_win.cc
@@ -223,9 +223,8 @@
   return true;
 }
 
-static const u8 kHintNop10Bytes[] = {
-  0x66, 0x66, 0x0F, 0x1F, 0x84,
-  0x00, 0x00, 0x00, 0x00, 0x00
+static const u8 kHintNop9Bytes[] = {
+  0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00
 };
 
 template<class T>
@@ -240,8 +239,8 @@
 static bool FunctionHasPadding(uptr address, uptr size) {
   if (IsMemoryPadding(address - size, size))
     return true;
-  if (size <= sizeof(kHintNop10Bytes) &&
-      FunctionHasPrefix(address, kHintNop10Bytes))
+  if (size <= sizeof(kHintNop9Bytes) &&
+      FunctionHasPrefix(address, kHintNop9Bytes))
     return true;
   return false;
 }
@@ -477,7 +476,7 @@
   switch (*(u8*)address) {
     case 0xA1:  // A1 XX XX XX XX XX XX XX XX :
                 //   movabs eax, dword ptr ds:[XXXXXXXX]
-      return 8;
+      return 9;
   }
 
   switch (*(u16*)address) {
@@ -495,6 +494,11 @@
     case 0x5741:  // push r15
     case 0x9066:  // Two-byte NOP
       return 2;
+
+    case 0x058B:  // 8B 05 XX XX XX XX : mov eax, dword ptr [XX XX XX XX]
+      if (rel_offset)
+        *rel_offset = 2;
+      return 6;
   }
 
   switch (0x00FFFFFF & *(u32*)address) {
@@ -549,7 +553,10 @@
     case 0x246c8948:  // 48 89 6C 24 XX : mov QWORD ptr [rsp + XX], rbp
     case 0x245c8948:  // 48 89 5c 24 XX : mov QWORD PTR [rsp + XX], rbx
     case 0x24748948:  // 48 89 74 24 XX : mov QWORD PTR [rsp + XX], rsi
+    case 0x244C8948:  // 48 89 4C 24 XX : mov QWORD PTR [rsp + XX], rcx
       return 5;
+    case 0x24648348:  // 48 83 64 24 XX : and QWORD PTR [rsp + XX], YY
+      return 6;
   }
 
 #else
@@ -828,6 +835,7 @@
 static void **InterestingDLLsAvailable() {
   static const char *InterestingDLLs[] = {
       "kernel32.dll",
+      "msvcr100.dll",      // VS2010
       "msvcr110.dll",      // VS2012
       "msvcr120.dll",      // VS2013
       "vcruntime140.dll",  // VS2015
diff --git a/lib/interception/tests/CMakeLists.txt b/lib/interception/tests/CMakeLists.txt
index 5ea943f..1da0a45 100644
--- a/lib/interception/tests/CMakeLists.txt
+++ b/lib/interception/tests/CMakeLists.txt
@@ -67,23 +67,13 @@
     FOLDER "Compiler-RT Runtime tests")
 endmacro()
 
-function(get_interception_lib_for_arch arch lib lib_name)
+function(get_interception_lib_for_arch arch lib)
   if(APPLE)
     set(tgt_name "RTInterception.test.osx")
   else()
     set(tgt_name "RTInterception.test.${arch}")
   endif()
   set(${lib} "${tgt_name}" PARENT_SCOPE)
-  if(CMAKE_CONFIGURATION_TYPES)
-   set(configuration_path "${CMAKE_CFG_INTDIR}/")
-  else()
-   set(configuration_path "")
-  endif()
-  if(NOT MSVC)
-    set(${lib_name} "${configuration_path}lib${tgt_name}.a" PARENT_SCOPE)
-  else()
-    set(${lib_name} "${configuration_path}${tgt_name}.lib" PARENT_SCOPE)
-  endif()
 endfunction()
 
 # Interception unit tests testsuite.
@@ -93,36 +83,16 @@
 
 # Adds interception tests for architecture.
 macro(add_interception_tests_for_arch arch)
-  get_target_flags_for_arch(${arch} TARGET_FLAGS)
-  set(INTERCEPTION_TEST_SOURCES ${INTERCEPTION_UNITTESTS}
-                             ${COMPILER_RT_GTEST_SOURCE})
-  set(INTERCEPTION_TEST_COMPILE_DEPS ${INTERCEPTION_TEST_HEADERS})
-  if(NOT COMPILER_RT_STANDALONE_BUILD)
-    list(APPEND INTERCEPTION_TEST_COMPILE_DEPS gtest)
-  endif()
   set(INTERCEPTION_TEST_OBJECTS)
-  foreach(source ${INTERCEPTION_TEST_SOURCES})
-    get_filename_component(basename ${source} NAME)
-    if(CMAKE_CONFIGURATION_TYPES)
-      set(output_obj "${CMAKE_CFG_INTDIR}/${basename}.${arch}.o")
-    else()
-      set(output_obj "${basename}.${arch}.o")
-    endif()
-    clang_compile(${output_obj} ${source}
-                  CFLAGS ${INTERCEPTION_TEST_CFLAGS_COMMON} ${TARGET_FLAGS}
-                  DEPS ${INTERCEPTION_TEST_COMPILE_DEPS})
-    list(APPEND INTERCEPTION_TEST_OBJECTS ${output_obj})
-  endforeach()
-  get_interception_lib_for_arch(${arch} INTERCEPTION_COMMON_LIB
-                                INTERCEPTION_COMMON_LIB_NAME)
-  # Add unittest target.
-  set(INTERCEPTION_TEST_NAME "Interception-${arch}-Test")
-  add_compiler_rt_test(InterceptionUnitTests ${INTERCEPTION_TEST_NAME}
-                       OBJECTS ${INTERCEPTION_TEST_OBJECTS}
-                               ${INTERCEPTION_COMMON_LIB_NAME}
-                       DEPS ${INTERCEPTION_TEST_OBJECTS} ${INTERCEPTION_COMMON_LIB}
-                       LINK_FLAGS ${INTERCEPTION_TEST_LINK_FLAGS_COMMON}
-                                  ${TARGET_FLAGS})
+  get_interception_lib_for_arch(${arch} INTERCEPTION_COMMON_LIB)
+  generate_compiler_rt_tests(INTERCEPTION_TEST_OBJECTS
+    InterceptionUnitTests "Interception-${arch}-Test" ${arch}
+    RUNTIME ${INTERCEPTION_COMMON_LIB}
+    SOURCES ${INTERCEPTION_UNITTESTS} ${COMPILER_RT_GTEST_SOURCE}
+    COMPILE_DEPS ${INTERCEPTION_TEST_HEADERS}
+    DEPS gtest
+    CFLAGS ${INTERCEPTION_TEST_CFLAGS_COMMON}
+    LINK_FLAGS ${INTERCEPTION_TEST_LINK_FLAGS_COMMON})
 endmacro()
 
 if(COMPILER_RT_CAN_EXECUTE_TESTS AND NOT ANDROID AND NOT APPLE)
diff --git a/lib/interception/tests/interception_win_test.cc b/lib/interception/tests/interception_win_test.cc
index a705768..37ef994 100644
--- a/lib/interception/tests/interception_win_test.cc
+++ b/lib/interception/tests/interception_win_test.cc
@@ -170,6 +170,13 @@
     0x54,                                      // push    esp
 };
 
+#if SANITIZER_WINDOWS64
+u8 kLoadGlobalCode[] = {
+  0x8B, 0x05, 0x00, 0x00, 0x00, 0x00, // mov    eax [rip + global]
+  0xC3,                               // ret
+};
+#endif
+
 const u8 kUnpatchableCode1[] = {
     0xC3,                           // ret
 };
@@ -502,6 +509,10 @@
   EXPECT_TRUE(TestFunctionPatching(kPatchableCode4, override));
   EXPECT_TRUE(TestFunctionPatching(kPatchableCode5, override));
 
+#if SANITIZER_WINDOWS64
+  EXPECT_TRUE(TestFunctionPatching(kLoadGlobalCode, override));
+#endif
+
   EXPECT_FALSE(TestFunctionPatching(kUnpatchableCode1, override));
   EXPECT_FALSE(TestFunctionPatching(kUnpatchableCode2, override));
   EXPECT_FALSE(TestFunctionPatching(kUnpatchableCode3, override));
diff --git a/lib/lsan/CMakeLists.txt b/lib/lsan/CMakeLists.txt
index a48b85f..60da3e1 100644
--- a/lib/lsan/CMakeLists.txt
+++ b/lib/lsan/CMakeLists.txt
@@ -11,16 +11,15 @@
 set(LSAN_SOURCES
   lsan.cc
   lsan_allocator.cc
+  lsan_linux.cc
   lsan_interceptors.cc
+  lsan_mac.cc
+  lsan_malloc_mac.cc
   lsan_preinit.cc
   lsan_thread.cc)
 
 set(LSAN_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR})
 
-# FIXME(fjricci) - remove this once lsan for darwin is fully enabled
-if(APPLE AND COMPILER_RT_HAS_LSAN)
-  set(LSAN_CFLAGS ${LSAN_CFLAGS} -DCAN_SANITIZE_LEAKS_MAC=1)
-endif()
 add_compiler_rt_object_libraries(RTLSanCommon
     OS ${SANITIZER_COMMON_SUPPORTED_OS}
     ARCHS ${LSAN_COMMON_SUPPORTED_ARCH}
@@ -30,6 +29,8 @@
 if(COMPILER_RT_HAS_LSAN)
   add_compiler_rt_component(lsan)
   if(APPLE)
+    set(LSAN_LINK_LIBS ${SANITIZER_COMMON_LINK_LIBS})
+
     add_weak_symbols("lsan" WEAK_SYMBOL_LINK_FLAGS)
     add_weak_symbols("sanitizer_common" WEAK_SYMBOL_LINK_FLAGS)
 
@@ -43,7 +44,8 @@
                   RTSanitizerCommon
                   RTSanitizerCommonLibc
       CFLAGS ${LSAN_CFLAGS}
-      LINK_FLAGS ${WEAK_SYMBOL_LINK_FLAGS}
+      LINK_FLAGS ${SANITIZER_COMMON_LINK_FLAGS} ${WEAK_SYMBOL_LINK_FLAGS}
+      LINK_LIBS ${LSAN_LINK_LIBS}
       PARENT_TARGET lsan)
   else()
     foreach(arch ${LSAN_SUPPORTED_ARCH})
diff --git a/lib/lsan/lsan.cc b/lib/lsan/lsan.cc
index c7c3429..603050a 100644
--- a/lib/lsan/lsan.cc
+++ b/lib/lsan/lsan.cc
@@ -56,6 +56,9 @@
   RegisterLsanFlags(&parser, f);
   RegisterCommonFlags(&parser);
 
+  // Override from user-specified string.
+  const char *lsan_default_options = MaybeCallLsanDefaultOptions();
+  parser.ParseString(lsan_default_options);
   parser.ParseString(GetEnv("LSAN_OPTIONS"));
 
   SetVerbosity(common_flags()->verbosity);
@@ -65,6 +68,18 @@
   if (common_flags()->help) parser.PrintFlagDescriptions();
 }
 
+static void OnStackUnwind(const SignalContext &sig, const void *,
+                          BufferedStackTrace *stack) {
+  GetStackTraceWithPcBpAndContext(stack, kStackTraceMax, sig.pc, sig.bp,
+                                  sig.context,
+                                  common_flags()->fast_unwind_on_fatal);
+}
+
+void LsanOnDeadlySignal(int signo, void *siginfo, void *context) {
+  HandleDeadlySignal(siginfo, context, GetCurrentThread(), &OnStackUnwind,
+                     nullptr);
+}
+
 extern "C" void __lsan_init() {
   CHECK(!lsan_init_is_running);
   if (lsan_inited)
@@ -76,9 +91,11 @@
   InitializeFlags();
   InitCommonLsan();
   InitializeAllocator();
+  ReplaceSystemMalloc();
   InitTlsSize();
   InitializeInterceptors();
   InitializeThreadRegistry();
+  InstallDeadlySignalHandlers(LsanOnDeadlySignal);
   u32 tid = ThreadCreate(0, 0, true);
   CHECK_EQ(tid, 0);
   ThreadStart(tid, GetTid());
diff --git a/lib/lsan/lsan.h b/lib/lsan/lsan.h
index ec5eb93..08794e9 100644
--- a/lib/lsan/lsan.h
+++ b/lib/lsan/lsan.h
@@ -12,24 +12,15 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "lsan_thread.h"
 #include "sanitizer_common/sanitizer_flags.h"
 #include "sanitizer_common/sanitizer_stacktrace.h"
 
-#define GET_STACK_TRACE(max_size, fast)                                        \
-  BufferedStackTrace stack;                                                    \
-  {                                                                            \
-    uptr stack_top = 0, stack_bottom = 0;                                      \
-    ThreadContext *t;                                                          \
-    if (fast && (t = CurrentThreadContext())) {                                \
-      stack_top = t->stack_end();                                              \
-      stack_bottom = t->stack_begin();                                         \
-    }                                                                          \
-    if (!SANITIZER_MIPS ||                                                     \
-        IsValidFrame(GET_CURRENT_FRAME(), stack_top, stack_bottom)) {          \
-      stack.Unwind(max_size, StackTrace::GetCurrentPc(), GET_CURRENT_FRAME(),  \
-                   /* context */ 0, stack_top, stack_bottom, fast);            \
-    }                                                                          \
-  }
+#define GET_STACK_TRACE(max_size, fast)                       \
+  __sanitizer::BufferedStackTrace stack;                      \
+  GetStackTraceWithPcBpAndContext(&stack, max_size,           \
+                                  StackTrace::GetCurrentPc(), \
+                                  GET_CURRENT_FRAME(), nullptr, fast);
 
 #define GET_STACK_TRACE_FATAL \
   GET_STACK_TRACE(kStackTraceMax, common_flags()->fast_unwind_on_fatal)
@@ -38,9 +29,37 @@
   GET_STACK_TRACE(__sanitizer::common_flags()->malloc_context_size, \
                   common_flags()->fast_unwind_on_malloc)
 
+#define GET_STACK_TRACE_THREAD GET_STACK_TRACE(kStackTraceMax, true)
+
 namespace __lsan {
 
 void InitializeInterceptors();
+void ReplaceSystemMalloc();
+
+#define ENSURE_LSAN_INITED do {   \
+  CHECK(!lsan_init_is_running);   \
+  if (!lsan_inited)               \
+    __lsan_init();                \
+} while (0)
+
+// Get the stack trace with the given pc and bp.
+// The pc will be in the position 0 of the resulting stack trace.
+// The bp may refer to the current frame or to the caller's frame.
+ALWAYS_INLINE
+void GetStackTraceWithPcBpAndContext(__sanitizer::BufferedStackTrace *stack,
+                                     __sanitizer::uptr max_depth,
+                                     __sanitizer::uptr pc, __sanitizer::uptr bp,
+                                     void *context, bool fast) {
+  uptr stack_top = 0, stack_bottom = 0;
+  ThreadContext *t;
+  if (fast && (t = CurrentThreadContext())) {
+    stack_top = t->stack_end();
+    stack_bottom = t->stack_begin();
+  }
+  if (!SANITIZER_MIPS || IsValidFrame(bp, stack_top, stack_bottom)) {
+    stack->Unwind(max_depth, pc, bp, context, stack_top, stack_bottom, fast);
+  }
+}
 
 }  // namespace __lsan
 
diff --git a/lib/lsan/lsan_allocator.cc b/lib/lsan/lsan_allocator.cc
index 4284857..2df58b4 100644
--- a/lib/lsan/lsan_allocator.cc
+++ b/lib/lsan/lsan_allocator.cc
@@ -15,7 +15,9 @@
 #include "lsan_allocator.h"
 
 #include "sanitizer_common/sanitizer_allocator.h"
+#include "sanitizer_common/sanitizer_allocator_checks.h"
 #include "sanitizer_common/sanitizer_allocator_interface.h"
+#include "sanitizer_common/sanitizer_errno.h"
 #include "sanitizer_common/sanitizer_internal_defs.h"
 #include "sanitizer_common/sanitizer_stackdepot.h"
 #include "sanitizer_common/sanitizer_stacktrace.h"
@@ -24,62 +26,27 @@
 extern "C" void *memset(void *ptr, int value, uptr num);
 
 namespace __lsan {
-
-struct ChunkMetadata {
-  u8 allocated : 8;  // Must be first.
-  ChunkTag tag : 2;
-#if SANITIZER_WORDSIZE == 64
-  uptr requested_size : 54;
-#else
-  uptr requested_size : 32;
-  uptr padding : 22;
-#endif
-  u32 stack_trace_id;
-};
-
-#if defined(__mips64) || defined(__aarch64__) || defined(__i386__)
-#if defined(__i386__)
+#if defined(__i386__) || defined(__arm__)
 static const uptr kMaxAllowedMallocSize = 1UL << 30;
-#else
+#elif defined(__mips64) || defined(__aarch64__)
 static const uptr kMaxAllowedMallocSize = 4UL << 30;
-#endif
-static const uptr kRegionSizeLog = 20;
-static const uptr kNumRegions = SANITIZER_MMAP_RANGE_SIZE >> kRegionSizeLog;
-typedef TwoLevelByteMap<(kNumRegions >> 12), 1 << 12> ByteMap;
-typedef CompactSizeClassMap SizeClassMap;
-typedef SizeClassAllocator32<0, SANITIZER_MMAP_RANGE_SIZE,
-    sizeof(ChunkMetadata), SizeClassMap, kRegionSizeLog, ByteMap>
-    PrimaryAllocator;
 #else
 static const uptr kMaxAllowedMallocSize = 8UL << 30;
-
-struct AP64 {  // Allocator64 parameters. Deliberately using a short name.
-  static const uptr kSpaceBeg = 0x600000000000ULL;
-  static const uptr kSpaceSize =  0x40000000000ULL; // 4T.
-  static const uptr kMetadataSize = sizeof(ChunkMetadata);
-  typedef DefaultSizeClassMap SizeClassMap;
-  typedef NoOpMapUnmapCallback MapUnmapCallback;
-  static const uptr kFlags = 0;
-};
-
-typedef SizeClassAllocator64<AP64> PrimaryAllocator;
 #endif
-typedef SizeClassAllocatorLocalCache<PrimaryAllocator> AllocatorCache;
 typedef LargeMmapAllocator<> SecondaryAllocator;
 typedef CombinedAllocator<PrimaryAllocator, AllocatorCache,
           SecondaryAllocator> Allocator;
 
 static Allocator allocator;
-static THREADLOCAL AllocatorCache cache;
 
 void InitializeAllocator() {
+  SetAllocatorMayReturnNull(common_flags()->allocator_may_return_null);
   allocator.InitLinkerInitialized(
-      common_flags()->allocator_may_return_null,
       common_flags()->allocator_release_to_os_interval_ms);
 }
 
 void AllocatorThreadFinish() {
-  allocator.SwallowCache(&cache);
+  allocator.SwallowCache(GetAllocatorCache());
 }
 
 static ChunkMetadata *Metadata(const void *p) {
@@ -109,9 +76,9 @@
     size = 1;
   if (size > kMaxAllowedMallocSize) {
     Report("WARNING: LeakSanitizer failed to allocate %zu bytes\n", size);
-    return nullptr;
+    return Allocator::FailureHandler::OnBadRequest();
   }
-  void *p = allocator.Allocate(&cache, size, alignment, false);
+  void *p = allocator.Allocate(GetAllocatorCache(), size, alignment);
   // Do not rely on the allocator to clear the memory (it's slow).
   if (cleared && allocator.FromPrimary(p))
     memset(p, 0, size);
@@ -121,11 +88,18 @@
   return p;
 }
 
+static void *Calloc(uptr nmemb, uptr size, const StackTrace &stack) {
+  if (UNLIKELY(CheckForCallocOverflow(size, nmemb)))
+    return Allocator::FailureHandler::OnBadRequest();
+  size *= nmemb;
+  return Allocate(stack, size, 1, true);
+}
+
 void Deallocate(void *p) {
   if (&__sanitizer_free_hook) __sanitizer_free_hook(p);
   RunFreeHooks(p);
   RegisterDeallocation(p);
-  allocator.Deallocate(&cache, p);
+  allocator.Deallocate(GetAllocatorCache(), p);
 }
 
 void *Reallocate(const StackTrace &stack, void *p, uptr new_size,
@@ -133,17 +107,17 @@
   RegisterDeallocation(p);
   if (new_size > kMaxAllowedMallocSize) {
     Report("WARNING: LeakSanitizer failed to allocate %zu bytes\n", new_size);
-    allocator.Deallocate(&cache, p);
-    return nullptr;
+    allocator.Deallocate(GetAllocatorCache(), p);
+    return Allocator::FailureHandler::OnBadRequest();
   }
-  p = allocator.Reallocate(&cache, p, new_size, alignment);
+  p = allocator.Reallocate(GetAllocatorCache(), p, new_size, alignment);
   RegisterAllocation(stack, p, new_size);
   return p;
 }
 
 void GetAllocatorCacheRange(uptr *begin, uptr *end) {
-  *begin = (uptr)&cache;
-  *end = *begin + sizeof(cache);
+  *begin = (uptr)GetAllocatorCache();
+  *end = *begin + sizeof(AllocatorCache);
 }
 
 uptr GetMallocUsableSize(const void *p) {
@@ -152,6 +126,39 @@
   return m->requested_size;
 }
 
+void *lsan_memalign(uptr alignment, uptr size, const StackTrace &stack) {
+  if (UNLIKELY(!IsPowerOfTwo(alignment))) {
+    errno = errno_EINVAL;
+    return Allocator::FailureHandler::OnBadRequest();
+  }
+  return SetErrnoOnNull(Allocate(stack, size, alignment, kAlwaysClearMemory));
+}
+
+void *lsan_malloc(uptr size, const StackTrace &stack) {
+  return SetErrnoOnNull(Allocate(stack, size, 1, kAlwaysClearMemory));
+}
+
+void lsan_free(void *p) {
+  Deallocate(p);
+}
+
+void *lsan_realloc(void *p, uptr size, const StackTrace &stack) {
+  return SetErrnoOnNull(Reallocate(stack, p, size, 1));
+}
+
+void *lsan_calloc(uptr nmemb, uptr size, const StackTrace &stack) {
+  return SetErrnoOnNull(Calloc(nmemb, size, stack));
+}
+
+void *lsan_valloc(uptr size, const StackTrace &stack) {
+  return SetErrnoOnNull(
+      Allocate(stack, size, GetPageSizeCached(), kAlwaysClearMemory));
+}
+
+uptr lsan_mz_size(const void *p) {
+  return GetMallocUsableSize(p);
+}
+
 ///// Interface to the common LSan module. /////
 
 void LockAllocator() {
diff --git a/lib/lsan/lsan_allocator.h b/lib/lsan/lsan_allocator.h
index f564601..4006f79 100644
--- a/lib/lsan/lsan_allocator.h
+++ b/lib/lsan/lsan_allocator.h
@@ -15,8 +15,10 @@
 #ifndef LSAN_ALLOCATOR_H
 #define LSAN_ALLOCATOR_H
 
+#include "sanitizer_common/sanitizer_allocator.h"
 #include "sanitizer_common/sanitizer_common.h"
 #include "sanitizer_common/sanitizer_internal_defs.h"
+#include "lsan_common.h"
 
 namespace __lsan {
 
@@ -34,6 +36,68 @@
 void AllocatorThreadFinish();
 void InitializeAllocator();
 
+const bool kAlwaysClearMemory = true;
+
+struct ChunkMetadata {
+  u8 allocated : 8;  // Must be first.
+  ChunkTag tag : 2;
+#if SANITIZER_WORDSIZE == 64
+  uptr requested_size : 54;
+#else
+  uptr requested_size : 32;
+  uptr padding : 22;
+#endif
+  u32 stack_trace_id;
+};
+
+#if defined(__mips64) || defined(__aarch64__) || defined(__i386__) || \
+    defined(__arm__)
+static const uptr kRegionSizeLog = 20;
+static const uptr kNumRegions = SANITIZER_MMAP_RANGE_SIZE >> kRegionSizeLog;
+typedef TwoLevelByteMap<(kNumRegions >> 12), 1 << 12> ByteMap;
+
+struct AP32 {
+  static const uptr kSpaceBeg = 0;
+  static const u64 kSpaceSize = SANITIZER_MMAP_RANGE_SIZE;
+  static const uptr kMetadataSize = sizeof(ChunkMetadata);
+  typedef __sanitizer::CompactSizeClassMap SizeClassMap;
+  static const uptr kRegionSizeLog = __lsan::kRegionSizeLog;
+  typedef __lsan::ByteMap ByteMap;
+  typedef NoOpMapUnmapCallback MapUnmapCallback;
+  static const uptr kFlags = 0;
+};
+typedef SizeClassAllocator32<AP32> PrimaryAllocator;
+#elif defined(__x86_64__) || defined(__powerpc64__)
+# if defined(__powerpc64__)
+const uptr kAllocatorSpace = 0xa0000000000ULL;
+const uptr kAllocatorSize  = 0x20000000000ULL;  // 2T.
+# else
+const uptr kAllocatorSpace = 0x600000000000ULL;
+const uptr kAllocatorSize  = 0x40000000000ULL;  // 4T.
+# endif
+struct AP64 {  // Allocator64 parameters. Deliberately using a short name.
+  static const uptr kSpaceBeg = kAllocatorSpace;
+  static const uptr kSpaceSize = kAllocatorSize;
+  static const uptr kMetadataSize = sizeof(ChunkMetadata);
+  typedef DefaultSizeClassMap SizeClassMap;
+  typedef NoOpMapUnmapCallback MapUnmapCallback;
+  static const uptr kFlags = 0;
+};
+
+typedef SizeClassAllocator64<AP64> PrimaryAllocator;
+#endif
+typedef SizeClassAllocatorLocalCache<PrimaryAllocator> AllocatorCache;
+
+AllocatorCache *GetAllocatorCache();
+
+void *lsan_memalign(uptr alignment, uptr size, const StackTrace &stack);
+void *lsan_malloc(uptr size, const StackTrace &stack);
+void lsan_free(void *p);
+void *lsan_realloc(void *p, uptr size, const StackTrace &stack);
+void *lsan_calloc(uptr nmemb, uptr size, const StackTrace &stack);
+void *lsan_valloc(uptr size, const StackTrace &stack);
+uptr lsan_mz_size(const void *p);
+
 }  // namespace __lsan
 
 #endif  // LSAN_ALLOCATOR_H
diff --git a/lib/lsan/lsan_common.cc b/lib/lsan/lsan_common.cc
index 5ae3ad2..622aae7 100644
--- a/lib/lsan/lsan_common.cc
+++ b/lib/lsan/lsan_common.cc
@@ -68,6 +68,19 @@
 static SuppressionContext *suppression_ctx = nullptr;
 static const char kSuppressionLeak[] = "leak";
 static const char *kSuppressionTypes[] = { kSuppressionLeak };
+static const char kStdSuppressions[] =
+#if SANITIZER_SUPPRESS_LEAK_ON_PTHREAD_EXIT
+  // For more details refer to the SANITIZER_SUPPRESS_LEAK_ON_PTHREAD_EXIT
+  // definition.
+  "leak:*pthread_exit*\n"
+#endif  // SANITIZER_SUPPRESS_LEAK_ON_PTHREAD_EXIT
+#if SANITIZER_MAC
+  // For Darwin and os_log/os_trace: https://reviews.llvm.org/D35173
+  "leak:*_os_trace*\n"
+#endif
+  // TLS leak in some glibc versions, described in
+  // https://sourceware.org/bugzilla/show_bug.cgi?id=12650.
+  "leak:*tls_get_addr*\n";
 
 void InitializeSuppressions() {
   CHECK_EQ(nullptr, suppression_ctx);
@@ -76,6 +89,7 @@
   suppression_ctx->ParseFromFile(flags()->suppressions);
   if (&__lsan_default_suppressions)
     suppression_ctx->Parse(__lsan_default_suppressions());
+  suppression_ctx->Parse(kStdSuppressions);
 }
 
 static SuppressionContext *GetSuppressionContext() {
@@ -83,12 +97,9 @@
   return suppression_ctx;
 }
 
-struct RootRegion {
-  const void *begin;
-  uptr size;
-};
+static InternalMmapVector<RootRegion> *root_regions;
 
-InternalMmapVector<RootRegion> *root_regions;
+InternalMmapVector<RootRegion> const *GetRootRegions() { return root_regions; }
 
 void InitializeRootRegions() {
   CHECK(!root_regions);
@@ -96,6 +107,10 @@
   root_regions = new(placeholder) InternalMmapVector<RootRegion>(1);
 }
 
+const char *MaybeCallLsanDefaultOptions() {
+  return (&__lsan_default_options) ? __lsan_default_options() : "";
+}
+
 void InitCommonLsan() {
   InitializeRootRegions();
   if (common_flags()->detect_leaks) {
@@ -111,7 +126,6 @@
   Decorator() : SanitizerCommonDecorator() { }
   const char *Error() { return Red(); }
   const char *Leak() { return Blue(); }
-  const char *End() { return Default(); }
 };
 
 static inline bool CanBeAHeapPointer(uptr p) {
@@ -175,6 +189,23 @@
   }
 }
 
+// Scans a global range for pointers
+void ScanGlobalRange(uptr begin, uptr end, Frontier *frontier) {
+  uptr allocator_begin = 0, allocator_end = 0;
+  GetAllocatorGlobalRange(&allocator_begin, &allocator_end);
+  if (begin <= allocator_begin && allocator_begin < end) {
+    CHECK_LE(allocator_begin, allocator_end);
+    CHECK_LE(allocator_end, end);
+    if (begin < allocator_begin)
+      ScanRangeForPointers(begin, allocator_begin, frontier, "GLOBAL",
+                           kReachable);
+    if (allocator_end < end)
+      ScanRangeForPointers(allocator_end, end, frontier, "GLOBAL", kReachable);
+  } else {
+    ScanRangeForPointers(begin, end, frontier, "GLOBAL", kReachable);
+  }
+}
+
 void ForEachExtraStackRangeCb(uptr begin, uptr end, void* arg) {
   Frontier *frontier = reinterpret_cast<Frontier *>(arg);
   ScanRangeForPointers(begin, end, frontier, "FAKE STACK", kReachable);
@@ -183,11 +214,11 @@
 // Scans thread data (stacks and TLS) for heap pointers.
 static void ProcessThreads(SuspendedThreadsList const &suspended_threads,
                            Frontier *frontier) {
-  InternalScopedBuffer<uptr> registers(SuspendedThreadsList::RegisterCount());
+  InternalScopedBuffer<uptr> registers(suspended_threads.RegisterCount());
   uptr registers_begin = reinterpret_cast<uptr>(registers.data());
   uptr registers_end = registers_begin + registers.size();
-  for (uptr i = 0; i < suspended_threads.thread_count(); i++) {
-    uptr os_id = static_cast<uptr>(suspended_threads.GetThreadID(i));
+  for (uptr i = 0; i < suspended_threads.ThreadCount(); i++) {
+    tid_t os_id = static_cast<tid_t>(suspended_threads.GetThreadID(i));
     LOG_THREADS("Processing thread %d.\n", os_id);
     uptr stack_begin, stack_end, tls_begin, tls_end, cache_begin, cache_end;
     DTLS *dtls;
@@ -201,11 +232,13 @@
       continue;
     }
     uptr sp;
-    bool have_registers =
-        (suspended_threads.GetRegistersAndSP(i, registers.data(), &sp) == 0);
-    if (!have_registers) {
-      Report("Unable to get registers from thread %d.\n");
-      // If unable to get SP, consider the entire stack to be reachable.
+    PtraceRegistersStatus have_registers =
+        suspended_threads.GetRegistersAndSP(i, registers.data(), &sp);
+    if (have_registers != REGISTERS_AVAILABLE) {
+      Report("Unable to get registers from thread %d.\n", os_id);
+      // If unable to get SP, consider the entire stack to be reachable unless
+      // GetRegistersAndSP failed with ESRCH.
+      if (have_registers == REGISTERS_UNAVAILABLE_FATAL) continue;
       sp = stack_begin;
     }
 
@@ -239,21 +272,23 @@
     }
 
     if (flags()->use_tls) {
-      LOG_THREADS("TLS at %p-%p.\n", tls_begin, tls_end);
-      if (cache_begin == cache_end) {
-        ScanRangeForPointers(tls_begin, tls_end, frontier, "TLS", kReachable);
-      } else {
-        // Because LSan should not be loaded with dlopen(), we can assume
-        // that allocator cache will be part of static TLS image.
-        CHECK_LE(tls_begin, cache_begin);
-        CHECK_GE(tls_end, cache_end);
-        if (tls_begin < cache_begin)
-          ScanRangeForPointers(tls_begin, cache_begin, frontier, "TLS",
-                               kReachable);
-        if (tls_end > cache_end)
-          ScanRangeForPointers(cache_end, tls_end, frontier, "TLS", kReachable);
+      if (tls_begin) {
+        LOG_THREADS("TLS at %p-%p.\n", tls_begin, tls_end);
+        // If the tls and cache ranges don't overlap, scan full tls range,
+        // otherwise, only scan the non-overlapping portions
+        if (cache_begin == cache_end || tls_end < cache_begin ||
+            tls_begin > cache_end) {
+          ScanRangeForPointers(tls_begin, tls_end, frontier, "TLS", kReachable);
+        } else {
+          if (tls_begin < cache_begin)
+            ScanRangeForPointers(tls_begin, cache_begin, frontier, "TLS",
+                                 kReachable);
+          if (tls_end > cache_end)
+            ScanRangeForPointers(cache_end, tls_end, frontier, "TLS",
+                                 kReachable);
+        }
       }
-      if (dtls) {
+      if (dtls && !DTLSInDestruction(dtls)) {
         for (uptr j = 0; j < dtls->dtv_size; ++j) {
           uptr dtls_beg = dtls->dtv[j].beg;
           uptr dtls_end = dtls_beg + dtls->dtv[j].size;
@@ -263,28 +298,36 @@
                                  kReachable);
           }
         }
+      } else {
+        // We are handling a thread with DTLS under destruction. Log about
+        // this and continue.
+        LOG_THREADS("Thread %d has DTLS under destruction.\n", os_id);
       }
     }
   }
 }
 
-static void ProcessRootRegion(Frontier *frontier, uptr root_begin,
-                              uptr root_end) {
-  MemoryMappingLayout proc_maps(/*cache_enabled*/true);
-  uptr begin, end, prot;
-  while (proc_maps.Next(&begin, &end,
-                        /*offset*/ nullptr, /*filename*/ nullptr,
-                        /*filename_size*/ 0, &prot)) {
-    uptr intersection_begin = Max(root_begin, begin);
-    uptr intersection_end = Min(end, root_end);
-    if (intersection_begin >= intersection_end) continue;
-    bool is_readable = prot & MemoryMappingLayout::kProtectionRead;
-    LOG_POINTERS("Root region %p-%p intersects with mapped region %p-%p (%s)\n",
-                 root_begin, root_end, begin, end,
-                 is_readable ? "readable" : "unreadable");
-    if (is_readable)
-      ScanRangeForPointers(intersection_begin, intersection_end, frontier,
-                           "ROOT", kReachable);
+void ScanRootRegion(Frontier *frontier, const RootRegion &root_region,
+                    uptr region_begin, uptr region_end, bool is_readable) {
+  uptr intersection_begin = Max(root_region.begin, region_begin);
+  uptr intersection_end = Min(region_end, root_region.begin + root_region.size);
+  if (intersection_begin >= intersection_end) return;
+  LOG_POINTERS("Root region %p-%p intersects with mapped region %p-%p (%s)\n",
+               root_region.begin, root_region.begin + root_region.size,
+               region_begin, region_end,
+               is_readable ? "readable" : "unreadable");
+  if (is_readable)
+    ScanRangeForPointers(intersection_begin, intersection_end, frontier, "ROOT",
+                         kReachable);
+}
+
+static void ProcessRootRegion(Frontier *frontier,
+                              const RootRegion &root_region) {
+  MemoryMappingLayout proc_maps(/*cache_enabled*/ true);
+  MemoryMappedSegment segment;
+  while (proc_maps.Next(&segment)) {
+    ScanRootRegion(frontier, root_region, segment.start, segment.end,
+                   segment.IsReadable());
   }
 }
 
@@ -293,9 +336,7 @@
   if (!flags()->use_root_regions) return;
   CHECK(root_regions);
   for (uptr i = 0; i < root_regions->size(); i++) {
-    RootRegion region = (*root_regions)[i];
-    uptr begin_addr = reinterpret_cast<uptr>(region.begin);
-    ProcessRootRegion(frontier, begin_addr, begin_addr + region.size);
+    ProcessRootRegion(frontier, (*root_regions)[i]);
   }
 }
 
@@ -333,6 +374,72 @@
   }
 }
 
+static uptr GetCallerPC(u32 stack_id, StackDepotReverseMap *map) {
+  CHECK(stack_id);
+  StackTrace stack = map->Get(stack_id);
+  // The top frame is our malloc/calloc/etc. The next frame is the caller.
+  if (stack.size >= 2)
+    return stack.trace[1];
+  return 0;
+}
+
+struct InvalidPCParam {
+  Frontier *frontier;
+  StackDepotReverseMap *stack_depot_reverse_map;
+  bool skip_linker_allocations;
+};
+
+// ForEachChunk callback. If the caller pc is invalid or is within the linker,
+// mark as reachable. Called by ProcessPlatformSpecificAllocations.
+static void MarkInvalidPCCb(uptr chunk, void *arg) {
+  CHECK(arg);
+  InvalidPCParam *param = reinterpret_cast<InvalidPCParam *>(arg);
+  chunk = GetUserBegin(chunk);
+  LsanMetadata m(chunk);
+  if (m.allocated() && m.tag() != kReachable && m.tag() != kIgnored) {
+    u32 stack_id = m.stack_trace_id();
+    uptr caller_pc = 0;
+    if (stack_id > 0)
+      caller_pc = GetCallerPC(stack_id, param->stack_depot_reverse_map);
+    // If caller_pc is unknown, this chunk may be allocated in a coroutine. Mark
+    // it as reachable, as we can't properly report its allocation stack anyway.
+    if (caller_pc == 0 || (param->skip_linker_allocations &&
+                           GetLinker()->containsAddress(caller_pc))) {
+      m.set_tag(kReachable);
+      param->frontier->push_back(chunk);
+    }
+  }
+}
+
+// On Linux, handles dynamically allocated TLS blocks by treating all chunks
+// allocated from ld-linux.so as reachable.
+// Dynamic TLS blocks contain the TLS variables of dynamically loaded modules.
+// They are allocated with a __libc_memalign() call in allocate_and_init()
+// (elf/dl-tls.c). Glibc won't tell us the address ranges occupied by those
+// blocks, but we can make sure they come from our own allocator by intercepting
+// __libc_memalign(). On top of that, there is no easy way to reach them. Their
+// addresses are stored in a dynamically allocated array (the DTV) which is
+// referenced from the static TLS. Unfortunately, we can't just rely on the DTV
+// being reachable from the static TLS, and the dynamic TLS being reachable from
+// the DTV. This is because the initial DTV is allocated before our interception
+// mechanism kicks in, and thus we don't recognize it as allocated memory. We
+// can't special-case it either, since we don't know its size.
+// Our solution is to include in the root set all allocations made from
+// ld-linux.so (which is where allocate_and_init() is implemented). This is
+// guaranteed to include all dynamic TLS blocks (and possibly other allocations
+// which we don't care about).
+// On all other platforms, this simply checks to ensure that the caller pc is
+// valid before reporting chunks as leaked.
+void ProcessPC(Frontier *frontier) {
+  StackDepotReverseMap stack_depot_reverse_map;
+  InvalidPCParam arg;
+  arg.frontier = frontier;
+  arg.stack_depot_reverse_map = &stack_depot_reverse_map;
+  arg.skip_linker_allocations =
+      flags()->use_tls && flags()->use_ld_allocations && GetLinker() != nullptr;
+  ForEachChunk(MarkInvalidPCCb, &arg);
+}
+
 // Sets the appropriate tag on each chunk.
 static void ClassifyAllChunks(SuspendedThreadsList const &suspended_threads) {
   // Holds the flood fill frontier.
@@ -344,11 +451,13 @@
   ProcessRootRegions(&frontier);
   FloodFillTag(&frontier, kReachable);
 
+  CHECK_EQ(0, frontier.size());
+  ProcessPC(&frontier);
+
   // The check here is relatively expensive, so we do this in a separate flood
   // fill. That way we can skip the check for chunks that are reachable
   // otherwise.
   LOG_POINTERS("Processing platform-specific allocations.\n");
-  CHECK_EQ(0, frontier.size());
   ProcessPlatformSpecificAllocations(&frontier);
   FloodFillTag(&frontier, kReachable);
 
@@ -458,7 +567,7 @@
            "\n");
     Printf("%s", d.Error());
     Report("ERROR: LeakSanitizer: detected memory leaks\n");
-    Printf("%s", d.End());
+    Printf("%s", d.Default());
     param.leak_report.ReportTopLeaks(flags()->max_leaks);
   }
   if (common_flags()->print_suppressions)
@@ -470,18 +579,16 @@
   return false;
 }
 
+static bool has_reported_leaks = false;
+bool HasReportedLeaks() { return has_reported_leaks; }
+
 void DoLeakCheck() {
   BlockingMutexLock l(&global_mutex);
   static bool already_done;
   if (already_done) return;
   already_done = true;
-  bool have_leaks = CheckForLeaks();
-  if (!have_leaks) {
-    return;
-  }
-  if (common_flags()->exitcode) {
-    Die();
-  }
+  has_reported_leaks = CheckForLeaks();
+  if (has_reported_leaks) HandleLeaks();
 }
 
 static int DoRecoverableLeakCheck() {
@@ -490,6 +597,8 @@
   return have_leaks ? 1 : 0;
 }
 
+void DoRecoverableLeakCheckVoid() { DoRecoverableLeakCheck(); }
+
 static Suppression *GetSuppressionForAddr(uptr addr) {
   Suppression *s = nullptr;
 
@@ -594,7 +703,7 @@
   Printf("%s leak of %zu byte(s) in %zu object(s) allocated from:\n",
          leaks_[index].is_directly_leaked ? "Direct" : "Indirect",
          leaks_[index].total_size, leaks_[index].hit_count);
-  Printf("%s", d.End());
+  Printf("%s", d.Default());
 
   PrintStackTraceById(leaks_[index].stack_trace_id);
 
@@ -652,6 +761,7 @@
 namespace __lsan {
 void InitCommonLsan() { }
 void DoLeakCheck() { }
+void DoRecoverableLeakCheckVoid() { }
 void DisableInThisThread() { }
 void EnableInThisThread() { }
 }
@@ -684,7 +794,7 @@
 #if CAN_SANITIZE_LEAKS
   BlockingMutexLock l(&global_mutex);
   CHECK(root_regions);
-  RootRegion region = {begin, size};
+  RootRegion region = {reinterpret_cast<uptr>(begin), size};
   root_regions->push_back(region);
   VReport(1, "Registered root region at %p of size %llu\n", begin, size);
 #endif // CAN_SANITIZE_LEAKS
@@ -698,7 +808,7 @@
   bool removed = false;
   for (uptr i = 0; i < root_regions->size(); i++) {
     RootRegion region = (*root_regions)[i];
-    if (region.begin == begin && region.size == size) {
+    if (region.begin == reinterpret_cast<uptr>(begin) && region.size == size) {
       removed = true;
       uptr last_index = root_regions->size() - 1;
       (*root_regions)[i] = (*root_regions)[last_index];
@@ -750,6 +860,11 @@
 
 #if !SANITIZER_SUPPORTS_WEAK_HOOKS
 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+const char * __lsan_default_options() {
+  return "";
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
 int __lsan_is_turned_off() {
   return 0;
 }
diff --git a/lib/lsan/lsan_common.h b/lib/lsan/lsan_common.h
index c457d1b..5adcaad 100644
--- a/lib/lsan/lsan_common.h
+++ b/lib/lsan/lsan_common.h
@@ -30,11 +30,16 @@
 // To enable LeakSanitizer on new architecture, one need to implement
 // internal_clone function as well as (probably) adjust TLS machinery for
 // new architecture inside sanitizer library.
-#if (SANITIZER_LINUX && !SANITIZER_ANDROID || CAN_SANITIZE_LEAKS_MAC) \
-     && (SANITIZER_WORDSIZE == 64) && (defined(__x86_64__) \
-     ||  defined(__mips64) ||  defined(__aarch64__))
+#if (SANITIZER_LINUX && !SANITIZER_ANDROID || SANITIZER_MAC) && \
+    (SANITIZER_WORDSIZE == 64) &&                               \
+    (defined(__x86_64__) || defined(__mips64) || defined(__aarch64__) || \
+     defined(__powerpc64__))
 #define CAN_SANITIZE_LEAKS 1
-#elif SANITIZER_LINUX && !SANITIZER_ANDROID && defined(__i386__)
+#elif defined(__i386__) && \
+    (SANITIZER_LINUX && !SANITIZER_ANDROID || SANITIZER_MAC)
+#define CAN_SANITIZE_LEAKS 1
+#elif defined(__arm__) && \
+    SANITIZER_LINUX && !SANITIZER_ANDROID
 #define CAN_SANITIZE_LEAKS 1
 #else
 #define CAN_SANITIZE_LEAKS 0
@@ -114,12 +119,22 @@
 void InitializePlatformSpecificModules();
 void ProcessGlobalRegions(Frontier *frontier);
 void ProcessPlatformSpecificAllocations(Frontier *frontier);
+
+struct RootRegion {
+  uptr begin;
+  uptr size;
+};
+
+InternalMmapVector<RootRegion> const *GetRootRegions();
+void ScanRootRegion(Frontier *frontier, RootRegion const &region,
+                    uptr region_begin, uptr region_end, bool is_readable);
 // Run stoptheworld while holding any platform-specific locks.
 void DoStopTheWorld(StopTheWorldCallback callback, void* argument);
 
 void ScanRangeForPointers(uptr begin, uptr end,
                           Frontier *frontier,
                           const char *region_type, ChunkTag tag);
+void ScanGlobalRange(uptr begin, uptr end, Frontier *frontier);
 
 enum IgnoreObjectResult {
   kIgnoreObjectSuccess,
@@ -128,8 +143,10 @@
 };
 
 // Functions called from the parent tool.
+const char *MaybeCallLsanDefaultOptions();
 void InitCommonLsan();
 void DoLeakCheck();
+void DoRecoverableLeakCheckVoid();
 void DisableCounterUnderflow();
 bool DisabledInThisThread();
 
@@ -143,15 +160,38 @@
   ~ScopedInterceptorDisabler() { EnableInThisThread(); }
 };
 
-// Special case for "new T[0]" where T is a type with DTOR.
-// new T[0] will allocate one word for the array size (0) and store a pointer
-// to the end of allocated chunk.
-inline bool IsSpecialCaseOfOperatorNew0(uptr chunk_beg, uptr chunk_size,
-                                        uptr addr) {
+// According to Itanium C++ ABI array cookie is a one word containing
+// size of allocated array.
+static inline bool IsItaniumABIArrayCookie(uptr chunk_beg, uptr chunk_size,
+                                           uptr addr) {
   return chunk_size == sizeof(uptr) && chunk_beg + chunk_size == addr &&
          *reinterpret_cast<uptr *>(chunk_beg) == 0;
 }
 
+// According to ARM C++ ABI array cookie consists of two words:
+// struct array_cookie {
+//   std::size_t element_size; // element_size != 0
+//   std::size_t element_count;
+// };
+static inline bool IsARMABIArrayCookie(uptr chunk_beg, uptr chunk_size,
+                                       uptr addr) {
+  return chunk_size == 2 * sizeof(uptr) && chunk_beg + chunk_size == addr &&
+         *reinterpret_cast<uptr *>(chunk_beg + sizeof(uptr)) == 0;
+}
+
+// Special case for "new T[0]" where T is a type with DTOR.
+// new T[0] will allocate a cookie (one or two words) for the array size (0)
+// and store a pointer to the end of allocated chunk. The actual cookie layout
+// varies between platforms according to their C++ ABI implementation.
+inline bool IsSpecialCaseOfOperatorNew0(uptr chunk_beg, uptr chunk_size,
+                                        uptr addr) {
+#if defined(__arm__)
+  return IsARMABIArrayCookie(chunk_beg, chunk_size, addr);
+#else
+  return IsItaniumABIArrayCookie(chunk_beg, chunk_size, addr);
+#endif
+}
+
 // The following must be implemented in the parent tool.
 
 void ForEachChunk(ForEachChunkCallback callback, void *arg);
@@ -165,10 +205,10 @@
 // Wrappers for ThreadRegistry access.
 void LockThreadRegistry();
 void UnlockThreadRegistry();
-bool GetThreadRangesLocked(uptr os_id, uptr *stack_begin, uptr *stack_end,
+bool GetThreadRangesLocked(tid_t os_id, uptr *stack_begin, uptr *stack_end,
                            uptr *tls_begin, uptr *tls_end, uptr *cache_begin,
                            uptr *cache_end, DTLS **dtls);
-void ForEachExtraStackRange(uptr os_id, RangeIteratorCallback callback,
+void ForEachExtraStackRange(tid_t os_id, RangeIteratorCallback callback,
                             void *arg);
 // If called from the main thread, updates the main thread's TID in the thread
 // registry. We need this to handle processes that fork() without a subsequent
@@ -184,6 +224,16 @@
 uptr GetUserBegin(uptr chunk);
 // Helper for __lsan_ignore_object().
 IgnoreObjectResult IgnoreObjectLocked(const void *p);
+
+// Return the linker module, if valid for the platform.
+LoadedModule *GetLinker();
+
+// Return true if LSan has finished leak checking and reported leaks.
+bool HasReportedLeaks();
+
+// Run platform-specific leak handlers.
+void HandleLeaks();
+
 // Wrapper for chunk metadata operations.
 class LsanMetadata {
  public:
@@ -202,6 +252,9 @@
 
 extern "C" {
 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+const char *__lsan_default_options();
+
+SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
 int __lsan_is_turned_off();
 
 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
diff --git a/lib/lsan/lsan_common_linux.cc b/lib/lsan/lsan_common_linux.cc
index 0e10d41..5042c7b 100644
--- a/lib/lsan/lsan_common_linux.cc
+++ b/lib/lsan/lsan_common_linux.cc
@@ -34,10 +34,6 @@
   return LibraryNameIs(full_name, kLinkerName);
 }
 
-static THREADLOCAL u32 current_thread_tid = kInvalidTid;
-u32 GetCurrentThread() { return current_thread_tid; }
-void SetCurrentThread(u32 tid) { current_thread_tid = tid; }
-
 __attribute__((tls_model("initial-exec")))
 THREADLOCAL int disable_counter;
 bool DisabledInThisThread() { return disable_counter > 0; }
@@ -66,8 +62,10 @@
       return;
     }
   }
-  VReport(1, "LeakSanitizer: Dynamic linker not found. "
-             "TLS will not be handled correctly.\n");
+  if (linker == nullptr) {
+    VReport(1, "LeakSanitizer: Dynamic linker not found. "
+               "TLS will not be handled correctly.\n");
+  }
 }
 
 static int ProcessGlobalRegionsCallback(struct dl_phdr_info *info, size_t size,
@@ -82,20 +80,7 @@
       continue;
     uptr begin = info->dlpi_addr + phdr->p_vaddr;
     uptr end = begin + phdr->p_memsz;
-    uptr allocator_begin = 0, allocator_end = 0;
-    GetAllocatorGlobalRange(&allocator_begin, &allocator_end);
-    if (begin <= allocator_begin && allocator_begin < end) {
-      CHECK_LE(allocator_begin, allocator_end);
-      CHECK_LE(allocator_end, end);
-      if (begin < allocator_begin)
-        ScanRangeForPointers(begin, allocator_begin, frontier, "GLOBAL",
-                             kReachable);
-      if (allocator_end < end)
-        ScanRangeForPointers(allocator_end, end, frontier, "GLOBAL",
-                             kReachable);
-    } else {
-      ScanRangeForPointers(begin, end, frontier, "GLOBAL", kReachable);
-    }
+    ScanGlobalRange(begin, end, frontier);
   }
   return 0;
 }
@@ -106,76 +91,22 @@
   dl_iterate_phdr(ProcessGlobalRegionsCallback, frontier);
 }
 
-static uptr GetCallerPC(u32 stack_id, StackDepotReverseMap *map) {
-  CHECK(stack_id);
-  StackTrace stack = map->Get(stack_id);
-  // The top frame is our malloc/calloc/etc. The next frame is the caller.
-  if (stack.size >= 2)
-    return stack.trace[1];
-  return 0;
-}
+LoadedModule *GetLinker() { return linker; }
 
-struct ProcessPlatformAllocParam {
-  Frontier *frontier;
-  StackDepotReverseMap *stack_depot_reverse_map;
-  bool skip_linker_allocations;
-};
-
-// ForEachChunk callback. Identifies unreachable chunks which must be treated as
-// reachable. Marks them as reachable and adds them to the frontier.
-static void ProcessPlatformSpecificAllocationsCb(uptr chunk, void *arg) {
-  CHECK(arg);
-  ProcessPlatformAllocParam *param =
-      reinterpret_cast<ProcessPlatformAllocParam *>(arg);
-  chunk = GetUserBegin(chunk);
-  LsanMetadata m(chunk);
-  if (m.allocated() && m.tag() != kReachable && m.tag() != kIgnored) {
-    u32 stack_id = m.stack_trace_id();
-    uptr caller_pc = 0;
-    if (stack_id > 0)
-      caller_pc = GetCallerPC(stack_id, param->stack_depot_reverse_map);
-    // If caller_pc is unknown, this chunk may be allocated in a coroutine. Mark
-    // it as reachable, as we can't properly report its allocation stack anyway.
-    if (caller_pc == 0 || (param->skip_linker_allocations &&
-                           linker->containsAddress(caller_pc))) {
-      m.set_tag(kReachable);
-      param->frontier->push_back(chunk);
-    }
-  }
-}
-
-// Handles dynamically allocated TLS blocks by treating all chunks allocated
-// from ld-linux.so as reachable.
-// Dynamic TLS blocks contain the TLS variables of dynamically loaded modules.
-// They are allocated with a __libc_memalign() call in allocate_and_init()
-// (elf/dl-tls.c). Glibc won't tell us the address ranges occupied by those
-// blocks, but we can make sure they come from our own allocator by intercepting
-// __libc_memalign(). On top of that, there is no easy way to reach them. Their
-// addresses are stored in a dynamically allocated array (the DTV) which is
-// referenced from the static TLS. Unfortunately, we can't just rely on the DTV
-// being reachable from the static TLS, and the dynamic TLS being reachable from
-// the DTV. This is because the initial DTV is allocated before our interception
-// mechanism kicks in, and thus we don't recognize it as allocated memory. We
-// can't special-case it either, since we don't know its size.
-// Our solution is to include in the root set all allocations made from
-// ld-linux.so (which is where allocate_and_init() is implemented). This is
-// guaranteed to include all dynamic TLS blocks (and possibly other allocations
-// which we don't care about).
-void ProcessPlatformSpecificAllocations(Frontier *frontier) {
-  StackDepotReverseMap stack_depot_reverse_map;
-  ProcessPlatformAllocParam arg;
-  arg.frontier = frontier;
-  arg.stack_depot_reverse_map = &stack_depot_reverse_map;
-  arg.skip_linker_allocations =
-      flags()->use_tls && flags()->use_ld_allocations && linker != nullptr;
-  ForEachChunk(ProcessPlatformSpecificAllocationsCb, &arg);
-}
+void ProcessPlatformSpecificAllocations(Frontier *frontier) {}
 
 struct DoStopTheWorldParam {
   StopTheWorldCallback callback;
   void *argument;
 };
 
+// While calling Die() here is undefined behavior and can potentially
+// cause race conditions, it isn't possible to intercept exit on linux,
+// so we have no choice but to call Die() from the atexit handler.
+void HandleLeaks() {
+  if (common_flags()->exitcode) Die();
+}
+
 static int DoStopTheWorldCallback(struct dl_phdr_info *info, size_t size,
                                   void *data) {
   DoStopTheWorldParam *param = reinterpret_cast<DoStopTheWorldParam *>(data);
diff --git a/lib/lsan/lsan_common_mac.cc b/lib/lsan/lsan_common_mac.cc
index 7f5e055..ac27c7a 100644
--- a/lib/lsan/lsan_common_mac.cc
+++ b/lib/lsan/lsan_common_mac.cc
@@ -12,71 +12,186 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "sanitizer_common/sanitizer_allocator_internal.h"
 #include "sanitizer_common/sanitizer_platform.h"
 #include "lsan_common.h"
 
 #if CAN_SANITIZE_LEAKS && SANITIZER_MAC
 
+#include "sanitizer_common/sanitizer_allocator_internal.h"
+#include "lsan_allocator.h"
+
 #include <pthread.h>
 
+#include <mach/mach.h>
+
 namespace __lsan {
 
 typedef struct {
   int disable_counter;
   u32 current_thread_id;
+  AllocatorCache cache;
 } thread_local_data_t;
 
 static pthread_key_t key;
 static pthread_once_t key_once = PTHREAD_ONCE_INIT;
 
-static void make_tls_key() { CHECK_EQ(pthread_key_create(&key, NULL), 0); }
+// The main thread destructor requires the current thread id,
+// so we can't destroy it until it's been used and reset to invalid tid
+void restore_tid_data(void *ptr) {
+  thread_local_data_t *data = (thread_local_data_t *)ptr;
+  if (data->current_thread_id != kInvalidTid)
+    pthread_setspecific(key, data);
+}
 
-static thread_local_data_t *get_tls_val() {
+static void make_tls_key() {
+  CHECK_EQ(pthread_key_create(&key, restore_tid_data), 0);
+}
+
+static thread_local_data_t *get_tls_val(bool alloc) {
   pthread_once(&key_once, make_tls_key);
 
   thread_local_data_t *ptr = (thread_local_data_t *)pthread_getspecific(key);
-  if (ptr == NULL) {
+  if (ptr == NULL && alloc) {
     ptr = (thread_local_data_t *)InternalAlloc(sizeof(*ptr));
     ptr->disable_counter = 0;
     ptr->current_thread_id = kInvalidTid;
+    ptr->cache = AllocatorCache();
     pthread_setspecific(key, ptr);
   }
 
   return ptr;
 }
 
-bool DisabledInThisThread() { return get_tls_val()->disable_counter > 0; }
+bool DisabledInThisThread() {
+  thread_local_data_t *data = get_tls_val(false);
+  return data ? data->disable_counter > 0 : false;
+}
 
-void DisableInThisThread() { ++get_tls_val()->disable_counter; }
+void DisableInThisThread() { ++get_tls_val(true)->disable_counter; }
 
 void EnableInThisThread() {
-  int *disable_counter = &get_tls_val()->disable_counter;
+  int *disable_counter = &get_tls_val(true)->disable_counter;
   if (*disable_counter == 0) {
     DisableCounterUnderflow();
   }
   --*disable_counter;
 }
 
-u32 GetCurrentThread() { return get_tls_val()->current_thread_id; }
-
-void SetCurrentThread(u32 tid) { get_tls_val()->current_thread_id = tid; }
-
-void InitializePlatformSpecificModules() {
-  CHECK(0 && "unimplemented");
+u32 GetCurrentThread() {
+  thread_local_data_t *data = get_tls_val(false);
+  return data ? data->current_thread_id : kInvalidTid;
 }
 
+void SetCurrentThread(u32 tid) { get_tls_val(true)->current_thread_id = tid; }
+
+AllocatorCache *GetAllocatorCache() { return &get_tls_val(true)->cache; }
+
+LoadedModule *GetLinker() { return nullptr; }
+
+// Required on Linux for initialization of TLS behavior, but should not be
+// required on Darwin.
+void InitializePlatformSpecificModules() {}
+
+// Sections which can't contain contain global pointers. This list errs on the
+// side of caution to avoid false positives, at the expense of performance.
+//
+// Other potentially safe sections include:
+// __all_image_info, __crash_info, __const, __got, __interpose, __objc_msg_break
+//
+// Sections which definitely cannot be included here are:
+// __objc_data, __objc_const, __data, __bss, __common, __thread_data,
+// __thread_bss, __thread_vars, __objc_opt_rw, __objc_opt_ptrs
+static const char *kSkippedSecNames[] = {
+    "__cfstring",       "__la_symbol_ptr",  "__mod_init_func",
+    "__mod_term_func",  "__nl_symbol_ptr",  "__objc_classlist",
+    "__objc_classrefs", "__objc_imageinfo", "__objc_nlclslist",
+    "__objc_protolist", "__objc_selrefs",   "__objc_superrefs"};
+
 // Scans global variables for heap pointers.
 void ProcessGlobalRegions(Frontier *frontier) {
-  CHECK(0 && "unimplemented");
+  for (auto name : kSkippedSecNames) CHECK(ARRAY_SIZE(name) < kMaxSegName);
+
+  MemoryMappingLayout memory_mapping(false);
+  InternalMmapVector<LoadedModule> modules(/*initial_capacity*/ 128);
+  memory_mapping.DumpListOfModules(&modules);
+  for (uptr i = 0; i < modules.size(); ++i) {
+    // Even when global scanning is disabled, we still need to scan
+    // system libraries for stashed pointers
+    if (!flags()->use_globals && modules[i].instrumented()) continue;
+
+    for (const __sanitizer::LoadedModule::AddressRange &range :
+         modules[i].ranges()) {
+      // Sections storing global variables are writable and non-executable
+      if (range.executable || !range.writable) continue;
+
+      for (auto name : kSkippedSecNames) {
+        if (!internal_strcmp(range.name, name)) continue;
+      }
+
+      ScanGlobalRange(range.beg, range.end, frontier);
+    }
+  }
 }
 
 void ProcessPlatformSpecificAllocations(Frontier *frontier) {
-  CHECK(0 && "unimplemented");
+  mach_port_name_t port;
+  if (task_for_pid(mach_task_self(), internal_getpid(), &port)
+      != KERN_SUCCESS) {
+    return;
+  }
+
+  unsigned depth = 1;
+  vm_size_t size = 0;
+  vm_address_t address = 0;
+  kern_return_t err = KERN_SUCCESS;
+  mach_msg_type_number_t count = VM_REGION_SUBMAP_INFO_COUNT_64;
+
+  InternalMmapVector<RootRegion> const *root_regions = GetRootRegions();
+
+  while (err == KERN_SUCCESS) {
+    struct vm_region_submap_info_64 info;
+    err = vm_region_recurse_64(port, &address, &size, &depth,
+                               (vm_region_info_t)&info, &count);
+
+    uptr end_address = address + size;
+
+    // libxpc stashes some pointers in the Kernel Alloc Once page,
+    // make sure not to report those as leaks.
+    if (info.user_tag == VM_MEMORY_OS_ALLOC_ONCE) {
+      ScanRangeForPointers(address, end_address, frontier, "GLOBAL",
+                           kReachable);
+
+      // Recursing over the full memory map is very slow, break out
+      // early if we don't need the full iteration.
+      if (!flags()->use_root_regions || !root_regions->size())
+        break;
+    }
+
+    // This additional root region scan is required on Darwin in order to
+    // detect root regions contained within mmap'd memory regions, because
+    // the Darwin implementation of sanitizer_procmaps traverses images
+    // as loaded by dyld, and not the complete set of all memory regions.
+    //
+    // TODO(fjricci) - remove this once sanitizer_procmaps_mac has the same
+    // behavior as sanitizer_procmaps_linux and traverses all memory regions
+    if (flags()->use_root_regions) {
+      for (uptr i = 0; i < root_regions->size(); i++) {
+        ScanRootRegion(frontier, (*root_regions)[i], address, end_address,
+                       info.protection & kProtectionRead);
+      }
+    }
+
+    address = end_address;
+  }
 }
 
+// On darwin, we can intercept _exit gracefully, and return a failing exit code
+// if required at that point. Calling Die() here is undefined behavior and
+// causes rare race conditions.
+void HandleLeaks() {}
+
 void DoStopTheWorld(StopTheWorldCallback callback, void *argument) {
-  CHECK(0 && "unimplemented");
+  StopTheWorld(callback, argument);
 }
 
 } // namespace __lsan
diff --git a/lib/lsan/lsan_interceptors.cc b/lib/lsan/lsan_interceptors.cc
index 876b39d..a7c0f72 100644
--- a/lib/lsan/lsan_interceptors.cc
+++ b/lib/lsan/lsan_interceptors.cc
@@ -20,7 +20,9 @@
 #include "sanitizer_common/sanitizer_internal_defs.h"
 #include "sanitizer_common/sanitizer_linux.h"
 #include "sanitizer_common/sanitizer_platform_interceptors.h"
+#include "sanitizer_common/sanitizer_platform_limits_netbsd.h"
 #include "sanitizer_common/sanitizer_platform_limits_posix.h"
+#include "sanitizer_common/sanitizer_posix.h"
 #include "sanitizer_common/sanitizer_tls_get_addr.h"
 #include "lsan.h"
 #include "lsan_allocator.h"
@@ -39,29 +41,23 @@
 int pthread_setspecific(unsigned key, const void *v);
 }
 
-#define ENSURE_LSAN_INITED do {   \
-  CHECK(!lsan_init_is_running);   \
-  if (!lsan_inited)               \
-    __lsan_init();                \
-} while (0)
-
 ///// Malloc/free interceptors. /////
 
-const bool kAlwaysClearMemory = true;
-
 namespace std {
   struct nothrow_t;
+  enum class align_val_t: size_t;
 }
 
+#if !SANITIZER_MAC
 INTERCEPTOR(void*, malloc, uptr size) {
   ENSURE_LSAN_INITED;
   GET_STACK_TRACE_MALLOC;
-  return Allocate(stack, size, 1, kAlwaysClearMemory);
+  return lsan_malloc(size, stack);
 }
 
 INTERCEPTOR(void, free, void *p) {
   ENSURE_LSAN_INITED;
-  Deallocate(p);
+  lsan_free(p);
 }
 
 INTERCEPTOR(void*, calloc, uptr nmemb, uptr size) {
@@ -76,31 +72,44 @@
     CHECK(allocated < kCallocPoolSize);
     return mem;
   }
-  if (CallocShouldReturnNullDueToOverflow(size, nmemb)) return nullptr;
   ENSURE_LSAN_INITED;
   GET_STACK_TRACE_MALLOC;
-  size *= nmemb;
-  return Allocate(stack, size, 1, true);
+  return lsan_calloc(nmemb, size, stack);
 }
 
 INTERCEPTOR(void*, realloc, void *q, uptr size) {
   ENSURE_LSAN_INITED;
   GET_STACK_TRACE_MALLOC;
-  return Reallocate(stack, q, size, 1);
+  return lsan_realloc(q, size, stack);
 }
 
+INTERCEPTOR(int, posix_memalign, void **memptr, uptr alignment, uptr size) {
+  ENSURE_LSAN_INITED;
+  GET_STACK_TRACE_MALLOC;
+  *memptr = lsan_memalign(alignment, size, stack);
+  // FIXME: Return ENOMEM if user requested more than max alloc size.
+  return 0;
+}
+
+INTERCEPTOR(void*, valloc, uptr size) {
+  ENSURE_LSAN_INITED;
+  GET_STACK_TRACE_MALLOC;
+  return lsan_valloc(size, stack);
+}
+#endif
+
 #if SANITIZER_INTERCEPT_MEMALIGN
 INTERCEPTOR(void*, memalign, uptr alignment, uptr size) {
   ENSURE_LSAN_INITED;
   GET_STACK_TRACE_MALLOC;
-  return Allocate(stack, size, alignment, kAlwaysClearMemory);
+  return lsan_memalign(alignment, size, stack);
 }
 #define LSAN_MAYBE_INTERCEPT_MEMALIGN INTERCEPT_FUNCTION(memalign)
 
 INTERCEPTOR(void *, __libc_memalign, uptr alignment, uptr size) {
   ENSURE_LSAN_INITED;
   GET_STACK_TRACE_MALLOC;
-  void *res = Allocate(stack, size, alignment, kAlwaysClearMemory);
+  void *res = lsan_memalign(alignment, size, stack);
   DTLS_on_libc_memalign(res, size);
   return res;
 }
@@ -114,29 +123,13 @@
 INTERCEPTOR(void*, aligned_alloc, uptr alignment, uptr size) {
   ENSURE_LSAN_INITED;
   GET_STACK_TRACE_MALLOC;
-  return Allocate(stack, size, alignment, kAlwaysClearMemory);
+  return lsan_memalign(alignment, size, stack);
 }
 #define LSAN_MAYBE_INTERCEPT_ALIGNED_ALLOC INTERCEPT_FUNCTION(aligned_alloc)
 #else
 #define LSAN_MAYBE_INTERCEPT_ALIGNED_ALLOC
 #endif
 
-INTERCEPTOR(int, posix_memalign, void **memptr, uptr alignment, uptr size) {
-  ENSURE_LSAN_INITED;
-  GET_STACK_TRACE_MALLOC;
-  *memptr = Allocate(stack, size, alignment, kAlwaysClearMemory);
-  // FIXME: Return ENOMEM if user requested more than max alloc size.
-  return 0;
-}
-
-INTERCEPTOR(void*, valloc, uptr size) {
-  ENSURE_LSAN_INITED;
-  GET_STACK_TRACE_MALLOC;
-  if (size == 0)
-    size = GetPageSizeCached();
-  return Allocate(stack, size, GetPageSizeCached(), kAlwaysClearMemory);
-}
-
 #if SANITIZER_INTERCEPT_MALLOC_USABLE_SIZE
 INTERCEPTOR(uptr, malloc_usable_size, void *ptr) {
   ENSURE_LSAN_INITED;
@@ -193,23 +186,69 @@
 #define LSAN_MAYBE_INTERCEPT_CFREE
 #endif // SANITIZER_INTERCEPT_CFREE
 
-#define OPERATOR_NEW_BODY                              \
-  ENSURE_LSAN_INITED;                                  \
-  GET_STACK_TRACE_MALLOC;                              \
-  return Allocate(stack, size, 1, kAlwaysClearMemory);
+#if SANITIZER_INTERCEPT_MCHECK_MPROBE
+INTERCEPTOR(int, mcheck, void (*abortfunc)(int mstatus)) {
+  return 0;
+}
 
-INTERCEPTOR_ATTRIBUTE
-void *operator new(size_t size) { OPERATOR_NEW_BODY; }
-INTERCEPTOR_ATTRIBUTE
-void *operator new[](size_t size) { OPERATOR_NEW_BODY; }
-INTERCEPTOR_ATTRIBUTE
-void *operator new(size_t size, std::nothrow_t const&) { OPERATOR_NEW_BODY; }
-INTERCEPTOR_ATTRIBUTE
-void *operator new[](size_t size, std::nothrow_t const&) { OPERATOR_NEW_BODY; }
+INTERCEPTOR(int, mcheck_pedantic, void (*abortfunc)(int mstatus)) {
+  return 0;
+}
+
+INTERCEPTOR(int, mprobe, void *ptr) {
+  return 0;
+}
+#endif // SANITIZER_INTERCEPT_MCHECK_MPROBE
+
+
+// TODO(alekseys): throw std::bad_alloc instead of dying on OOM.
+#define OPERATOR_NEW_BODY(nothrow)                         \
+  ENSURE_LSAN_INITED;                                      \
+  GET_STACK_TRACE_MALLOC;                                  \
+  void *res = lsan_malloc(size, stack);                    \
+  if (!nothrow && UNLIKELY(!res)) DieOnFailure::OnOOM();   \
+  return res;
+#define OPERATOR_NEW_BODY_ALIGN(nothrow)                   \
+  ENSURE_LSAN_INITED;                                      \
+  GET_STACK_TRACE_MALLOC;                                  \
+  void *res = lsan_memalign((uptr)align, size, stack);     \
+  if (!nothrow && UNLIKELY(!res)) DieOnFailure::OnOOM();   \
+  return res;
 
 #define OPERATOR_DELETE_BODY \
   ENSURE_LSAN_INITED;        \
-  Deallocate(ptr);
+  lsan_free(ptr);
+
+// On OS X it's not enough to just provide our own 'operator new' and
+// 'operator delete' implementations, because they're going to be in the runtime
+// dylib, and the main executable will depend on both the runtime dylib and
+// libstdc++, each of has its implementation of new and delete.
+// To make sure that C++ allocation/deallocation operators are overridden on
+// OS X we need to intercept them using their mangled names.
+#if !SANITIZER_MAC
+
+INTERCEPTOR_ATTRIBUTE
+void *operator new(size_t size) { OPERATOR_NEW_BODY(false /*nothrow*/); }
+INTERCEPTOR_ATTRIBUTE
+void *operator new[](size_t size) { OPERATOR_NEW_BODY(false /*nothrow*/); }
+INTERCEPTOR_ATTRIBUTE
+void *operator new(size_t size, std::nothrow_t const&)
+{ OPERATOR_NEW_BODY(true /*nothrow*/); }
+INTERCEPTOR_ATTRIBUTE
+void *operator new[](size_t size, std::nothrow_t const&)
+{ OPERATOR_NEW_BODY(true /*nothrow*/); }
+INTERCEPTOR_ATTRIBUTE
+void *operator new(size_t size, std::align_val_t align)
+{ OPERATOR_NEW_BODY_ALIGN(false /*nothrow*/); }
+INTERCEPTOR_ATTRIBUTE
+void *operator new[](size_t size, std::align_val_t align)
+{ OPERATOR_NEW_BODY_ALIGN(false /*nothrow*/); }
+INTERCEPTOR_ATTRIBUTE
+void *operator new(size_t size, std::align_val_t align, std::nothrow_t const&)
+{ OPERATOR_NEW_BODY_ALIGN(true /*nothrow*/); }
+INTERCEPTOR_ATTRIBUTE
+void *operator new[](size_t size, std::align_val_t align, std::nothrow_t const&)
+{ OPERATOR_NEW_BODY_ALIGN(true /*nothrow*/); }
 
 INTERCEPTOR_ATTRIBUTE
 void operator delete(void *ptr) NOEXCEPT { OPERATOR_DELETE_BODY; }
@@ -218,9 +257,55 @@
 INTERCEPTOR_ATTRIBUTE
 void operator delete(void *ptr, std::nothrow_t const&) { OPERATOR_DELETE_BODY; }
 INTERCEPTOR_ATTRIBUTE
-void operator delete[](void *ptr, std::nothrow_t const &) {
-  OPERATOR_DELETE_BODY;
-}
+void operator delete[](void *ptr, std::nothrow_t const &)
+{ OPERATOR_DELETE_BODY; }
+INTERCEPTOR_ATTRIBUTE
+void operator delete(void *ptr, size_t size) NOEXCEPT
+{ OPERATOR_DELETE_BODY; }
+INTERCEPTOR_ATTRIBUTE
+void operator delete[](void *ptr, size_t size) NOEXCEPT
+{ OPERATOR_DELETE_BODY; }
+INTERCEPTOR_ATTRIBUTE
+void operator delete(void *ptr, std::align_val_t) NOEXCEPT
+{ OPERATOR_DELETE_BODY; }
+INTERCEPTOR_ATTRIBUTE
+void operator delete[](void *ptr, std::align_val_t) NOEXCEPT
+{ OPERATOR_DELETE_BODY; }
+INTERCEPTOR_ATTRIBUTE
+void operator delete(void *ptr, std::align_val_t, std::nothrow_t const&)
+{ OPERATOR_DELETE_BODY; }
+INTERCEPTOR_ATTRIBUTE
+void operator delete[](void *ptr, std::align_val_t, std::nothrow_t const&)
+{ OPERATOR_DELETE_BODY; }
+INTERCEPTOR_ATTRIBUTE
+void operator delete(void *ptr, size_t size, std::align_val_t) NOEXCEPT
+{ OPERATOR_DELETE_BODY; }
+INTERCEPTOR_ATTRIBUTE
+void operator delete[](void *ptr, size_t size, std::align_val_t) NOEXCEPT
+{ OPERATOR_DELETE_BODY; }
+
+#else  // SANITIZER_MAC
+
+INTERCEPTOR(void *, _Znwm, size_t size)
+{ OPERATOR_NEW_BODY(false /*nothrow*/); }
+INTERCEPTOR(void *, _Znam, size_t size)
+{ OPERATOR_NEW_BODY(false /*nothrow*/); }
+INTERCEPTOR(void *, _ZnwmRKSt9nothrow_t, size_t size, std::nothrow_t const&)
+{ OPERATOR_NEW_BODY(true /*nothrow*/); }
+INTERCEPTOR(void *, _ZnamRKSt9nothrow_t, size_t size, std::nothrow_t const&)
+{ OPERATOR_NEW_BODY(true /*nothrow*/); }
+
+INTERCEPTOR(void, _ZdlPv, void *ptr)
+{ OPERATOR_DELETE_BODY; }
+INTERCEPTOR(void, _ZdaPv, void *ptr)
+{ OPERATOR_DELETE_BODY; }
+INTERCEPTOR(void, _ZdlPvRKSt9nothrow_t, void *ptr, std::nothrow_t const&)
+{ OPERATOR_DELETE_BODY; }
+INTERCEPTOR(void, _ZdaPvRKSt9nothrow_t, void *ptr, std::nothrow_t const&)
+{ OPERATOR_DELETE_BODY; }
+
+#endif  // !SANITIZER_MAC
+
 
 ///// Thread initialization and finalization. /////
 
@@ -290,7 +375,8 @@
     res = REAL(pthread_create)(th, attr, __lsan_thread_start_func, &p);
   }
   if (res == 0) {
-    int tid = ThreadCreate(GetCurrentThread(), *(uptr *)th, detached);
+    int tid = ThreadCreate(GetCurrentThread(), *(uptr *)th,
+                           IsStateDetached(detached));
     CHECK_NE(tid, 0);
     atomic_store(&p.tid, tid, memory_order_release);
     while (atomic_load(&p.tid, memory_order_acquire) != 0)
@@ -310,9 +396,19 @@
   return res;
 }
 
+INTERCEPTOR(void, _exit, int status) {
+  if (status == 0 && HasReportedLeaks()) status = common_flags()->exitcode;
+  REAL(_exit)(status);
+}
+
+#define COMMON_INTERCEPT_FUNCTION(name) INTERCEPT_FUNCTION(name)
+#include "sanitizer_common/sanitizer_signal_interceptors.inc"
+
 namespace __lsan {
 
 void InitializeInterceptors() {
+  InitializeSignalInterceptors();
+
   INTERCEPT_FUNCTION(malloc);
   INTERCEPT_FUNCTION(free);
   LSAN_MAYBE_INTERCEPT_CFREE;
@@ -329,6 +425,7 @@
   LSAN_MAYBE_INTERCEPT_MALLOPT;
   INTERCEPT_FUNCTION(pthread_create);
   INTERCEPT_FUNCTION(pthread_join);
+  INTERCEPT_FUNCTION(_exit);
 
   if (pthread_key_create(&g_thread_finalize_key, &thread_finalize)) {
     Report("LeakSanitizer: failed to create thread key.\n");
diff --git a/lib/lsan/lsan_linux.cc b/lib/lsan/lsan_linux.cc
new file mode 100644
index 0000000..c9749c7
--- /dev/null
+++ b/lib/lsan/lsan_linux.cc
@@ -0,0 +1,33 @@
+//=-- lsan_linux.cc -------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of LeakSanitizer. Linux-specific code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_platform.h"
+
+#if SANITIZER_LINUX
+
+#include "lsan_allocator.h"
+
+namespace __lsan {
+
+static THREADLOCAL u32 current_thread_tid = kInvalidTid;
+u32 GetCurrentThread() { return current_thread_tid; }
+void SetCurrentThread(u32 tid) { current_thread_tid = tid; }
+
+static THREADLOCAL AllocatorCache allocator_cache;
+AllocatorCache *GetAllocatorCache() { return &allocator_cache; }
+
+void ReplaceSystemMalloc() {}
+
+} // namespace __lsan
+
+#endif // SANITIZER_LINUX
diff --git a/lib/lsan/lsan_mac.cc b/lib/lsan/lsan_mac.cc
new file mode 100644
index 0000000..1a6f5f4
--- /dev/null
+++ b/lib/lsan/lsan_mac.cc
@@ -0,0 +1,192 @@
+//===-- lsan_mac.cc -------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of LeakSanitizer, a memory leak checker.
+//
+// Mac-specific details.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_platform.h"
+#if SANITIZER_MAC
+
+#include "interception/interception.h"
+#include "lsan.h"
+#include "lsan_allocator.h"
+#include "lsan_thread.h"
+
+#include <pthread.h>
+
+namespace __lsan {
+// Support for the following functions from libdispatch on Mac OS:
+//   dispatch_async_f()
+//   dispatch_async()
+//   dispatch_sync_f()
+//   dispatch_sync()
+//   dispatch_after_f()
+//   dispatch_after()
+//   dispatch_group_async_f()
+//   dispatch_group_async()
+// TODO(glider): libdispatch API contains other functions that we don't support
+// yet.
+//
+// dispatch_sync() and dispatch_sync_f() are synchronous, although chances are
+// they can cause jobs to run on a thread different from the current one.
+// TODO(glider): if so, we need a test for this (otherwise we should remove
+// them).
+//
+// The following functions use dispatch_barrier_async_f() (which isn't a library
+// function but is exported) and are thus supported:
+//   dispatch_source_set_cancel_handler_f()
+//   dispatch_source_set_cancel_handler()
+//   dispatch_source_set_event_handler_f()
+//   dispatch_source_set_event_handler()
+//
+// The reference manual for Grand Central Dispatch is available at
+//   http://developer.apple.com/library/mac/#documentation/Performance/Reference/GCD_libdispatch_Ref/Reference/reference.html
+// The implementation details are at
+//   http://libdispatch.macosforge.org/trac/browser/trunk/src/queue.c
+
+typedef void *dispatch_group_t;
+typedef void *dispatch_queue_t;
+typedef void *dispatch_source_t;
+typedef u64 dispatch_time_t;
+typedef void (*dispatch_function_t)(void *block);
+typedef void *(*worker_t)(void *block);
+
+// A wrapper for the ObjC blocks used to support libdispatch.
+typedef struct {
+  void *block;
+  dispatch_function_t func;
+  u32 parent_tid;
+} lsan_block_context_t;
+
+ALWAYS_INLINE
+void lsan_register_worker_thread(int parent_tid) {
+  if (GetCurrentThread() == kInvalidTid) {
+    u32 tid = ThreadCreate(parent_tid, 0, true);
+    ThreadStart(tid, GetTid());
+    SetCurrentThread(tid);
+  }
+}
+
+// For use by only those functions that allocated the context via
+// alloc_lsan_context().
+extern "C" void lsan_dispatch_call_block_and_release(void *block) {
+  lsan_block_context_t *context = (lsan_block_context_t *)block;
+  VReport(2,
+          "lsan_dispatch_call_block_and_release(): "
+          "context: %p, pthread_self: %p\n",
+          block, pthread_self());
+  lsan_register_worker_thread(context->parent_tid);
+  // Call the original dispatcher for the block.
+  context->func(context->block);
+  lsan_free(context);
+}
+
+}  // namespace __lsan
+
+using namespace __lsan;  // NOLINT
+
+// Wrap |ctxt| and |func| into an lsan_block_context_t.
+// The caller retains control of the allocated context.
+extern "C" lsan_block_context_t *alloc_lsan_context(void *ctxt,
+                                                    dispatch_function_t func) {
+  GET_STACK_TRACE_THREAD;
+  lsan_block_context_t *lsan_ctxt =
+      (lsan_block_context_t *)lsan_malloc(sizeof(lsan_block_context_t), stack);
+  lsan_ctxt->block = ctxt;
+  lsan_ctxt->func = func;
+  lsan_ctxt->parent_tid = GetCurrentThread();
+  return lsan_ctxt;
+}
+
+// Define interceptor for dispatch_*_f function with the three most common
+// parameters: dispatch_queue_t, context, dispatch_function_t.
+#define INTERCEPT_DISPATCH_X_F_3(dispatch_x_f)                        \
+  INTERCEPTOR(void, dispatch_x_f, dispatch_queue_t dq, void *ctxt,    \
+              dispatch_function_t func) {                             \
+    lsan_block_context_t *lsan_ctxt = alloc_lsan_context(ctxt, func); \
+    return REAL(dispatch_x_f)(dq, (void *)lsan_ctxt,                  \
+                              lsan_dispatch_call_block_and_release);  \
+  }
+
+INTERCEPT_DISPATCH_X_F_3(dispatch_async_f)
+INTERCEPT_DISPATCH_X_F_3(dispatch_sync_f)
+INTERCEPT_DISPATCH_X_F_3(dispatch_barrier_async_f)
+
+INTERCEPTOR(void, dispatch_after_f, dispatch_time_t when, dispatch_queue_t dq,
+            void *ctxt, dispatch_function_t func) {
+  lsan_block_context_t *lsan_ctxt = alloc_lsan_context(ctxt, func);
+  return REAL(dispatch_after_f)(when, dq, (void *)lsan_ctxt,
+                                lsan_dispatch_call_block_and_release);
+}
+
+INTERCEPTOR(void, dispatch_group_async_f, dispatch_group_t group,
+            dispatch_queue_t dq, void *ctxt, dispatch_function_t func) {
+  lsan_block_context_t *lsan_ctxt = alloc_lsan_context(ctxt, func);
+  REAL(dispatch_group_async_f)
+  (group, dq, (void *)lsan_ctxt, lsan_dispatch_call_block_and_release);
+}
+
+#if !defined(MISSING_BLOCKS_SUPPORT)
+extern "C" {
+void dispatch_async(dispatch_queue_t dq, void (^work)(void));
+void dispatch_group_async(dispatch_group_t dg, dispatch_queue_t dq,
+                          void (^work)(void));
+void dispatch_after(dispatch_time_t when, dispatch_queue_t queue,
+                    void (^work)(void));
+void dispatch_source_set_cancel_handler(dispatch_source_t ds,
+                                        void (^work)(void));
+void dispatch_source_set_event_handler(dispatch_source_t ds,
+                                       void (^work)(void));
+}
+
+#define GET_LSAN_BLOCK(work)                 \
+  void (^lsan_block)(void);                  \
+  int parent_tid = GetCurrentThread();       \
+  lsan_block = ^(void) {                     \
+    lsan_register_worker_thread(parent_tid); \
+    work();                                  \
+  }
+
+INTERCEPTOR(void, dispatch_async, dispatch_queue_t dq, void (^work)(void)) {
+  GET_LSAN_BLOCK(work);
+  REAL(dispatch_async)(dq, lsan_block);
+}
+
+INTERCEPTOR(void, dispatch_group_async, dispatch_group_t dg,
+            dispatch_queue_t dq, void (^work)(void)) {
+  GET_LSAN_BLOCK(work);
+  REAL(dispatch_group_async)(dg, dq, lsan_block);
+}
+
+INTERCEPTOR(void, dispatch_after, dispatch_time_t when, dispatch_queue_t queue,
+            void (^work)(void)) {
+  GET_LSAN_BLOCK(work);
+  REAL(dispatch_after)(when, queue, lsan_block);
+}
+
+INTERCEPTOR(void, dispatch_source_set_cancel_handler, dispatch_source_t ds,
+            void (^work)(void)) {
+  if (!work) {
+    REAL(dispatch_source_set_cancel_handler)(ds, work);
+    return;
+  }
+  GET_LSAN_BLOCK(work);
+  REAL(dispatch_source_set_cancel_handler)(ds, lsan_block);
+}
+
+INTERCEPTOR(void, dispatch_source_set_event_handler, dispatch_source_t ds,
+            void (^work)(void)) {
+  GET_LSAN_BLOCK(work);
+  REAL(dispatch_source_set_event_handler)(ds, lsan_block);
+}
+#endif
+
+#endif  // SANITIZER_MAC
diff --git a/lib/lsan/lsan_malloc_mac.cc b/lib/lsan/lsan_malloc_mac.cc
new file mode 100644
index 0000000..9c1dacc
--- /dev/null
+++ b/lib/lsan/lsan_malloc_mac.cc
@@ -0,0 +1,55 @@
+//===-- lsan_malloc_mac.cc ------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of LeakSanitizer (LSan), a memory leak detector.
+//
+// Mac-specific malloc interception.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_platform.h"
+#if SANITIZER_MAC
+
+#include "lsan.h"
+#include "lsan_allocator.h"
+#include "lsan_thread.h"
+
+using namespace __lsan;
+#define COMMON_MALLOC_ZONE_NAME "lsan"
+#define COMMON_MALLOC_ENTER() ENSURE_LSAN_INITED
+#define COMMON_MALLOC_SANITIZER_INITIALIZED lsan_inited
+#define COMMON_MALLOC_FORCE_LOCK()
+#define COMMON_MALLOC_FORCE_UNLOCK()
+#define COMMON_MALLOC_MEMALIGN(alignment, size) \
+  GET_STACK_TRACE_MALLOC; \
+  void *p = lsan_memalign(alignment, size, stack)
+#define COMMON_MALLOC_MALLOC(size) \
+  GET_STACK_TRACE_MALLOC; \
+  void *p = lsan_malloc(size, stack)
+#define COMMON_MALLOC_REALLOC(ptr, size) \
+  GET_STACK_TRACE_MALLOC; \
+  void *p = lsan_realloc(ptr, size, stack)
+#define COMMON_MALLOC_CALLOC(count, size) \
+  GET_STACK_TRACE_MALLOC; \
+  void *p = lsan_calloc(count, size, stack)
+#define COMMON_MALLOC_VALLOC(size) \
+  GET_STACK_TRACE_MALLOC; \
+  void *p = lsan_valloc(size, stack)
+#define COMMON_MALLOC_FREE(ptr) \
+  lsan_free(ptr)
+#define COMMON_MALLOC_SIZE(ptr) \
+  uptr size = lsan_mz_size(ptr)
+#define COMMON_MALLOC_FILL_STATS(zone, stats)
+#define COMMON_MALLOC_REPORT_UNKNOWN_REALLOC(ptr, zone_ptr, zone_name) \
+  (void)zone_name; \
+  Report("mz_realloc(%p) -- attempting to realloc unallocated memory.\n", ptr);
+#define COMMON_MALLOC_NAMESPACE __lsan
+
+#include "sanitizer_common/sanitizer_malloc_mac.inc"
+
+#endif // SANITIZER_MAC
diff --git a/lib/lsan/lsan_thread.cc b/lib/lsan/lsan_thread.cc
index ebec6cd..4404c8c 100644
--- a/lib/lsan/lsan_thread.cc
+++ b/lib/lsan/lsan_thread.cc
@@ -77,7 +77,7 @@
                                        /* arg */ nullptr);
 }
 
-void ThreadStart(u32 tid, uptr os_id) {
+void ThreadStart(u32 tid, tid_t os_id, bool workerthread) {
   OnStartedArgs args;
   uptr stack_size = 0;
   uptr tls_size = 0;
@@ -87,11 +87,12 @@
   args.tls_end = args.tls_begin + tls_size;
   GetAllocatorCacheRange(&args.cache_begin, &args.cache_end);
   args.dtls = DTLS_Get();
-  thread_registry->StartThread(tid, os_id, /*workerthread*/ false, &args);
+  thread_registry->StartThread(tid, os_id, workerthread, &args);
 }
 
 void ThreadFinish() {
   thread_registry->FinishThread(GetCurrentThread());
+  SetCurrentThread(kInvalidTid);
 }
 
 ThreadContext *CurrentThreadContext() {
@@ -126,7 +127,7 @@
 
 ///// Interface to the common LSan module. /////
 
-bool GetThreadRangesLocked(uptr os_id, uptr *stack_begin, uptr *stack_end,
+bool GetThreadRangesLocked(tid_t os_id, uptr *stack_begin, uptr *stack_end,
                            uptr *tls_begin, uptr *tls_end, uptr *cache_begin,
                            uptr *cache_end, DTLS **dtls) {
   ThreadContext *context = static_cast<ThreadContext *>(
@@ -142,7 +143,7 @@
   return true;
 }
 
-void ForEachExtraStackRange(uptr os_id, RangeIteratorCallback callback,
+void ForEachExtraStackRange(tid_t os_id, RangeIteratorCallback callback,
                             void *arg) {
 }
 
diff --git a/lib/lsan/lsan_thread.h b/lib/lsan/lsan_thread.h
index 10b7b57..b16d3d9 100644
--- a/lib/lsan/lsan_thread.h
+++ b/lib/lsan/lsan_thread.h
@@ -45,7 +45,7 @@
 
 void InitializeThreadRegistry();
 
-void ThreadStart(u32 tid, uptr os_id);
+void ThreadStart(u32 tid, tid_t os_id, bool workerthread = false);
 void ThreadFinish();
 u32 ThreadCreate(u32 tid, uptr uid, bool detached);
 void ThreadJoin(u32 tid);
diff --git a/lib/lsan/weak_symbols.txt b/lib/lsan/weak_symbols.txt
index da4f994..6922556 100644
--- a/lib/lsan/weak_symbols.txt
+++ b/lib/lsan/weak_symbols.txt
@@ -1,2 +1,3 @@
+___lsan_default_options
 ___lsan_default_suppressions
 ___lsan_is_turned_off
diff --git a/lib/msan/msan.h b/lib/msan/msan.h
index 0709260..fa9c15b 100644
--- a/lib/msan/msan.h
+++ b/lib/msan/msan.h
@@ -280,10 +280,18 @@
 
 void MsanAllocatorInit();
 void MsanAllocatorThreadFinish();
-void *MsanCalloc(StackTrace *stack, uptr nmemb, uptr size);
-void *MsanReallocate(StackTrace *stack, void *oldp, uptr size,
-                     uptr alignment, bool zeroise);
 void MsanDeallocate(StackTrace *stack, void *ptr);
+
+void *msan_malloc(uptr size, StackTrace *stack);
+void *msan_calloc(uptr nmemb, uptr size, StackTrace *stack);
+void *msan_realloc(void *ptr, uptr size, StackTrace *stack);
+void *msan_valloc(uptr size, StackTrace *stack);
+void *msan_pvalloc(uptr size, StackTrace *stack);
+void *msan_aligned_alloc(uptr alignment, uptr size, StackTrace *stack);
+void *msan_memalign(uptr alignment, uptr size, StackTrace *stack);
+int msan_posix_memalign(void **memptr, uptr alignment, uptr size,
+                        StackTrace *stack);
+
 void InstallTrapHandler();
 void InstallAtExitHandler();
 
diff --git a/lib/msan/msan_allocator.cc b/lib/msan/msan_allocator.cc
index 6c389f0..1b134e1 100644
--- a/lib/msan/msan_allocator.cc
+++ b/lib/msan/msan_allocator.cc
@@ -13,7 +13,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "sanitizer_common/sanitizer_allocator.h"
+#include "sanitizer_common/sanitizer_allocator_checks.h"
 #include "sanitizer_common/sanitizer_allocator_interface.h"
+#include "sanitizer_common/sanitizer_errno.h"
 #include "msan.h"
 #include "msan_allocator.h"
 #include "msan_origin.h"
@@ -47,12 +49,18 @@
   static const uptr kRegionSizeLog = 20;
   static const uptr kNumRegions = SANITIZER_MMAP_RANGE_SIZE >> kRegionSizeLog;
   typedef TwoLevelByteMap<(kNumRegions >> 12), 1 << 12> ByteMap;
-  typedef CompactSizeClassMap SizeClassMap;
 
-  typedef SizeClassAllocator32<0, SANITIZER_MMAP_RANGE_SIZE, sizeof(Metadata),
-                               SizeClassMap, kRegionSizeLog, ByteMap,
-                               MsanMapUnmapCallback> PrimaryAllocator;
-
+  struct AP32 {
+    static const uptr kSpaceBeg = 0;
+    static const u64 kSpaceSize = SANITIZER_MMAP_RANGE_SIZE;
+    static const uptr kMetadataSize = sizeof(Metadata);
+    typedef __sanitizer::CompactSizeClassMap SizeClassMap;
+    static const uptr kRegionSizeLog = __msan::kRegionSizeLog;
+    typedef __msan::ByteMap ByteMap;
+    typedef MsanMapUnmapCallback MapUnmapCallback;
+    static const uptr kFlags = 0;
+  };
+  typedef SizeClassAllocator32<AP32> PrimaryAllocator;
 #elif defined(__x86_64__)
 #if SANITIZER_LINUX && !defined(MSAN_LINUX_X86_64_OLD_MAPPING)
   static const uptr kAllocatorSpace = 0x700000000000ULL;
@@ -90,11 +98,18 @@
   static const uptr kRegionSizeLog = 20;
   static const uptr kNumRegions = SANITIZER_MMAP_RANGE_SIZE >> kRegionSizeLog;
   typedef TwoLevelByteMap<(kNumRegions >> 12), 1 << 12> ByteMap;
-  typedef CompactSizeClassMap SizeClassMap;
 
-  typedef SizeClassAllocator32<0, SANITIZER_MMAP_RANGE_SIZE, sizeof(Metadata),
-                               SizeClassMap, kRegionSizeLog, ByteMap,
-                               MsanMapUnmapCallback> PrimaryAllocator;
+  struct AP32 {
+    static const uptr kSpaceBeg = 0;
+    static const u64 kSpaceSize = SANITIZER_MMAP_RANGE_SIZE;
+    static const uptr kMetadataSize = sizeof(Metadata);
+    typedef __sanitizer::CompactSizeClassMap SizeClassMap;
+    static const uptr kRegionSizeLog = __msan::kRegionSizeLog;
+    typedef __msan::ByteMap ByteMap;
+    typedef MsanMapUnmapCallback MapUnmapCallback;
+    static const uptr kFlags = 0;
+  };
+  typedef SizeClassAllocator32<AP32> PrimaryAllocator;
 #endif
 typedef SizeClassAllocatorLocalCache<PrimaryAllocator> AllocatorCache;
 typedef LargeMmapAllocator<MsanMapUnmapCallback> SecondaryAllocator;
@@ -106,9 +121,8 @@
 static SpinMutex fallback_mutex;
 
 void MsanAllocatorInit() {
-  allocator.Init(
-      common_flags()->allocator_may_return_null,
-      common_flags()->allocator_release_to_os_interval_ms);
+  SetAllocatorMayReturnNull(common_flags()->allocator_may_return_null);
+  allocator.Init(common_flags()->allocator_release_to_os_interval_ms);
 }
 
 AllocatorCache *GetAllocatorCache(MsanThreadLocalMallocStorage *ms) {
@@ -126,17 +140,17 @@
   if (size > kMaxAllowedMallocSize) {
     Report("WARNING: MemorySanitizer failed to allocate %p bytes\n",
            (void *)size);
-    return allocator.ReturnNullOrDieOnBadRequest();
+    return Allocator::FailureHandler::OnBadRequest();
   }
   MsanThread *t = GetCurrentThread();
   void *allocated;
   if (t) {
     AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage());
-    allocated = allocator.Allocate(cache, size, alignment, false);
+    allocated = allocator.Allocate(cache, size, alignment);
   } else {
     SpinMutexLock l(&fallback_mutex);
     AllocatorCache *cache = &fallback_allocator_cache;
-    allocated = allocator.Allocate(cache, size, alignment, false);
+    allocated = allocator.Allocate(cache, size, alignment);
   }
   Metadata *meta =
       reinterpret_cast<Metadata *>(allocator.GetMetaData(allocated));
@@ -182,20 +196,8 @@
   }
 }
 
-void *MsanCalloc(StackTrace *stack, uptr nmemb, uptr size) {
-  if (CallocShouldReturnNullDueToOverflow(size, nmemb))
-    return allocator.ReturnNullOrDieOnBadRequest();
-  return MsanReallocate(stack, nullptr, nmemb * size, sizeof(u64), true);
-}
-
 void *MsanReallocate(StackTrace *stack, void *old_p, uptr new_size,
-                     uptr alignment, bool zeroise) {
-  if (!old_p)
-    return MsanAllocate(stack, new_size, alignment, zeroise);
-  if (!new_size) {
-    MsanDeallocate(stack, old_p);
-    return nullptr;
-  }
+                     uptr alignment) {
   Metadata *meta = reinterpret_cast<Metadata*>(allocator.GetMetaData(old_p));
   uptr old_size = meta->requested_size;
   uptr actually_allocated_size = allocator.GetActuallyAllocatedSize(old_p);
@@ -203,10 +205,7 @@
     // We are not reallocating here.
     meta->requested_size = new_size;
     if (new_size > old_size) {
-      if (zeroise) {
-        __msan_clear_and_unpoison((char *)old_p + old_size,
-                                  new_size - old_size);
-      } else if (flags()->poison_in_malloc) {
+      if (flags()->poison_in_malloc) {
         stack->tag = StackTrace::TAG_ALLOC;
         PoisonMemory((char *)old_p + old_size, new_size - old_size, stack);
       }
@@ -214,8 +213,7 @@
     return old_p;
   }
   uptr memcpy_size = Min(new_size, old_size);
-  void *new_p = MsanAllocate(stack, new_size, alignment, zeroise);
-  // Printf("realloc: old_size %zd new_size %zd\n", old_size, new_size);
+  void *new_p = MsanAllocate(stack, new_size, alignment, false /*zeroise*/);
   if (new_p) {
     CopyMemory(new_p, old_p, memcpy_size, stack);
     MsanDeallocate(stack, old_p);
@@ -231,6 +229,71 @@
   return b->requested_size;
 }
 
+void *msan_malloc(uptr size, StackTrace *stack) {
+  return SetErrnoOnNull(MsanAllocate(stack, size, sizeof(u64), false));
+}
+
+void *msan_calloc(uptr nmemb, uptr size, StackTrace *stack) {
+  if (UNLIKELY(CheckForCallocOverflow(size, nmemb)))
+    return SetErrnoOnNull(Allocator::FailureHandler::OnBadRequest());
+  return SetErrnoOnNull(MsanAllocate(stack, nmemb * size, sizeof(u64), true));
+}
+
+void *msan_realloc(void *ptr, uptr size, StackTrace *stack) {
+  if (!ptr)
+    return SetErrnoOnNull(MsanAllocate(stack, size, sizeof(u64), false));
+  if (size == 0) {
+    MsanDeallocate(stack, ptr);
+    return nullptr;
+  }
+  return SetErrnoOnNull(MsanReallocate(stack, ptr, size, sizeof(u64)));
+}
+
+void *msan_valloc(uptr size, StackTrace *stack) {
+  return SetErrnoOnNull(MsanAllocate(stack, size, GetPageSizeCached(), false));
+}
+
+void *msan_pvalloc(uptr size, StackTrace *stack) {
+  uptr PageSize = GetPageSizeCached();
+  if (UNLIKELY(CheckForPvallocOverflow(size, PageSize))) {
+    errno = errno_ENOMEM;
+    return Allocator::FailureHandler::OnBadRequest();
+  }
+  // pvalloc(0) should allocate one page.
+  size = size ? RoundUpTo(size, PageSize) : PageSize;
+  return SetErrnoOnNull(MsanAllocate(stack, size, PageSize, false));
+}
+
+void *msan_aligned_alloc(uptr alignment, uptr size, StackTrace *stack) {
+  if (UNLIKELY(!CheckAlignedAllocAlignmentAndSize(alignment, size))) {
+    errno = errno_EINVAL;
+    return Allocator::FailureHandler::OnBadRequest();
+  }
+  return SetErrnoOnNull(MsanAllocate(stack, size, alignment, false));
+}
+
+void *msan_memalign(uptr alignment, uptr size, StackTrace *stack) {
+  if (UNLIKELY(!IsPowerOfTwo(alignment))) {
+    errno = errno_EINVAL;
+    return Allocator::FailureHandler::OnBadRequest();
+  }
+  return SetErrnoOnNull(MsanAllocate(stack, size, alignment, false));
+}
+
+int msan_posix_memalign(void **memptr, uptr alignment, uptr size,
+                        StackTrace *stack) {
+  if (UNLIKELY(!CheckPosixMemalignAlignment(alignment))) {
+    Allocator::FailureHandler::OnBadRequest();
+    return errno_EINVAL;
+  }
+  void *ptr = MsanAllocate(stack, size, alignment, false);
+  if (UNLIKELY(!ptr))
+    return errno_ENOMEM;
+  CHECK(IsAligned((uptr)ptr, alignment));
+  *memptr = ptr;
+  return 0;
+}
+
 } // namespace __msan
 
 using namespace __msan;
diff --git a/lib/msan/msan_interceptors.cc b/lib/msan/msan_interceptors.cc
index 6447bb1..72bb745 100644
--- a/lib/msan/msan_interceptors.cc
+++ b/lib/msan/msan_interceptors.cc
@@ -27,6 +27,7 @@
 #include "sanitizer_common/sanitizer_allocator_internal.h"
 #include "sanitizer_common/sanitizer_atomic.h"
 #include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_errno.h"
 #include "sanitizer_common/sanitizer_stackdepot.h"
 #include "sanitizer_common/sanitizer_libc.h"
 #include "sanitizer_common/sanitizer_linux.h"
@@ -48,15 +49,9 @@
 DECLARE_REAL(void *, memcpy, void *dest, const void *src, uptr n)
 DECLARE_REAL(void *, memset, void *dest, int c, uptr n)
 
-#if SANITIZER_FREEBSD
-#define __errno_location __error
-#endif
-
 // True if this is a nested interceptor.
 static THREADLOCAL int in_interceptor_scope;
 
-extern "C" int *__errno_location(void);
-
 struct InterceptorScope {
   InterceptorScope() { ++in_interceptor_scope; }
   ~InterceptorScope() { --in_interceptor_scope; }
@@ -114,7 +109,7 @@
 #define CHECK_UNPOISONED(x, n)                             \
   do {                                                     \
     if (!IsInInterceptorScope()) CHECK_UNPOISONED_0(x, n); \
-  } while (0);
+  } while (0)
 
 #define CHECK_UNPOISONED_STRING_OF_LEN(x, len, n)               \
   CHECK_UNPOISONED((x),                                         \
@@ -123,15 +118,7 @@
 #define CHECK_UNPOISONED_STRING(x, n)                           \
     CHECK_UNPOISONED_STRING_OF_LEN((x), internal_strlen(x), (n))
 
-INTERCEPTOR(SIZE_T, fread, void *ptr, SIZE_T size, SIZE_T nmemb, void *file) {
-  ENSURE_MSAN_INITED();
-  SIZE_T res = REAL(fread)(ptr, size, nmemb, file);
-  if (res > 0)
-    __msan_unpoison(ptr, res *size);
-  return res;
-}
-
-#if !SANITIZER_FREEBSD
+#if !SANITIZER_FREEBSD && !SANITIZER_NETBSD
 INTERCEPTOR(SIZE_T, fread_unlocked, void *ptr, SIZE_T size, SIZE_T nmemb,
             void *file) {
   ENSURE_MSAN_INITED();
@@ -147,7 +134,7 @@
 
 INTERCEPTOR(SSIZE_T, readlink, const char *path, char *buf, SIZE_T bufsiz) {
   ENSURE_MSAN_INITED();
-  CHECK_UNPOISONED_STRING(path, 0)
+  CHECK_UNPOISONED_STRING(path, 0);
   SSIZE_T res = REAL(readlink)(path, buf, bufsiz);
   if (res > 0)
     __msan_unpoison(buf, res);
@@ -174,58 +161,45 @@
 
 INTERCEPTOR(int, posix_memalign, void **memptr, SIZE_T alignment, SIZE_T size) {
   GET_MALLOC_STACK_TRACE;
-  CHECK_EQ(alignment & (alignment - 1), 0);
   CHECK_NE(memptr, 0);
-  *memptr = MsanReallocate(&stack, nullptr, size, alignment, false);
-  CHECK_NE(*memptr, 0);
-  __msan_unpoison(memptr, sizeof(*memptr));
-  return 0;
+  int res = msan_posix_memalign(memptr, alignment, size, &stack);
+  if (!res)
+    __msan_unpoison(memptr, sizeof(*memptr));
+  return res;
 }
 
-#if !SANITIZER_FREEBSD
-INTERCEPTOR(void *, memalign, SIZE_T boundary, SIZE_T size) {
+#if !SANITIZER_FREEBSD && !SANITIZER_NETBSD
+INTERCEPTOR(void *, memalign, SIZE_T alignment, SIZE_T size) {
   GET_MALLOC_STACK_TRACE;
-  CHECK_EQ(boundary & (boundary - 1), 0);
-  void *ptr = MsanReallocate(&stack, nullptr, size, boundary, false);
-  return ptr;
+  return msan_memalign(alignment, size, &stack);
 }
 #define MSAN_MAYBE_INTERCEPT_MEMALIGN INTERCEPT_FUNCTION(memalign)
 #else
 #define MSAN_MAYBE_INTERCEPT_MEMALIGN
 #endif
 
-INTERCEPTOR(void *, aligned_alloc, SIZE_T boundary, SIZE_T size) {
+INTERCEPTOR(void *, aligned_alloc, SIZE_T alignment, SIZE_T size) {
   GET_MALLOC_STACK_TRACE;
-  CHECK_EQ(boundary & (boundary - 1), 0);
-  void *ptr = MsanReallocate(&stack, nullptr, size, boundary, false);
-  return ptr;
+  return msan_aligned_alloc(alignment, size, &stack);
 }
 
-INTERCEPTOR(void *, __libc_memalign, SIZE_T boundary, SIZE_T size) {
+INTERCEPTOR(void *, __libc_memalign, SIZE_T alignment, SIZE_T size) {
   GET_MALLOC_STACK_TRACE;
-  CHECK_EQ(boundary & (boundary - 1), 0);
-  void *ptr = MsanReallocate(&stack, nullptr, size, boundary, false);
-  DTLS_on_libc_memalign(ptr, size);
+  void *ptr = msan_memalign(alignment, size, &stack);
+  if (ptr)
+    DTLS_on_libc_memalign(ptr, size);
   return ptr;
 }
 
 INTERCEPTOR(void *, valloc, SIZE_T size) {
   GET_MALLOC_STACK_TRACE;
-  void *ptr = MsanReallocate(&stack, nullptr, size, GetPageSizeCached(), false);
-  return ptr;
+  return msan_valloc(size, &stack);
 }
 
-#if !SANITIZER_FREEBSD
+#if !SANITIZER_FREEBSD && !SANITIZER_NETBSD
 INTERCEPTOR(void *, pvalloc, SIZE_T size) {
   GET_MALLOC_STACK_TRACE;
-  uptr PageSize = GetPageSizeCached();
-  size = RoundUpTo(size, PageSize);
-  if (size == 0) {
-    // pvalloc(0) should allocate one page.
-    size = PageSize;
-  }
-  void *ptr = MsanReallocate(&stack, nullptr, size, PageSize, false);
-  return ptr;
+  return msan_pvalloc(size, &stack);
 }
 #define MSAN_MAYBE_INTERCEPT_PVALLOC INTERCEPT_FUNCTION(pvalloc)
 #else
@@ -238,7 +212,7 @@
   MsanDeallocate(&stack, ptr);
 }
 
-#if !SANITIZER_FREEBSD
+#if !SANITIZER_FREEBSD && !SANITIZER_NETBSD
 INTERCEPTOR(void, cfree, void *ptr) {
   GET_MALLOC_STACK_TRACE;
   if (!ptr || UNLIKELY(IsInDlsymAllocPool(ptr))) return;
@@ -253,7 +227,7 @@
   return __sanitizer_get_allocated_size(ptr);
 }
 
-#if !SANITIZER_FREEBSD
+#if !SANITIZER_FREEBSD && !SANITIZER_NETBSD
 // This function actually returns a struct by value, but we can't unpoison a
 // temporary! The following is equivalent on all supported platforms but
 // aarch64 (which uses a different register for sret value).  We have a test
@@ -272,7 +246,7 @@
 #define MSAN_MAYBE_INTERCEPT_MALLINFO
 #endif
 
-#if !SANITIZER_FREEBSD
+#if !SANITIZER_FREEBSD && !SANITIZER_NETBSD
 INTERCEPTOR(int, mallopt, int cmd, int value) {
   return -1;
 }
@@ -281,7 +255,7 @@
 #define MSAN_MAYBE_INTERCEPT_MALLOPT
 #endif
 
-#if !SANITIZER_FREEBSD
+#if !SANITIZER_FREEBSD && !SANITIZER_NETBSD
 INTERCEPTOR(void, malloc_stats, void) {
   // FIXME: implement, but don't call REAL(malloc_stats)!
 }
@@ -334,7 +308,7 @@
   return res;
 }
 
-#if !SANITIZER_FREEBSD
+#if !SANITIZER_FREEBSD && !SANITIZER_NETBSD
 INTERCEPTOR(char *, __strdup, char *src) {
   ENSURE_MSAN_INITED();
   GET_STORE_STACK_TRACE;
@@ -349,33 +323,6 @@
 #define MSAN_MAYBE_INTERCEPT___STRDUP
 #endif
 
-INTERCEPTOR(char *, strndup, char *src, SIZE_T n) {
-  ENSURE_MSAN_INITED();
-  GET_STORE_STACK_TRACE;
-  // On FreeBSD strndup() leverages strnlen().
-  InterceptorScope interceptor_scope;
-  SIZE_T copy_size = REAL(strnlen)(src, n);
-  char *res = REAL(strndup)(src, n);
-  CopyShadowAndOrigin(res, src, copy_size, &stack);
-  __msan_unpoison(res + copy_size, 1); // \0
-  return res;
-}
-
-#if !SANITIZER_FREEBSD
-INTERCEPTOR(char *, __strndup, char *src, SIZE_T n) {
-  ENSURE_MSAN_INITED();
-  GET_STORE_STACK_TRACE;
-  SIZE_T copy_size = REAL(strnlen)(src, n);
-  char *res = REAL(__strndup)(src, n);
-  CopyShadowAndOrigin(res, src, copy_size, &stack);
-  __msan_unpoison(res + copy_size, 1); // \0
-  return res;
-}
-#define MSAN_MAYBE_INTERCEPT___STRNDUP INTERCEPT_FUNCTION(__strndup)
-#else
-#define MSAN_MAYBE_INTERCEPT___STRNDUP
-#endif
-
 INTERCEPTOR(char *, gcvt, double number, SIZE_T ndigit, char *buf) {
   ENSURE_MSAN_INITED();
   char *res = REAL(gcvt)(number, ndigit, buf);
@@ -510,6 +457,20 @@
   return res;
 }
 
+#if SANITIZER_LINUX
+INTERCEPTOR(SIZE_T, __strxfrm_l, char *dest, const char *src, SIZE_T n,
+            void *loc) {
+  ENSURE_MSAN_INITED();
+  CHECK_UNPOISONED(src, REAL(strlen)(src) + 1);
+  SIZE_T res = REAL(__strxfrm_l)(dest, src, n, loc);
+  if (res < n) __msan_unpoison(dest, res + 1);
+  return res;
+}
+#define MSAN_MAYBE_INTERCEPT___STRXFRM_L INTERCEPT_FUNCTION(__strxfrm_l)
+#else
+#define MSAN_MAYBE_INTERCEPT___STRXFRM_L
+#endif
+
 #define INTERCEPTOR_STRFTIME_BODY(char_type, ret_type, func, s, ...) \
   ENSURE_MSAN_INITED();                                              \
   ret_type res = REAL(func)(s, __VA_ARGS__);                         \
@@ -526,7 +487,7 @@
   INTERCEPTOR_STRFTIME_BODY(char, SIZE_T, strftime_l, s, max, format, tm, loc);
 }
 
-#if !SANITIZER_FREEBSD
+#if !SANITIZER_FREEBSD && !SANITIZER_NETBSD
 INTERCEPTOR(SIZE_T, __strftime_l, char *s, SIZE_T max, const char *format,
             __sanitizer_tm *tm, void *loc) {
   INTERCEPTOR_STRFTIME_BODY(char, SIZE_T, __strftime_l, s, max, format, tm,
@@ -548,7 +509,7 @@
                             loc);
 }
 
-#if !SANITIZER_FREEBSD
+#if !SANITIZER_FREEBSD && !SANITIZER_NETBSD
 INTERCEPTOR(SIZE_T, __wcsftime_l, wchar_t *s, SIZE_T max, const wchar_t *format,
             __sanitizer_tm *tm, void *loc) {
   INTERCEPTOR_STRFTIME_BODY(wchar_t, SIZE_T, __wcsftime_l, s, max, format, tm,
@@ -573,30 +534,6 @@
   return res;
 }
 
-INTERCEPTOR(SIZE_T, wcslen, const wchar_t *s) {
-  ENSURE_MSAN_INITED();
-  SIZE_T res = REAL(wcslen)(s);
-  CHECK_UNPOISONED(s, sizeof(wchar_t) * (res + 1));
-  return res;
-}
-
-// wchar_t *wcschr(const wchar_t *wcs, wchar_t wc);
-INTERCEPTOR(wchar_t *, wcschr, void *s, wchar_t wc, void *ps) {
-  ENSURE_MSAN_INITED();
-  wchar_t *res = REAL(wcschr)(s, wc, ps);
-  return res;
-}
-
-// wchar_t *wcscpy(wchar_t *dest, const wchar_t *src);
-INTERCEPTOR(wchar_t *, wcscpy, wchar_t *dest, const wchar_t *src) {
-  ENSURE_MSAN_INITED();
-  GET_STORE_STACK_TRACE;
-  wchar_t *res = REAL(wcscpy)(dest, src);
-  CopyShadowAndOrigin(dest, src, sizeof(wchar_t) * (REAL(wcslen)(src) + 1),
-                      &stack);
-  return res;
-}
-
 // wchar_t *wmemcpy(wchar_t *dest, const wchar_t *src, SIZE_T n);
 INTERCEPTOR(wchar_t *, wmemcpy, wchar_t *dest, const wchar_t *src, SIZE_T n) {
   ENSURE_MSAN_INITED();
@@ -678,7 +615,7 @@
 
 INTERCEPTOR(int, setenv, const char *name, const char *value, int overwrite) {
   ENSURE_MSAN_INITED();
-  CHECK_UNPOISONED_STRING(name, 0)
+  CHECK_UNPOISONED_STRING(name, 0);
   int res = REAL(setenv)(name, value, overwrite);
   if (!res) UnpoisonEnviron();
   return res;
@@ -691,7 +628,7 @@
   return res;
 }
 
-#if !SANITIZER_FREEBSD
+#if !SANITIZER_FREEBSD && !SANITIZER_NETBSD
 INTERCEPTOR(int, __fxstat, int magic, int fd, void *buf) {
   ENSURE_MSAN_INITED();
   int res = REAL(__fxstat)(magic, fd, buf);
@@ -704,7 +641,7 @@
 #define MSAN_MAYBE_INTERCEPT___FXSTAT
 #endif
 
-#if !SANITIZER_FREEBSD
+#if !SANITIZER_FREEBSD && !SANITIZER_NETBSD
 INTERCEPTOR(int, __fxstat64, int magic, int fd, void *buf) {
   ENSURE_MSAN_INITED();
   int res = REAL(__fxstat64)(magic, fd, buf);
@@ -717,7 +654,7 @@
 #define MSAN_MAYBE_INTERCEPT___FXSTAT64
 #endif
 
-#if SANITIZER_FREEBSD
+#if SANITIZER_FREEBSD && !SANITIZER_NETBSD
 INTERCEPTOR(int, fstatat, int fd, char *pathname, void *buf, int flags) {
   ENSURE_MSAN_INITED();
   int res = REAL(fstatat)(fd, pathname, buf, flags);
@@ -736,7 +673,7 @@
 # define MSAN_INTERCEPT_FSTATAT INTERCEPT_FUNCTION(__fxstatat)
 #endif
 
-#if !SANITIZER_FREEBSD
+#if !SANITIZER_FREEBSD && !SANITIZER_NETBSD
 INTERCEPTOR(int, __fxstatat64, int magic, int fd, char *pathname, void *buf,
             int flags) {
   ENSURE_MSAN_INITED();
@@ -783,7 +720,7 @@
   return res;
 }
 
-#if !SANITIZER_FREEBSD
+#if !SANITIZER_FREEBSD && !SANITIZER_NETBSD
 INTERCEPTOR(char *, fgets_unlocked, char *s, int size, void *stream) {
   ENSURE_MSAN_INITED();
   char *res = REAL(fgets_unlocked)(s, size, stream);
@@ -806,7 +743,7 @@
   return res;
 }
 
-#if !SANITIZER_FREEBSD
+#if !SANITIZER_FREEBSD && !SANITIZER_NETBSD
 INTERCEPTOR(int, getrlimit64, int resource, void *rlim) {
   if (msan_init_is_running) return REAL(getrlimit64)(resource, rlim);
   ENSURE_MSAN_INITED();
@@ -882,7 +819,7 @@
   return res;
 }
 
-#if !SANITIZER_FREEBSD
+#if !SANITIZER_FREEBSD && !SANITIZER_NETBSD
 INTERCEPTOR(int, epoll_wait, int epfd, void *events, int maxevents,
     int timeout) {
   ENSURE_MSAN_INITED();
@@ -897,7 +834,7 @@
 #define MSAN_MAYBE_INTERCEPT_EPOLL_WAIT
 #endif
 
-#if !SANITIZER_FREEBSD
+#if !SANITIZER_FREEBSD && !SANITIZER_NETBSD
 INTERCEPTOR(int, epoll_pwait, int epfd, void *events, int maxevents,
     int timeout, void *sigmask) {
   ENSURE_MSAN_INITED();
@@ -917,7 +854,7 @@
   if (UNLIKELY(!msan_inited))
     // Hack: dlsym calls calloc before REAL(calloc) is retrieved from dlsym.
     return AllocateFromLocalPool(nmemb * size);
-  return MsanCalloc(&stack, nmemb, size);
+  return msan_calloc(nmemb, size, &stack);
 }
 
 INTERCEPTOR(void *, realloc, void *ptr, SIZE_T size) {
@@ -930,12 +867,12 @@
       new_ptr = AllocateFromLocalPool(copy_size);
     } else {
       copy_size = size;
-      new_ptr = MsanReallocate(&stack, nullptr, copy_size, sizeof(u64), false);
+      new_ptr = msan_malloc(copy_size, &stack);
     }
     internal_memcpy(new_ptr, ptr, copy_size);
     return new_ptr;
   }
-  return MsanReallocate(&stack, ptr, size, sizeof(u64), false);
+  return msan_realloc(ptr, size, &stack);
 }
 
 INTERCEPTOR(void *, malloc, SIZE_T size) {
@@ -943,7 +880,7 @@
   if (UNLIKELY(!msan_inited))
     // Hack: dlsym calls malloc before REAL(malloc) is retrieved from dlsym.
     return AllocateFromLocalPool(size);
-  return MsanReallocate(&stack, nullptr, size, sizeof(u64), false);
+  return msan_malloc(size, &stack);
 }
 
 void __msan_allocated_memory(const void *data, uptr size) {
@@ -974,7 +911,7 @@
   ENSURE_MSAN_INITED();
   if (addr && !MEM_IS_APP(addr)) {
     if (flags & map_fixed) {
-      *__errno_location() = errno_EINVAL;
+      errno = errno_EINVAL;
       return (void *)-1;
     } else {
       addr = nullptr;
@@ -986,13 +923,13 @@
   return res;
 }
 
-#if !SANITIZER_FREEBSD
+#if !SANITIZER_FREEBSD && !SANITIZER_NETBSD
 INTERCEPTOR(void *, mmap64, void *addr, SIZE_T length, int prot, int flags,
             int fd, OFF64_T offset) {
   ENSURE_MSAN_INITED();
   if (addr && !MEM_IS_APP(addr)) {
     if (flags & map_fixed) {
-      *__errno_location() = errno_EINVAL;
+      errno = errno_EINVAL;
       return (void *)-1;
     } else {
       addr = nullptr;
@@ -1060,11 +997,19 @@
   cb(signo, si, uc);
 }
 
+static void read_sigaction(const __sanitizer_sigaction *act) {
+  CHECK_UNPOISONED(&act->sa_flags, sizeof(act->sa_flags));
+  if (act->sa_flags & __sanitizer::sa_siginfo)
+    CHECK_UNPOISONED(&act->sigaction, sizeof(act->sigaction));
+  else
+    CHECK_UNPOISONED(&act->handler, sizeof(act->handler));
+  CHECK_UNPOISONED(&act->sa_mask, sizeof(act->sa_mask));
+}
+
 INTERCEPTOR(int, sigaction, int signo, const __sanitizer_sigaction *act,
             __sanitizer_sigaction *oldact) {
   ENSURE_MSAN_INITED();
-  // FIXME: check that *act is unpoisoned.
-  // That requires intercepting all of sigemptyset, sigfillset, etc.
+  if (act) read_sigaction(act);
   int res;
   if (flags()->wrap_signals) {
     SpinMutexLock lock(&sigactions_mu);
@@ -1360,6 +1305,13 @@
     return __msan_memcpy(to, from, size);                   \
   }
 
+#define COMMON_INTERCEPTOR_COPY_STRING(ctx, to, from, size) \
+  do {                                                      \
+    GET_STORE_STACK_TRACE;                                  \
+    CopyShadowAndOrigin(to, from, size, &stack);            \
+    __msan_unpoison(to + size, 1);                          \
+  } while (false)
+
 #include "sanitizer_common/sanitizer_platform_interceptors.h"
 #include "sanitizer_common/sanitizer_common_interceptors.inc"
 
@@ -1433,6 +1385,35 @@
   return res;
 }
 
+// wchar_t *wcschr(const wchar_t *wcs, wchar_t wc);
+INTERCEPTOR(wchar_t *, wcschr, void *s, wchar_t wc, void *ps) {
+  ENSURE_MSAN_INITED();
+  wchar_t *res = REAL(wcschr)(s, wc, ps);
+  return res;
+}
+
+// wchar_t *wcscpy(wchar_t *dest, const wchar_t *src);
+INTERCEPTOR(wchar_t *, wcscpy, wchar_t *dest, const wchar_t *src) {
+  ENSURE_MSAN_INITED();
+  GET_STORE_STACK_TRACE;
+  wchar_t *res = REAL(wcscpy)(dest, src);
+  CopyShadowAndOrigin(dest, src, sizeof(wchar_t) * (REAL(wcslen)(src) + 1),
+                      &stack);
+  return res;
+}
+
+INTERCEPTOR(wchar_t *, wcsncpy, wchar_t *dest, const wchar_t *src,
+            SIZE_T n) {  // NOLINT
+  ENSURE_MSAN_INITED();
+  GET_STORE_STACK_TRACE;
+  SIZE_T copy_size = REAL(wcsnlen)(src, n);
+  if (copy_size < n) copy_size++;           // trailing \0
+  wchar_t *res = REAL(wcsncpy)(dest, src, n);  // NOLINT
+  CopyShadowAndOrigin(dest, src, copy_size * sizeof(wchar_t), &stack);
+  __msan_unpoison(dest + copy_size, (n - copy_size) * sizeof(wchar_t));
+  return res;
+}
+
 // These interface functions reside here so that they can use
 // REAL(memset), etc.
 void __msan_unpoison(const void *a, uptr size) {
@@ -1527,8 +1508,6 @@
   INTERCEPT_FUNCTION(stpcpy);  // NOLINT
   INTERCEPT_FUNCTION(strdup);
   MSAN_MAYBE_INTERCEPT___STRDUP;
-  INTERCEPT_FUNCTION(strndup);
-  MSAN_MAYBE_INTERCEPT___STRNDUP;
   INTERCEPT_FUNCTION(strncpy);  // NOLINT
   INTERCEPT_FUNCTION(gcvt);
   INTERCEPT_FUNCTION(strcat);  // NOLINT
@@ -1556,6 +1535,7 @@
 #endif
   INTERCEPT_FUNCTION(strxfrm);
   INTERCEPT_FUNCTION(strxfrm_l);
+  MSAN_MAYBE_INTERCEPT___STRXFRM_L;
   INTERCEPT_FUNCTION(strftime);
   INTERCEPT_FUNCTION(strftime_l);
   MSAN_MAYBE_INTERCEPT___STRFTIME_L;
@@ -1565,8 +1545,10 @@
   INTERCEPT_FUNCTION(mbtowc);
   INTERCEPT_FUNCTION(mbrtowc);
   INTERCEPT_FUNCTION(wcslen);
+  INTERCEPT_FUNCTION(wcsnlen);
   INTERCEPT_FUNCTION(wcschr);
   INTERCEPT_FUNCTION(wcscpy);
+  INTERCEPT_FUNCTION(wcsncpy);
   INTERCEPT_FUNCTION(wcscmp);
   INTERCEPT_FUNCTION(getenv);
   INTERCEPT_FUNCTION(setenv);
diff --git a/lib/msan/msan_linux.cc b/lib/msan/msan_linux.cc
index 0a687f6..a1191a1 100644
--- a/lib/msan/msan_linux.cc
+++ b/lib/msan/msan_linux.cc
@@ -9,11 +9,11 @@
 //
 // This file is a part of MemorySanitizer.
 //
-// Linux- and FreeBSD-specific code.
+// Linux-, NetBSD- and FreeBSD-specific code.
 //===----------------------------------------------------------------------===//
 
 #include "sanitizer_common/sanitizer_platform.h"
-#if SANITIZER_FREEBSD || SANITIZER_LINUX
+#if SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_NETBSD
 
 #include "msan.h"
 #include "msan_thread.h"
@@ -213,4 +213,4 @@
 
 } // namespace __msan
 
-#endif // SANITIZER_FREEBSD || SANITIZER_LINUX
+#endif // SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_NETBSD
diff --git a/lib/msan/msan_new_delete.cc b/lib/msan/msan_new_delete.cc
index 5401003..7219267 100644
--- a/lib/msan/msan_new_delete.cc
+++ b/lib/msan/msan_new_delete.cc
@@ -14,6 +14,7 @@
 
 #include "msan.h"
 #include "interception/interception.h"
+#include "sanitizer_common/sanitizer_allocator.h"
 
 #if MSAN_REPLACE_OPERATORS_NEW_AND_DELETE
 
@@ -27,18 +28,25 @@
 }  // namespace std
 
 
-#define OPERATOR_NEW_BODY \
+// TODO(alekseys): throw std::bad_alloc instead of dying on OOM.
+#define OPERATOR_NEW_BODY(nothrow) \
   GET_MALLOC_STACK_TRACE; \
-  return MsanReallocate(&stack, 0, size, sizeof(u64), false)
+  void *res = msan_malloc(size, &stack);\
+  if (!nothrow && UNLIKELY(!res)) DieOnFailure::OnOOM();\
+  return res
 
 INTERCEPTOR_ATTRIBUTE
-void *operator new(size_t size) { OPERATOR_NEW_BODY; }
+void *operator new(size_t size) { OPERATOR_NEW_BODY(false /*nothrow*/); }
 INTERCEPTOR_ATTRIBUTE
-void *operator new[](size_t size) { OPERATOR_NEW_BODY; }
+void *operator new[](size_t size) { OPERATOR_NEW_BODY(false /*nothrow*/); }
 INTERCEPTOR_ATTRIBUTE
-void *operator new(size_t size, std::nothrow_t const&) { OPERATOR_NEW_BODY; }
+void *operator new(size_t size, std::nothrow_t const&) {
+  OPERATOR_NEW_BODY(true /*nothrow*/);
+}
 INTERCEPTOR_ATTRIBUTE
-void *operator new[](size_t size, std::nothrow_t const&) { OPERATOR_NEW_BODY; }
+void *operator new[](size_t size, std::nothrow_t const&) {
+  OPERATOR_NEW_BODY(true /*nothrow*/);
+}
 
 #define OPERATOR_DELETE_BODY \
   GET_MALLOC_STACK_TRACE; \
diff --git a/lib/msan/msan_report.cc b/lib/msan/msan_report.cc
index 9a35c9c..cddad01 100644
--- a/lib/msan/msan_report.cc
+++ b/lib/msan/msan_report.cc
@@ -30,10 +30,8 @@
 class Decorator: public __sanitizer::SanitizerCommonDecorator {
  public:
   Decorator() : SanitizerCommonDecorator() { }
-  const char *Warning()    { return Red(); }
   const char *Origin()     { return Magenta(); }
   const char *Name()   { return Green(); }
-  const char *End()    { return Default(); }
 };
 
 static void DescribeStackOrigin(const char *so, uptr pc) {
@@ -47,7 +45,7 @@
       "  %sUninitialized value was created by an allocation of '%s%s%s'"
       " in the stack frame of function '%s%s%s'%s\n",
       d.Origin(), d.Name(), s, d.Origin(), d.Name(), sep + 1, d.Origin(),
-      d.End());
+      d.Default());
   InternalFree(s);
 
   if (pc) {
@@ -66,7 +64,7 @@
     StackTrace stack;
     o = o.getNextChainedOrigin(&stack);
     Printf("  %sUninitialized value was stored to memory at%s\n", d.Origin(),
-        d.End());
+           d.Default());
     stack.Print();
   }
   if (o.isStackOrigin()) {
@@ -78,18 +76,19 @@
     switch (stack.tag) {
       case StackTrace::TAG_ALLOC:
         Printf("  %sUninitialized value was created by a heap allocation%s\n",
-               d.Origin(), d.End());
+               d.Origin(), d.Default());
         break;
       case StackTrace::TAG_DEALLOC:
         Printf("  %sUninitialized value was created by a heap deallocation%s\n",
-               d.Origin(), d.End());
+               d.Origin(), d.Default());
         break;
       case STACK_TRACE_TAG_POISON:
         Printf("  %sMemory was marked as uninitialized%s\n", d.Origin(),
-               d.End());
+               d.Default());
         break;
       default:
-        Printf("  %sUninitialized value was created%s\n", d.Origin(), d.End());
+        Printf("  %sUninitialized value was created%s\n", d.Origin(),
+               d.Default());
         break;
     }
     stack.Print();
@@ -104,7 +103,7 @@
   Decorator d;
   Printf("%s", d.Warning());
   Report("WARNING: MemorySanitizer: use-of-uninitialized-value\n");
-  Printf("%s", d.End());
+  Printf("%s", d.Default());
   stack->Print();
   if (origin) {
     DescribeOrigin(origin);
@@ -144,7 +143,7 @@
     Decorator d;
     Printf("%s", d.Warning());
     Printf("MemorySanitizer: %d warnings reported.\n", msan_report_count);
-    Printf("%s", d.End());
+    Printf("%s", d.Default());
   }
 }
 
@@ -203,7 +202,7 @@
   Decorator d;
   Printf("%s", d.Warning());
   Printf("Shadow map of [%p, %p), %zu bytes:\n", start, end, end - start);
-  Printf("%s", d.End());
+  Printf("%s", d.Default());
   while (s < e) {
     // Line start.
     if (pos % 16 == 0) {
@@ -265,7 +264,7 @@
   Printf("%s", d.Warning());
   Printf("%sUninitialized bytes in %s%s%s at offset %zu inside [%p, %zu)%s\n",
          d.Warning(), d.Name(), what, d.Warning(), offset, start, size,
-         d.End());
+         d.Default());
   if (__sanitizer::Verbosity())
     DescribeMemoryRange(start, size);
 }
diff --git a/lib/msan/tests/CMakeLists.txt b/lib/msan/tests/CMakeLists.txt
index 8e911dc..0442895 100644
--- a/lib/msan/tests/CMakeLists.txt
+++ b/lib/msan/tests/CMakeLists.txt
@@ -35,6 +35,7 @@
   -Wno-zero-length-array
   -Wno-uninitialized
   -Werror=sign-compare
+  -Wno-gnu-zero-variadic-macro-arguments
 )
 set(MSAN_UNITTEST_INSTRUMENTED_CFLAGS
   ${MSAN_UNITTEST_COMMON_CFLAGS}
@@ -52,20 +53,14 @@
 
 append_list_if(COMPILER_RT_HAS_LIBDL -ldl MSAN_UNITTEST_LINK_FLAGS)
 
-# Compile source for the given architecture, using compiler
-# options in ${ARGN}, and add it to the object list.
-macro(msan_compile obj_list source arch kind)
-  get_filename_component(basename ${source} NAME)
-  set(output_obj "${basename}.${arch}${kind}.o")
-  get_target_flags_for_arch(${arch} TARGET_CFLAGS)
-  set(COMPILE_DEPS ${MSAN_UNITTEST_HEADERS})
-  if(NOT COMPILER_RT_STANDALONE_BUILD)
-    list(APPEND COMPILE_DEPS gtest msan)
-  endif()
-  clang_compile(${output_obj} ${source}
-                CFLAGS ${ARGN} ${TARGET_CFLAGS}
-                DEPS ${COMPILE_DEPS})
-  list(APPEND ${obj_list} ${output_obj})
+macro(msan_compile obj_list source arch kind cflags)
+  sanitizer_test_compile(
+    ${obj_list} ${source} ${arch}
+    KIND ${kind}
+    COMPILE_DEPS ${MSAN_UNITTEST_HEADERS}
+    DEPS gtest msan
+    CFLAGS ${MSAN_UNITTEST_INSTRUMENTED_CFLAGS} ${cflags}
+  )
 endmacro()
 
 macro(msan_link_shared so_list so_name arch kind)
@@ -87,23 +82,22 @@
 set_target_properties(MsanUnitTests PROPERTIES FOLDER "MSan unit tests")
 
 # Adds MSan unit tests and benchmarks for architecture.
-macro(add_msan_tests_for_arch arch kind)
+macro(add_msan_tests_for_arch arch kind cflags)
   # Build gtest instrumented with MSan.
   set(MSAN_INST_GTEST)
   msan_compile(MSAN_INST_GTEST ${COMPILER_RT_GTEST_SOURCE} ${arch} "${kind}"
-                               ${MSAN_UNITTEST_INSTRUMENTED_CFLAGS} ${ARGN})
+                               "${cflags}")
 
   # Instrumented tests.
   set(MSAN_INST_TEST_OBJECTS)
   foreach (SOURCE ${MSAN_UNITTEST_SOURCES})
-    msan_compile(MSAN_INST_TEST_OBJECTS ${SOURCE} ${arch} "${kind}"
-                 ${MSAN_UNITTEST_INSTRUMENTED_CFLAGS} ${ARGN})
+    msan_compile(MSAN_INST_TEST_OBJECTS ${SOURCE} ${arch} "${kind}" "${cflags}")
   endforeach(SOURCE)
 
   # Instrumented loadable module objects.
   set(MSAN_INST_LOADABLE_OBJECTS)
   msan_compile(MSAN_INST_LOADABLE_OBJECTS ${MSAN_LOADABLE_SOURCE} ${arch} "${kind}"
-               ${MSAN_UNITTEST_INSTRUMENTED_CFLAGS} "-fPIC" ${ARGN})
+               "-fPIC;${cflags}")
 
   # Instrumented loadable library tests.
   set(MSAN_LOADABLE_SO)
@@ -137,8 +131,8 @@
       CFLAGS ${MSAN_LIBCXX_CFLAGS} ${TARGET_CFLAGS})
     set(MSAN_LIBCXX_SO ${LIBCXX_PREFIX}/lib/libc++.so)
 
-    add_msan_tests_for_arch(${arch} "")
+    add_msan_tests_for_arch(${arch} "" "")
     add_msan_tests_for_arch(${arch} "-with-call"
-                            -mllvm -msan-instrumentation-with-call-threshold=0)
+                            "-mllvm;-msan-instrumentation-with-call-threshold=0")
   endforeach()
 endif()
diff --git a/lib/msan/tests/msan_test.cc b/lib/msan/tests/msan_test.cc
index 6f4dd99..b2d5f7c 100644
--- a/lib/msan/tests/msan_test.cc
+++ b/lib/msan/tests/msan_test.cc
@@ -175,10 +175,16 @@
 }
 
 #define EXPECT_NOT_POISONED(x) EXPECT_EQ(true, TestForNotPoisoned((x)))
+#define EXPECT_NOT_POISONED2(data, size) \
+  EXPECT_EQ(true, TestForNotPoisoned((data), (size)))
+
+bool TestForNotPoisoned(const void *data, size_t size) {
+  return __msan_test_shadow(data, size) == -1;
+}
 
 template<typename T>
 bool TestForNotPoisoned(const T& t) {
-  return __msan_test_shadow((void*)&t, sizeof(t)) == -1;
+  return TestForNotPoisoned((void *)&t, sizeof(t));
 }
 
 static U8 poisoned_array[100];
@@ -879,92 +885,203 @@
   close(sock);
 }
 
-TEST(MemorySanitizer, accept) {
-  int listen_socket = socket(AF_INET, SOCK_STREAM, 0);
+class SocketAddr {
+ public:
+  virtual ~SocketAddr() = default;
+  virtual struct sockaddr *ptr() = 0;
+  virtual size_t size() const = 0;
+
+  template <class... Args>
+  static std::unique_ptr<SocketAddr> Create(int family, Args... args);
+};
+
+class SocketAddr4 : public SocketAddr {
+ public:
+  SocketAddr4() { EXPECT_POISONED(sai_); }
+  explicit SocketAddr4(uint16_t port) {
+    memset(&sai_, 0, sizeof(sai_));
+    sai_.sin_family = AF_INET;
+    sai_.sin_port = port;
+    sai_.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+  }
+
+  sockaddr *ptr() override { return reinterpret_cast<sockaddr *>(&sai_); }
+
+  size_t size() const override { return sizeof(sai_); }
+
+ private:
+  sockaddr_in sai_;
+};
+
+class SocketAddr6 : public SocketAddr {
+ public:
+  SocketAddr6() { EXPECT_POISONED(sai_); }
+  explicit SocketAddr6(uint16_t port) {
+    memset(&sai_, 0, sizeof(sai_));
+    sai_.sin6_family = AF_INET6;
+    sai_.sin6_port = port;
+    sai_.sin6_addr = in6addr_loopback;
+  }
+
+  sockaddr *ptr() override { return reinterpret_cast<sockaddr *>(&sai_); }
+
+  size_t size() const override { return sizeof(sai_); }
+
+ private:
+  sockaddr_in6 sai_;
+};
+
+template <class... Args>
+std::unique_ptr<SocketAddr> SocketAddr::Create(int family, Args... args) {
+  if (family == AF_INET)
+    return std::unique_ptr<SocketAddr>(new SocketAddr4(args...));
+  return std::unique_ptr<SocketAddr>(new SocketAddr6(args...));
+}
+
+class MemorySanitizerIpTest : public ::testing::TestWithParam<int> {
+ public:
+  void SetUp() override {
+    ASSERT_TRUE(GetParam() == AF_INET || GetParam() == AF_INET6);
+  }
+
+  template <class... Args>
+  std::unique_ptr<SocketAddr> CreateSockAddr(Args... args) const {
+    return SocketAddr::Create(GetParam(), args...);
+  }
+
+  int CreateSocket(int socket_type) const {
+    return socket(GetParam(), socket_type, 0);
+  }
+};
+
+std::vector<int> GetAvailableIpSocketFamilies() {
+  std::vector<int> result;
+
+  for (int i : {AF_INET, AF_INET6}) {
+    int s = socket(i, SOCK_STREAM, 0);
+    if (s > 0) {
+      auto sai = SocketAddr::Create(i, 0);
+      if (bind(s, sai->ptr(), sai->size()) == 0) result.push_back(i);
+      close(s);
+    }
+  }
+
+  return result;
+}
+
+INSTANTIATE_TEST_CASE_P(IpTests, MemorySanitizerIpTest,
+                        ::testing::ValuesIn(GetAvailableIpSocketFamilies()));
+
+TEST_P(MemorySanitizerIpTest, accept) {
+  int listen_socket = CreateSocket(SOCK_STREAM);
   ASSERT_LT(0, listen_socket);
 
-  struct sockaddr_in sai;
-  memset(&sai, 0, sizeof(sai));
-  sai.sin_family = AF_INET;
-  sai.sin_port = 0;
-  sai.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
-  int res = bind(listen_socket, (struct sockaddr *)&sai, sizeof(sai));
+  auto sai = CreateSockAddr(0);
+  int res = bind(listen_socket, sai->ptr(), sai->size());
   ASSERT_EQ(0, res);
 
   res = listen(listen_socket, 1);
   ASSERT_EQ(0, res);
 
-  socklen_t sz = sizeof(sai);
-  res = getsockname(listen_socket, (struct sockaddr *)&sai, &sz);
+  socklen_t sz = sai->size();
+  res = getsockname(listen_socket, sai->ptr(), &sz);
   ASSERT_EQ(0, res);
-  ASSERT_EQ(sizeof(sai), sz);
+  ASSERT_EQ(sai->size(), sz);
 
-  int connect_socket = socket(AF_INET, SOCK_STREAM, 0);
+  int connect_socket = CreateSocket(SOCK_STREAM);
   ASSERT_LT(0, connect_socket);
   res = fcntl(connect_socket, F_SETFL, O_NONBLOCK);
   ASSERT_EQ(0, res);
-  res = connect(connect_socket, (struct sockaddr *)&sai, sizeof(sai));
+  res = connect(connect_socket, sai->ptr(), sai->size());
   // On FreeBSD this connection completes immediately.
   if (res != 0) {
     ASSERT_EQ(-1, res);
     ASSERT_EQ(EINPROGRESS, errno);
   }
 
-  __msan_poison(&sai, sizeof(sai));
-  int new_sock = accept(listen_socket, (struct sockaddr *)&sai, &sz);
+  __msan_poison(sai->ptr(), sai->size());
+  int new_sock = accept(listen_socket, sai->ptr(), &sz);
   ASSERT_LT(0, new_sock);
-  ASSERT_EQ(sizeof(sai), sz);
-  EXPECT_NOT_POISONED(sai);
+  ASSERT_EQ(sai->size(), sz);
+  EXPECT_NOT_POISONED2(sai->ptr(), sai->size());
 
-  __msan_poison(&sai, sizeof(sai));
-  res = getpeername(new_sock, (struct sockaddr *)&sai, &sz);
+  __msan_poison(sai->ptr(), sai->size());
+  res = getpeername(new_sock, sai->ptr(), &sz);
   ASSERT_EQ(0, res);
-  ASSERT_EQ(sizeof(sai), sz);
-  EXPECT_NOT_POISONED(sai);
+  ASSERT_EQ(sai->size(), sz);
+  EXPECT_NOT_POISONED2(sai->ptr(), sai->size());
 
   close(new_sock);
   close(connect_socket);
   close(listen_socket);
 }
 
-TEST(MemorySanitizer, getaddrinfo) {
-  struct addrinfo *ai;
-  struct addrinfo hints;
-  memset(&hints, 0, sizeof(hints));
-  hints.ai_family = AF_INET;
-  int res = getaddrinfo("localhost", NULL, &hints, &ai);
-  ASSERT_EQ(0, res);
-  EXPECT_NOT_POISONED(*ai);
-  ASSERT_EQ(sizeof(sockaddr_in), ai->ai_addrlen);
-  EXPECT_NOT_POISONED(*(sockaddr_in*)ai->ai_addr); 
-}
+TEST_P(MemorySanitizerIpTest, recvmsg) {
+  int server_socket = CreateSocket(SOCK_DGRAM);
+  ASSERT_LT(0, server_socket);
 
-TEST(MemorySanitizer, getnameinfo) {
-  struct sockaddr_in sai;
-  memset(&sai, 0, sizeof(sai));
-  sai.sin_family = AF_INET;
-  sai.sin_port = 80;
-  sai.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
-  char host[500];
-  char serv[500];
-  int res = getnameinfo((struct sockaddr *)&sai, sizeof(sai), host,
-                        sizeof(host), serv, sizeof(serv), 0);
+  auto sai = CreateSockAddr(0);
+  int res = bind(server_socket, sai->ptr(), sai->size());
   ASSERT_EQ(0, res);
-  EXPECT_NOT_POISONED(host[0]);
-  EXPECT_POISONED(host[sizeof(host) - 1]);
 
-  ASSERT_NE(0U, strlen(host));
-  EXPECT_NOT_POISONED(serv[0]);
-  EXPECT_POISONED(serv[sizeof(serv) - 1]);
-  ASSERT_NE(0U, strlen(serv));
+  socklen_t sz = sai->size();
+  res = getsockname(server_socket, sai->ptr(), &sz);
+  ASSERT_EQ(0, res);
+  ASSERT_EQ(sai->size(), sz);
+
+  int client_socket = CreateSocket(SOCK_DGRAM);
+  ASSERT_LT(0, client_socket);
+
+  auto client_sai = CreateSockAddr(0);
+  res = bind(client_socket, client_sai->ptr(), client_sai->size());
+  ASSERT_EQ(0, res);
+
+  sz = client_sai->size();
+  res = getsockname(client_socket, client_sai->ptr(), &sz);
+  ASSERT_EQ(0, res);
+  ASSERT_EQ(client_sai->size(), sz);
+
+  const char *s = "message text";
+  struct iovec iov;
+  iov.iov_base = (void *)s;
+  iov.iov_len = strlen(s) + 1;
+  struct msghdr msg;
+  memset(&msg, 0, sizeof(msg));
+  msg.msg_name = sai->ptr();
+  msg.msg_namelen = sai->size();
+  msg.msg_iov = &iov;
+  msg.msg_iovlen = 1;
+  res = sendmsg(client_socket, &msg, 0);
+  ASSERT_LT(0, res);
+
+  char buf[1000];
+  struct iovec recv_iov;
+  recv_iov.iov_base = (void *)&buf;
+  recv_iov.iov_len = sizeof(buf);
+  auto recv_sai = CreateSockAddr();
+  struct msghdr recv_msg;
+  memset(&recv_msg, 0, sizeof(recv_msg));
+  recv_msg.msg_name = recv_sai->ptr();
+  recv_msg.msg_namelen = recv_sai->size();
+  recv_msg.msg_iov = &recv_iov;
+  recv_msg.msg_iovlen = 1;
+  res = recvmsg(server_socket, &recv_msg, 0);
+  ASSERT_LT(0, res);
+
+  ASSERT_EQ(recv_sai->size(), recv_msg.msg_namelen);
+  EXPECT_NOT_POISONED2(recv_sai->ptr(), recv_sai->size());
+  EXPECT_STREQ(s, buf);
+
+  close(server_socket);
+  close(client_socket);
 }
 
 #define EXPECT_HOSTENT_NOT_POISONED(he)        \
   do {                                         \
     EXPECT_NOT_POISONED(*(he));                \
-    ASSERT_NE((void *) 0, (he)->h_name);       \
-    ASSERT_NE((void *) 0, (he)->h_aliases);    \
-    ASSERT_NE((void *) 0, (he)->h_addr_list);  \
+    ASSERT_NE((void *)0, (he)->h_name);        \
+    ASSERT_NE((void *)0, (he)->h_aliases);     \
+    ASSERT_NE((void *)0, (he)->h_addr_list);   \
     EXPECT_NOT_POISONED(strlen((he)->h_name)); \
     char **p = (he)->h_aliases;                \
     while (*p) {                               \
@@ -993,76 +1110,38 @@
   EXPECT_HOSTENT_NOT_POISONED(he);
 }
 
-#endif // MSAN_TEST_DISABLE_GETHOSTBYNAME
+#endif  // MSAN_TEST_DISABLE_GETHOSTBYNAME
 
-TEST(MemorySanitizer, recvmsg) {
-  int server_socket = socket(AF_INET, SOCK_DGRAM, 0);
-  ASSERT_LT(0, server_socket);
+TEST(MemorySanitizer, getaddrinfo) {
+  struct addrinfo *ai;
+  struct addrinfo hints;
+  memset(&hints, 0, sizeof(hints));
+  hints.ai_family = AF_INET;
+  int res = getaddrinfo("localhost", NULL, &hints, &ai);
+  ASSERT_EQ(0, res);
+  EXPECT_NOT_POISONED(*ai);
+  ASSERT_EQ(sizeof(sockaddr_in), ai->ai_addrlen);
+  EXPECT_NOT_POISONED(*(sockaddr_in *)ai->ai_addr);
+}
 
+TEST(MemorySanitizer, getnameinfo) {
   struct sockaddr_in sai;
   memset(&sai, 0, sizeof(sai));
   sai.sin_family = AF_INET;
-  sai.sin_port = 0;
+  sai.sin_port = 80;
   sai.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
-  int res = bind(server_socket, (struct sockaddr *)&sai, sizeof(sai));
+  char host[500];
+  char serv[500];
+  int res = getnameinfo((struct sockaddr *)&sai, sizeof(sai), host,
+                        sizeof(host), serv, sizeof(serv), 0);
   ASSERT_EQ(0, res);
+  EXPECT_NOT_POISONED(host[0]);
+  EXPECT_POISONED(host[sizeof(host) - 1]);
 
-  socklen_t sz = sizeof(sai);
-  res = getsockname(server_socket, (struct sockaddr *)&sai, &sz);
-  ASSERT_EQ(0, res);
-  ASSERT_EQ(sizeof(sai), sz);
-
-
-  int client_socket = socket(AF_INET, SOCK_DGRAM, 0);
-  ASSERT_LT(0, client_socket);
-
-  struct sockaddr_in client_sai;
-  memset(&client_sai, 0, sizeof(client_sai));
-  client_sai.sin_family = AF_INET;
-  client_sai.sin_port = 0;
-  client_sai.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
-  res = bind(client_socket, (struct sockaddr *)&client_sai, sizeof(client_sai));
-  ASSERT_EQ(0, res);
-
-  sz = sizeof(client_sai);
-  res = getsockname(client_socket, (struct sockaddr *)&client_sai, &sz);
-  ASSERT_EQ(0, res);
-  ASSERT_EQ(sizeof(client_sai), sz);
-
-  const char *s = "message text";
-  struct iovec iov;
-  iov.iov_base = (void *)s;
-  iov.iov_len = strlen(s) + 1;
-  struct msghdr msg;
-  memset(&msg, 0, sizeof(msg));
-  msg.msg_name = &sai;
-  msg.msg_namelen = sizeof(sai);
-  msg.msg_iov = &iov;
-  msg.msg_iovlen = 1;
-  res = sendmsg(client_socket, &msg, 0);
-  ASSERT_LT(0, res);
-
-
-  char buf[1000];
-  struct iovec recv_iov;
-  recv_iov.iov_base = (void *)&buf;
-  recv_iov.iov_len = sizeof(buf);
-  struct sockaddr_in recv_sai;
-  struct msghdr recv_msg;
-  memset(&recv_msg, 0, sizeof(recv_msg));
-  recv_msg.msg_name = &recv_sai;
-  recv_msg.msg_namelen = sizeof(recv_sai);
-  recv_msg.msg_iov = &recv_iov;
-  recv_msg.msg_iovlen = 1;
-  res = recvmsg(server_socket, &recv_msg, 0);
-  ASSERT_LT(0, res);
-
-  ASSERT_EQ(sizeof(recv_sai), recv_msg.msg_namelen);
-  EXPECT_NOT_POISONED(*(struct sockaddr_in *)recv_msg.msg_name);
-  EXPECT_STREQ(s, buf);
-
-  close(server_socket);
-  close(client_socket);
+  ASSERT_NE(0U, strlen(host));
+  EXPECT_NOT_POISONED(serv[0]);
+  EXPECT_POISONED(serv[sizeof(serv) - 1]);
+  ASSERT_NE(0U, strlen(serv));
 }
 
 TEST(MemorySanitizer, gethostbyname2) {
@@ -1502,19 +1581,28 @@
 TEST(MemorySanitizer, strndup) {
   char buf[4] = "abc";
   __msan_poison(buf + 2, sizeof(*buf));
-  char *x = strndup(buf, 3);
+  char *x;
+  EXPECT_UMR(x = strndup(buf, 3));
   EXPECT_NOT_POISONED(x[0]);
   EXPECT_NOT_POISONED(x[1]);
   EXPECT_POISONED(x[2]);
   EXPECT_NOT_POISONED(x[3]);
   free(x);
+  // Check handling of non 0 terminated strings.
+  buf[3] = 'z';
+  __msan_poison(buf + 3, sizeof(*buf));
+  EXPECT_UMR(x = strndup(buf + 3, 1));
+  EXPECT_POISONED(x[0]);
+  EXPECT_NOT_POISONED(x[1]);
+  free(x);
 }
 
 TEST(MemorySanitizer, strndup_short) {
   char buf[4] = "abc";
   __msan_poison(buf + 1, sizeof(*buf));
   __msan_poison(buf + 2, sizeof(*buf));
-  char *x = strndup(buf, 2);
+  char *x;
+  EXPECT_UMR(x = strndup(buf, 2));
   EXPECT_NOT_POISONED(x[0]);
   EXPECT_POISONED(x[1]);
   EXPECT_NOT_POISONED(x[2]);
@@ -1619,6 +1707,48 @@
   EXPECT_POISONED(a[7]);
 }
 
+TEST(MemorySanitizer, wcscat) {
+  wchar_t a[10];
+  wchar_t b[] = L"def";
+  wcscpy(a, L"abc");
+
+  wcscat(a, b);
+  EXPECT_EQ(6U, wcslen(a));
+  EXPECT_POISONED(a[7]);
+
+  a[3] = 0;
+  __msan_poison(b + 1, sizeof(wchar_t));
+  EXPECT_UMR(wcscat(a, b));
+
+  __msan_unpoison(b + 1, sizeof(wchar_t));
+  __msan_poison(a + 2, sizeof(wchar_t));
+  EXPECT_UMR(wcscat(a, b));
+}
+
+TEST(MemorySanitizer, wcsncat) {
+  wchar_t a[10];
+  wchar_t b[] = L"def";
+  wcscpy(a, L"abc");
+
+  wcsncat(a, b, 5);
+  EXPECT_EQ(6U, wcslen(a));
+  EXPECT_POISONED(a[7]);
+
+  a[3] = 0;
+  __msan_poison(a + 4, sizeof(wchar_t) * 6);
+  wcsncat(a, b, 2);
+  EXPECT_EQ(5U, wcslen(a));
+  EXPECT_POISONED(a[6]);
+
+  a[3] = 0;
+  __msan_poison(b + 1, sizeof(wchar_t));
+  EXPECT_UMR(wcsncat(a, b, 2));
+
+  __msan_unpoison(b + 1, sizeof(wchar_t));
+  __msan_poison(a + 2, sizeof(wchar_t));
+  EXPECT_UMR(wcsncat(a, b, 2));
+}
+
 #define TEST_STRTO_INT(func_name, char_type, str_prefix) \
   TEST(MemorySanitizer, func_name) {                     \
     char_type *e;                                        \
@@ -2124,10 +2254,51 @@
   EXPECT_NE(0U, strlen(time.tm_zone));
 }
 
+#if !defined(__FreeBSD__)
+/* Creates a temporary file with contents similar to /etc/fstab to be used
+   with getmntent{_r}.  */
+class TempFstabFile {
+ public:
+   TempFstabFile() : fd (-1) { }
+   ~TempFstabFile() {
+     if (fd >= 0)
+       close (fd);
+   }
+
+   bool Create(void) {
+     snprintf(tmpfile, sizeof(tmpfile), "/tmp/msan.getmntent.tmp.XXXXXX");
+
+     fd = mkstemp(tmpfile);
+     if (fd == -1)
+       return false;
+
+     const char entry[] = "/dev/root / ext4 errors=remount-ro 0 1";
+     size_t entrylen = sizeof(entry);
+
+     size_t bytesWritten = write(fd, entry, entrylen);
+     if (entrylen != bytesWritten)
+       return false;
+
+     return true;
+   }
+
+   const char* FileName(void) {
+     return tmpfile;
+   }
+
+ private:
+  char tmpfile[128];
+  int fd;
+};
+#endif
+
 // There's no getmntent() on FreeBSD.
 #if !defined(__FreeBSD__)
 TEST(MemorySanitizer, getmntent) {
-  FILE *fp = setmntent("/etc/fstab", "r");
+  TempFstabFile fstabtmp;
+  ASSERT_TRUE(fstabtmp.Create());
+  FILE *fp = setmntent(fstabtmp.FileName(), "r");
+
   struct mntent *mnt = getmntent(fp);
   ASSERT_TRUE(mnt != NULL);
   ASSERT_NE(0U, strlen(mnt->mnt_fsname));
@@ -2143,7 +2314,10 @@
 // There's no getmntent_r() on FreeBSD.
 #if !defined(__FreeBSD__)
 TEST(MemorySanitizer, getmntent_r) {
-  FILE *fp = setmntent("/etc/fstab", "r");
+  TempFstabFile fstabtmp;
+  ASSERT_TRUE(fstabtmp.Create());
+  FILE *fp = setmntent(fstabtmp.FileName(), "r");
+
   struct mntent mntbuf;
   char buf[1000];
   struct mntent *mnt = getmntent_r(fp, &mntbuf, buf, sizeof(buf));
@@ -3502,6 +3676,21 @@
     EXPECT_NOT_POISONED(gids[i]);
 }
 
+TEST(MemorySanitizer, getgroups_zero) {
+  gid_t group;
+  int n = getgroups(0, &group);
+  ASSERT_GE(n, 0);
+}
+
+TEST(MemorySanitizer, getgroups_negative) {
+  gid_t group;
+  int n = getgroups(-1, 0);
+  ASSERT_EQ(-1, n);
+
+  n = getgroups(-1, 0);
+  ASSERT_EQ(-1, n);
+}
+
 TEST(MemorySanitizer, wordexp) {
   wordexp_t w;
   int res = wordexp("a b c", &w, 0);
@@ -3584,8 +3773,10 @@
 
   EXPECT_POISONED(poisoned(6, 0xF) > poisoned(7, 0));
   EXPECT_POISONED(poisoned(0xF, 0xF) > poisoned(7, 0));
-
-  EXPECT_NOT_POISONED(poisoned(-1, 0x80000000U) >= poisoned(-1, 0U));
+  // Note that "icmp op X, Y" is approximated with "or shadow(X), shadow(Y)"
+  // and therefore may generate false positives in some cases, e.g. the
+  // following one:
+  // EXPECT_NOT_POISONED(poisoned(-1, 0x80000000U) >= poisoned(-1, 0U));
 }
 
 #if MSAN_HAS_M128
diff --git a/lib/profile/CMakeLists.txt b/lib/profile/CMakeLists.txt
index 006285b..342f8ee 100644
--- a/lib/profile/CMakeLists.txt
+++ b/lib/profile/CMakeLists.txt
@@ -48,6 +48,7 @@
   InstrProfilingFile.c
   InstrProfilingMerge.c
   InstrProfilingMergeFile.c
+  InstrProfilingNameVar.c
   InstrProfilingWriter.c
   InstrProfilingPlatformDarwin.c
   InstrProfilingPlatformLinux.c
diff --git a/lib/profile/GCDAProfiling.c b/lib/profile/GCDAProfiling.c
index 138af6e..8c92546 100644
--- a/lib/profile/GCDAProfiling.c
+++ b/lib/profile/GCDAProfiling.c
@@ -37,6 +37,9 @@
 #ifndef MAP_FILE
 #define MAP_FILE 0
 #endif
+#ifndef O_BINARY
+#define O_BINARY 0
+#endif
 #endif
 
 #if defined(__FreeBSD__) && defined(__i386__)
@@ -238,17 +241,17 @@
 
   /* Try just opening the file. */
   new_file = 0;
-  fd = open(filename, O_RDWR);
+  fd = open(filename, O_RDWR | O_BINARY);
 
   if (fd == -1) {
     /* Try opening the file, creating it if necessary. */
     new_file = 1;
     mode = "w+b";
-    fd = open(filename, O_RDWR | O_CREAT, 0644);
+    fd = open(filename, O_RDWR | O_CREAT | O_BINARY, 0644);
     if (fd == -1) {
       /* Try creating the directories first then opening the file. */
       __llvm_profile_recursive_mkdir(filename);
-      fd = open(filename, O_RDWR | O_CREAT, 0644);
+      fd = open(filename, O_RDWR | O_CREAT | O_BINARY, 0644);
       if (fd == -1) {
         /* Bah! It's hopeless. */
         int errnum = errno;
diff --git a/lib/profile/InstrProfData.inc b/lib/profile/InstrProfData.inc
index f7c22d1..66d63a4 100644
--- a/lib/profile/InstrProfData.inc
+++ b/lib/profile/InstrProfData.inc
@@ -153,7 +153,17 @@
 VALUE_PROF_FUNC_PARAM(uint64_t, TargetValue, Type::getInt64Ty(Ctx)) \
                       INSTR_PROF_COMMA
 VALUE_PROF_FUNC_PARAM(void *, Data, Type::getInt8PtrTy(Ctx)) INSTR_PROF_COMMA
+#ifndef VALUE_RANGE_PROF
 VALUE_PROF_FUNC_PARAM(uint32_t, CounterIndex, Type::getInt32Ty(Ctx))
+#else /* VALUE_RANGE_PROF */
+VALUE_PROF_FUNC_PARAM(uint32_t, CounterIndex, Type::getInt32Ty(Ctx)) \
+                      INSTR_PROF_COMMA
+VALUE_PROF_FUNC_PARAM(uint64_t, PreciseRangeStart, Type::getInt64Ty(Ctx)) \
+                      INSTR_PROF_COMMA
+VALUE_PROF_FUNC_PARAM(uint64_t, PreciseRangeLast, Type::getInt64Ty(Ctx)) \
+                      INSTR_PROF_COMMA
+VALUE_PROF_FUNC_PARAM(uint64_t, LargeValue, Type::getInt64Ty(Ctx))
+#endif /*VALUE_RANGE_PROF */
 #undef VALUE_PROF_FUNC_PARAM
 #undef INSTR_PROF_COMMA
 /* VALUE_PROF_FUNC_PARAM end */
@@ -174,13 +184,15 @@
  * name hash and the function address.
  */
 VALUE_PROF_KIND(IPVK_IndirectCallTarget, 0)
+/* For memory intrinsic functions size profiling. */
+VALUE_PROF_KIND(IPVK_MemOPSize, 1)
 /* These two kinds must be the last to be
  * declared. This is to make sure the string
  * array created with the template can be
  * indexed with the kind value.
  */
 VALUE_PROF_KIND(IPVK_First, IPVK_IndirectCallTarget)
-VALUE_PROF_KIND(IPVK_Last, IPVK_IndirectCallTarget)
+VALUE_PROF_KIND(IPVK_Last, IPVK_MemOPSize)
 
 #undef VALUE_PROF_KIND
 /* VALUE_PROF_KIND end */
@@ -234,6 +246,31 @@
 /* COVMAP_HEADER end.  */
 
 
+#ifdef INSTR_PROF_SECT_ENTRY
+#define INSTR_PROF_DATA_DEFINED
+INSTR_PROF_SECT_ENTRY(IPSK_data, \
+                      INSTR_PROF_QUOTE(INSTR_PROF_DATA_COMMON), \
+                      INSTR_PROF_QUOTE(INSTR_PROF_DATA_COFF), "__DATA,")
+INSTR_PROF_SECT_ENTRY(IPSK_cnts, \
+                      INSTR_PROF_QUOTE(INSTR_PROF_CNTS_COMMON), \
+                      INSTR_PROF_QUOTE(INSTR_PROF_CNTS_COFF), "__DATA,")
+INSTR_PROF_SECT_ENTRY(IPSK_name, \
+                      INSTR_PROF_QUOTE(INSTR_PROF_NAME_COMMON), \
+                      INSTR_PROF_QUOTE(INSTR_PROF_NAME_COFF), "__DATA,")
+INSTR_PROF_SECT_ENTRY(IPSK_vals, \
+                      INSTR_PROF_QUOTE(INSTR_PROF_VALS_COMMON), \
+                      INSTR_PROF_QUOTE(INSTR_PROF_VALS_COFF), "__DATA,")
+INSTR_PROF_SECT_ENTRY(IPSK_vnodes, \
+                      INSTR_PROF_QUOTE(INSTR_PROF_VNODES_COMMON), \
+                      INSTR_PROF_QUOTE(INSTR_PROF_VNODES_COFF), "__DATA,")
+INSTR_PROF_SECT_ENTRY(IPSK_covmap, \
+                      INSTR_PROF_QUOTE(INSTR_PROF_COVMAP_COMMON), \
+                      INSTR_PROF_QUOTE(INSTR_PROF_COVMAP_COFF), "__LLVM_COV,")
+
+#undef INSTR_PROF_SECT_ENTRY
+#endif
+
+
 #ifdef INSTR_PROF_VALUE_PROF_DATA
 #define INSTR_PROF_DATA_DEFINED
 
@@ -593,7 +630,7 @@
 /* Indexed profile format version (start from 1). */
 #define INSTR_PROF_INDEX_VERSION 4
 /* Coverage mapping format vresion (start from 0). */
-#define INSTR_PROF_COVMAP_VERSION 1
+#define INSTR_PROF_COVMAP_VERSION 2
 
 /* Profile version is always of type uint64_t. Reserve the upper 8 bits in the
  * version for other variants of profile. We set the lowest bit of the upper 8
@@ -610,17 +647,47 @@
  * specified via command line. */
 #define INSTR_PROF_PROFILE_NAME_VAR __llvm_profile_filename
 
+/* section name strings common to all targets other
+   than WIN32 */
+#define INSTR_PROF_DATA_COMMON __llvm_prf_data
+#define INSTR_PROF_NAME_COMMON __llvm_prf_names
+#define INSTR_PROF_CNTS_COMMON __llvm_prf_cnts
+#define INSTR_PROF_VALS_COMMON __llvm_prf_vals
+#define INSTR_PROF_VNODES_COMMON __llvm_prf_vnds
+#define INSTR_PROF_COVMAP_COMMON __llvm_covmap
+/* Win32 */
+#define INSTR_PROF_DATA_COFF .lprfd
+#define INSTR_PROF_NAME_COFF .lprfn
+#define INSTR_PROF_CNTS_COFF .lprfc
+#define INSTR_PROF_VALS_COFF .lprfv
+#define INSTR_PROF_VNODES_COFF .lprfnd
+#define INSTR_PROF_COVMAP_COFF .lcovmap
+
+#ifdef _WIN32
 /* Runtime section names and name strings.  */
-#define INSTR_PROF_DATA_SECT_NAME __llvm_prf_data
-#define INSTR_PROF_NAME_SECT_NAME __llvm_prf_names
-#define INSTR_PROF_CNTS_SECT_NAME __llvm_prf_cnts
+#define INSTR_PROF_DATA_SECT_NAME INSTR_PROF_DATA_COFF
+#define INSTR_PROF_NAME_SECT_NAME INSTR_PROF_NAME_COFF
+#define INSTR_PROF_CNTS_SECT_NAME INSTR_PROF_CNTS_COFF
 /* Array of pointers. Each pointer points to a list
  * of value nodes associated with one value site.
  */
-#define INSTR_PROF_VALS_SECT_NAME __llvm_prf_vals
+#define INSTR_PROF_VALS_SECT_NAME INSTR_PROF_VALS_COFF
 /* Value profile nodes section. */
-#define INSTR_PROF_VNODES_SECT_NAME __llvm_prf_vnds
-#define INSTR_PROF_COVMAP_SECT_NAME __llvm_covmap
+#define INSTR_PROF_VNODES_SECT_NAME INSTR_PROF_VNODES_COFF
+#define INSTR_PROF_COVMAP_SECT_NAME INSTR_PROF_COVMAP_COFF
+#else
+/* Runtime section names and name strings.  */
+#define INSTR_PROF_DATA_SECT_NAME INSTR_PROF_DATA_COMMON
+#define INSTR_PROF_NAME_SECT_NAME INSTR_PROF_NAME_COMMON
+#define INSTR_PROF_CNTS_SECT_NAME INSTR_PROF_CNTS_COMMON
+/* Array of pointers. Each pointer points to a list
+ * of value nodes associated with one value site.
+ */
+#define INSTR_PROF_VALS_SECT_NAME INSTR_PROF_VALS_COMMON
+/* Value profile nodes section. */
+#define INSTR_PROF_VNODES_SECT_NAME INSTR_PROF_VNODES_COMMON
+#define INSTR_PROF_COVMAP_SECT_NAME INSTR_PROF_COVMAP_COMMON
+#endif
 
 #define INSTR_PROF_DATA_SECT_NAME_STR                                          \
   INSTR_PROF_QUOTE(INSTR_PROF_DATA_SECT_NAME)
@@ -649,6 +716,9 @@
 #define INSTR_PROF_VALUE_PROF_FUNC __llvm_profile_instrument_target
 #define INSTR_PROF_VALUE_PROF_FUNC_STR \
         INSTR_PROF_QUOTE(INSTR_PROF_VALUE_PROF_FUNC)
+#define INSTR_PROF_VALUE_RANGE_PROF_FUNC __llvm_profile_instrument_range
+#define INSTR_PROF_VALUE_RANGE_PROF_FUNC_STR \
+        INSTR_PROF_QUOTE(INSTR_PROF_VALUE_RANGE_PROF_FUNC)
 
 /* InstrProfile per-function control data alignment.  */
 #define INSTR_PROF_DATA_ALIGNMENT 8
diff --git a/lib/profile/InstrProfiling.c b/lib/profile/InstrProfiling.c
index 6828a3d..fe66fec 100644
--- a/lib/profile/InstrProfiling.c
+++ b/lib/profile/InstrProfiling.c
@@ -19,8 +19,6 @@
 
 COMPILER_RT_WEAK uint64_t INSTR_PROF_RAW_VERSION_VAR = INSTR_PROF_RAW_VERSION;
 
-COMPILER_RT_WEAK char INSTR_PROF_PROFILE_NAME_VAR[1] = {0};
-
 COMPILER_RT_VISIBILITY uint64_t __llvm_profile_get_magic(void) {
   return sizeof(void *) == sizeof(uint64_t) ? (INSTR_PROF_RAW_MAGIC_64)
                                             : (INSTR_PROF_RAW_MAGIC_32);
diff --git a/lib/profile/InstrProfilingBuffer.c b/lib/profile/InstrProfilingBuffer.c
index ac259e8..a7e852f 100644
--- a/lib/profile/InstrProfilingBuffer.c
+++ b/lib/profile/InstrProfilingBuffer.c
@@ -45,15 +45,24 @@
          (CountersEnd - CountersBegin) * sizeof(uint64_t) + NamesSize + Padding;
 }
 
+COMPILER_RT_VISIBILITY
+void initBufferWriter(ProfDataWriter *BufferWriter, char *Buffer) {
+  BufferWriter->Write = lprofBufferWriter;
+  BufferWriter->WriterCtx = Buffer;
+}
+
 COMPILER_RT_VISIBILITY int __llvm_profile_write_buffer(char *Buffer) {
-  return lprofWriteData(lprofBufferWriter, Buffer, 0);
+  ProfDataWriter BufferWriter;
+  initBufferWriter(&BufferWriter, Buffer);
+  return lprofWriteData(&BufferWriter, 0, 0);
 }
 
 COMPILER_RT_VISIBILITY int __llvm_profile_write_buffer_internal(
     char *Buffer, const __llvm_profile_data *DataBegin,
     const __llvm_profile_data *DataEnd, const uint64_t *CountersBegin,
     const uint64_t *CountersEnd, const char *NamesBegin, const char *NamesEnd) {
-  return lprofWriteDataImpl(lprofBufferWriter, Buffer, DataBegin, DataEnd,
-                            CountersBegin, CountersEnd, 0, NamesBegin,
-                            NamesEnd);
+  ProfDataWriter BufferWriter;
+  initBufferWriter(&BufferWriter, Buffer);
+  return lprofWriteDataImpl(&BufferWriter, DataBegin, DataEnd, CountersBegin,
+                            CountersEnd, 0, NamesBegin, NamesEnd, 0);
 }
diff --git a/lib/profile/InstrProfilingFile.c b/lib/profile/InstrProfilingFile.c
index cd3590e..8ae2b7d 100644
--- a/lib/profile/InstrProfilingFile.c
+++ b/lib/profile/InstrProfilingFile.c
@@ -91,24 +91,39 @@
 static unsigned doMerging() { return lprofCurFilename.MergePoolSize; }
 
 /* Return 1 if there is an error, otherwise return  0.  */
-static uint32_t fileWriter(ProfDataIOVec *IOVecs, uint32_t NumIOVecs,
-                           void **WriterCtx) {
+static uint32_t fileWriter(ProfDataWriter *This, ProfDataIOVec *IOVecs,
+                           uint32_t NumIOVecs) {
   uint32_t I;
-  FILE *File = (FILE *)*WriterCtx;
+  FILE *File = (FILE *)This->WriterCtx;
   for (I = 0; I < NumIOVecs; I++) {
-    if (fwrite(IOVecs[I].Data, IOVecs[I].ElmSize, IOVecs[I].NumElm, File) !=
-        IOVecs[I].NumElm)
-      return 1;
+    if (IOVecs[I].Data) {
+      if (fwrite(IOVecs[I].Data, IOVecs[I].ElmSize, IOVecs[I].NumElm, File) !=
+          IOVecs[I].NumElm)
+        return 1;
+    } else {
+      if (fseek(File, IOVecs[I].ElmSize * IOVecs[I].NumElm, SEEK_CUR) == -1)
+        return 1;
+    }
   }
   return 0;
 }
 
+static void initFileWriter(ProfDataWriter *This, FILE *File) {
+  This->Write = fileWriter;
+  This->WriterCtx = File;
+}
+
 COMPILER_RT_VISIBILITY ProfBufferIO *
 lprofCreateBufferIOInternal(void *File, uint32_t BufferSz) {
   FreeHook = &free;
   DynamicBufferIOBuffer = (uint8_t *)calloc(BufferSz, 1);
   VPBufferSize = BufferSz;
-  return lprofCreateBufferIO(fileWriter, File);
+  ProfDataWriter *fileWriter =
+      (ProfDataWriter *)calloc(sizeof(ProfDataWriter), 1);
+  initFileWriter(fileWriter, File);
+  ProfBufferIO *IO = lprofCreateBufferIO(fileWriter);
+  IO->OwnFileWriter = 1;
+  return IO;
 }
 
 static void setupIOBuffer() {
@@ -122,9 +137,10 @@
 
 /* Read profile data in \c ProfileFile and merge with in-memory
    profile counters. Returns -1 if there is fatal error, otheriwse
-   0 is returned.
+   0 is returned. Returning 0 does not mean merge is actually
+   performed. If merge is actually done, *MergeDone is set to 1.
 */
-static int doProfileMerging(FILE *ProfileFile) {
+static int doProfileMerging(FILE *ProfileFile, int *MergeDone) {
   uint64_t ProfileFileSize;
   char *ProfileBuffer;
 
@@ -169,6 +185,8 @@
   __llvm_profile_merge_from_buffer(ProfileBuffer, ProfileFileSize);
   (void)munmap(ProfileBuffer, ProfileFileSize);
 
+  *MergeDone = 1;
+
   return 0;
 }
 
@@ -190,7 +208,7 @@
  * dumper. With profile merging enabled, each executable as well as any of
  * its instrumented shared libraries dump profile data into their own data file.
 */
-static FILE *openFileForMerging(const char *ProfileFileName) {
+static FILE *openFileForMerging(const char *ProfileFileName, int *MergeDone) {
   FILE *ProfileFile;
   int rc;
 
@@ -199,15 +217,14 @@
   if (!ProfileFile)
     return NULL;
 
-  rc = doProfileMerging(ProfileFile);
-  if (rc || COMPILER_RT_FTRUNCATE(ProfileFile, 0L) ||
+  rc = doProfileMerging(ProfileFile, MergeDone);
+  if (rc || (!*MergeDone && COMPILER_RT_FTRUNCATE(ProfileFile, 0L)) ||
       fseek(ProfileFile, 0L, SEEK_SET) == -1) {
     PROF_ERR("Profile Merging of file %s failed: %s\n", ProfileFileName,
              strerror(errno));
     fclose(ProfileFile);
     return NULL;
   }
-  fseek(ProfileFile, 0L, SEEK_SET);
   return ProfileFile;
 }
 
@@ -216,17 +233,20 @@
   int RetVal;
   FILE *OutputFile;
 
+  int MergeDone = 0;
   if (!doMerging())
     OutputFile = fopen(OutputName, "ab");
   else
-    OutputFile = openFileForMerging(OutputName);
+    OutputFile = openFileForMerging(OutputName, &MergeDone);
 
   if (!OutputFile)
     return -1;
 
   FreeHook = &free;
   setupIOBuffer();
-  RetVal = lprofWriteData(fileWriter, OutputFile, lprofGetVPDataReader());
+  ProfDataWriter fileWriter;
+  initFileWriter(&fileWriter, OutputFile);
+  RetVal = lprofWriteData(&fileWriter, lprofGetVPDataReader(), MergeDone);
 
   fclose(OutputFile);
   return RetVal;
@@ -499,8 +519,10 @@
 
   EnvFilenamePat = getFilenamePatFromEnv();
   if (EnvFilenamePat) {
-    SelectedPat = EnvFilenamePat;
-    PNS = PNS_environment;
+    /* Pass CopyFilenamePat = 1, to ensure that the filename would be valid 
+       at the  moment when __llvm_profile_write_file() gets executed. */
+    parseAndSetFilename(EnvFilenamePat, PNS_environment, 1);
+    return;
   } else if (hasCommandLineOverrider) {
     SelectedPat = INSTR_PROF_PROFILE_NAME_VAR;
     PNS = PNS_command_line;
@@ -530,6 +552,7 @@
   int rc, Length;
   const char *Filename;
   char *FilenameBuf;
+  int PDeathSig = 0;
 
   if (lprofProfileDumped()) {
     PROF_NOTE("Profile data not written to file: %s.\n", 
@@ -556,10 +579,18 @@
     return -1;
   }
 
+  // Temporarily suspend getting SIGKILL when the parent exits.
+  PDeathSig = lprofSuspendSigKill();
+
   /* Write profile data to the file. */
   rc = writeFile(Filename);
   if (rc)
     PROF_ERR("Failed to write file \"%s\": %s\n", Filename, strerror(errno));
+
+  // Restore SIGKILL.
+  if (PDeathSig == 1)
+    lprofRestoreSigKill();
+
   return rc;
 }
 
diff --git a/lib/profile/InstrProfilingInternal.h b/lib/profile/InstrProfilingInternal.h
index c73b291..36490ef 100644
--- a/lib/profile/InstrProfilingInternal.h
+++ b/lib/profile/InstrProfilingInternal.h
@@ -48,17 +48,21 @@
   size_t NumElm;
 } ProfDataIOVec;
 
-typedef uint32_t (*WriterCallback)(ProfDataIOVec *, uint32_t NumIOVecs,
-                                   void **WriterCtx);
+struct ProfDataWriter;
+typedef uint32_t (*WriterCallback)(struct ProfDataWriter *This, ProfDataIOVec *,
+                                   uint32_t NumIOVecs);
+
+typedef struct ProfDataWriter {
+  WriterCallback Write;
+  void *WriterCtx;
+} ProfDataWriter;
 
 /*!
  * The data structure for buffered IO of profile data.
  */
 typedef struct ProfBufferIO {
-  /* File handle.  */
-  void *File;
-  /* Low level IO callback. */
-  WriterCallback FileWriter;
+  ProfDataWriter *FileWriter;
+  uint32_t OwnFileWriter;
   /* The start of the buffer. */
   uint8_t *BufferStart;
   /* Total size of the buffer. */
@@ -73,7 +77,7 @@
 /*!
  * This is the interface to create a handle for buffered IO.
  */
-ProfBufferIO *lprofCreateBufferIO(WriterCallback FileWriter, void *File);
+ProfBufferIO *lprofCreateBufferIO(ProfDataWriter *FileWriter);
 
 /*!
  * The interface to destroy the bufferIO handle and reclaim
@@ -96,8 +100,9 @@
 /* The low level interface to write data into a buffer. It is used as the
  * callback by other high level writer methods such as buffered IO writer
  * and profile data writer.  */
-uint32_t lprofBufferWriter(ProfDataIOVec *IOVecs, uint32_t NumIOVecs,
-                           void **WriterCtx);
+uint32_t lprofBufferWriter(ProfDataWriter *This, ProfDataIOVec *IOVecs,
+                           uint32_t NumIOVecs);
+void initBufferWriter(ProfDataWriter *BufferWriter, char *Buffer);
 
 struct ValueProfData;
 struct ValueProfRecord;
@@ -133,15 +138,17 @@
                                         uint32_t N);
 } VPDataReaderType;
 
-int lprofWriteData(WriterCallback Writer, void *WriterCtx,
-                   VPDataReaderType *VPDataReader);
-int lprofWriteDataImpl(WriterCallback Writer, void *WriterCtx,
+/* Write profile data to destinitation. If SkipNameDataWrite is set to 1,
+   the name data is already in destintation, we just skip over it. */
+int lprofWriteData(ProfDataWriter *Writer, VPDataReaderType *VPDataReader,
+                   int SkipNameDataWrite);
+int lprofWriteDataImpl(ProfDataWriter *Writer,
                        const __llvm_profile_data *DataBegin,
                        const __llvm_profile_data *DataEnd,
                        const uint64_t *CountersBegin,
                        const uint64_t *CountersEnd,
                        VPDataReaderType *VPDataReader, const char *NamesBegin,
-                       const char *NamesEnd);
+                       const char *NamesEnd, int SkipNameDataWrite);
 
 /* Merge value profile data pointed to by SrcValueProfData into
  * in-memory profile counters pointed by to DstData.  */
diff --git a/lib/profile/InstrProfilingNameVar.c b/lib/profile/InstrProfilingNameVar.c
new file mode 100644
index 0000000..264568f
--- /dev/null
+++ b/lib/profile/InstrProfilingNameVar.c
@@ -0,0 +1,18 @@
+/*===- InstrProfilingNameVar.c - profile name variable setup  -------------===*\
+|*
+|*                     The LLVM Compiler Infrastructure
+|*
+|* This file is distributed under the University of Illinois Open Source
+|* License. See LICENSE.TXT for details.
+|*
+\*===----------------------------------------------------------------------===*/
+
+#include "InstrProfiling.h"
+
+/* char __llvm_profile_filename[1]
+ *
+ * The runtime should only provide its own definition of this symbol when the
+ * user has not specified one. Set this up by moving the runtime's copy of this
+ * symbol to an object file within the archive.
+ */
+COMPILER_RT_WEAK char INSTR_PROF_PROFILE_NAME_VAR[1] = {0};
diff --git a/lib/profile/InstrProfilingUtil.c b/lib/profile/InstrProfilingUtil.c
index 321c719..fb68f30 100644
--- a/lib/profile/InstrProfilingUtil.c
+++ b/lib/profile/InstrProfilingUtil.c
@@ -29,6 +29,11 @@
 #include <stdlib.h>
 #include <string.h>
 
+#if defined(__linux__)
+#include <signal.h>
+#include <sys/prctl.h>
+#endif
+
 COMPILER_RT_VISIBILITY
 void __llvm_profile_recursive_mkdir(char *path) {
   int i;
@@ -219,3 +224,21 @@
 #endif
   return Sep;
 }
+
+COMPILER_RT_VISIBILITY int lprofSuspendSigKill() {
+#if defined(__linux__)
+  int PDeachSig = 0;
+  /* Temporarily suspend getting SIGKILL upon exit of the parent process. */
+  if (prctl(PR_GET_PDEATHSIG, &PDeachSig) == 0 && PDeachSig == SIGKILL)
+    prctl(PR_SET_PDEATHSIG, 0);
+  return (PDeachSig == SIGKILL);
+#else
+  return 0;
+#endif
+}
+
+COMPILER_RT_VISIBILITY void lprofRestoreSigKill() {
+#if defined(__linux__)
+  prctl(PR_SET_PDEATHSIG, SIGKILL);
+#endif
+}
diff --git a/lib/profile/InstrProfilingUtil.h b/lib/profile/InstrProfilingUtil.h
index a80fde7..9698599 100644
--- a/lib/profile/InstrProfilingUtil.h
+++ b/lib/profile/InstrProfilingUtil.h
@@ -51,4 +51,12 @@
 unsigned lprofBoolCmpXchg(void **Ptr, void *OldV, void *NewV);
 void *lprofPtrFetchAdd(void **Mem, long ByteIncr);
 
+/* Temporarily suspend SIGKILL. Return value of 1 means a restore is needed.
+ * Other return values mean no restore is needed.
+ */
+int lprofSuspendSigKill();
+
+/* Restore previously suspended SIGKILL. */
+void lprofRestoreSigKill();
+
 #endif /* PROFILE_INSTRPROFILINGUTIL_H */
diff --git a/lib/profile/InstrProfilingValue.c b/lib/profile/InstrProfilingValue.c
index 6648f89..4bc7bb6 100644
--- a/lib/profile/InstrProfilingValue.c
+++ b/lib/profile/InstrProfilingValue.c
@@ -22,7 +22,7 @@
 static int OutOfNodesWarnings = 0;
 static int hasNonDefaultValsPerSite = 0;
 #define INSTR_PROF_MAX_VP_WARNS 10
-#define INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE 8
+#define INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE 16
 #define INSTR_PROF_VNODE_POOL_SIZE 1024
 
 #ifndef _MSC_VER
@@ -220,6 +220,35 @@
 }
 
 /*
+ * The target values are partitioned into multiple regions/ranges. There is one
+ * contiguous region which is precise -- every value in the range is tracked
+ * individually. A value outside the precise region will be collapsed into one
+ * value depending on the region it falls in.
+ *
+ * There are three regions:
+ * 1. (-inf, PreciseRangeStart) and (PreciseRangeLast, LargeRangeValue) belong
+ * to one region -- all values here should be mapped to one value of
+ * "PreciseRangeLast + 1".
+ * 2. [PreciseRangeStart, PreciseRangeLast]
+ * 3. Large values: [LargeValue, +inf) maps to one value of LargeValue.
+ *
+ * The range for large values is optional. The default value of INT64_MIN
+ * indicates it is not specified.
+ */
+COMPILER_RT_VISIBILITY void __llvm_profile_instrument_range(
+    uint64_t TargetValue, void *Data, uint32_t CounterIndex,
+    int64_t PreciseRangeStart, int64_t PreciseRangeLast, int64_t LargeValue) {
+
+  if (LargeValue != INT64_MIN && (int64_t)TargetValue >= LargeValue)
+    TargetValue = LargeValue;
+  else if ((int64_t)TargetValue < PreciseRangeStart ||
+           (int64_t)TargetValue > PreciseRangeLast)
+    TargetValue = PreciseRangeLast + 1;
+
+  __llvm_profile_instrument_target(TargetValue, Data, CounterIndex);
+}
+
+/*
  * A wrapper struct that represents value profile runtime data.
  * Like InstrProfRecord class which is used by profiling host tools,
  * ValueProfRuntimeRecord also implements the abstract intefaces defined in
diff --git a/lib/profile/InstrProfilingWriter.c b/lib/profile/InstrProfilingWriter.c
index 95f37e8..d4c9b9b 100644
--- a/lib/profile/InstrProfilingWriter.c
+++ b/lib/profile/InstrProfilingWriter.c
@@ -31,41 +31,44 @@
 /* The buffer writer is reponsponsible in keeping writer state
  * across the call.
  */
-COMPILER_RT_VISIBILITY uint32_t lprofBufferWriter(ProfDataIOVec *IOVecs,
-                                                  uint32_t NumIOVecs,
-                                                  void **WriterCtx) {
+COMPILER_RT_VISIBILITY uint32_t lprofBufferWriter(ProfDataWriter *This,
+                                                  ProfDataIOVec *IOVecs,
+                                                  uint32_t NumIOVecs) {
   uint32_t I;
-  char **Buffer = (char **)WriterCtx;
+  char **Buffer = (char **)&This->WriterCtx;
   for (I = 0; I < NumIOVecs; I++) {
     size_t Length = IOVecs[I].ElmSize * IOVecs[I].NumElm;
-    memcpy(*Buffer, IOVecs[I].Data, Length);
+    if (IOVecs[I].Data)
+      memcpy(*Buffer, IOVecs[I].Data, Length);
     *Buffer += Length;
   }
   return 0;
 }
 
-static void llvmInitBufferIO(ProfBufferIO *BufferIO, WriterCallback FileWriter,
-                             void *File, uint8_t *Buffer, uint32_t BufferSz) {
-  BufferIO->File = File;
+static void llvmInitBufferIO(ProfBufferIO *BufferIO, ProfDataWriter *FileWriter,
+                             uint8_t *Buffer, uint32_t BufferSz) {
   BufferIO->FileWriter = FileWriter;
+  BufferIO->OwnFileWriter = 0;
   BufferIO->BufferStart = Buffer;
   BufferIO->BufferSz = BufferSz;
   BufferIO->CurOffset = 0;
 }
 
 COMPILER_RT_VISIBILITY ProfBufferIO *
-lprofCreateBufferIO(WriterCallback FileWriter, void *File) {
+lprofCreateBufferIO(ProfDataWriter *FileWriter) {
   uint8_t *Buffer = DynamicBufferIOBuffer;
   uint32_t BufferSize = VPBufferSize;
   if (!Buffer) {
     Buffer = &BufferIOBuffer[0];
     BufferSize = sizeof(BufferIOBuffer);
   }
-  llvmInitBufferIO(&TheBufferIO, FileWriter, File, Buffer, BufferSize);
+  llvmInitBufferIO(&TheBufferIO, FileWriter, Buffer, BufferSize);
   return &TheBufferIO;
 }
 
 COMPILER_RT_VISIBILITY void lprofDeleteBufferIO(ProfBufferIO *BufferIO) {
+  if (BufferIO->OwnFileWriter)
+    FreeHook(BufferIO->FileWriter);
   if (DynamicBufferIOBuffer) {
     FreeHook(DynamicBufferIOBuffer);
     DynamicBufferIOBuffer = 0;
@@ -83,13 +86,16 @@
   /* Special case, bypass the buffer completely. */
   ProfDataIOVec IO[] = {{Data, sizeof(uint8_t), Size}};
   if (Size > BufferIO->BufferSz) {
-    if (BufferIO->FileWriter(IO, 1, &BufferIO->File))
+    if (BufferIO->FileWriter->Write(BufferIO->FileWriter, IO, 1))
       return -1;
   } else {
     /* Write the data to buffer */
     uint8_t *Buffer = BufferIO->BufferStart + BufferIO->CurOffset;
-    lprofBufferWriter(IO, 1, (void **)&Buffer);
-    BufferIO->CurOffset = Buffer - BufferIO->BufferStart;
+    ProfDataWriter BufferWriter;
+    initBufferWriter(&BufferWriter, (char *)Buffer);
+    lprofBufferWriter(&BufferWriter, IO, 1);
+    BufferIO->CurOffset =
+        (uint8_t *)BufferWriter.WriterCtx - BufferIO->BufferStart;
   }
   return 0;
 }
@@ -98,7 +104,7 @@
   if (BufferIO->CurOffset) {
     ProfDataIOVec IO[] = {
         {BufferIO->BufferStart, sizeof(uint8_t), BufferIO->CurOffset}};
-    if (BufferIO->FileWriter(IO, 1, &BufferIO->File))
+    if (BufferIO->FileWriter->Write(BufferIO->FileWriter, IO, 1))
       return -1;
     BufferIO->CurOffset = 0;
   }
@@ -201,7 +207,7 @@
   return 0;
 }
 
-static int writeValueProfData(WriterCallback Writer, void *WriterCtx,
+static int writeValueProfData(ProfDataWriter *Writer,
                               VPDataReaderType *VPDataReader,
                               const __llvm_profile_data *DataBegin,
                               const __llvm_profile_data *DataEnd) {
@@ -211,7 +217,7 @@
   if (!VPDataReader)
     return 0;
 
-  BufferIO = lprofCreateBufferIO(Writer, WriterCtx);
+  BufferIO = lprofCreateBufferIO(Writer);
 
   for (DI = DataBegin; DI < DataEnd; DI++) {
     if (writeOneValueProfData(BufferIO, VPDataReader, DI))
@@ -225,9 +231,9 @@
   return 0;
 }
 
-COMPILER_RT_VISIBILITY int lprofWriteData(WriterCallback Writer,
-                                          void *WriterCtx,
-                                          VPDataReaderType *VPDataReader) {
+COMPILER_RT_VISIBILITY int lprofWriteData(ProfDataWriter *Writer,
+                                          VPDataReaderType *VPDataReader,
+                                          int SkipNameDataWrite) {
   /* Match logic in __llvm_profile_write_buffer(). */
   const __llvm_profile_data *DataBegin = __llvm_profile_begin_data();
   const __llvm_profile_data *DataEnd = __llvm_profile_end_data();
@@ -235,18 +241,17 @@
   const uint64_t *CountersEnd = __llvm_profile_end_counters();
   const char *NamesBegin = __llvm_profile_begin_names();
   const char *NamesEnd = __llvm_profile_end_names();
-  return lprofWriteDataImpl(Writer, WriterCtx, DataBegin, DataEnd,
-                            CountersBegin, CountersEnd, VPDataReader,
-                            NamesBegin, NamesEnd);
+  return lprofWriteDataImpl(Writer, DataBegin, DataEnd, CountersBegin,
+                            CountersEnd, VPDataReader, NamesBegin, NamesEnd,
+                            SkipNameDataWrite);
 }
 
 COMPILER_RT_VISIBILITY int
-lprofWriteDataImpl(WriterCallback Writer, void *WriterCtx,
-                   const __llvm_profile_data *DataBegin,
+lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin,
                    const __llvm_profile_data *DataEnd,
                    const uint64_t *CountersBegin, const uint64_t *CountersEnd,
                    VPDataReaderType *VPDataReader, const char *NamesBegin,
-                   const char *NamesEnd) {
+                   const char *NamesEnd, int SkipNameDataWrite) {
 
   /* Calculate size of sections. */
   const uint64_t DataSize = __llvm_profile_get_data_size(DataBegin, DataEnd);
@@ -268,14 +273,14 @@
 #include "InstrProfData.inc"
 
   /* Write the data. */
-  ProfDataIOVec IOVec[] = {{&Header, sizeof(__llvm_profile_header), 1},
-                           {DataBegin, sizeof(__llvm_profile_data), DataSize},
-                           {CountersBegin, sizeof(uint64_t), CountersSize},
-                           {NamesBegin, sizeof(uint8_t), NamesSize},
-                           {Zeroes, sizeof(uint8_t), Padding}};
-  if (Writer(IOVec, sizeof(IOVec) / sizeof(*IOVec), &WriterCtx))
+  ProfDataIOVec IOVec[] = {
+      {&Header, sizeof(__llvm_profile_header), 1},
+      {DataBegin, sizeof(__llvm_profile_data), DataSize},
+      {CountersBegin, sizeof(uint64_t), CountersSize},
+      {SkipNameDataWrite ? NULL : NamesBegin, sizeof(uint8_t), NamesSize},
+      {Zeroes, sizeof(uint8_t), Padding}};
+  if (Writer->Write(Writer, IOVec, sizeof(IOVec) / sizeof(*IOVec)))
     return -1;
 
-  return writeValueProfData(Writer, WriterCtx, VPDataReader, DataBegin,
-                            DataEnd);
+  return writeValueProfData(Writer, VPDataReader, DataBegin, DataEnd);
 }
diff --git a/lib/safestack/safestack.cc b/lib/safestack/safestack.cc
index b194b6c..d783cd5 100644
--- a/lib/safestack/safestack.cc
+++ b/lib/safestack/safestack.cc
@@ -21,7 +21,9 @@
 #include <unistd.h>
 #include <sys/resource.h>
 #include <sys/types.h>
+#if !defined(__NetBSD__)
 #include <sys/user.h>
+#endif
 
 #include "interception/interception.h"
 #include "sanitizer_common/sanitizer_common.h"
diff --git a/lib/sanitizer_common/CMakeLists.txt b/lib/sanitizer_common/CMakeLists.txt
index 6cdc918..db077b5 100644
--- a/lib/sanitizer_common/CMakeLists.txt
+++ b/lib/sanitizer_common/CMakeLists.txt
@@ -6,8 +6,11 @@
   sanitizer_common.cc
   sanitizer_deadlock_detector1.cc
   sanitizer_deadlock_detector2.cc
+  sanitizer_errno.cc
+  sanitizer_file.cc
   sanitizer_flags.cc
   sanitizer_flag_parser.cc
+  sanitizer_fuchsia.cc
   sanitizer_libc.cc
   sanitizer_libignore.cc
   sanitizer_linux.cc
@@ -15,6 +18,7 @@
   sanitizer_mac.cc
   sanitizer_persistent_allocator.cc
   sanitizer_platform_limits_linux.cc
+  sanitizer_platform_limits_netbsd.cc
   sanitizer_platform_limits_posix.cc
   sanitizer_posix.cc
   sanitizer_printf.cc
@@ -28,6 +32,7 @@
   sanitizer_stoptheworld_mac.cc
   sanitizer_suppressions.cc
   sanitizer_symbolizer.cc
+  sanitizer_symbolizer_fuchsia.cc
   sanitizer_symbolizer_libbacktrace.cc
   sanitizer_symbolizer_mac.cc
   sanitizer_symbolizer_win.cc
@@ -53,12 +58,13 @@
 
 set(SANITIZER_LIBCDEP_SOURCES
   sanitizer_common_libcdep.cc
+  sanitizer_allocator_checks.cc
   sancov_flags.cc
-  sanitizer_coverage_libcdep.cc
+  sanitizer_coverage_fuchsia.cc
   sanitizer_coverage_libcdep_new.cc
-  sanitizer_coverage_mapping_libcdep.cc
   sanitizer_coverage_win_sections.cc
   sanitizer_linux_libcdep.cc
+  sanitizer_mac_libcdep.cc
   sanitizer_posix_libcdep.cc
   sanitizer_stacktrace_libcdep.cc
   sanitizer_stoptheworld_linux_libcdep.cc
@@ -94,9 +100,13 @@
   sanitizer_common_syscalls.inc
   sanitizer_deadlock_detector.h
   sanitizer_deadlock_detector_interface.h
+  sanitizer_errno.h
+  sanitizer_errno_codes.h
+  sanitizer_file.h
   sanitizer_flag_parser.h
   sanitizer_flags.h
   sanitizer_flags.inc
+  sanitizer_fuchsia.h
   sanitizer_interface_internal.h
   sanitizer_internal_defs.h
   sanitizer_lfstack.h
@@ -110,6 +120,7 @@
   sanitizer_placement_new.h
   sanitizer_platform.h
   sanitizer_platform_interceptors.h
+  sanitizer_platform_limits_netbsd.h
   sanitizer_platform_limits_posix.h
   sanitizer_posix.h
   sanitizer_procmaps.h
@@ -189,6 +200,21 @@
   CFLAGS ${SANITIZER_CFLAGS}
   DEFS ${SANITIZER_COMMON_DEFINITIONS})
 
+set(SANITIZER_NO_WEAK_HOOKS_CFLAGS ${SANITIZER_CFLAGS})
+list(APPEND SANITIZER_NO_WEAK_HOOKS_CFLAGS "-DSANITIZER_SUPPORTS_WEAK_HOOKS=0")
+add_compiler_rt_object_libraries(RTSanitizerCommonNoHooks
+  ${OS_OPTION}
+  ARCHS ${SANITIZER_COMMON_SUPPORTED_ARCH}
+  SOURCES ${SANITIZER_SOURCES}
+  CFLAGS ${SANITIZER_NO_WEAK_HOOKS_CFLAGS}
+  DEFS ${SANITIZER_COMMON_DEFINITIONS})
+add_compiler_rt_object_libraries(RTSanitizerCommonLibcNoHooks
+  ${OS_OPTION}
+  ARCHS ${SANITIZER_COMMON_SUPPORTED_ARCH}
+  SOURCES ${SANITIZER_LIBCDEP_SOURCES}
+  CFLAGS ${SANITIZER_NO_WEAK_HOOKS_CFLAGS}
+  DEFS ${SANITIZER_COMMON_DEFINITIONS})
+
 if(WIN32)
   add_compiler_rt_object_libraries(SanitizerCommonWeakInterception
     ${SANITIZER_COMMON_SUPPORTED_OS}
diff --git a/lib/sanitizer_common/sanitizer_allocator.cc b/lib/sanitizer_common/sanitizer_allocator.cc
index d47b5b4..84f523c 100644
--- a/lib/sanitizer_common/sanitizer_allocator.cc
+++ b/lib/sanitizer_common/sanitizer_allocator.cc
@@ -14,6 +14,7 @@
 
 #include "sanitizer_allocator.h"
 
+#include "sanitizer_allocator_checks.h"
 #include "sanitizer_allocator_internal.h"
 #include "sanitizer_atomic.h"
 #include "sanitizer_common.h"
@@ -94,8 +95,7 @@
     SpinMutexLock l(&internal_alloc_init_mu);
     if (atomic_load(&internal_allocator_initialized, memory_order_relaxed) ==
         0) {
-      internal_allocator_instance->Init(
-          /* may_return_null */ false, kReleaseToOSIntervalNever);
+      internal_allocator_instance->Init(kReleaseToOSIntervalNever);
       atomic_store(&internal_allocator_initialized, 1, memory_order_release);
     }
   }
@@ -108,9 +108,9 @@
   if (cache == 0) {
     SpinMutexLock l(&internal_allocator_cache_mu);
     return internal_allocator()->Allocate(&internal_allocator_cache, size,
-                                          alignment, false);
+                                          alignment);
   }
-  return internal_allocator()->Allocate(cache, size, alignment, false);
+  return internal_allocator()->Allocate(cache, size, alignment);
 }
 
 static void *RawInternalRealloc(void *ptr, uptr size,
@@ -161,8 +161,8 @@
 }
 
 void *InternalCalloc(uptr count, uptr size, InternalAllocatorCache *cache) {
-  if (CallocShouldReturnNullDueToOverflow(count, size))
-    return internal_allocator()->ReturnNullOrDieOnBadRequest();
+  if (UNLIKELY(CheckForCallocOverflow(count, size)))
+    return InternalAllocator::FailureHandler::OnBadRequest();
   void *p = InternalAlloc(count * size, cache);
   if (p) internal_memset(p, 0, count * size);
   return p;
@@ -203,18 +203,15 @@
   low_level_alloc_callback = callback;
 }
 
-bool CallocShouldReturnNullDueToOverflow(uptr size, uptr n) {
-  if (!size) return false;
-  uptr max = (uptr)-1L;
-  return (max / size) < n;
+static atomic_uint8_t allocator_out_of_memory = {0};
+static atomic_uint8_t allocator_may_return_null = {0};
+
+bool IsAllocatorOutOfMemory() {
+  return atomic_load_relaxed(&allocator_out_of_memory);
 }
 
-static atomic_uint8_t reporting_out_of_memory = {0};
-
-bool IsReportingOOM() { return atomic_load_relaxed(&reporting_out_of_memory); }
-
-void NORETURN ReportAllocatorCannotReturnNull(bool out_of_memory) {
-  if (out_of_memory) atomic_store_relaxed(&reporting_out_of_memory, 1);
+// Prints error message and kills the program.
+void NORETURN ReportAllocatorCannotReturnNull() {
   Report("%s's allocator is terminating the process instead of returning 0\n",
          SanitizerToolName);
   Report("If you don't like this behavior set allocator_may_return_null=1\n");
@@ -222,4 +219,35 @@
   Die();
 }
 
+bool AllocatorMayReturnNull() {
+  return atomic_load(&allocator_may_return_null, memory_order_relaxed);
+}
+
+void SetAllocatorMayReturnNull(bool may_return_null) {
+  atomic_store(&allocator_may_return_null, may_return_null,
+               memory_order_relaxed);
+}
+
+void *ReturnNullOrDieOnFailure::OnBadRequest() {
+  if (AllocatorMayReturnNull())
+    return nullptr;
+  ReportAllocatorCannotReturnNull();
+}
+
+void *ReturnNullOrDieOnFailure::OnOOM() {
+  atomic_store_relaxed(&allocator_out_of_memory, 1);
+  if (AllocatorMayReturnNull())
+    return nullptr;
+  ReportAllocatorCannotReturnNull();
+}
+
+void NORETURN *DieOnFailure::OnBadRequest() {
+  ReportAllocatorCannotReturnNull();
+}
+
+void NORETURN *DieOnFailure::OnOOM() {
+  atomic_store_relaxed(&allocator_out_of_memory, 1);
+  ReportAllocatorCannotReturnNull();
+}
+
 } // namespace __sanitizer
diff --git a/lib/sanitizer_common/sanitizer_allocator.h b/lib/sanitizer_common/sanitizer_allocator.h
index 9a37a2f..3836836 100644
--- a/lib/sanitizer_common/sanitizer_allocator.h
+++ b/lib/sanitizer_common/sanitizer_allocator.h
@@ -24,12 +24,28 @@
 
 namespace __sanitizer {
 
-// Returns true if ReportAllocatorCannotReturnNull(true) was called.
-// Can be use to avoid memory hungry operations.
-bool IsReportingOOM();
+// Since flags are immutable and allocator behavior can be changed at runtime
+// (unit tests or ASan on Android are some examples), allocator_may_return_null
+// flag value is cached here and can be altered later.
+bool AllocatorMayReturnNull();
+void SetAllocatorMayReturnNull(bool may_return_null);
 
-// Prints error message and kills the program.
-void NORETURN ReportAllocatorCannotReturnNull(bool out_of_memory);
+// Allocator failure handling policies:
+// Implements AllocatorMayReturnNull policy, returns null when the flag is set,
+// dies otherwise.
+struct ReturnNullOrDieOnFailure {
+  static void *OnBadRequest();
+  static void *OnOOM();
+};
+// Always dies on the failure.
+struct DieOnFailure {
+  static void NORETURN *OnBadRequest();
+  static void NORETURN *OnOOM();
+};
+
+// Returns true if allocator detected OOM condition. Can be used to avoid memory
+// hungry operations. Set when AllocatorReturnNullOrDieOnOOM() is called.
+bool IsAllocatorOutOfMemory();
 
 // Allocators call these callbacks on mmap/munmap.
 struct NoOpMapUnmapCallback {
@@ -40,8 +56,18 @@
 // Callback type for iterating over chunks.
 typedef void (*ForEachChunkCallback)(uptr chunk, void *arg);
 
-// Returns true if calloc(size, n) should return 0 due to overflow in size*n.
-bool CallocShouldReturnNullDueToOverflow(uptr size, uptr n);
+INLINE u32 Rand(u32 *state) {  // ANSI C linear congruential PRNG.
+  return (*state = *state * 1103515245 + 12345) >> 16;
+}
+
+INLINE u32 RandN(u32 *state, u32 n) { return Rand(state) % n; }  // [0, n)
+
+template<typename T>
+INLINE void RandomShuffle(T *a, u32 n, u32 *rand_state) {
+  if (n <= 1) return;
+  for (u32 i = n - 1; i > 0; i--)
+    Swap(a[i], a[RandN(rand_state, i + 1)]);
+}
 
 #include "sanitizer_allocator_size_class_map.h"
 #include "sanitizer_allocator_stats.h"
diff --git a/lib/sanitizer_common/sanitizer_allocator_checks.cc b/lib/sanitizer_common/sanitizer_allocator_checks.cc
new file mode 100644
index 0000000..dc263db
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_allocator_checks.cc
@@ -0,0 +1,23 @@
+//===-- sanitizer_allocator_checks.cc ---------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Various checks shared between ThreadSanitizer, MemorySanitizer, etc. memory
+// allocators.
+//
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_errno.h"
+
+namespace __sanitizer {
+
+void SetErrnoToENOMEM() {
+  errno = errno_ENOMEM;
+}
+
+} // namespace __sanitizer
diff --git a/lib/sanitizer_common/sanitizer_allocator_checks.h b/lib/sanitizer_common/sanitizer_allocator_checks.h
new file mode 100644
index 0000000..b61a8b2
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_allocator_checks.h
@@ -0,0 +1,75 @@
+//===-- sanitizer_allocator_checks.h ----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Various checks shared between ThreadSanitizer, MemorySanitizer, etc. memory
+// allocators.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SANITIZER_ALLOCATOR_CHECKS_H
+#define SANITIZER_ALLOCATOR_CHECKS_H
+
+#include "sanitizer_internal_defs.h"
+#include "sanitizer_common.h"
+#include "sanitizer_platform.h"
+
+namespace __sanitizer {
+
+// The following is defined in a separate compilation unit to avoid pulling in
+// sanitizer_errno.h in this header, which leads to conflicts when other system
+// headers include errno.h. This is usually the result of an unlikely event,
+// and as such we do not care as much about having it inlined.
+void SetErrnoToENOMEM();
+
+// A common errno setting logic shared by almost all sanitizer allocator APIs.
+INLINE void *SetErrnoOnNull(void *ptr) {
+  if (UNLIKELY(!ptr))
+    SetErrnoToENOMEM();
+  return ptr;
+}
+
+// In case of the check failure, the caller of the following Check... functions
+// should "return POLICY::OnBadRequest();" where POLICY is the current allocator
+// failure handling policy.
+
+// Checks aligned_alloc() parameters, verifies that the alignment is a power of
+// two and that the size is a multiple of alignment for POSIX implementation,
+// and a bit relaxed requirement for non-POSIX ones, that the size is a multiple
+// of alignment.
+INLINE bool CheckAlignedAllocAlignmentAndSize(uptr alignment, uptr size) {
+#if SANITIZER_POSIX
+  return IsPowerOfTwo(alignment) && (size & (alignment - 1)) == 0;
+#else
+  return size % alignment == 0;
+#endif
+}
+
+// Checks posix_memalign() parameters, verifies that alignment is a power of two
+// and a multiple of sizeof(void *).
+INLINE bool CheckPosixMemalignAlignment(uptr alignment) {
+  return IsPowerOfTwo(alignment) && (alignment % sizeof(void *)) == 0; // NOLINT
+}
+
+// Returns true if calloc(size, n) call overflows on size*n calculation.
+INLINE bool CheckForCallocOverflow(uptr size, uptr n) {
+  if (!size)
+    return false;
+  uptr max = (uptr)-1L;
+  return (max / size) < n;
+}
+
+// Returns true if the size passed to pvalloc overflows when rounded to the next
+// multiple of page_size.
+INLINE bool CheckForPvallocOverflow(uptr size, uptr page_size) {
+  return RoundUpTo(size, page_size) < size;
+}
+
+} // namespace __sanitizer
+
+#endif  // SANITIZER_ALLOCATOR_CHECKS_H
diff --git a/lib/sanitizer_common/sanitizer_allocator_combined.h b/lib/sanitizer_common/sanitizer_allocator_combined.h
index 19e1ae9..0d8a2a1 100644
--- a/lib/sanitizer_common/sanitizer_allocator_combined.h
+++ b/lib/sanitizer_common/sanitizer_allocator_combined.h
@@ -24,31 +24,26 @@
           class SecondaryAllocator>  // NOLINT
 class CombinedAllocator {
  public:
-  void InitCommon(bool may_return_null, s32 release_to_os_interval_ms) {
+  typedef typename SecondaryAllocator::FailureHandler FailureHandler;
+
+  void InitLinkerInitialized(s32 release_to_os_interval_ms) {
     primary_.Init(release_to_os_interval_ms);
-    atomic_store(&may_return_null_, may_return_null, memory_order_relaxed);
-  }
-
-  void InitLinkerInitialized(
-      bool may_return_null, s32 release_to_os_interval_ms) {
-    secondary_.InitLinkerInitialized(may_return_null);
+    secondary_.InitLinkerInitialized();
     stats_.InitLinkerInitialized();
-    InitCommon(may_return_null, release_to_os_interval_ms);
   }
 
-  void Init(bool may_return_null, s32 release_to_os_interval_ms) {
-    secondary_.Init(may_return_null);
+  void Init(s32 release_to_os_interval_ms) {
+    primary_.Init(release_to_os_interval_ms);
+    secondary_.Init();
     stats_.Init();
-    InitCommon(may_return_null, release_to_os_interval_ms);
   }
 
-  void *Allocate(AllocatorCache *cache, uptr size, uptr alignment,
-                 bool cleared = false, bool check_rss_limit = false) {
+  void *Allocate(AllocatorCache *cache, uptr size, uptr alignment) {
     // Returning 0 on malloc(0) may break a lot of code.
     if (size == 0)
       size = 1;
-    if (size + alignment < size) return ReturnNullOrDieOnBadRequest();
-    if (check_rss_limit && RssLimitIsExceeded()) return ReturnNullOrDieOnOOM();
+    if (size + alignment < size)
+      return FailureHandler::OnBadRequest();
     uptr original_size = size;
     // If alignment requirements are to be fulfilled by the frontend allocator
     // rather than by the primary or secondary, passing an alignment lower than
@@ -56,48 +51,24 @@
     // alignment check.
     if (alignment > 8)
       size = RoundUpTo(size, alignment);
-    void *res;
-    bool from_primary = primary_.CanAllocate(size, alignment);
     // The primary allocator should return a 2^x aligned allocation when
     // requested 2^x bytes, hence using the rounded up 'size' when being
     // serviced by the primary (this is no longer true when the primary is
     // using a non-fixed base address). The secondary takes care of the
     // alignment without such requirement, and allocating 'size' would use
     // extraneous memory, so we employ 'original_size'.
-    if (from_primary)
+    void *res;
+    if (primary_.CanAllocate(size, alignment))
       res = cache->Allocate(&primary_, primary_.ClassID(size));
     else
       res = secondary_.Allocate(&stats_, original_size, alignment);
+    if (!res)
+      return FailureHandler::OnOOM();
     if (alignment > 8)
       CHECK_EQ(reinterpret_cast<uptr>(res) & (alignment - 1), 0);
-    // When serviced by the secondary, the chunk comes from a mmap allocation
-    // and will be zero'd out anyway. We only need to clear our the chunk if
-    // it was serviced by the primary, hence using the rounded up 'size'.
-    if (cleared && res && from_primary)
-      internal_bzero_aligned16(res, RoundUpTo(size, 16));
     return res;
   }
 
-  bool MayReturnNull() const {
-    return atomic_load(&may_return_null_, memory_order_acquire);
-  }
-
-  void *ReturnNullOrDieOnBadRequest() {
-    if (MayReturnNull())
-      return nullptr;
-    ReportAllocatorCannotReturnNull(false);
-  }
-
-  void *ReturnNullOrDieOnOOM() {
-    if (MayReturnNull()) return nullptr;
-    ReportAllocatorCannotReturnNull(true);
-  }
-
-  void SetMayReturnNull(bool may_return_null) {
-    secondary_.SetMayReturnNull(may_return_null);
-    atomic_store(&may_return_null_, may_return_null, memory_order_release);
-  }
-
   s32 ReleaseToOSIntervalMs() const {
     return primary_.ReleaseToOSIntervalMs();
   }
@@ -106,13 +77,8 @@
     primary_.SetReleaseToOSIntervalMs(release_to_os_interval_ms);
   }
 
-  bool RssLimitIsExceeded() {
-    return atomic_load(&rss_limit_is_exceeded_, memory_order_acquire);
-  }
-
-  void SetRssLimitIsExceeded(bool rss_limit_is_exceeded) {
-    atomic_store(&rss_limit_is_exceeded_, rss_limit_is_exceeded,
-                 memory_order_release);
+  void ForceReleaseToOS() {
+    primary_.ForceReleaseToOS();
   }
 
   void Deallocate(AllocatorCache *cache, void *p) {
@@ -227,7 +193,5 @@
   PrimaryAllocator primary_;
   SecondaryAllocator secondary_;
   AllocatorGlobalStats stats_;
-  atomic_uint8_t may_return_null_;
-  atomic_uint8_t rss_limit_is_exceeded_;
 };
 
diff --git a/lib/sanitizer_common/sanitizer_allocator_interface.h b/lib/sanitizer_common/sanitizer_allocator_interface.h
index 74ee903..2f5ce31 100644
--- a/lib/sanitizer_common/sanitizer_allocator_interface.h
+++ b/lib/sanitizer_common/sanitizer_allocator_interface.h
@@ -38,9 +38,11 @@
 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
     void __sanitizer_free_hook(void *ptr);
 
+SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE void
+__sanitizer_purge_allocator();
 
-SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
-    void __sanitizer_print_memory_profile(int top_percent);
+SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE void
+__sanitizer_print_memory_profile(uptr top_percent, uptr max_number_of_contexts);
 }  // extern "C"
 
 #endif  // SANITIZER_ALLOCATOR_INTERFACE_H
diff --git a/lib/sanitizer_common/sanitizer_allocator_internal.h b/lib/sanitizer_common/sanitizer_allocator_internal.h
index e939cbe..a791d0d 100644
--- a/lib/sanitizer_common/sanitizer_allocator_internal.h
+++ b/lib/sanitizer_common/sanitizer_allocator_internal.h
@@ -23,27 +23,32 @@
 // purposes.
 typedef CompactSizeClassMap InternalSizeClassMap;
 
-static const uptr kInternalAllocatorSpace = 0;
-static const u64 kInternalAllocatorSize = SANITIZER_MMAP_RANGE_SIZE;
 static const uptr kInternalAllocatorRegionSizeLog = 20;
-#if SANITIZER_WORDSIZE == 32
 static const uptr kInternalAllocatorNumRegions =
-    kInternalAllocatorSize >> kInternalAllocatorRegionSizeLog;
+    SANITIZER_MMAP_RANGE_SIZE >> kInternalAllocatorRegionSizeLog;
+#if SANITIZER_WORDSIZE == 32
 typedef FlatByteMap<kInternalAllocatorNumRegions> ByteMap;
 #else
-static const uptr kInternalAllocatorNumRegions =
-    kInternalAllocatorSize >> kInternalAllocatorRegionSizeLog;
 typedef TwoLevelByteMap<(kInternalAllocatorNumRegions >> 12), 1 << 12> ByteMap;
 #endif
-typedef SizeClassAllocator32<
-    kInternalAllocatorSpace, kInternalAllocatorSize, 0, InternalSizeClassMap,
-    kInternalAllocatorRegionSizeLog, ByteMap> PrimaryInternalAllocator;
+struct AP32 {
+  static const uptr kSpaceBeg = 0;
+  static const u64 kSpaceSize = SANITIZER_MMAP_RANGE_SIZE;
+  static const uptr kMetadataSize = 0;
+  typedef InternalSizeClassMap SizeClassMap;
+  static const uptr kRegionSizeLog = kInternalAllocatorRegionSizeLog;
+  typedef __sanitizer::ByteMap ByteMap;
+  typedef NoOpMapUnmapCallback MapUnmapCallback;
+  static const uptr kFlags = 0;
+};
+typedef SizeClassAllocator32<AP32> PrimaryInternalAllocator;
 
 typedef SizeClassAllocatorLocalCache<PrimaryInternalAllocator>
     InternalAllocatorCache;
 
 typedef CombinedAllocator<PrimaryInternalAllocator, InternalAllocatorCache,
-                          LargeMmapAllocator<> > InternalAllocator;
+                          LargeMmapAllocator<NoOpMapUnmapCallback, DieOnFailure>
+                         > InternalAllocator;
 
 void *InternalAlloc(uptr size, InternalAllocatorCache *cache = nullptr,
                     uptr alignment = 0);
diff --git a/lib/sanitizer_common/sanitizer_allocator_local_cache.h b/lib/sanitizer_common/sanitizer_allocator_local_cache.h
index e1172e0..1b3c2c0 100644
--- a/lib/sanitizer_common/sanitizer_allocator_local_cache.h
+++ b/lib/sanitizer_common/sanitizer_allocator_local_cache.h
@@ -26,9 +26,6 @@
 template <class SizeClassAllocator>
 struct SizeClassAllocator64LocalCache {
   typedef SizeClassAllocator Allocator;
-  static const uptr kNumClasses = SizeClassAllocator::kNumClasses;
-  typedef typename Allocator::SizeClassMapT SizeClassMap;
-  typedef typename Allocator::CompactPtrT CompactPtrT;
 
   void Init(AllocatorGlobalStats *s) {
     stats_.Init();
@@ -45,10 +42,12 @@
   void *Allocate(SizeClassAllocator *allocator, uptr class_id) {
     CHECK_NE(class_id, 0UL);
     CHECK_LT(class_id, kNumClasses);
-    stats_.Add(AllocatorStatAllocated, Allocator::ClassIdToSize(class_id));
     PerClass *c = &per_class_[class_id];
-    if (UNLIKELY(c->count == 0))
-      Refill(c, allocator, class_id);
+    if (UNLIKELY(c->count == 0)) {
+      if (UNLIKELY(!Refill(c, allocator, class_id)))
+        return nullptr;
+    }
+    stats_.Add(AllocatorStatAllocated, c->class_size);
     CHECK_GT(c->count, 0);
     CompactPtrT chunk = c->chunks[--c->count];
     void *res = reinterpret_cast<void *>(allocator->CompactPtrToPointer(
@@ -62,8 +61,8 @@
     // If the first allocator call on a new thread is a deallocation, then
     // max_count will be zero, leading to check failure.
     InitCache();
-    stats_.Sub(AllocatorStatAllocated, Allocator::ClassIdToSize(class_id));
     PerClass *c = &per_class_[class_id];
+    stats_.Sub(AllocatorStatAllocated, c->class_size);
     CHECK_NE(c->max_count, 0UL);
     if (UNLIKELY(c->count == c->max_count))
       Drain(c, allocator, class_id, c->max_count / 2);
@@ -74,38 +73,46 @@
   }
 
   void Drain(SizeClassAllocator *allocator) {
-    for (uptr class_id = 0; class_id < kNumClasses; class_id++) {
-      PerClass *c = &per_class_[class_id];
+    for (uptr i = 0; i < kNumClasses; i++) {
+      PerClass *c = &per_class_[i];
       while (c->count > 0)
-        Drain(c, allocator, class_id, c->count);
+        Drain(c, allocator, i, c->count);
     }
   }
 
-  // private:
+ private:
+  typedef typename Allocator::SizeClassMapT SizeClassMap;
+  static const uptr kNumClasses = SizeClassMap::kNumClasses;
+  typedef typename Allocator::CompactPtrT CompactPtrT;
+
   struct PerClass {
     u32 count;
     u32 max_count;
+    uptr class_size;
     CompactPtrT chunks[2 * SizeClassMap::kMaxNumCachedHint];
   };
   PerClass per_class_[kNumClasses];
   AllocatorStats stats_;
 
   void InitCache() {
-    if (per_class_[1].max_count)
+    if (LIKELY(per_class_[1].max_count))
       return;
     for (uptr i = 0; i < kNumClasses; i++) {
       PerClass *c = &per_class_[i];
       c->max_count = 2 * SizeClassMap::MaxCachedHint(i);
+      c->class_size = Allocator::ClassIdToSize(i);
     }
   }
 
-  NOINLINE void Refill(PerClass *c, SizeClassAllocator *allocator,
+  NOINLINE bool Refill(PerClass *c, SizeClassAllocator *allocator,
                        uptr class_id) {
     InitCache();
-    uptr num_requested_chunks = SizeClassMap::MaxCachedHint(class_id);
-    allocator->GetFromAllocator(&stats_, class_id, c->chunks,
-                                num_requested_chunks);
+    uptr num_requested_chunks = c->max_count / 2;
+    if (UNLIKELY(!allocator->GetFromAllocator(&stats_, class_id, c->chunks,
+                                              num_requested_chunks)))
+      return false;
     c->count = num_requested_chunks;
+    return true;
   }
 
   NOINLINE void Drain(PerClass *c, SizeClassAllocator *allocator, uptr class_id,
@@ -124,7 +131,6 @@
 struct SizeClassAllocator32LocalCache {
   typedef SizeClassAllocator Allocator;
   typedef typename Allocator::TransferBatch TransferBatch;
-  static const uptr kNumClasses = SizeClassAllocator::kNumClasses;
 
   void Init(AllocatorGlobalStats *s) {
     stats_.Init();
@@ -132,6 +138,21 @@
       s->Register(&stats_);
   }
 
+  // Returns a TransferBatch suitable for class_id.
+  TransferBatch *CreateBatch(uptr class_id, SizeClassAllocator *allocator,
+                             TransferBatch *b) {
+    if (uptr batch_class_id = per_class_[class_id].batch_class_id)
+      return (TransferBatch*)Allocate(allocator, batch_class_id);
+    return b;
+  }
+
+  // Destroys TransferBatch b.
+  void DestroyBatch(uptr class_id, SizeClassAllocator *allocator,
+                    TransferBatch *b) {
+    if (uptr batch_class_id = per_class_[class_id].batch_class_id)
+      Deallocate(allocator, batch_class_id, b);
+  }
+
   void Destroy(SizeClassAllocator *allocator, AllocatorGlobalStats *s) {
     Drain(allocator);
     if (s)
@@ -141,10 +162,12 @@
   void *Allocate(SizeClassAllocator *allocator, uptr class_id) {
     CHECK_NE(class_id, 0UL);
     CHECK_LT(class_id, kNumClasses);
-    stats_.Add(AllocatorStatAllocated, Allocator::ClassIdToSize(class_id));
     PerClass *c = &per_class_[class_id];
-    if (UNLIKELY(c->count == 0))
-      Refill(allocator, class_id);
+    if (UNLIKELY(c->count == 0)) {
+      if (UNLIKELY(!Refill(allocator, class_id)))
+        return nullptr;
+    }
+    stats_.Add(AllocatorStatAllocated, c->class_size);
     void *res = c->batch[--c->count];
     PREFETCH(c->batch[c->count - 1]);
     return res;
@@ -156,8 +179,8 @@
     // If the first allocator call on a new thread is a deallocation, then
     // max_count will be zero, leading to check failure.
     InitCache();
-    stats_.Sub(AllocatorStatAllocated, Allocator::ClassIdToSize(class_id));
     PerClass *c = &per_class_[class_id];
+    stats_.Sub(AllocatorStatAllocated, c->class_size);
     CHECK_NE(c->max_count, 0UL);
     if (UNLIKELY(c->count == c->max_count))
       Drain(allocator, class_id);
@@ -165,72 +188,68 @@
   }
 
   void Drain(SizeClassAllocator *allocator) {
-    for (uptr class_id = 0; class_id < kNumClasses; class_id++) {
-      PerClass *c = &per_class_[class_id];
+    for (uptr i = 0; i < kNumClasses; i++) {
+      PerClass *c = &per_class_[i];
       while (c->count > 0)
-        Drain(allocator, class_id);
+        Drain(allocator, i);
     }
   }
 
-  // private:
-  typedef typename SizeClassAllocator::SizeClassMapT SizeClassMap;
+ private:
+  typedef typename Allocator::SizeClassMapT SizeClassMap;
+  static const uptr kBatchClassID = SizeClassMap::kBatchClassID;
+  static const uptr kNumClasses = SizeClassMap::kNumClasses;
+  // If kUseSeparateSizeClassForBatch is true, all TransferBatch objects are
+  // allocated from kBatchClassID size class (except for those that are needed
+  // for kBatchClassID itself). The goal is to have TransferBatches in a totally
+  // different region of RAM to improve security.
+  static const bool kUseSeparateSizeClassForBatch =
+      Allocator::kUseSeparateSizeClassForBatch;
+
   struct PerClass {
     uptr count;
     uptr max_count;
+    uptr class_size;
+    uptr batch_class_id;
     void *batch[2 * TransferBatch::kMaxNumCached];
   };
   PerClass per_class_[kNumClasses];
   AllocatorStats stats_;
 
   void InitCache() {
-    if (per_class_[1].max_count)
+    if (LIKELY(per_class_[1].max_count))
       return;
+    const uptr batch_class_id = SizeClassMap::ClassID(sizeof(TransferBatch));
     for (uptr i = 0; i < kNumClasses; i++) {
       PerClass *c = &per_class_[i];
-      c->max_count = 2 * TransferBatch::MaxCached(i);
+      uptr max_cached = TransferBatch::MaxCached(i);
+      c->max_count = 2 * max_cached;
+      c->class_size = Allocator::ClassIdToSize(i);
+      // Precompute the class id to use to store batches for the current class
+      // id. 0 means the class size is large enough to store a batch within one
+      // of the chunks. If using a separate size class, it will always be
+      // kBatchClassID, except for kBatchClassID itself.
+      if (kUseSeparateSizeClassForBatch) {
+        c->batch_class_id = (i == kBatchClassID) ? 0 : kBatchClassID;
+      } else {
+        c->batch_class_id = (c->class_size <
+          TransferBatch::AllocationSizeRequiredForNElements(max_cached)) ?
+              batch_class_id : 0;
+      }
     }
   }
 
-  // TransferBatch class is declared in SizeClassAllocator.
-  // We transfer chunks between central and thread-local free lists in batches.
-  // For small size classes we allocate batches separately.
-  // For large size classes we may use one of the chunks to store the batch.
-  // sizeof(TransferBatch) must be a power of 2 for more efficient allocation.
-  static uptr SizeClassForTransferBatch(uptr class_id) {
-    if (Allocator::ClassIdToSize(class_id) <
-        TransferBatch::AllocationSizeRequiredForNElements(
-            TransferBatch::MaxCached(class_id)))
-      return SizeClassMap::ClassID(sizeof(TransferBatch));
-    return 0;
-  }
-
-  // Returns a TransferBatch suitable for class_id.
-  // For small size classes allocates the batch from the allocator.
-  // For large size classes simply returns b.
-  TransferBatch *CreateBatch(uptr class_id, SizeClassAllocator *allocator,
-                             TransferBatch *b) {
-    if (uptr batch_class_id = SizeClassForTransferBatch(class_id))
-      return (TransferBatch*)Allocate(allocator, batch_class_id);
-    return b;
-  }
-
-  // Destroys TransferBatch b.
-  // For small size classes deallocates b to the allocator.
-  // Does notthing for large size classes.
-  void DestroyBatch(uptr class_id, SizeClassAllocator *allocator,
-                    TransferBatch *b) {
-    if (uptr batch_class_id = SizeClassForTransferBatch(class_id))
-      Deallocate(allocator, batch_class_id, b);
-  }
-
-  NOINLINE void Refill(SizeClassAllocator *allocator, uptr class_id) {
+  NOINLINE bool Refill(SizeClassAllocator *allocator, uptr class_id) {
     InitCache();
     PerClass *c = &per_class_[class_id];
     TransferBatch *b = allocator->AllocateBatch(&stats_, this, class_id);
+    if (UNLIKELY(!b))
+      return false;
     CHECK_GT(b->Count(), 0);
     b->CopyToArray(c->batch);
     c->count = b->Count();
     DestroyBatch(class_id, allocator, b);
+    return true;
   }
 
   NOINLINE void Drain(SizeClassAllocator *allocator, uptr class_id) {
@@ -240,6 +259,10 @@
     uptr first_idx_to_drain = c->count - cnt;
     TransferBatch *b = CreateBatch(
         class_id, allocator, (TransferBatch *)c->batch[first_idx_to_drain]);
+    // Failure to allocate a batch while releasing memory is non recoverable.
+    // TODO(alekseys): Figure out how to do it without allocating a new batch.
+    if (UNLIKELY(!b))
+      DieOnFailure::OnOOM();
     b->SetFromArray(allocator->GetRegionBeginBySizeClass(class_id),
                     &c->batch[first_idx_to_drain], cnt);
     c->count -= cnt;
diff --git a/lib/sanitizer_common/sanitizer_allocator_primary32.h b/lib/sanitizer_common/sanitizer_allocator_primary32.h
index 2882afd..6c682af 100644
--- a/lib/sanitizer_common/sanitizer_allocator_primary32.h
+++ b/lib/sanitizer_common/sanitizer_allocator_primary32.h
@@ -24,7 +24,8 @@
 // be returned by MmapOrDie().
 //
 // Region:
-//   a result of a single call to MmapAlignedOrDie(kRegionSize, kRegionSize).
+//   a result of a single call to MmapAlignedOrDieOnFatalError(kRegionSize,
+//                                                             kRegionSize).
 // Since the regions are aligned by kRegionSize, there are exactly
 // kNumPossibleRegions possible regions in the address space and so we keep
 // a ByteMap possible_regions to store the size classes of each Region.
@@ -36,13 +37,30 @@
 //
 // In order to avoid false sharing the objects of this class should be
 // chache-line aligned.
-template <const uptr kSpaceBeg, const u64 kSpaceSize,
-          const uptr kMetadataSize, class SizeClassMap,
-          const uptr kRegionSizeLog,
-          class ByteMap,
-          class MapUnmapCallback = NoOpMapUnmapCallback>
+
+struct SizeClassAllocator32FlagMasks {  //  Bit masks.
+  enum {
+    kRandomShuffleChunks = 1,
+    kUseSeparateSizeClassForBatch = 2,
+  };
+};
+
+template <class Params>
 class SizeClassAllocator32 {
  public:
+  static const uptr kSpaceBeg = Params::kSpaceBeg;
+  static const u64 kSpaceSize = Params::kSpaceSize;
+  static const uptr kMetadataSize = Params::kMetadataSize;
+  typedef typename Params::SizeClassMap SizeClassMap;
+  static const uptr kRegionSizeLog = Params::kRegionSizeLog;
+  typedef typename Params::ByteMap ByteMap;
+  typedef typename Params::MapUnmapCallback MapUnmapCallback;
+
+  static const bool kRandomShuffleChunks = Params::kFlags &
+      SizeClassAllocator32FlagMasks::kRandomShuffleChunks;
+  static const bool kUseSeparateSizeClassForBatch = Params::kFlags &
+      SizeClassAllocator32FlagMasks::kUseSeparateSizeClassForBatch;
+
   struct TransferBatch {
     static const uptr kMaxNumCached = SizeClassMap::kMaxNumCachedHint - 2;
     void SetFromArray(uptr region_beg_unused, void *batch[], uptr count) {
@@ -79,15 +97,14 @@
 
   static const uptr kBatchSize = sizeof(TransferBatch);
   COMPILER_CHECK((kBatchSize & (kBatchSize - 1)) == 0);
-  COMPILER_CHECK(sizeof(TransferBatch) ==
-                 SizeClassMap::kMaxNumCachedHint * sizeof(uptr));
+  COMPILER_CHECK(kBatchSize == SizeClassMap::kMaxNumCachedHint * sizeof(uptr));
 
   static uptr ClassIdToSize(uptr class_id) {
-    return SizeClassMap::Size(class_id);
+    return (class_id == SizeClassMap::kBatchClassID) ?
+        kBatchSize : SizeClassMap::Size(class_id);
   }
 
-  typedef SizeClassAllocator32<kSpaceBeg, kSpaceSize, kMetadataSize,
-      SizeClassMap, kRegionSizeLog, ByteMap, MapUnmapCallback> ThisT;
+  typedef SizeClassAllocator32<Params> ThisT;
   typedef SizeClassAllocator32LocalCache<ThisT> AllocatorCache;
 
   void Init(s32 release_to_os_interval_ms) {
@@ -103,8 +120,11 @@
     // This is empty here. Currently only implemented in 64-bit allocator.
   }
 
+  void ForceReleaseToOS() {
+    // Currently implemented in 64-bit allocator only.
+  }
+
   void *MapWithCallback(uptr size) {
-    size = RoundUpTo(size, GetPageSizeCached());
     void *res = MmapOrDie(size, "SizeClassAllocator32");
     MapUnmapCallback().OnMap((uptr)res, size);
     return res;
@@ -136,8 +156,9 @@
     CHECK_LT(class_id, kNumClasses);
     SizeClassInfo *sci = GetSizeClassInfo(class_id);
     SpinMutexLock l(&sci->mutex);
-    if (sci->free_list.empty())
-      PopulateFreeList(stat, c, sci, class_id);
+    if (sci->free_list.empty() &&
+        UNLIKELY(!PopulateFreeList(stat, c, sci, class_id)))
+      return nullptr;
     CHECK(!sci->free_list.empty());
     TransferBatch *b = sci->free_list.front();
     sci->free_list.pop_front();
@@ -147,9 +168,9 @@
   NOINLINE void DeallocateBatch(AllocatorStats *stat, uptr class_id,
                                 TransferBatch *b) {
     CHECK_LT(class_id, kNumClasses);
+    CHECK_GT(b->Count(), 0);
     SizeClassInfo *sci = GetSizeClassInfo(class_id);
     SpinMutexLock l(&sci->mutex);
-    CHECK_GT(b->Count(), 0);
     sci->free_list.push_front(b);
   }
 
@@ -247,7 +268,8 @@
   struct SizeClassInfo {
     SpinMutex mutex;
     IntrusiveList<TransferBatch> free_list;
-    char padding[kCacheLineSize - sizeof(uptr) -
+    u32 rand_state;
+    char padding[kCacheLineSize - 2 * sizeof(uptr) -
                  sizeof(IntrusiveList<TransferBatch>)];
   };
   COMPILER_CHECK(sizeof(SizeClassInfo) == kCacheLineSize);
@@ -264,11 +286,13 @@
 
   uptr AllocateRegion(AllocatorStats *stat, uptr class_id) {
     CHECK_LT(class_id, kNumClasses);
-    uptr res = reinterpret_cast<uptr>(MmapAlignedOrDie(kRegionSize, kRegionSize,
-                                      "SizeClassAllocator32"));
+    uptr res = reinterpret_cast<uptr>(MmapAlignedOrDieOnFatalError(
+        kRegionSize, kRegionSize, "SizeClassAllocator32"));
+    if (UNLIKELY(!res))
+      return 0;
     MapUnmapCallback().OnMap(res, kRegionSize);
     stat->Add(AllocatorStatMapped, kRegionSize);
-    CHECK_EQ(0U, (res & (kRegionSize - 1)));
+    CHECK(IsAligned(res, kRegionSize));
     possible_regions.set(ComputeRegionId(res), static_cast<u8>(class_id));
     return res;
   }
@@ -278,33 +302,69 @@
     return &size_class_info_array[class_id];
   }
 
-  void PopulateFreeList(AllocatorStats *stat, AllocatorCache *c,
-                        SizeClassInfo *sci, uptr class_id) {
-    uptr size = ClassIdToSize(class_id);
-    uptr reg = AllocateRegion(stat, class_id);
-    uptr n_chunks = kRegionSize / (size + kMetadataSize);
-    uptr max_count = TransferBatch::MaxCached(class_id);
-    TransferBatch *b = nullptr;
-    for (uptr i = reg; i < reg + n_chunks * size; i += size) {
+  bool PopulateBatches(AllocatorCache *c, SizeClassInfo *sci, uptr class_id,
+                       TransferBatch **current_batch, uptr max_count,
+                       uptr *pointers_array, uptr count) {
+    // If using a separate class for batches, we do not need to shuffle it.
+    if (kRandomShuffleChunks && (!kUseSeparateSizeClassForBatch ||
+        class_id != SizeClassMap::kBatchClassID))
+      RandomShuffle(pointers_array, count, &sci->rand_state);
+    TransferBatch *b = *current_batch;
+    for (uptr i = 0; i < count; i++) {
       if (!b) {
-        b = c->CreateBatch(class_id, this, (TransferBatch*)i);
+        b = c->CreateBatch(class_id, this, (TransferBatch*)pointers_array[i]);
+        if (UNLIKELY(!b))
+          return false;
         b->Clear();
       }
-      b->Add((void*)i);
+      b->Add((void*)pointers_array[i]);
       if (b->Count() == max_count) {
-        CHECK_GT(b->Count(), 0);
         sci->free_list.push_back(b);
         b = nullptr;
       }
     }
+    *current_batch = b;
+    return true;
+  }
+
+  bool PopulateFreeList(AllocatorStats *stat, AllocatorCache *c,
+                        SizeClassInfo *sci, uptr class_id) {
+    uptr size = ClassIdToSize(class_id);
+    uptr reg = AllocateRegion(stat, class_id);
+    if (UNLIKELY(!reg))
+      return false;
+    if (kRandomShuffleChunks)
+      if (UNLIKELY(sci->rand_state == 0))
+        // The random state is initialized from ASLR (PIE) and time.
+        sci->rand_state = reinterpret_cast<uptr>(sci) ^ NanoTime();
+    uptr n_chunks = kRegionSize / (size + kMetadataSize);
+    uptr max_count = TransferBatch::MaxCached(class_id);
+    CHECK_GT(max_count, 0);
+    TransferBatch *b = nullptr;
+    const uptr kShuffleArraySize = 48;
+    uptr shuffle_array[kShuffleArraySize];
+    uptr count = 0;
+    for (uptr i = reg; i < reg + n_chunks * size; i += size) {
+      shuffle_array[count++] = i;
+      if (count == kShuffleArraySize) {
+        if (UNLIKELY(!PopulateBatches(c, sci, class_id, &b, max_count,
+                                      shuffle_array, count)))
+          return false;
+        count = 0;
+      }
+    }
+    if (count) {
+      if (UNLIKELY(!PopulateBatches(c, sci, class_id, &b, max_count,
+                                    shuffle_array, count)))
+        return false;
+    }
     if (b) {
       CHECK_GT(b->Count(), 0);
       sci->free_list.push_back(b);
     }
+    return true;
   }
 
   ByteMap possible_regions;
   SizeClassInfo size_class_info_array[kNumClasses];
 };
-
-
diff --git a/lib/sanitizer_common/sanitizer_allocator_primary64.h b/lib/sanitizer_common/sanitizer_allocator_primary64.h
index 035d92b..b22f3ab 100644
--- a/lib/sanitizer_common/sanitizer_allocator_primary64.h
+++ b/lib/sanitizer_common/sanitizer_allocator_primary64.h
@@ -62,10 +62,10 @@
   // as a 4-byte integer (offset from the region start shifted right by 4).
   typedef u32 CompactPtrT;
   static const uptr kCompactPtrScale = 4;
-  CompactPtrT PointerToCompactPtr(uptr base, uptr ptr) {
+  CompactPtrT PointerToCompactPtr(uptr base, uptr ptr) const {
     return static_cast<CompactPtrT>((ptr - base) >> kCompactPtrScale);
   }
-  uptr CompactPtrToPointer(uptr base, CompactPtrT ptr32) {
+  uptr CompactPtrToPointer(uptr base, CompactPtrT ptr32) const {
     return base + (static_cast<uptr>(ptr32) << kCompactPtrScale);
   }
 
@@ -80,7 +80,7 @@
       CHECK_NE(NonConstSpaceBeg, ~(uptr)0);
     }
     SetReleaseToOSIntervalMs(release_to_os_interval_ms);
-    MapWithCallback(SpaceEnd(), AdditionalSize());
+    MapWithCallbackOrDie(SpaceEnd(), AdditionalSize());
   }
 
   s32 ReleaseToOSIntervalMs() const {
@@ -92,14 +92,11 @@
                  memory_order_relaxed);
   }
 
-  void MapWithCallback(uptr beg, uptr size) {
-    CHECK_EQ(beg, reinterpret_cast<uptr>(MmapFixedOrDie(beg, size)));
-    MapUnmapCallback().OnMap(beg, size);
-  }
-
-  void UnmapWithCallback(uptr beg, uptr size) {
-    MapUnmapCallback().OnUnmap(beg, size);
-    UnmapOrDie(reinterpret_cast<void *>(beg), size);
+  void ForceReleaseToOS() {
+    for (uptr class_id = 1; class_id < kNumClasses; class_id++) {
+      BlockingMutexLock l(&GetRegionInfo(class_id)->mutex);
+      MaybeReleaseToOS(class_id, true /*force*/);
+    }
   }
 
   static bool CanAllocate(uptr size, uptr alignment) {
@@ -116,16 +113,20 @@
     BlockingMutexLock l(&region->mutex);
     uptr old_num_chunks = region->num_freed_chunks;
     uptr new_num_freed_chunks = old_num_chunks + n_chunks;
-    EnsureFreeArraySpace(region, region_beg, new_num_freed_chunks);
+    // Failure to allocate free array space while releasing memory is non
+    // recoverable.
+    if (UNLIKELY(!EnsureFreeArraySpace(region, region_beg,
+                                       new_num_freed_chunks)))
+      DieOnFailure::OnOOM();
     for (uptr i = 0; i < n_chunks; i++)
       free_array[old_num_chunks + i] = chunks[i];
     region->num_freed_chunks = new_num_freed_chunks;
-    region->n_freed += n_chunks;
+    region->stats.n_freed += n_chunks;
 
-    MaybeReleaseToOS(class_id);
+    MaybeReleaseToOS(class_id, false /*force*/);
   }
 
-  NOINLINE void GetFromAllocator(AllocatorStats *stat, uptr class_id,
+  NOINLINE bool GetFromAllocator(AllocatorStats *stat, uptr class_id,
                                  CompactPtrT *chunks, uptr n_chunks) {
     RegionInfo *region = GetRegionInfo(class_id);
     uptr region_beg = GetRegionBeginBySizeClass(class_id);
@@ -133,18 +134,19 @@
 
     BlockingMutexLock l(&region->mutex);
     if (UNLIKELY(region->num_freed_chunks < n_chunks)) {
-      PopulateFreeArray(stat, class_id, region,
-                        n_chunks - region->num_freed_chunks);
+      if (UNLIKELY(!PopulateFreeArray(stat, class_id, region,
+                                      n_chunks - region->num_freed_chunks)))
+        return false;
       CHECK_GE(region->num_freed_chunks, n_chunks);
     }
     region->num_freed_chunks -= n_chunks;
     uptr base_idx = region->num_freed_chunks;
     for (uptr i = 0; i < n_chunks; i++)
       chunks[i] = free_array[base_idx + i];
-    region->n_allocated += n_chunks;
+    region->stats.n_allocated += n_chunks;
+    return true;
   }
 
-
   bool PointerIsMine(const void *p) {
     uptr P = reinterpret_cast<uptr>(p);
     if (kUsingConstantSpaceBeg && (kSpaceBeg % kSpaceSize) == 0)
@@ -160,7 +162,7 @@
         space_beg;
   }
 
-  uptr GetRegionBeginBySizeClass(uptr class_id) {
+  uptr GetRegionBeginBySizeClass(uptr class_id) const {
     return SpaceBeg() + kRegionSize * class_id;
   }
 
@@ -211,7 +213,7 @@
 
   // Test-only.
   void TestOnlyUnmap() {
-    UnmapWithCallback(SpaceBeg(), kSpaceSize + AdditionalSize());
+    UnmapWithCallbackOrDie(SpaceBeg(), kSpaceSize + AdditionalSize());
   }
 
   static void FillMemoryProfile(uptr start, uptr rss, bool file, uptr *stats,
@@ -224,34 +226,43 @@
   void PrintStats(uptr class_id, uptr rss) {
     RegionInfo *region = GetRegionInfo(class_id);
     if (region->mapped_user == 0) return;
-    uptr in_use = region->n_allocated - region->n_freed;
+    uptr in_use = region->stats.n_allocated - region->stats.n_freed;
     uptr avail_chunks = region->allocated_user / ClassIdToSize(class_id);
     Printf(
-        "  %02zd (%6zd): mapped: %6zdK allocs: %7zd frees: %7zd inuse: %6zd "
-        "num_freed_chunks %7zd avail: %6zd rss: %6zdK releases: %6zd\n",
-        class_id, ClassIdToSize(class_id), region->mapped_user >> 10,
-        region->n_allocated, region->n_freed, in_use,
-        region->num_freed_chunks, avail_chunks, rss >> 10,
-        region->rtoi.num_releases);
+        "%s %02zd (%6zd): mapped: %6zdK allocs: %7zd frees: %7zd inuse: %6zd "
+        "num_freed_chunks %7zd avail: %6zd rss: %6zdK releases: %6zd "
+        "last released: %6zdK region: 0x%zx\n",
+        region->exhausted ? "F" : " ", class_id, ClassIdToSize(class_id),
+        region->mapped_user >> 10, region->stats.n_allocated,
+        region->stats.n_freed, in_use, region->num_freed_chunks, avail_chunks,
+        rss >> 10, region->rtoi.num_releases,
+        region->rtoi.last_released_bytes >> 10,
+        SpaceBeg() + kRegionSize * class_id);
   }
 
   void PrintStats() {
-    uptr total_mapped = 0;
-    uptr n_allocated = 0;
-    uptr n_freed = 0;
-    for (uptr class_id = 1; class_id < kNumClasses; class_id++) {
-      RegionInfo *region = GetRegionInfo(class_id);
-      total_mapped += region->mapped_user;
-      n_allocated += region->n_allocated;
-      n_freed += region->n_freed;
-    }
-    Printf("Stats: SizeClassAllocator64: %zdM mapped in %zd allocations; "
-           "remains %zd\n",
-           total_mapped >> 20, n_allocated, n_allocated - n_freed);
     uptr rss_stats[kNumClasses];
     for (uptr class_id = 0; class_id < kNumClasses; class_id++)
       rss_stats[class_id] = SpaceBeg() + kRegionSize * class_id;
     GetMemoryProfile(FillMemoryProfile, rss_stats, kNumClasses);
+
+    uptr total_mapped = 0;
+    uptr total_rss = 0;
+    uptr n_allocated = 0;
+    uptr n_freed = 0;
+    for (uptr class_id = 1; class_id < kNumClasses; class_id++) {
+      RegionInfo *region = GetRegionInfo(class_id);
+      if (region->mapped_user != 0) {
+        total_mapped += region->mapped_user;
+        total_rss += rss_stats[class_id];
+      }
+      n_allocated += region->stats.n_allocated;
+      n_freed += region->stats.n_freed;
+    }
+
+    Printf("Stats: SizeClassAllocator64: %zdM mapped (%zdM rss) in "
+           "%zd allocations; remains %zd\n", total_mapped >> 20,
+           total_rss >> 20, n_allocated, n_allocated - n_freed);
     for (uptr class_id = 1; class_id < kNumClasses; class_id++)
       PrintStats(class_id, rss_stats[class_id]);
   }
@@ -299,7 +310,240 @@
   static const uptr kNumClasses = SizeClassMap::kNumClasses;
   static const uptr kNumClassesRounded = SizeClassMap::kNumClassesRounded;
 
+  // A packed array of counters. Each counter occupies 2^n bits, enough to store
+  // counter's max_value. Ctor will try to allocate the required buffer via
+  // mapper->MapPackedCounterArrayBuffer and the caller is expected to check
+  // whether the initialization was successful by checking IsAllocated() result.
+  // For the performance sake, none of the accessors check the validity of the
+  // arguments, it is assumed that index is always in [0, n) range and the value
+  // is not incremented past max_value.
+  template<class MemoryMapperT>
+  class PackedCounterArray {
+   public:
+    PackedCounterArray(u64 num_counters, u64 max_value, MemoryMapperT *mapper)
+        : n(num_counters), memory_mapper(mapper) {
+      CHECK_GT(num_counters, 0);
+      CHECK_GT(max_value, 0);
+      constexpr u64 kMaxCounterBits = sizeof(*buffer) * 8ULL;
+      // Rounding counter storage size up to the power of two allows for using
+      // bit shifts calculating particular counter's index and offset.
+      uptr counter_size_bits =
+          RoundUpToPowerOfTwo(MostSignificantSetBitIndex(max_value) + 1);
+      CHECK_LE(counter_size_bits, kMaxCounterBits);
+      counter_size_bits_log = Log2(counter_size_bits);
+      counter_mask = ~0ULL >> (kMaxCounterBits - counter_size_bits);
+
+      uptr packing_ratio = kMaxCounterBits >> counter_size_bits_log;
+      CHECK_GT(packing_ratio, 0);
+      packing_ratio_log = Log2(packing_ratio);
+      bit_offset_mask = packing_ratio - 1;
+
+      buffer_size =
+          (RoundUpTo(n, 1ULL << packing_ratio_log) >> packing_ratio_log) *
+          sizeof(*buffer);
+      buffer = reinterpret_cast<u64*>(
+          memory_mapper->MapPackedCounterArrayBuffer(buffer_size));
+    }
+    ~PackedCounterArray() {
+      if (buffer) {
+        memory_mapper->UnmapPackedCounterArrayBuffer(
+            reinterpret_cast<uptr>(buffer), buffer_size);
+      }
+    }
+
+    bool IsAllocated() const {
+      return !!buffer;
+    }
+
+    u64 GetCount() const {
+      return n;
+    }
+
+    uptr Get(uptr i) const {
+      DCHECK_LT(i, n);
+      uptr index = i >> packing_ratio_log;
+      uptr bit_offset = (i & bit_offset_mask) << counter_size_bits_log;
+      return (buffer[index] >> bit_offset) & counter_mask;
+    }
+
+    void Inc(uptr i) const {
+      DCHECK_LT(Get(i), counter_mask);
+      uptr index = i >> packing_ratio_log;
+      uptr bit_offset = (i & bit_offset_mask) << counter_size_bits_log;
+      buffer[index] += 1ULL << bit_offset;
+    }
+
+    void IncRange(uptr from, uptr to) const {
+      DCHECK_LE(from, to);
+      for (uptr i = from; i <= to; i++)
+        Inc(i);
+    }
+
+   private:
+    const u64 n;
+    u64 counter_size_bits_log;
+    u64 counter_mask;
+    u64 packing_ratio_log;
+    u64 bit_offset_mask;
+
+    MemoryMapperT* const memory_mapper;
+    u64 buffer_size;
+    u64* buffer;
+  };
+
+  template<class MemoryMapperT>
+  class FreePagesRangeTracker {
+   public:
+    explicit FreePagesRangeTracker(MemoryMapperT* mapper)
+        : memory_mapper(mapper),
+          page_size_scaled_log(Log2(GetPageSizeCached() >> kCompactPtrScale)),
+          in_the_range(false), current_page(0), current_range_start_page(0) {}
+
+    void NextPage(bool freed) {
+      if (freed) {
+        if (!in_the_range) {
+          current_range_start_page = current_page;
+          in_the_range = true;
+        }
+      } else {
+        CloseOpenedRange();
+      }
+      current_page++;
+    }
+
+    void Done() {
+      CloseOpenedRange();
+    }
+
+   private:
+    void CloseOpenedRange() {
+      if (in_the_range) {
+        memory_mapper->ReleasePageRangeToOS(
+            current_range_start_page << page_size_scaled_log,
+            current_page << page_size_scaled_log);
+        in_the_range = false;
+      }
+    }
+
+    MemoryMapperT* const memory_mapper;
+    const uptr page_size_scaled_log;
+    bool in_the_range;
+    uptr current_page;
+    uptr current_range_start_page;
+  };
+
+  // Iterates over the free_array to identify memory pages containing freed
+  // chunks only and returns these pages back to OS.
+  // allocated_pages_count is the total number of pages allocated for the
+  // current bucket.
+  template<class MemoryMapperT>
+  static void ReleaseFreeMemoryToOS(CompactPtrT *free_array,
+                                    uptr free_array_count, uptr chunk_size,
+                                    uptr allocated_pages_count,
+                                    MemoryMapperT *memory_mapper) {
+    const uptr page_size = GetPageSizeCached();
+
+    // Figure out the number of chunks per page and whether we can take a fast
+    // path (the number of chunks per page is the same for all pages).
+    uptr full_pages_chunk_count_max;
+    bool same_chunk_count_per_page;
+    if (chunk_size <= page_size && page_size % chunk_size == 0) {
+      // Same number of chunks per page, no cross overs.
+      full_pages_chunk_count_max = page_size / chunk_size;
+      same_chunk_count_per_page = true;
+    } else if (chunk_size <= page_size && page_size % chunk_size != 0 &&
+        chunk_size % (page_size % chunk_size) == 0) {
+      // Some chunks are crossing page boundaries, which means that the page
+      // contains one or two partial chunks, but all pages contain the same
+      // number of chunks.
+      full_pages_chunk_count_max = page_size / chunk_size + 1;
+      same_chunk_count_per_page = true;
+    } else if (chunk_size <= page_size) {
+      // Some chunks are crossing page boundaries, which means that the page
+      // contains one or two partial chunks.
+      full_pages_chunk_count_max = page_size / chunk_size + 2;
+      same_chunk_count_per_page = false;
+    } else if (chunk_size > page_size && chunk_size % page_size == 0) {
+      // One chunk covers multiple pages, no cross overs.
+      full_pages_chunk_count_max = 1;
+      same_chunk_count_per_page = true;
+    } else if (chunk_size > page_size) {
+      // One chunk covers multiple pages, Some chunks are crossing page
+      // boundaries. Some pages contain one chunk, some contain two.
+      full_pages_chunk_count_max = 2;
+      same_chunk_count_per_page = false;
+    } else {
+      UNREACHABLE("All chunk_size/page_size ratios must be handled.");
+    }
+
+    PackedCounterArray<MemoryMapperT> counters(allocated_pages_count,
+                                               full_pages_chunk_count_max,
+                                               memory_mapper);
+    if (!counters.IsAllocated())
+      return;
+
+    const uptr chunk_size_scaled = chunk_size >> kCompactPtrScale;
+    const uptr page_size_scaled = page_size >> kCompactPtrScale;
+    const uptr page_size_scaled_log = Log2(page_size_scaled);
+
+    // Iterate over free chunks and count how many free chunks affect each
+    // allocated page.
+    if (chunk_size <= page_size && page_size % chunk_size == 0) {
+      // Each chunk affects one page only.
+      for (uptr i = 0; i < free_array_count; i++)
+        counters.Inc(free_array[i] >> page_size_scaled_log);
+    } else {
+      // In all other cases chunks might affect more than one page.
+      for (uptr i = 0; i < free_array_count; i++) {
+        counters.IncRange(
+            free_array[i] >> page_size_scaled_log,
+            (free_array[i] + chunk_size_scaled - 1) >> page_size_scaled_log);
+      }
+    }
+
+    // Iterate over pages detecting ranges of pages with chunk counters equal
+    // to the expected number of chunks for the particular page.
+    FreePagesRangeTracker<MemoryMapperT> range_tracker(memory_mapper);
+    if (same_chunk_count_per_page) {
+      // Fast path, every page has the same number of chunks affecting it.
+      for (uptr i = 0; i < counters.GetCount(); i++)
+        range_tracker.NextPage(counters.Get(i) == full_pages_chunk_count_max);
+    } else {
+      // Show path, go through the pages keeping count how many chunks affect
+      // each page.
+      const uptr pn =
+          chunk_size < page_size ? page_size_scaled / chunk_size_scaled : 1;
+      const uptr pnc = pn * chunk_size_scaled;
+      // The idea is to increment the current page pointer by the first chunk
+      // size, middle portion size (the portion of the page covered by chunks
+      // except the first and the last one) and then the last chunk size, adding
+      // up the number of chunks on the current page and checking on every step
+      // whether the page boundary was crossed.
+      uptr prev_page_boundary = 0;
+      uptr current_boundary = 0;
+      for (uptr i = 0; i < counters.GetCount(); i++) {
+        uptr page_boundary = prev_page_boundary + page_size_scaled;
+        uptr chunks_per_page = pn;
+        if (current_boundary < page_boundary) {
+          if (current_boundary > prev_page_boundary)
+            chunks_per_page++;
+          current_boundary += pnc;
+          if (current_boundary < page_boundary) {
+            chunks_per_page++;
+            current_boundary += chunk_size_scaled;
+          }
+        }
+        prev_page_boundary = page_boundary;
+
+        range_tracker.NextPage(counters.Get(i) == chunks_per_page);
+      }
+    }
+    range_tracker.Done();
+  }
+
  private:
+  friend class MemoryMapper;
+
   static const uptr kRegionSize = kSpaceSize / kNumClassesRounded;
   // FreeArray is the array of free-d chunks (stored as 4-byte offsets).
   // In the worst case it may reguire kRegionSize/SizeClassMap::kMinSize
@@ -326,10 +570,16 @@
 
   atomic_sint32_t release_to_os_interval_ms_;
 
+  struct Stats {
+    uptr n_allocated;
+    uptr n_freed;
+  };
+
   struct ReleaseToOsInfo {
     uptr n_freed_at_last_release;
     uptr num_releases;
     u64 last_release_at_ns;
+    u64 last_released_bytes;
   };
 
   struct RegionInfo {
@@ -340,36 +590,25 @@
     uptr allocated_meta;  // Bytes allocated for metadata.
     uptr mapped_user;  // Bytes mapped for user memory.
     uptr mapped_meta;  // Bytes mapped for metadata.
-    u32 rand_state; // Seed for random shuffle, used if kRandomShuffleChunks.
-    uptr n_allocated, n_freed;  // Just stats.
+    u32 rand_state;  // Seed for random shuffle, used if kRandomShuffleChunks.
+    bool exhausted;  // Whether region is out of space for new chunks.
+    Stats stats;
     ReleaseToOsInfo rtoi;
   };
   COMPILER_CHECK(sizeof(RegionInfo) >= kCacheLineSize);
 
-  u32 Rand(u32 *state) {  // ANSI C linear congruential PRNG.
-    return (*state = *state * 1103515245 + 12345) >> 16;
-  }
-
-  u32 RandN(u32 *state, u32 n) { return Rand(state) % n; }  // [0, n)
-
-  void RandomShuffle(u32 *a, u32 n, u32 *rand_state) {
-    if (n <= 1) return;
-    for (u32 i = n - 1; i > 0; i--)
-      Swap(a[i], a[RandN(rand_state, i + 1)]);
-  }
-
-  RegionInfo *GetRegionInfo(uptr class_id) {
+  RegionInfo *GetRegionInfo(uptr class_id) const {
     CHECK_LT(class_id, kNumClasses);
     RegionInfo *regions =
         reinterpret_cast<RegionInfo *>(SpaceBeg() + kSpaceSize);
     return &regions[class_id];
   }
 
-  uptr GetMetadataEnd(uptr region_beg) {
+  uptr GetMetadataEnd(uptr region_beg) const {
     return region_beg + kRegionSize - kFreeArraySize;
   }
 
-  uptr GetChunkIdx(uptr chunk, uptr size) {
+  uptr GetChunkIdx(uptr chunk, uptr size) const {
     if (!kUsingConstantSpaceBeg)
       chunk -= SpaceBeg();
 
@@ -381,96 +620,182 @@
     return (u32)offset / (u32)size;
   }
 
-  CompactPtrT *GetFreeArray(uptr region_beg) {
-    return reinterpret_cast<CompactPtrT *>(region_beg + kRegionSize -
-                                           kFreeArraySize);
+  CompactPtrT *GetFreeArray(uptr region_beg) const {
+    return reinterpret_cast<CompactPtrT *>(GetMetadataEnd(region_beg));
   }
 
-  void EnsureFreeArraySpace(RegionInfo *region, uptr region_beg,
+  bool MapWithCallback(uptr beg, uptr size) {
+    uptr mapped = reinterpret_cast<uptr>(MmapFixedOrDieOnFatalError(beg, size));
+    if (UNLIKELY(!mapped))
+      return false;
+    CHECK_EQ(beg, mapped);
+    MapUnmapCallback().OnMap(beg, size);
+    return true;
+  }
+
+  void MapWithCallbackOrDie(uptr beg, uptr size) {
+    CHECK_EQ(beg, reinterpret_cast<uptr>(MmapFixedOrDie(beg, size)));
+    MapUnmapCallback().OnMap(beg, size);
+  }
+
+  void UnmapWithCallbackOrDie(uptr beg, uptr size) {
+    MapUnmapCallback().OnUnmap(beg, size);
+    UnmapOrDie(reinterpret_cast<void *>(beg), size);
+  }
+
+  bool EnsureFreeArraySpace(RegionInfo *region, uptr region_beg,
                             uptr num_freed_chunks) {
     uptr needed_space = num_freed_chunks * sizeof(CompactPtrT);
     if (region->mapped_free_array < needed_space) {
-      CHECK_LE(needed_space, kFreeArraySize);
       uptr new_mapped_free_array = RoundUpTo(needed_space, kFreeArrayMapSize);
+      CHECK_LE(new_mapped_free_array, kFreeArraySize);
       uptr current_map_end = reinterpret_cast<uptr>(GetFreeArray(region_beg)) +
                              region->mapped_free_array;
       uptr new_map_size = new_mapped_free_array - region->mapped_free_array;
-      MapWithCallback(current_map_end, new_map_size);
+      if (UNLIKELY(!MapWithCallback(current_map_end, new_map_size)))
+        return false;
       region->mapped_free_array = new_mapped_free_array;
     }
+    return true;
   }
 
-
-  NOINLINE void PopulateFreeArray(AllocatorStats *stat, uptr class_id,
+  NOINLINE bool PopulateFreeArray(AllocatorStats *stat, uptr class_id,
                                   RegionInfo *region, uptr requested_count) {
     // region->mutex is held.
-    uptr size = ClassIdToSize(class_id);
-    uptr beg_idx = region->allocated_user;
-    uptr end_idx = beg_idx + requested_count * size;
-    uptr region_beg = GetRegionBeginBySizeClass(class_id);
-    if (end_idx > region->mapped_user) {
-      if (!kUsingConstantSpaceBeg && region->mapped_user == 0)
-        region->rand_state = static_cast<u32>(region_beg >> 12);  // From ASLR.
+    const uptr size = ClassIdToSize(class_id);
+    const uptr new_space_beg = region->allocated_user;
+    const uptr new_space_end = new_space_beg + requested_count * size;
+    const uptr region_beg = GetRegionBeginBySizeClass(class_id);
+
+    // Map more space for chunks, if necessary.
+    if (new_space_end > region->mapped_user) {
+      if (UNLIKELY(region->mapped_user == 0)) {
+        if (!kUsingConstantSpaceBeg && kRandomShuffleChunks)
+          // The random state is initialized from ASLR.
+          region->rand_state = static_cast<u32>(region_beg >> 12);
+        // Postpone the first release to OS attempt for ReleaseToOSIntervalMs,
+        // preventing just allocated memory from being released sooner than
+        // necessary and also preventing extraneous ReleaseMemoryPagesToOS calls
+        // for short lived processes.
+        region->rtoi.last_release_at_ns = NanoTime();
+      }
       // Do the mmap for the user memory.
       uptr map_size = kUserMapSize;
-      while (end_idx > region->mapped_user + map_size)
+      while (new_space_end > region->mapped_user + map_size)
         map_size += kUserMapSize;
-      CHECK_GE(region->mapped_user + map_size, end_idx);
-      MapWithCallback(region_beg + region->mapped_user, map_size);
+      CHECK_GE(region->mapped_user + map_size, new_space_end);
+      if (UNLIKELY(!MapWithCallback(region_beg + region->mapped_user,
+                                    map_size)))
+        return false;
       stat->Add(AllocatorStatMapped, map_size);
       region->mapped_user += map_size;
     }
-    CompactPtrT *free_array = GetFreeArray(region_beg);
-    uptr total_count = (region->mapped_user - beg_idx) / size;
-    uptr num_freed_chunks = region->num_freed_chunks;
-    EnsureFreeArraySpace(region, region_beg, num_freed_chunks + total_count);
-    for (uptr i = 0; i < total_count; i++) {
-      uptr chunk = beg_idx + i * size;
-      free_array[num_freed_chunks + total_count - 1 - i] =
-          PointerToCompactPtr(0, chunk);
-    }
-    if (kRandomShuffleChunks)
-      RandomShuffle(&free_array[num_freed_chunks], total_count,
-                    &region->rand_state);
-    region->num_freed_chunks += total_count;
-    region->allocated_user += total_count * size;
-    CHECK_LE(region->allocated_user, region->mapped_user);
+    const uptr new_chunks_count = (region->mapped_user - new_space_beg) / size;
 
-    region->allocated_meta += total_count * kMetadataSize;
-    if (region->allocated_meta > region->mapped_meta) {
-      uptr map_size = kMetaMapSize;
-      while (region->allocated_meta > region->mapped_meta + map_size)
-        map_size += kMetaMapSize;
-      // Do the mmap for the metadata.
-      CHECK_GE(region->mapped_meta + map_size, region->allocated_meta);
-      MapWithCallback(GetMetadataEnd(region_beg) -
-                      region->mapped_meta - map_size, map_size);
-      region->mapped_meta += map_size;
-    }
-    CHECK_LE(region->allocated_meta, region->mapped_meta);
-    if (region->mapped_user + region->mapped_meta >
+    // Calculate the required space for metadata.
+    const uptr requested_allocated_meta =
+        region->allocated_meta + new_chunks_count * kMetadataSize;
+    uptr requested_mapped_meta = region->mapped_meta;
+    while (requested_allocated_meta > requested_mapped_meta)
+      requested_mapped_meta += kMetaMapSize;
+    // Check whether this size class is exhausted.
+    if (region->mapped_user + requested_mapped_meta >
         kRegionSize - kFreeArraySize) {
-      Printf("%s: Out of memory. Dying. ", SanitizerToolName);
-      Printf("The process has exhausted %zuMB for size class %zu.\n",
-          kRegionSize / 1024 / 1024, size);
-      Die();
+      if (!region->exhausted) {
+        region->exhausted = true;
+        Printf("%s: Out of memory. ", SanitizerToolName);
+        Printf("The process has exhausted %zuMB for size class %zu.\n",
+               kRegionSize >> 20, size);
+      }
+      return false;
     }
+    // Map more space for metadata, if necessary.
+    if (requested_mapped_meta > region->mapped_meta) {
+      if (UNLIKELY(!MapWithCallback(
+              GetMetadataEnd(region_beg) - requested_mapped_meta,
+              requested_mapped_meta - region->mapped_meta)))
+        return false;
+      region->mapped_meta = requested_mapped_meta;
+    }
+
+    // If necessary, allocate more space for the free array and populate it with
+    // newly allocated chunks.
+    const uptr total_freed_chunks = region->num_freed_chunks + new_chunks_count;
+    if (UNLIKELY(!EnsureFreeArraySpace(region, region_beg, total_freed_chunks)))
+      return false;
+    CompactPtrT *free_array = GetFreeArray(region_beg);
+    for (uptr i = 0, chunk = new_space_beg; i < new_chunks_count;
+         i++, chunk += size)
+      free_array[total_freed_chunks - 1 - i] = PointerToCompactPtr(0, chunk);
+    if (kRandomShuffleChunks)
+      RandomShuffle(&free_array[region->num_freed_chunks], new_chunks_count,
+                    &region->rand_state);
+
+    // All necessary memory is mapped and now it is safe to advance all
+    // 'allocated_*' counters.
+    region->num_freed_chunks += new_chunks_count;
+    region->allocated_user += new_chunks_count * size;
+    CHECK_LE(region->allocated_user, region->mapped_user);
+    region->allocated_meta = requested_allocated_meta;
+    CHECK_LE(region->allocated_meta, region->mapped_meta);
+    region->exhausted = false;
+
+    // TODO(alekseyshl): Consider bumping last_release_at_ns here to prevent
+    // MaybeReleaseToOS from releasing just allocated pages or protect these
+    // not yet used chunks some other way.
+
+    return true;
   }
 
-  void MaybeReleaseChunkRange(uptr region_beg, uptr chunk_size,
-                              CompactPtrT first, CompactPtrT last) {
-    uptr beg_ptr = CompactPtrToPointer(region_beg, first);
-    uptr end_ptr = CompactPtrToPointer(region_beg, last) + chunk_size;
-    ReleaseMemoryPagesToOS(beg_ptr, end_ptr);
-  }
+  class MemoryMapper {
+   public:
+    MemoryMapper(const ThisT& base_allocator, uptr class_id)
+        : allocator(base_allocator),
+          region_base(base_allocator.GetRegionBeginBySizeClass(class_id)),
+          released_ranges_count(0),
+          released_bytes(0) {
+    }
 
-  // Attempts to release some RAM back to OS. The region is expected to be
-  // locked.
-  // Algorithm:
-  // * Sort the chunks.
-  // * Find ranges fully covered by free-d chunks
-  // * Release them to OS with madvise.
-  void MaybeReleaseToOS(uptr class_id) {
+    uptr GetReleasedRangesCount() const {
+      return released_ranges_count;
+    }
+
+    uptr GetReleasedBytes() const {
+      return released_bytes;
+    }
+
+    uptr MapPackedCounterArrayBuffer(uptr buffer_size) {
+      // TODO(alekseyshl): The idea to explore is to check if we have enough
+      // space between num_freed_chunks*sizeof(CompactPtrT) and
+      // mapped_free_array to fit buffer_size bytes and use that space instead
+      // of mapping a temporary one.
+      return reinterpret_cast<uptr>(
+          MmapOrDieOnFatalError(buffer_size, "ReleaseToOSPageCounters"));
+    }
+
+    void UnmapPackedCounterArrayBuffer(uptr buffer, uptr buffer_size) {
+      UnmapOrDie(reinterpret_cast<void *>(buffer), buffer_size);
+    }
+
+    // Releases [from, to) range of pages back to OS.
+    void ReleasePageRangeToOS(CompactPtrT from, CompactPtrT to) {
+      const uptr from_page = allocator.CompactPtrToPointer(region_base, from);
+      const uptr to_page = allocator.CompactPtrToPointer(region_base, to);
+      ReleaseMemoryPagesToOS(from_page, to_page);
+      released_ranges_count++;
+      released_bytes += to_page - from_page;
+    }
+
+   private:
+    const ThisT& allocator;
+    const uptr region_base;
+    uptr released_ranges_count;
+    uptr released_bytes;
+  };
+
+  // Attempts to release RAM occupied by freed chunks back to OS. The region is
+  // expected to be locked.
+  void MaybeReleaseToOS(uptr class_id, bool force) {
     RegionInfo *region = GetRegionInfo(class_id);
     const uptr chunk_size = ClassIdToSize(class_id);
     const uptr page_size = GetPageSizeCached();
@@ -478,44 +803,34 @@
     uptr n = region->num_freed_chunks;
     if (n * chunk_size < page_size)
       return;  // No chance to release anything.
-    if ((region->n_freed - region->rtoi.n_freed_at_last_release) * chunk_size <
-        page_size) {
+    if ((region->stats.n_freed -
+         region->rtoi.n_freed_at_last_release) * chunk_size < page_size) {
       return;  // Nothing new to release.
     }
 
-    s32 interval_ms = ReleaseToOSIntervalMs();
-    if (interval_ms < 0)
-      return;
+    if (!force) {
+      s32 interval_ms = ReleaseToOSIntervalMs();
+      if (interval_ms < 0)
+        return;
 
-    u64 now_ns = NanoTime();
-    if (region->rtoi.last_release_at_ns + interval_ms * 1000000ULL > now_ns)
-      return;  // Memory was returned recently.
-    region->rtoi.last_release_at_ns = now_ns;
-
-    uptr region_beg = GetRegionBeginBySizeClass(class_id);
-    CompactPtrT *free_array = GetFreeArray(region_beg);
-    SortArray(free_array, n);
-
-    const uptr scaled_chunk_size = chunk_size >> kCompactPtrScale;
-    const uptr kScaledGranularity = page_size >> kCompactPtrScale;
-
-    uptr range_beg = free_array[0];
-    uptr prev = free_array[0];
-    for (uptr i = 1; i < n; i++) {
-      uptr chunk = free_array[i];
-      CHECK_GT(chunk, prev);
-      if (chunk - prev != scaled_chunk_size) {
-        CHECK_GT(chunk - prev, scaled_chunk_size);
-        if (prev + scaled_chunk_size - range_beg >= kScaledGranularity) {
-          MaybeReleaseChunkRange(region_beg, chunk_size, range_beg, prev);
-          region->rtoi.n_freed_at_last_release = region->n_freed;
-          region->rtoi.num_releases++;
-        }
-        range_beg = chunk;
+      if (region->rtoi.last_release_at_ns + interval_ms * 1000000ULL >
+          NanoTime()) {
+        return;  // Memory was returned recently.
       }
-      prev = chunk;
     }
+
+    MemoryMapper memory_mapper(*this, class_id);
+
+    ReleaseFreeMemoryToOS<MemoryMapper>(
+        GetFreeArray(GetRegionBeginBySizeClass(class_id)), n, chunk_size,
+        RoundUpTo(region->allocated_user, page_size) / page_size,
+        &memory_mapper);
+
+    if (memory_mapper.GetReleasedRangesCount() > 0) {
+      region->rtoi.n_freed_at_last_release = region->stats.n_freed;
+      region->rtoi.num_releases += memory_mapper.GetReleasedRangesCount();
+      region->rtoi.last_released_bytes = memory_mapper.GetReleasedBytes();
+    }
+    region->rtoi.last_release_at_ns = NanoTime();
   }
 };
-
-
diff --git a/lib/sanitizer_common/sanitizer_allocator_secondary.h b/lib/sanitizer_common/sanitizer_allocator_secondary.h
index 2e98e59..261dfb5 100644
--- a/lib/sanitizer_common/sanitizer_allocator_secondary.h
+++ b/lib/sanitizer_common/sanitizer_allocator_secondary.h
@@ -17,17 +17,19 @@
 // This class can (de)allocate only large chunks of memory using mmap/unmap.
 // The main purpose of this allocator is to cover large and rare allocation
 // sizes not covered by more efficient allocators (e.g. SizeClassAllocator64).
-template <class MapUnmapCallback = NoOpMapUnmapCallback>
+template <class MapUnmapCallback = NoOpMapUnmapCallback,
+          class FailureHandlerT = ReturnNullOrDieOnFailure>
 class LargeMmapAllocator {
  public:
-  void InitLinkerInitialized(bool may_return_null) {
+  typedef FailureHandlerT FailureHandler;
+
+  void InitLinkerInitialized() {
     page_size_ = GetPageSizeCached();
-    atomic_store(&may_return_null_, may_return_null, memory_order_relaxed);
   }
 
-  void Init(bool may_return_null) {
+  void Init() {
     internal_memset(this, 0, sizeof(*this));
-    InitLinkerInitialized(may_return_null);
+    InitLinkerInitialized();
   }
 
   void *Allocate(AllocatorStats *stat, uptr size, uptr alignment) {
@@ -36,9 +38,12 @@
     if (alignment > page_size_)
       map_size += alignment;
     // Overflow.
-    if (map_size < size) return ReturnNullOrDieOnBadRequest();
+    if (map_size < size)
+      return FailureHandler::OnBadRequest();
     uptr map_beg = reinterpret_cast<uptr>(
-        MmapOrDie(map_size, "LargeMmapAllocator"));
+        MmapOrDieOnFatalError(map_size, "LargeMmapAllocator"));
+    if (!map_beg)
+      return FailureHandler::OnOOM();
     CHECK(IsAligned(map_beg, page_size_));
     MapUnmapCallback().OnMap(map_beg, map_size);
     uptr map_end = map_beg + map_size;
@@ -72,24 +77,6 @@
     return reinterpret_cast<void*>(res);
   }
 
-  bool MayReturnNull() const {
-    return atomic_load(&may_return_null_, memory_order_acquire);
-  }
-
-  void *ReturnNullOrDieOnBadRequest() {
-    if (MayReturnNull()) return nullptr;
-    ReportAllocatorCannotReturnNull(false);
-  }
-
-  void *ReturnNullOrDieOnOOM() {
-    if (MayReturnNull()) return nullptr;
-    ReportAllocatorCannotReturnNull(true);
-  }
-
-  void SetMayReturnNull(bool may_return_null) {
-    atomic_store(&may_return_null_, may_return_null, memory_order_release);
-  }
-
   void Deallocate(AllocatorStats *stat, void *p) {
     Header *h = GetHeader(p);
     {
@@ -275,7 +262,6 @@
   struct Stats {
     uptr n_allocs, n_frees, currently_allocated, max_allocated, by_size_log[64];
   } stats;
-  atomic_uint8_t may_return_null_;
   SpinMutex mutex_;
 };
 
diff --git a/lib/sanitizer_common/sanitizer_allocator_size_class_map.h b/lib/sanitizer_common/sanitizer_allocator_size_class_map.h
index 7151a46..2bd83b2 100644
--- a/lib/sanitizer_common/sanitizer_allocator_size_class_map.h
+++ b/lib/sanitizer_common/sanitizer_allocator_size_class_map.h
@@ -134,8 +134,9 @@
 
   static const uptr kMaxSize = 1UL << kMaxSizeLog;
   static const uptr kNumClasses =
-      kMidClass + ((kMaxSizeLog - kMidSizeLog) << S) + 1;
+      kMidClass + ((kMaxSizeLog - kMidSizeLog) << S) + 1 + 1;
   static const uptr kLargestClassID = kNumClasses - 2;
+  static const uptr kBatchClassID = kNumClasses - 1;
   COMPILER_CHECK(kNumClasses >= 16 && kNumClasses <= 256);
   static const uptr kNumClassesRounded =
       kNumClasses <= 32  ? 32 :
@@ -143,6 +144,11 @@
       kNumClasses <= 128 ? 128 : 256;
 
   static uptr Size(uptr class_id) {
+    // Estimate the result for kBatchClassID because this class does not know
+    // the exact size of TransferBatch. It's OK since we are using the actual
+    // sizeof(TransferBatch) where it matters.
+    if (UNLIKELY(class_id == kBatchClassID))
+      return kMaxNumCachedHint * sizeof(uptr);
     if (class_id <= kMidClass)
       return kMinSize * class_id;
     class_id -= kMidClass;
@@ -151,9 +157,10 @@
   }
 
   static uptr ClassID(uptr size) {
+    if (UNLIKELY(size > kMaxSize))
+      return 0;
     if (size <= kMidSize)
       return (size + kMinSize - 1) >> kMinSizeLog;
-    if (size > kMaxSize) return 0;
     uptr l = MostSignificantSetBitIndex(size);
     uptr hbits = (size >> (l - S)) & M;
     uptr lbits = size & ((1 << (l - S)) - 1);
@@ -162,7 +169,13 @@
   }
 
   static uptr MaxCachedHint(uptr class_id) {
-    if (class_id == 0) return 0;
+    // Estimate the result for kBatchClassID because this class does not know
+    // the exact size of TransferBatch. We need to cache fewer batches than user
+    // chunks, so this number can be small.
+    if (UNLIKELY(class_id == kBatchClassID))
+      return 16;
+    if (UNLIKELY(class_id == 0))
+      return 0;
     uptr n = (1UL << kMaxBytesCachedLog) / Size(class_id);
     return Max<uptr>(1, Min(kMaxNumCachedHint, n));
   }
@@ -178,6 +191,8 @@
       uptr p = prev_s ? (d * 100 / prev_s) : 0;
       uptr l = s ? MostSignificantSetBitIndex(s) : 0;
       uptr cached = MaxCachedHint(i) * s;
+      if (i == kBatchClassID)
+        d = p = l = 0;
       Printf("c%02zd => s: %zd diff: +%zd %02zd%% l %zd "
              "cached: %zd %zd; id %zd\n",
              i, Size(i), d, p, l, MaxCachedHint(i), cached, ClassID(s));
@@ -192,12 +207,13 @@
       // Printf("Validate: c%zd\n", c);
       uptr s = Size(c);
       CHECK_NE(s, 0U);
+      if (c == kBatchClassID)
+        continue;
       CHECK_EQ(ClassID(s), c);
-      if (c != kNumClasses - 1)
+      if (c < kLargestClassID)
         CHECK_EQ(ClassID(s + 1), c + 1);
       CHECK_EQ(ClassID(s - 1), c);
-      if (c)
-        CHECK_GT(Size(c), Size(c-1));
+      CHECK_GT(Size(c), Size(c - 1));
     }
     CHECK_EQ(ClassID(kMaxSize + 1), 0);
 
@@ -207,7 +223,7 @@
       CHECK_LT(c, kNumClasses);
       CHECK_GE(Size(c), s);
       if (c > 0)
-        CHECK_LT(Size(c-1), s);
+        CHECK_LT(Size(c - 1), s);
     }
   }
 };
diff --git a/lib/sanitizer_common/sanitizer_atomic_clang.h b/lib/sanitizer_common/sanitizer_atomic_clang.h
index 38363e8..65b3a38 100644
--- a/lib/sanitizer_common/sanitizer_atomic_clang.h
+++ b/lib/sanitizer_common/sanitizer_atomic_clang.h
@@ -71,16 +71,25 @@
   return v;
 }
 
-template<typename T>
-INLINE bool atomic_compare_exchange_strong(volatile T *a,
-                                           typename T::Type *cmp,
+template <typename T>
+INLINE bool atomic_compare_exchange_strong(volatile T *a, typename T::Type *cmp,
                                            typename T::Type xchg,
                                            memory_order mo) {
   typedef typename T::Type Type;
   Type cmpv = *cmp;
-  Type prev = __sync_val_compare_and_swap(&a->val_dont_use, cmpv, xchg);
-  if (prev == cmpv)
-    return true;
+  Type prev;
+#if defined(_MIPS_SIM) && _MIPS_SIM == _ABIO32
+  if (sizeof(*a) == 8) {
+    Type volatile *val_ptr = const_cast<Type volatile *>(&a->val_dont_use);
+    prev = __mips_sync_val_compare_and_swap<u64>(
+        reinterpret_cast<u64 volatile *>(val_ptr), (u64)cmpv, (u64)xchg);
+  } else {
+    prev = __sync_val_compare_and_swap(&a->val_dont_use, cmpv, xchg);
+  }
+#else
+  prev = __sync_val_compare_and_swap(&a->val_dont_use, cmpv, xchg);
+#endif
+  if (prev == cmpv) return true;
   *cmp = prev;
   return false;
 }
diff --git a/lib/sanitizer_common/sanitizer_atomic_clang_other.h b/lib/sanitizer_common/sanitizer_atomic_clang_other.h
index 099b9f7..d2acc31 100644
--- a/lib/sanitizer_common/sanitizer_atomic_clang_other.h
+++ b/lib/sanitizer_common/sanitizer_atomic_clang_other.h
@@ -17,6 +17,56 @@
 
 namespace __sanitizer {
 
+// MIPS32 does not support atomic > 4 bytes. To address this lack of
+// functionality, the sanitizer library provides helper methods which use an
+// internal spin lock mechanism to emulate atomic oprations when the size is
+// 8 bytes.
+#if defined(_MIPS_SIM) && _MIPS_SIM == _ABIO32
+static void __spin_lock(volatile int *lock) {
+  while (__sync_lock_test_and_set(lock, 1))
+    while (*lock) {
+    }
+}
+
+static void __spin_unlock(volatile int *lock) { __sync_lock_release(lock); }
+
+
+// Make sure the lock is on its own cache line to prevent false sharing.
+// Put it inside a struct that is aligned and padded to the typical MIPS
+// cacheline which is 32 bytes.
+static struct {
+  int lock;
+  char pad[32 - sizeof(int)];
+} __attribute__((aligned(32))) lock = {0};
+
+template <class T>
+T __mips_sync_fetch_and_add(volatile T *ptr, T val) {
+  T ret;
+
+  __spin_lock(&lock.lock);
+
+  ret = *ptr;
+  *ptr = ret + val;
+
+  __spin_unlock(&lock.lock);
+
+  return ret;
+}
+
+template <class T>
+T __mips_sync_val_compare_and_swap(volatile T *ptr, T oldval, T newval) {
+  T ret;
+  __spin_lock(&lock.lock);
+
+  ret = *ptr;
+  if (ret == oldval) *ptr = newval;
+
+  __spin_unlock(&lock.lock);
+
+  return ret;
+}
+#endif
+
 INLINE void proc_yield(int cnt) {
   __asm__ __volatile__("" ::: "memory");
 }
@@ -53,8 +103,15 @@
     // 64-bit load on 32-bit platform.
     // Gross, but simple and reliable.
     // Assume that it is not in read-only memory.
+#if defined(_MIPS_SIM) && _MIPS_SIM == _ABIO32
+    typename T::Type volatile *val_ptr =
+        const_cast<typename T::Type volatile *>(&a->val_dont_use);
+    v = __mips_sync_fetch_and_add<u64>(
+        reinterpret_cast<u64 volatile *>(val_ptr), 0);
+#else
     v = __sync_fetch_and_add(
         const_cast<typename T::Type volatile *>(&a->val_dont_use), 0);
+#endif
   }
   return v;
 }
@@ -84,7 +141,14 @@
     typename T::Type cmp = a->val_dont_use;
     typename T::Type cur;
     for (;;) {
+#if defined(_MIPS_SIM) && _MIPS_SIM == _ABIO32
+      typename T::Type volatile *val_ptr =
+          const_cast<typename T::Type volatile *>(&a->val_dont_use);
+      cur = __mips_sync_val_compare_and_swap<u64>(
+          reinterpret_cast<u64 volatile *>(val_ptr), (u64)cmp, (u64)v);
+#else
       cur = __sync_val_compare_and_swap(&a->val_dont_use, cmp, v);
+#endif
       if (cmp == v)
         break;
       cmp = cur;
diff --git a/lib/sanitizer_common/sanitizer_common.cc b/lib/sanitizer_common/sanitizer_common.cc
index 9078a90..970b11c 100644
--- a/lib/sanitizer_common/sanitizer_common.cc
+++ b/lib/sanitizer_common/sanitizer_common.cc
@@ -27,72 +27,6 @@
 atomic_uint32_t current_verbosity;
 uptr PageSizeCached;
 
-StaticSpinMutex report_file_mu;
-ReportFile report_file = {&report_file_mu, kStderrFd, "", "", 0};
-
-void RawWrite(const char *buffer) {
-  report_file.Write(buffer, internal_strlen(buffer));
-}
-
-void ReportFile::ReopenIfNecessary() {
-  mu->CheckLocked();
-  if (fd == kStdoutFd || fd == kStderrFd) return;
-
-  uptr pid = internal_getpid();
-  // If in tracer, use the parent's file.
-  if (pid == stoptheworld_tracer_pid)
-    pid = stoptheworld_tracer_ppid;
-  if (fd != kInvalidFd) {
-    // If the report file is already opened by the current process,
-    // do nothing. Otherwise the report file was opened by the parent
-    // process, close it now.
-    if (fd_pid == pid)
-      return;
-    else
-      CloseFile(fd);
-  }
-
-  const char *exe_name = GetProcessName();
-  if (common_flags()->log_exe_name && exe_name) {
-    internal_snprintf(full_path, kMaxPathLength, "%s.%s.%zu", path_prefix,
-                      exe_name, pid);
-  } else {
-    internal_snprintf(full_path, kMaxPathLength, "%s.%zu", path_prefix, pid);
-  }
-  fd = OpenFile(full_path, WrOnly);
-  if (fd == kInvalidFd) {
-    const char *ErrorMsgPrefix = "ERROR: Can't open file: ";
-    WriteToFile(kStderrFd, ErrorMsgPrefix, internal_strlen(ErrorMsgPrefix));
-    WriteToFile(kStderrFd, full_path, internal_strlen(full_path));
-    Die();
-  }
-  fd_pid = pid;
-}
-
-void ReportFile::SetReportPath(const char *path) {
-  if (!path)
-    return;
-  uptr len = internal_strlen(path);
-  if (len > sizeof(path_prefix) - 100) {
-    Report("ERROR: Path is too long: %c%c%c%c%c%c%c%c...\n",
-           path[0], path[1], path[2], path[3],
-           path[4], path[5], path[6], path[7]);
-    Die();
-  }
-
-  SpinMutexLock l(mu);
-  if (fd != kStdoutFd && fd != kStderrFd && fd != kInvalidFd)
-    CloseFile(fd);
-  fd = kInvalidFd;
-  if (internal_strcmp(path, "stdout") == 0) {
-    fd = kStdoutFd;
-  } else if (internal_strcmp(path, "stderr") == 0) {
-    fd = kStderrFd;
-  } else {
-    internal_snprintf(path_prefix, kMaxPathLength, "%s", path);
-  }
-}
-
 // PID of the tracer task in StopTheWorld. It shares the address space with the
 // main process, but has a different PID and thus requires special handling.
 uptr stoptheworld_tracer_pid = 0;
@@ -100,6 +34,8 @@
 // writing to the same log file.
 uptr stoptheworld_tracer_ppid = 0;
 
+StaticSpinMutex CommonSanitizerReportMutex;
+
 void NORETURN ReportMmapFailureAndDie(uptr size, const char *mem_type,
                                       const char *mmap_type, error_t err,
                                       bool raw_report) {
@@ -120,42 +56,6 @@
   UNREACHABLE("unable to mmap");
 }
 
-bool ReadFileToBuffer(const char *file_name, char **buff, uptr *buff_size,
-                      uptr *read_len, uptr max_len, error_t *errno_p) {
-  uptr PageSize = GetPageSizeCached();
-  uptr kMinFileLen = PageSize;
-  *buff = nullptr;
-  *buff_size = 0;
-  *read_len = 0;
-  // The files we usually open are not seekable, so try different buffer sizes.
-  for (uptr size = kMinFileLen; size <= max_len; size *= 2) {
-    fd_t fd = OpenFile(file_name, RdOnly, errno_p);
-    if (fd == kInvalidFd) return false;
-    UnmapOrDie(*buff, *buff_size);
-    *buff = (char*)MmapOrDie(size, __func__);
-    *buff_size = size;
-    *read_len = 0;
-    // Read up to one page at a time.
-    bool reached_eof = false;
-    while (*read_len + PageSize <= size) {
-      uptr just_read;
-      if (!ReadFromFile(fd, *buff + *read_len, PageSize, &just_read, errno_p)) {
-        UnmapOrDie(*buff, *buff_size);
-        return false;
-      }
-      if (just_read == 0) {
-        reached_eof = true;
-        break;
-      }
-      *read_len += just_read;
-    }
-    CloseFile(fd);
-    if (reached_eof)  // We've read the whole file.
-      break;
-  }
-  return true;
-}
-
 typedef bool UptrComparisonFunction(const uptr &a, const uptr &b);
 typedef bool U32ComparisonFunction(const u32 &a, const u32 &b);
 
@@ -199,23 +99,24 @@
   return module;
 }
 
-void ReportErrorSummary(const char *error_message) {
+void ReportErrorSummary(const char *error_message, const char *alt_tool_name) {
   if (!common_flags()->print_summary)
     return;
   InternalScopedString buff(kMaxSummaryLength);
-  buff.append("SUMMARY: %s: %s", SanitizerToolName, error_message);
+  buff.append("SUMMARY: %s: %s",
+              alt_tool_name ? alt_tool_name : SanitizerToolName, error_message);
   __sanitizer_report_error_summary(buff.data());
 }
 
 #if !SANITIZER_GO
-void ReportErrorSummary(const char *error_type, const AddressInfo &info) {
-  if (!common_flags()->print_summary)
-    return;
+void ReportErrorSummary(const char *error_type, const AddressInfo &info,
+                        const char *alt_tool_name) {
+  if (!common_flags()->print_summary) return;
   InternalScopedString buff(kMaxSummaryLength);
   buff.append("%s ", error_type);
   RenderFrame(&buff, "%L %F", 0, info, common_flags()->symbolize_vs_style,
               common_flags()->strip_path_prefix);
-  ReportErrorSummary(buff.data());
+  ReportErrorSummary(buff.data(), alt_tool_name);
 }
 #endif
 
@@ -283,9 +184,11 @@
   }
 }
 
-void LoadedModule::addAddressRange(uptr beg, uptr end, bool executable) {
+void LoadedModule::addAddressRange(uptr beg, uptr end, bool executable,
+                                   bool writable, const char *name) {
   void *mem = InternalAlloc(sizeof(AddressRange));
-  AddressRange *r = new(mem) AddressRange(beg, end, executable);
+  AddressRange *r =
+      new(mem) AddressRange(beg, end, executable, writable, name);
   ranges_.push_back(r);
   if (executable && end > max_executable_address_)
     max_executable_address_ = end;
@@ -357,36 +260,6 @@
   return true;
 }
 
-static const char kPathSeparator = SANITIZER_WINDOWS ? ';' : ':';
-
-char *FindPathToBinary(const char *name) {
-  if (FileExists(name)) {
-    return internal_strdup(name);
-  }
-
-  const char *path = GetEnv("PATH");
-  if (!path)
-    return nullptr;
-  uptr name_len = internal_strlen(name);
-  InternalScopedBuffer<char> buffer(kMaxPathLength);
-  const char *beg = path;
-  while (true) {
-    const char *end = internal_strchrnul(beg, kPathSeparator);
-    uptr prefix_len = end - beg;
-    if (prefix_len + name_len + 2 <= kMaxPathLength) {
-      internal_memcpy(buffer.data(), beg, prefix_len);
-      buffer[prefix_len] = '/';
-      internal_memcpy(&buffer[prefix_len + 1], name, name_len);
-      buffer[prefix_len + 1 + name_len] = '\0';
-      if (FileExists(buffer.data()))
-        return internal_strdup(buffer.data());
-    }
-    if (*end == '\0') break;
-    beg = end + 1;
-  }
-  return nullptr;
-}
-
 static char binary_name_cache_str[kMaxPathLength];
 static char process_name_cache_str[kMaxPathLength];
 
@@ -480,15 +353,6 @@
 using namespace __sanitizer;  // NOLINT
 
 extern "C" {
-void __sanitizer_set_report_path(const char *path) {
-  report_file.SetReportPath(path);
-}
-
-void __sanitizer_set_report_fd(void *fd) {
-  report_file.fd = (fd_t)reinterpret_cast<uptr>(fd);
-  report_file.fd_pid = internal_getpid();
-}
-
 SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_report_error_summary,
                              const char *error_summary) {
   Printf("%s\n", error_summary);
@@ -505,11 +369,4 @@
                                               void (*free_hook)(const void *)) {
   return InstallMallocFreeHooks(malloc_hook, free_hook);
 }
-
-#if !SANITIZER_GO
-SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_print_memory_profile,
-                             int top_percent) {
-  (void)top_percent;
-}
-#endif
 } // extern "C"
diff --git a/lib/sanitizer_common/sanitizer_common.h b/lib/sanitizer_common/sanitizer_common.h
index aa4d9e5..14b92d9 100644
--- a/lib/sanitizer_common/sanitizer_common.h
+++ b/lib/sanitizer_common/sanitizer_common.h
@@ -29,8 +29,11 @@
 #endif
 
 namespace __sanitizer {
-struct StackTrace;
+
 struct AddressInfo;
+struct BufferedStackTrace;
+struct SignalContext;
+struct StackTrace;
 
 // Constants.
 const uptr kWordSize = SANITIZER_WORDSIZE / 8;
@@ -72,7 +75,7 @@
 uptr GetMmapGranularity();
 uptr GetMaxVirtualAddress();
 // Threads
-uptr GetTid();
+tid_t GetTid();
 uptr GetThreadSelf();
 void GetThreadStackTopAndBottom(bool at_initialization, uptr *stack_top,
                                 uptr *stack_bottom);
@@ -85,21 +88,30 @@
   return MmapOrDie(size, mem_type, /*raw_report*/ true);
 }
 void UnmapOrDie(void *addr, uptr size);
+// Behaves just like MmapOrDie, but tolerates out of memory condition, in that
+// case returns nullptr.
+void *MmapOrDieOnFatalError(uptr size, const char *mem_type);
 void *MmapFixedNoReserve(uptr fixed_addr, uptr size,
                          const char *name = nullptr);
 void *MmapNoReserveOrDie(uptr size, const char *mem_type);
 void *MmapFixedOrDie(uptr fixed_addr, uptr size);
+// Behaves just like MmapFixedOrDie, but tolerates out of memory condition, in
+// that case returns nullptr.
+void *MmapFixedOrDieOnFatalError(uptr fixed_addr, uptr size);
 void *MmapFixedNoAccess(uptr fixed_addr, uptr size, const char *name = nullptr);
 void *MmapNoAccess(uptr size);
 // Map aligned chunk of address space; size and alignment are powers of two.
-void *MmapAlignedOrDie(uptr size, uptr alignment, const char *mem_type);
+// Dies on all but out of memory errors, in the latter case returns nullptr.
+void *MmapAlignedOrDieOnFatalError(uptr size, uptr alignment,
+                                   const char *mem_type);
 // Disallow access to a memory range.  Use MmapFixedNoAccess to allocate an
 // unaccessible memory.
 bool MprotectNoAccess(uptr addr, uptr size);
 bool MprotectReadOnly(uptr addr, uptr size);
 
 // Find an available address space.
-uptr FindAvailableMemoryRange(uptr size, uptr alignment, uptr left_padding);
+uptr FindAvailableMemoryRange(uptr size, uptr alignment, uptr left_padding,
+                              uptr *largest_gap_found);
 
 // Used to check if we can map shadow memory to a fixed location.
 bool MemoryRangeIsAvailable(uptr range_start, uptr range_end);
@@ -116,6 +128,28 @@
 void RunMallocHooks(const void *ptr, uptr size);
 void RunFreeHooks(const void *ptr);
 
+class ReservedAddressRange {
+ public:
+  uptr Init(uptr size, const char *name = nullptr, uptr fixed_addr = 0);
+  uptr Map(uptr fixed_addr, uptr size, bool tolerate_enomem = false);
+  void Unmap(uptr addr, uptr size);
+  void *base() const { return base_; }
+  uptr size() const { return size_; }
+
+ private:
+  void* base_;
+  uptr size_;
+  const char* name_;
+};
+
+typedef void (*fill_profile_f)(uptr start, uptr rss, bool file,
+                               /*out*/uptr *stats, uptr stats_size);
+
+// Parse the contents of /proc/self/smaps and generate a memory profile.
+// |cb| is a tool-specific callback that fills the |stats| array containing
+// |stats_size| elements.
+void GetMemoryProfile(fill_profile_f cb, uptr *stats, uptr stats_size);
+
 // InternalScopedBuffer can be used instead of large stack arrays to
 // keep frame size low.
 // FIXME: use InternalAlloc instead of MmapOrDie once
@@ -176,6 +210,7 @@
 void SetLowLevelAllocateCallback(LowLevelAllocateCallback callback);
 
 // IO
+void CatastrophicErrorWrite(const char *buffer, uptr length);
 void RawWrite(const char *buffer);
 bool ColorizeReports();
 void RemoveANSIEscapeSequencesFromString(char *buffer);
@@ -192,66 +227,21 @@
   } while (0)
 
 // Can be used to prevent mixing error reports from different sanitizers.
+// FIXME: Replace with ScopedErrorReportLock and hide.
 extern StaticSpinMutex CommonSanitizerReportMutex;
 
-struct ReportFile {
-  void Write(const char *buffer, uptr length);
-  bool SupportsColors();
-  void SetReportPath(const char *path);
+// Lock sanitizer error reporting and protects against nested errors.
+class ScopedErrorReportLock {
+ public:
+  ScopedErrorReportLock();
+  ~ScopedErrorReportLock();
 
-  // Don't use fields directly. They are only declared public to allow
-  // aggregate initialization.
-
-  // Protects fields below.
-  StaticSpinMutex *mu;
-  // Opened file descriptor. Defaults to stderr. It may be equal to
-  // kInvalidFd, in which case new file will be opened when necessary.
-  fd_t fd;
-  // Path prefix of report file, set via __sanitizer_set_report_path.
-  char path_prefix[kMaxPathLength];
-  // Full path to report, obtained as <path_prefix>.PID
-  char full_path[kMaxPathLength];
-  // PID of the process that opened fd. If a fork() occurs,
-  // the PID of child will be different from fd_pid.
-  uptr fd_pid;
-
- private:
-  void ReopenIfNecessary();
+  static void CheckLocked();
 };
-extern ReportFile report_file;
 
 extern uptr stoptheworld_tracer_pid;
 extern uptr stoptheworld_tracer_ppid;
 
-enum FileAccessMode {
-  RdOnly,
-  WrOnly,
-  RdWr
-};
-
-// Returns kInvalidFd on error.
-fd_t OpenFile(const char *filename, FileAccessMode mode,
-              error_t *errno_p = nullptr);
-void CloseFile(fd_t);
-
-// Return true on success, false on error.
-bool ReadFromFile(fd_t fd, void *buff, uptr buff_size,
-                  uptr *bytes_read = nullptr, error_t *error_p = nullptr);
-bool WriteToFile(fd_t fd, const void *buff, uptr buff_size,
-                 uptr *bytes_written = nullptr, error_t *error_p = nullptr);
-
-bool RenameFile(const char *oldpath, const char *newpath,
-                error_t *error_p = nullptr);
-
-// Scoped file handle closer.
-struct FileCloser {
-  explicit FileCloser(fd_t fd) : fd(fd) {}
-  ~FileCloser() { CloseFile(fd); }
-  fd_t fd;
-};
-
-bool SupportsColoredOutput(fd_t fd);
-
 // Opens the file 'file_name" and reads up to 'max_len' bytes.
 // The resulting buffer is mmaped and stored in '*buff'.
 // The size of the mmaped region is stored in '*buff_size'.
@@ -260,11 +250,6 @@
 bool ReadFileToBuffer(const char *file_name, char **buff, uptr *buff_size,
                       uptr *read_len, uptr max_len = 1 << 26,
                       error_t *errno_p = nullptr);
-// Maps given file to virtual memory, and returns pointer to it
-// (or NULL if mapping fails). Stores the size of mmaped region
-// in '*buff_size'.
-void *MapFileToMemory(const char *file_name, uptr *buff_size);
-void *MapWritableFileToMemory(void *addr, uptr size, fd_t fd, OFF_T offset);
 
 bool IsAccessibleMemoryRange(uptr beg, uptr size);
 
@@ -284,27 +269,8 @@
 void DisableCoreDumperIfNecessary();
 void DumpProcessMap();
 void PrintModuleMap();
-bool FileExists(const char *filename);
 const char *GetEnv(const char *name);
 bool SetEnv(const char *name, const char *value);
-const char *GetPwd();
-char *FindPathToBinary(const char *name);
-bool IsPathSeparator(const char c);
-bool IsAbsolutePath(const char *path);
-// Starts a subprocess and returs its pid.
-// If *_fd parameters are not kInvalidFd their corresponding input/output
-// streams will be redirect to the file. The files will always be closed
-// in parent process even in case of an error.
-// The child process will close all fds after STDERR_FILENO
-// before passing control to a program.
-pid_t StartSubprocess(const char *filename, const char *const argv[],
-                      fd_t stdin_fd = kInvalidFd, fd_t stdout_fd = kInvalidFd,
-                      fd_t stderr_fd = kInvalidFd);
-// Checks if specified process is still running
-bool IsProcessRunning(pid_t pid);
-// Waits for the process to finish and returns its exit code.
-// Returns -1 in case of an error.
-int WaitForProcess(pid_t pid);
 
 u32 GetUid();
 void ReExec();
@@ -317,15 +283,9 @@
 void SetAddressSpaceUnlimited();
 void AdjustStackSize(void *attr);
 void PrepareForSandboxing(__sanitizer_sandbox_arguments *args);
-void CovPrepareForSandboxing(__sanitizer_sandbox_arguments *args);
 void SetSandboxingCallback(void (*f)());
 
-void CoverageUpdateMapping();
-void CovBeforeFork();
-void CovAfterFork(int child_pid);
-
 void InitializeCoverage(bool enabled, const char *coverage_dir);
-void ReInitializeCoverage(bool enabled, const char *coverage_dir);
 
 void InitTlsSize();
 uptr GetTlsSize();
@@ -380,9 +340,26 @@
 
 // Functions related to signal handling.
 typedef void (*SignalHandlerType)(int, void *, void *);
-bool IsHandledDeadlySignal(int signum);
+HandleSignalMode GetHandleSignalMode(int signum);
 void InstallDeadlySignalHandlers(SignalHandlerType handler);
-const char *DescribeSignalOrException(int signo);
+
+// Signal reporting.
+// Each sanitizer uses slightly different implementation of stack unwinding.
+typedef void (*UnwindSignalStackCallbackType)(const SignalContext &sig,
+                                              const void *callback_context,
+                                              BufferedStackTrace *stack);
+// Print deadly signal report and die.
+void HandleDeadlySignal(void *siginfo, void *context, u32 tid,
+                        UnwindSignalStackCallbackType unwind,
+                        const void *unwind_context);
+
+// Part of HandleDeadlySignal, exposed for asan.
+void StartReportDeadlySignal();
+// Part of HandleDeadlySignal, exposed for asan.
+void ReportDeadlySignal(const SignalContext &sig, u32 tid,
+                        UnwindSignalStackCallbackType unwind,
+                        const void *unwind_context);
+
 // Alternative signal stack (POSIX-only).
 void SetAlternateSignalStack();
 void UnsetAlternateSignalStack();
@@ -392,12 +369,16 @@
 // Construct a one-line string:
 //   SUMMARY: SanitizerToolName: error_message
 // and pass it to __sanitizer_report_error_summary.
-void ReportErrorSummary(const char *error_message);
+// If alt_tool_name is provided, it's used in place of SanitizerToolName.
+void ReportErrorSummary(const char *error_message,
+                        const char *alt_tool_name = nullptr);
 // Same as above, but construct error_message as:
 //   error_type file:line[:column][ function]
-void ReportErrorSummary(const char *error_type, const AddressInfo &info);
+void ReportErrorSummary(const char *error_type, const AddressInfo &info,
+                        const char *alt_tool_name = nullptr);
 // Same as above, but obtains AddressInfo by symbolizing top stack trace frame.
-void ReportErrorSummary(const char *error_type, const StackTrace *trace);
+void ReportErrorSummary(const char *error_type, const StackTrace *trace,
+                        const char *alt_tool_name = nullptr);
 
 // Math
 #if SANITIZER_WINDOWS && !defined(__clang__) && !defined(__GNUC__)
@@ -695,6 +676,7 @@
 }
 
 const uptr kModuleUUIDSize = 16;
+const uptr kMaxSegName = 16;
 
 // Represents a binary loaded into virtual memory (e.g. this can be an
 // executable or a shared object).
@@ -713,7 +695,8 @@
   void set(const char *module_name, uptr base_address, ModuleArch arch,
            u8 uuid[kModuleUUIDSize], bool instrumented);
   void clear();
-  void addAddressRange(uptr beg, uptr end, bool executable);
+  void addAddressRange(uptr beg, uptr end, bool executable, bool writable,
+                       const char *name = nullptr);
   bool containsAddress(uptr address) const;
 
   const char *full_name() const { return full_name_; }
@@ -728,9 +711,18 @@
     uptr beg;
     uptr end;
     bool executable;
+    bool writable;
+    char name[kMaxSegName];
 
-    AddressRange(uptr beg, uptr end, bool executable)
-        : next(nullptr), beg(beg), end(end), executable(executable) {}
+    AddressRange(uptr beg, uptr end, bool executable, bool writable,
+                 const char *name)
+        : next(nullptr),
+          beg(beg),
+          end(end),
+          executable(executable),
+          writable(writable) {
+      internal_strncpy(this->name, (name ? name : ""), ARRAY_SIZE(this->name));
+    }
   };
 
   const IntrusiveList<AddressRange> &ranges() const { return ranges_; }
@@ -749,9 +741,10 @@
 // filling this information.
 class ListOfModules {
  public:
-  ListOfModules() : modules_(kInitialCapacity) {}
+  ListOfModules() : initialized(false) {}
   ~ListOfModules() { clear(); }
   void init();
+  void fallbackInit();  // Uses fallback init if available, otherwise clears
   const LoadedModule *begin() const { return modules_.begin(); }
   LoadedModule *begin() { return modules_.begin(); }
   const LoadedModule *end() const { return modules_.end(); }
@@ -767,10 +760,15 @@
     for (auto &module : modules_) module.clear();
     modules_.clear();
   }
+  void clearOrInit() {
+    initialized ? clear() : modules_.Initialize(kInitialCapacity);
+    initialized = true;
+  }
 
-  InternalMmapVector<LoadedModule> modules_;
+  InternalMmapVectorNoCtor<LoadedModule> modules_;
   // We rarely have more than 16K loaded modules.
   static const uptr kInitialCapacity = 1 << 14;
+  bool initialized;
 };
 
 // Callback type for iterating over a set of memory ranges.
@@ -802,8 +800,11 @@
 #if SANITIZER_LINUX
 // Initialize Android logging. Any writes before this are silently lost.
 void AndroidLogInit();
+void SetAbortMessage(const char *);
 #else
 INLINE void AndroidLogInit() {}
+// FIXME: MacOS implementation could use CRSetCrashLogMessage.
+INLINE void SetAbortMessage(const char *) {}
 #endif
 
 #if SANITIZER_ANDROID
@@ -843,36 +844,50 @@
 }
 
 struct SignalContext {
+  void *siginfo;
   void *context;
   uptr addr;
   uptr pc;
   uptr sp;
   uptr bp;
   bool is_memory_access;
-
   enum WriteFlag { UNKNOWN, READ, WRITE } write_flag;
 
-  SignalContext(void *context, uptr addr, uptr pc, uptr sp, uptr bp,
-                bool is_memory_access, WriteFlag write_flag)
-      : context(context),
-        addr(addr),
-        pc(pc),
-        sp(sp),
-        bp(bp),
-        is_memory_access(is_memory_access),
-        write_flag(write_flag) {}
+  // VS2013 doesn't implement unrestricted unions, so we need a trivial default
+  // constructor
+  SignalContext() = default;
+
+  // Creates signal context in a platform-specific manner.
+  // SignalContext is going to keep pointers to siginfo and context without
+  // owning them.
+  SignalContext(void *siginfo, void *context)
+      : siginfo(siginfo),
+        context(context),
+        addr(GetAddress()),
+        is_memory_access(IsMemoryAccess()),
+        write_flag(GetWriteFlag()) {
+    InitPcSpBp();
+  }
 
   static void DumpAllRegisters(void *context);
 
-  // Creates signal context in a platform-specific manner.
-  static SignalContext Create(void *siginfo, void *context);
+  // Type of signal e.g. SIGSEGV or EXCEPTION_ACCESS_VIOLATION.
+  int GetType() const;
 
-  // Returns true if the "context" indicates a memory write.
-  static WriteFlag GetWriteFlag(void *context);
+  // String description of the signal.
+  const char *Describe() const;
+
+  // Returns true if signal is stack overflow.
+  bool IsStackOverflow() const;
+
+ private:
+  // Platform specific initialization.
+  void InitPcSpBp();
+  uptr GetAddress() const;
+  WriteFlag GetWriteFlag() const;
+  bool IsMemoryAccess() const;
 };
 
-void GetPcSpBp(void *context, uptr *pc, uptr *sp, uptr *bp);
-
 void MaybeReexec();
 
 template <typename Fn>
@@ -913,6 +928,10 @@
 
 void CheckNoDeepBind(const char *filename, int flag);
 
+// Returns the requested amount of random data (up to 256 bytes) that can then
+// be used to seed a PRNG. Defaults to blocking like the underlying syscall.
+bool GetRandom(void *buffer, uptr length, bool blocking = true);
+
 }  // namespace __sanitizer
 
 inline void *operator new(__sanitizer::operator_new_size_type size,
diff --git a/lib/sanitizer_common/sanitizer_common_interceptors.inc b/lib/sanitizer_common/sanitizer_common_interceptors.inc
index fbffb56..321126c 100644
--- a/lib/sanitizer_common/sanitizer_common_interceptors.inc
+++ b/lib/sanitizer_common/sanitizer_common_interceptors.inc
@@ -24,7 +24,8 @@
 //   COMMON_INTERCEPTOR_SET_THREAD_NAME
 //   COMMON_INTERCEPTOR_ON_DLOPEN
 //   COMMON_INTERCEPTOR_ON_EXIT
-//   COMMON_INTERCEPTOR_MUTEX_LOCK
+//   COMMON_INTERCEPTOR_MUTEX_PRE_LOCK
+//   COMMON_INTERCEPTOR_MUTEX_POST_LOCK
 //   COMMON_INTERCEPTOR_MUTEX_UNLOCK
 //   COMMON_INTERCEPTOR_MUTEX_REPAIR
 //   COMMON_INTERCEPTOR_SET_PTHREAD_NAME
@@ -33,12 +34,16 @@
 //   COMMON_INTERCEPTOR_MEMSET_IMPL
 //   COMMON_INTERCEPTOR_MEMMOVE_IMPL
 //   COMMON_INTERCEPTOR_MEMCPY_IMPL
+//   COMMON_INTERCEPTOR_COPY_STRING
+//   COMMON_INTERCEPTOR_STRNDUP_IMPL
 //===----------------------------------------------------------------------===//
 
 #include "interception/interception.h"
 #include "sanitizer_addrhashmap.h"
+#include "sanitizer_errno.h"
 #include "sanitizer_placement_new.h"
 #include "sanitizer_platform_interceptors.h"
+#include "sanitizer_symbolizer.h"
 #include "sanitizer_tls_get_addr.h"
 
 #include <stdarg.h>
@@ -89,8 +94,12 @@
 #define COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd) {}
 #endif
 
-#ifndef COMMON_INTERCEPTOR_MUTEX_LOCK
-#define COMMON_INTERCEPTOR_MUTEX_LOCK(ctx, m) {}
+#ifndef COMMON_INTERCEPTOR_MUTEX_PRE_LOCK
+#define COMMON_INTERCEPTOR_MUTEX_PRE_LOCK(ctx, m) {}
+#endif
+
+#ifndef COMMON_INTERCEPTOR_MUTEX_POST_LOCK
+#define COMMON_INTERCEPTOR_MUTEX_POST_LOCK(ctx, m) {}
 #endif
 
 #ifndef COMMON_INTERCEPTOR_MUTEX_UNLOCK
@@ -134,12 +143,9 @@
 #define COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED (0)
 #endif
 
-#define COMMON_INTERCEPTOR_READ_STRING_OF_LEN(ctx, s, len, n)       \
-    COMMON_INTERCEPTOR_READ_RANGE((ctx), (s),                       \
-      common_flags()->strict_string_checks ? (len) + 1 : (n) )
-
 #define COMMON_INTERCEPTOR_READ_STRING(ctx, s, n)                   \
-    COMMON_INTERCEPTOR_READ_STRING_OF_LEN((ctx), (s), REAL(strlen)(s), (n))
+    COMMON_INTERCEPTOR_READ_RANGE((ctx), (s),                       \
+      common_flags()->strict_string_checks ? (REAL(strlen)(s)) + 1 : (n) )
 
 #ifndef COMMON_INTERCEPTOR_ON_DLOPEN
 #define COMMON_INTERCEPTOR_ON_DLOPEN(filename, flag) \
@@ -215,6 +221,24 @@
   }
 #endif
 
+#ifndef COMMON_INTERCEPTOR_COPY_STRING
+#define COMMON_INTERCEPTOR_COPY_STRING(ctx, to, from, size) {}
+#endif
+
+#ifndef COMMON_INTERCEPTOR_STRNDUP_IMPL
+#define COMMON_INTERCEPTOR_STRNDUP_IMPL(ctx, s, size)                         \
+  COMMON_INTERCEPTOR_ENTER(ctx, strndup, s, size);                            \
+  uptr copy_length = internal_strnlen(s, size);                               \
+  char *new_mem = (char *)WRAP(malloc)(copy_length + 1);                      \
+  if (common_flags()->intercept_strndup) {                                    \
+    COMMON_INTERCEPTOR_READ_STRING(ctx, s, Min(size, copy_length + 1));       \
+  }                                                                           \
+  COMMON_INTERCEPTOR_COPY_STRING(ctx, new_mem, s, copy_length);               \
+  internal_memcpy(new_mem, s, copy_length);                                   \
+  new_mem[copy_length] = '\0';                                                \
+  return new_mem;
+#endif
+
 struct FileMetadata {
   // For open_memstream().
   char **addr;
@@ -235,7 +259,7 @@
 
 static MetadataHashMap *interceptor_metadata_map;
 
-#if SI_NOT_WINDOWS
+#if SI_POSIX
 UNUSED static void SetInterceptorMetadata(__sanitizer_FILE *addr,
                                           const FileMetadata &file) {
   MetadataHashMap::Handle h(interceptor_metadata_map, (uptr)addr);
@@ -262,7 +286,7 @@
   MetadataHashMap::Handle h(interceptor_metadata_map, (uptr)addr, true);
   CHECK(h.exists());
 }
-#endif  // SI_NOT_WINDOWS
+#endif  // SI_POSIX
 
 #if SANITIZER_INTERCEPT_STRLEN
 INTERCEPTOR(SIZE_T, strlen, const char *s) {
@@ -298,11 +322,31 @@
 #define INIT_STRNLEN
 #endif
 
+#if SANITIZER_INTERCEPT_STRNDUP
+INTERCEPTOR(char*, strndup, const char *s, uptr size) {
+  void *ctx;
+  COMMON_INTERCEPTOR_STRNDUP_IMPL(ctx, s, size);
+}
+#define INIT_STRNDUP COMMON_INTERCEPT_FUNCTION(strndup)
+#else
+#define INIT_STRNDUP
+#endif // SANITIZER_INTERCEPT_STRNDUP
+
+#if SANITIZER_INTERCEPT___STRNDUP
+INTERCEPTOR(char*, __strndup, const char *s, uptr size) {
+  void *ctx;
+  COMMON_INTERCEPTOR_STRNDUP_IMPL(ctx, s, size);
+}
+#define INIT___STRNDUP COMMON_INTERCEPT_FUNCTION(__strndup)
+#else
+#define INIT___STRNDUP
+#endif // SANITIZER_INTERCEPT___STRNDUP
+
 #if SANITIZER_INTERCEPT_TEXTDOMAIN
 INTERCEPTOR(char*, textdomain, const char *domainname) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, textdomain, domainname);
-  COMMON_INTERCEPTOR_READ_STRING(ctx, domainname, 0);
+  if (domainname) COMMON_INTERCEPTOR_READ_STRING(ctx, domainname, 0);
   char *domain = REAL(textdomain)(domainname);
   if (domain) {
     COMMON_INTERCEPTOR_INITIALIZE_RANGE(domain, REAL(strlen)(domain) + 1);
@@ -445,8 +489,7 @@
                                const char *s2) {
     uptr len1 = REAL(strlen)(s1);
     uptr len2 = REAL(strlen)(s2);
-    COMMON_INTERCEPTOR_READ_STRING_OF_LEN(ctx, s1, len1,
-                                          r ? r - s1 + len2 : len1 + 1);
+    COMMON_INTERCEPTOR_READ_STRING(ctx, s1, r ? r - s1 + len2 : len1 + 1);
     COMMON_INTERCEPTOR_READ_RANGE(ctx, s2, len2 + 1);
 }
 #endif
@@ -495,6 +538,52 @@
 #define INIT_STRCASESTR
 #endif
 
+#if SANITIZER_INTERCEPT_STRTOK
+
+INTERCEPTOR(char*, strtok, char *str, const char *delimiters) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, strtok, str, delimiters);
+  if (!common_flags()->intercept_strtok) {
+    return REAL(strtok)(str, delimiters);
+  }
+  if (common_flags()->strict_string_checks) {
+    // If strict_string_checks is enabled, we check the whole first argument
+    // string on the first call (strtok saves this string in a static buffer
+    // for subsequent calls). We do not need to check strtok's result.
+    // As the delimiters can change, we check them every call.
+    if (str != nullptr) {
+      COMMON_INTERCEPTOR_READ_RANGE(ctx, str, REAL(strlen)(str) + 1);
+    }
+    COMMON_INTERCEPTOR_READ_RANGE(ctx, delimiters,
+                                  REAL(strlen)(delimiters) + 1);
+    return REAL(strtok)(str, delimiters);
+  } else {
+    // However, when strict_string_checks is disabled we cannot check the
+    // whole string on the first call. Instead, we check the result string
+    // which is guaranteed to be a NULL-terminated substring of the first
+    // argument. We also conservatively check one character of str and the
+    // delimiters.
+    if (str != nullptr) {
+      COMMON_INTERCEPTOR_READ_STRING(ctx, str, 1);
+    }
+    COMMON_INTERCEPTOR_READ_RANGE(ctx, delimiters, 1);
+    char *result = REAL(strtok)(str, delimiters);
+    if (result != nullptr) {
+      COMMON_INTERCEPTOR_READ_RANGE(ctx, result, REAL(strlen)(result) + 1);
+    } else if (str != nullptr) {
+      // No delimiter were found, it's safe to assume that the entire str was
+      // scanned.
+      COMMON_INTERCEPTOR_READ_RANGE(ctx, str, REAL(strlen)(str) + 1);
+    }
+    return result;
+  }
+}
+
+#define INIT_STRTOK COMMON_INTERCEPT_FUNCTION(strtok)
+#else
+#define INIT_STRTOK
+#endif
+
 #if SANITIZER_INTERCEPT_MEMMEM
 DECLARE_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_memmem, uptr called_pc,
                               const void *s1, SIZE_T len1, const void *s2,
@@ -526,10 +615,11 @@
     return internal_strchr(s, c);
   COMMON_INTERCEPTOR_ENTER(ctx, strchr, s, c);
   char *result = REAL(strchr)(s, c);
-  uptr len = internal_strlen(s);
-  uptr n = result ? result - s + 1 : len + 1;
-  if (common_flags()->intercept_strchr)
-    COMMON_INTERCEPTOR_READ_STRING_OF_LEN(ctx, s, len, n);
+  if (common_flags()->intercept_strchr) {
+    // Keep strlen as macro argument, as macro may ignore it.
+    COMMON_INTERCEPTOR_READ_STRING(ctx, s,
+      (result ? result - s : REAL(strlen)(s)) + 1);
+  }
   return result;
 }
 #define INIT_STRCHR COMMON_INTERCEPT_FUNCTION(strchr)
@@ -558,9 +648,8 @@
   if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
     return internal_strrchr(s, c);
   COMMON_INTERCEPTOR_ENTER(ctx, strrchr, s, c);
-  uptr len = internal_strlen(s);
   if (common_flags()->intercept_strchr)
-    COMMON_INTERCEPTOR_READ_STRING_OF_LEN(ctx, s, len, len + 1);
+    COMMON_INTERCEPTOR_READ_RANGE(ctx, s, REAL(strlen)(s) + 1);
   return REAL(strrchr)(s, c);
 }
 #define INIT_STRRCHR COMMON_INTERCEPT_FUNCTION(strrchr)
@@ -798,7 +887,7 @@
 #define INIT_FREXPF_FREXPL
 #endif  // SANITIZER_INTERCEPT_FREXPF_FREXPL
 
-#if SI_NOT_WINDOWS
+#if SI_POSIX
 static void write_iovec(void *ctx, struct __sanitizer_iovec *iovec,
                         SIZE_T iovlen, SIZE_T maxlen) {
   for (SIZE_T i = 0; i < iovlen && maxlen; ++i) {
@@ -837,6 +926,23 @@
 #define INIT_READ
 #endif
 
+#if SANITIZER_INTERCEPT_FREAD
+INTERCEPTOR(SIZE_T, fread, void *ptr, SIZE_T size, SIZE_T nmemb, void *file) {
+  // libc file streams can call user-supplied functions, see fopencookie.
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, fread, ptr, size, nmemb, file);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://github.com/google/sanitizers/issues/321.
+  SIZE_T res = REAL(fread)(ptr, size, nmemb, file);
+  if (res > 0) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, res * size);
+  return res;
+}
+#define INIT_FREAD COMMON_INTERCEPT_FUNCTION(fread)
+#else
+#define INIT_FREAD
+#endif
+
 #if SANITIZER_INTERCEPT_PREAD
 INTERCEPTOR(SSIZE_T, pread, int fd, void *ptr, SIZE_T count, OFF_T offset) {
   void *ctx;
@@ -937,6 +1043,20 @@
 #define INIT_WRITE
 #endif
 
+#if SANITIZER_INTERCEPT_FWRITE
+INTERCEPTOR(SIZE_T, fwrite, const void *p, uptr size, uptr nmemb, void *file) {
+  // libc file streams can call user-supplied functions, see fopencookie.
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, fwrite, p, size, nmemb, file);
+  SIZE_T res = REAL(fwrite)(p, size, nmemb, file);
+  if (res > 0) COMMON_INTERCEPTOR_READ_RANGE(ctx, p, res * size);
+  return res;
+}
+#define INIT_FWRITE COMMON_INTERCEPT_FUNCTION(fwrite)
+#else
+#define INIT_FWRITE
+#endif
+
 #if SANITIZER_INTERCEPT_PWRITE
 INTERCEPTOR(SSIZE_T, pwrite, int fd, void *ptr, SIZE_T count, OFF_T offset) {
   void *ctx;
@@ -3252,7 +3372,7 @@
 //  * GNU version returns message pointer, which points to either buf or some
 //    static storage.
 #if ((_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600) && !_GNU_SOURCE) || \
-    SANITIZER_MAC
+    SANITIZER_MAC || SANITIZER_ANDROID
 // POSIX version. Spec is not clear on whether buf is NULL-terminated.
 // At least on OSX, buf contents are valid even when the call fails.
 INTERCEPTOR(int, strerror_r, int errnum, char *buf, SIZE_T buflen) {
@@ -3277,7 +3397,10 @@
   // its metadata. See
   // https://github.com/google/sanitizers/issues/321.
   char *res = REAL(strerror_r)(errnum, buf, buflen);
-  COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, REAL(strlen)(res) + 1);
+  if (res == buf)
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, REAL(strlen)(res) + 1);
+  else
+    COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, REAL(strlen)(res) + 1);
   return res;
 }
 #endif //(_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600) && !_GNU_SOURCE ||
@@ -3420,7 +3543,8 @@
   // its metadata. See
   // https://github.com/google/sanitizers/issues/321.
   int res = REAL(getgroups)(size, lst);
-  if (res && lst) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, lst, res * sizeof(*lst));
+  if (res >= 0 && lst && size > 0)
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, lst, res * sizeof(*lst));
   return res;
 }
 #define INIT_GETGROUPS COMMON_INTERCEPT_FUNCTION(getgroups);
@@ -3466,7 +3590,7 @@
   if (fds && nfds) read_pollfd(ctx, fds, nfds);
   if (timeout_ts)
     COMMON_INTERCEPTOR_READ_RANGE(ctx, timeout_ts, struct_timespec_sz);
-  // FIXME: read sigmask when all of sigemptyset, etc are intercepted.
+  if (sigmask) COMMON_INTERCEPTOR_READ_RANGE(ctx, sigmask, sizeof(*sigmask));
   int res =
       COMMON_INTERCEPTOR_BLOCK_REAL(ppoll)(fds, nfds, timeout_ts, sigmask);
   if (fds && nfds) write_pollfd(ctx, fds, nfds);
@@ -3507,7 +3631,7 @@
 INTERCEPTOR(int, sigwait, __sanitizer_sigset_t *set, int *sig) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, sigwait, set, sig);
-  // FIXME: read sigset_t when all of sigemptyset, etc are intercepted
+  if (set) COMMON_INTERCEPTOR_READ_RANGE(ctx, set, sizeof(*set));
   // FIXME: under ASan the call below may write to freed memory and corrupt
   // its metadata. See
   // https://github.com/google/sanitizers/issues/321.
@@ -3524,7 +3648,7 @@
 INTERCEPTOR(int, sigwaitinfo, __sanitizer_sigset_t *set, void *info) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, sigwaitinfo, set, info);
-  // FIXME: read sigset_t when all of sigemptyset, etc are intercepted
+  if (set) COMMON_INTERCEPTOR_READ_RANGE(ctx, set, sizeof(*set));
   // FIXME: under ASan the call below may write to freed memory and corrupt
   // its metadata. See
   // https://github.com/google/sanitizers/issues/321.
@@ -3543,7 +3667,7 @@
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, sigtimedwait, set, info, timeout);
   if (timeout) COMMON_INTERCEPTOR_READ_RANGE(ctx, timeout, struct_timespec_sz);
-  // FIXME: read sigset_t when all of sigemptyset, etc are intercepted
+  if (set) COMMON_INTERCEPTOR_READ_RANGE(ctx, set, sizeof(*set));
   // FIXME: under ASan the call below may write to freed memory and corrupt
   // its metadata. See
   // https://github.com/google/sanitizers/issues/321.
@@ -3606,7 +3730,7 @@
             __sanitizer_sigset_t *oldset) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, sigprocmask, how, set, oldset);
-  // FIXME: read sigset_t when all of sigemptyset, etc are intercepted
+  if (set) COMMON_INTERCEPTOR_READ_RANGE(ctx, set, sizeof(*set));
   // FIXME: under ASan the call below may write to freed memory and corrupt
   // its metadata. See
   // https://github.com/google/sanitizers/issues/321.
@@ -3675,11 +3799,12 @@
 INTERCEPTOR(int, pthread_mutex_lock, void *m) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, pthread_mutex_lock, m);
+  COMMON_INTERCEPTOR_MUTEX_PRE_LOCK(ctx, m);
   int res = REAL(pthread_mutex_lock)(m);
   if (res == errno_EOWNERDEAD)
     COMMON_INTERCEPTOR_MUTEX_REPAIR(ctx, m);
   if (res == 0 || res == errno_EOWNERDEAD)
-    COMMON_INTERCEPTOR_MUTEX_LOCK(ctx, m);
+    COMMON_INTERCEPTOR_MUTEX_POST_LOCK(ctx, m);
   if (res == errno_EINVAL)
     COMMON_INTERCEPTOR_MUTEX_INVALID(ctx, m);
   return res;
@@ -4280,7 +4405,7 @@
 #define INIT_TEMPNAM
 #endif
 
-#if SANITIZER_INTERCEPT_PTHREAD_SETNAME_NP
+#if SANITIZER_INTERCEPT_PTHREAD_SETNAME_NP && !SANITIZER_NETBSD
 INTERCEPTOR(int, pthread_setname_np, uptr thread, const char *name) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, pthread_setname_np, thread, name);
@@ -4289,6 +4414,17 @@
   return REAL(pthread_setname_np)(thread, name);
 }
 #define INIT_PTHREAD_SETNAME_NP COMMON_INTERCEPT_FUNCTION(pthread_setname_np);
+#elif SANITIZER_INTERCEPT_PTHREAD_SETNAME_NP && SANITIZER_NETBSD
+INTERCEPTOR(int, pthread_setname_np, uptr thread, const char *name, void *arg) {
+  void *ctx;
+  char newname[32]; // PTHREAD_MAX_NAMELEN_NP=32
+  COMMON_INTERCEPTOR_ENTER(ctx, pthread_setname_np, thread, name, arg);
+  COMMON_INTERCEPTOR_READ_STRING(ctx, name, 0);
+  internal_snprintf(newname, sizeof(newname), name, arg);
+  COMMON_INTERCEPTOR_SET_PTHREAD_NAME(ctx, thread, newname);
+  return REAL(pthread_setname_np)(thread, name, arg);
+}
+#define INIT_PTHREAD_SETNAME_NP COMMON_INTERCEPT_FUNCTION(pthread_setname_np);
 #else
 #define INIT_PTHREAD_SETNAME_NP
 #endif
@@ -4553,7 +4689,7 @@
   // its metadata. See
   // https://github.com/google/sanitizers/issues/321.
   SIZE_T res = REAL(iconv)(cd, inbuf, inbytesleft, outbuf, outbytesleft);
-  if (res != (SIZE_T) - 1 && outbuf && *outbuf > outbuf_orig) {
+  if (outbuf && *outbuf > outbuf_orig) {
     SIZE_T sz = (char *)*outbuf - (char *)outbuf_orig;
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, outbuf_orig, sz);
   }
@@ -5451,6 +5587,7 @@
   if (filename) COMMON_INTERCEPTOR_READ_STRING(ctx, filename, 0);
   COMMON_INTERCEPTOR_ON_DLOPEN(filename, flag);
   void *res = REAL(dlopen)(filename, flag);
+  Symbolizer::GetOrInit()->InvalidateModuleList();
   COMMON_INTERCEPTOR_LIBRARY_LOADED(filename, res);
   return res;
 }
@@ -5459,6 +5596,7 @@
   void *ctx;
   COMMON_INTERCEPTOR_ENTER_NOIGNORE(ctx, dlclose, handle);
   int res = REAL(dlclose)(handle);
+  Symbolizer::GetOrInit()->InvalidateModuleList();
   COMMON_INTERCEPTOR_LIBRARY_UNLOADED();
   return res;
 }
@@ -6047,6 +6185,86 @@
 #define INIT_UTMPX
 #endif
 
+#if SANITIZER_INTERCEPT_GETLOADAVG
+INTERCEPTOR(int, getloadavg, double *loadavg, int nelem) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, getloadavg, loadavg, nelem);
+  int res = REAL(getloadavg)(loadavg, nelem);
+  if (res > 0)
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, loadavg, res * sizeof(*loadavg));
+  return res;
+}
+#define INIT_GETLOADAVG                      \
+  COMMON_INTERCEPT_FUNCTION(getloadavg);
+#else
+#define INIT_GETLOADAVG
+#endif
+
+#if SANITIZER_INTERCEPT_MCHECK_MPROBE
+INTERCEPTOR(int, mcheck, void (*abortfunc)(int mstatus)) {
+  return 0;
+}
+
+INTERCEPTOR(int, mcheck_pedantic, void (*abortfunc)(int mstatus)) {
+  return 0;
+}
+
+INTERCEPTOR(int, mprobe, void *ptr) {
+  return 0;
+}
+#endif
+
+INTERCEPTOR(SIZE_T, wcslen, const wchar_t *s) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, wcslen, s);
+  SIZE_T res = REAL(wcslen)(s);
+  COMMON_INTERCEPTOR_READ_RANGE(ctx, s, sizeof(wchar_t) * (res + 1));
+  return res;
+}
+
+INTERCEPTOR(SIZE_T, wcsnlen, const wchar_t *s, SIZE_T n) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, wcsnlen, s, n);
+  SIZE_T res = REAL(wcsnlen)(s, n);
+  COMMON_INTERCEPTOR_READ_RANGE(ctx, s, sizeof(wchar_t) * Min(res + 1, n));
+  return res;
+}
+#define INIT_WCSLEN                  \
+  COMMON_INTERCEPT_FUNCTION(wcslen); \
+  COMMON_INTERCEPT_FUNCTION(wcsnlen);
+
+#if SANITIZER_INTERCEPT_WCSCAT
+INTERCEPTOR(wchar_t *, wcscat, wchar_t *dst, const wchar_t *src) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, wcscat, dst, src);
+  SIZE_T src_size = REAL(wcslen)(src);
+  SIZE_T dst_size = REAL(wcslen)(dst);
+  COMMON_INTERCEPTOR_READ_RANGE(ctx, src, (src_size + 1) * sizeof(wchar_t));
+  COMMON_INTERCEPTOR_READ_RANGE(ctx, dst, (dst_size + 1) * sizeof(wchar_t));
+  COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst + dst_size,
+                                 (src_size + 1) * sizeof(wchar_t));
+  return REAL(wcscat)(dst, src);  // NOLINT
+}
+
+INTERCEPTOR(wchar_t *, wcsncat, wchar_t *dst, const wchar_t *src, SIZE_T n) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, wcsncat, dst, src, n);
+  SIZE_T src_size = REAL(wcsnlen)(src, n);
+  SIZE_T dst_size = REAL(wcslen)(dst);
+  COMMON_INTERCEPTOR_READ_RANGE(ctx, src,
+                                Min(src_size + 1, n) * sizeof(wchar_t));
+  COMMON_INTERCEPTOR_READ_RANGE(ctx, dst, (dst_size + 1) * sizeof(wchar_t));
+  COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst + dst_size,
+                                 (src_size + 1) * sizeof(wchar_t));
+  return REAL(wcsncat)(dst, src, n);  // NOLINT
+}
+#define INIT_WCSCAT                  \
+  COMMON_INTERCEPT_FUNCTION(wcscat); \
+  COMMON_INTERCEPT_FUNCTION(wcsncat);
+#else
+#define INIT_WCSCAT
+#endif
+
 static void InitializeCommonInterceptors() {
   static u64 metadata_mem[sizeof(MetadataHashMap) / sizeof(u64) + 1];
   interceptor_metadata_map = new((void *)&metadata_mem) MetadataHashMap();
@@ -6054,6 +6272,8 @@
   INIT_TEXTDOMAIN;
   INIT_STRLEN;
   INIT_STRNLEN;
+  INIT_STRNDUP;
+  INIT___STRNDUP;
   INIT_STRCMP;
   INIT_STRNCMP;
   INIT_STRCASECMP;
@@ -6064,6 +6284,7 @@
   INIT_STRCHRNUL;
   INIT_STRRCHR;
   INIT_STRSPN;
+  INIT_STRTOK;
   INIT_STRPBRK;
   INIT_MEMSET;
   INIT_MEMMOVE;
@@ -6073,12 +6294,14 @@
   INIT_MEMRCHR;
   INIT_MEMMEM;
   INIT_READ;
+  INIT_FREAD;
   INIT_PREAD;
   INIT_PREAD64;
   INIT_READV;
   INIT_PREADV;
   INIT_PREADV64;
   INIT_WRITE;
+  INIT_FWRITE;
   INIT_PWRITE;
   INIT_PWRITE64;
   INIT_WRITEV;
@@ -6245,4 +6468,7 @@
   // FIXME: add other *stat interceptors.
   INIT_UTMP;
   INIT_UTMPX;
+  INIT_GETLOADAVG;
+  INIT_WCSLEN;
+  INIT_WCSCAT;
 }
diff --git a/lib/sanitizer_common/sanitizer_common_interceptors_format.inc b/lib/sanitizer_common/sanitizer_common_interceptors_format.inc
index 1256349..5ebe5a6 100644
--- a/lib/sanitizer_common/sanitizer_common_interceptors_format.inc
+++ b/lib/sanitizer_common/sanitizer_common_interceptors_format.inc
@@ -325,8 +325,8 @@
       continue;
     int size = scanf_get_value_size(&dir);
     if (size == FSS_INVALID) {
-      Report("WARNING: unexpected format specifier in scanf interceptor: "
-        "%.*s\n", dir.end - dir.begin, dir.begin);
+      Report("%s: WARNING: unexpected format specifier in scanf interceptor: ",
+             SanitizerToolName, "%.*s\n", dir.end - dir.begin, dir.begin);
       break;
     }
     void *argp = va_arg(aq, void *);
@@ -520,8 +520,12 @@
       continue;
     int size = printf_get_value_size(&dir);
     if (size == FSS_INVALID) {
-      Report("WARNING: unexpected format specifier in printf "
-             "interceptor: %.*s\n", dir.end - dir.begin, dir.begin);
+      static int ReportedOnce;
+      if (!ReportedOnce++)
+        Report(
+            "%s: WARNING: unexpected format specifier in printf "
+            "interceptor: %.*s (reported once per process)\n",
+            SanitizerToolName, dir.end - dir.begin, dir.begin);
       break;
     }
     if (dir.convSpecifier == 'n') {
diff --git a/lib/sanitizer_common/sanitizer_common_interface.inc b/lib/sanitizer_common/sanitizer_common_interface.inc
index 4f0e940..2568df3 100644
--- a/lib/sanitizer_common/sanitizer_common_interface.inc
+++ b/lib/sanitizer_common/sanitizer_common_interface.inc
@@ -34,6 +34,7 @@
 INTERFACE_FUNCTION(__sanitizer_get_ownership)
 INTERFACE_FUNCTION(__sanitizer_get_unmapped_bytes)
 INTERFACE_FUNCTION(__sanitizer_install_malloc_and_free_hooks)
+INTERFACE_FUNCTION(__sanitizer_purge_allocator)
+INTERFACE_FUNCTION(__sanitizer_print_memory_profile)
 INTERFACE_WEAK_FUNCTION(__sanitizer_free_hook)
 INTERFACE_WEAK_FUNCTION(__sanitizer_malloc_hook)
-INTERFACE_WEAK_FUNCTION(__sanitizer_print_memory_profile)
diff --git a/lib/sanitizer_common/sanitizer_common_libcdep.cc b/lib/sanitizer_common/sanitizer_common_libcdep.cc
index e96db6d..4048960 100644
--- a/lib/sanitizer_common/sanitizer_common_libcdep.cc
+++ b/lib/sanitizer_common/sanitizer_common_libcdep.cc
@@ -14,7 +14,10 @@
 #include "sanitizer_common.h"
 
 #include "sanitizer_allocator_interface.h"
+#include "sanitizer_file.h"
 #include "sanitizer_flags.h"
+#include "sanitizer_procmaps.h"
+#include "sanitizer_report_decorator.h"
 #include "sanitizer_stackdepot.h"
 #include "sanitizer_stacktrace.h"
 #include "sanitizer_symbolizer.h"
@@ -25,12 +28,25 @@
 
 namespace __sanitizer {
 
+#if !SANITIZER_FUCHSIA
+
 bool ReportFile::SupportsColors() {
   SpinMutexLock l(mu);
   ReopenIfNecessary();
   return SupportsColoredOutput(fd);
 }
 
+static INLINE bool ReportSupportsColors() {
+  return report_file.SupportsColors();
+}
+
+#else  // SANITIZER_FUCHSIA
+
+// Fuchsia's logs always go through post-processing that handles colorization.
+static INLINE bool ReportSupportsColors() { return true; }
+
+#endif  // !SANITIZER_FUCHSIA
+
 bool ColorizeReports() {
   // FIXME: Add proper Windows support to AnsiColorDecorator and re-enable color
   // printing on Windows.
@@ -39,7 +55,7 @@
 
   const char *flag = common_flags()->color;
   return internal_strcmp(flag, "always") == 0 ||
-         (internal_strcmp(flag, "auto") == 0 && report_file.SupportsColors());
+         (internal_strcmp(flag, "auto") == 0 && ReportSupportsColors());
 }
 
 static void (*sandboxing_callback)();
@@ -47,7 +63,8 @@
   sandboxing_callback = f;
 }
 
-void ReportErrorSummary(const char *error_type, const StackTrace *stack) {
+void ReportErrorSummary(const char *error_type, const StackTrace *stack,
+                        const char *alt_tool_name) {
 #if !SANITIZER_GO
   if (!common_flags()->print_summary)
     return;
@@ -59,7 +76,7 @@
   // Maybe sometimes we need to choose another frame (e.g. skip memcpy/etc).
   uptr pc = StackTrace::GetPreviousInstructionPc(stack->trace[0]);
   SymbolizedStack *frame = Symbolizer::GetOrInit()->SymbolizePC(pc);
-  ReportErrorSummary(error_type, frame->info);
+  ReportErrorSummary(error_type, frame->info, alt_tool_name);
   frame->ClearAll();
 #endif
 }
@@ -123,13 +140,134 @@
     if (heap_profile &&
         current_rss_mb > rss_during_last_reported_profile * 1.1) {
       Printf("\n\nHEAP PROFILE at RSS %zdMb\n", current_rss_mb);
-      __sanitizer_print_memory_profile(90);
+      __sanitizer_print_memory_profile(90, 20);
       rss_during_last_reported_profile = current_rss_mb;
     }
   }
 }
 #endif
 
+#if !SANITIZER_FUCHSIA && !SANITIZER_GO
+void StartReportDeadlySignal() {
+  // Write the first message using fd=2, just in case.
+  // It may actually fail to write in case stderr is closed.
+  CatastrophicErrorWrite(SanitizerToolName, internal_strlen(SanitizerToolName));
+  static const char kDeadlySignal[] = ":DEADLYSIGNAL\n";
+  CatastrophicErrorWrite(kDeadlySignal, sizeof(kDeadlySignal) - 1);
+}
+
+static void MaybeReportNonExecRegion(uptr pc) {
+#if SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_NETBSD
+  MemoryMappingLayout proc_maps(/*cache_enabled*/ true);
+  MemoryMappedSegment segment;
+  while (proc_maps.Next(&segment)) {
+    if (pc >= segment.start && pc < segment.end && !segment.IsExecutable())
+      Report("Hint: PC is at a non-executable region. Maybe a wild jump?\n");
+  }
+#endif
+}
+
+static void PrintMemoryByte(InternalScopedString *str, const char *before,
+                            u8 byte) {
+  SanitizerCommonDecorator d;
+  str->append("%s%s%x%x%s ", before, d.MemoryByte(), byte >> 4, byte & 15,
+              d.Default());
+}
+
+static void MaybeDumpInstructionBytes(uptr pc) {
+  if (!common_flags()->dump_instruction_bytes || (pc < GetPageSizeCached()))
+    return;
+  InternalScopedString str(1024);
+  str.append("First 16 instruction bytes at pc: ");
+  if (IsAccessibleMemoryRange(pc, 16)) {
+    for (int i = 0; i < 16; ++i) {
+      PrintMemoryByte(&str, "", ((u8 *)pc)[i]);
+    }
+    str.append("\n");
+  } else {
+    str.append("unaccessible\n");
+  }
+  Report("%s", str.data());
+}
+
+static void MaybeDumpRegisters(void *context) {
+  if (!common_flags()->dump_registers) return;
+  SignalContext::DumpAllRegisters(context);
+}
+
+static void ReportStackOverflowImpl(const SignalContext &sig, u32 tid,
+                                    UnwindSignalStackCallbackType unwind,
+                                    const void *unwind_context) {
+  SanitizerCommonDecorator d;
+  Printf("%s", d.Warning());
+  static const char kDescription[] = "stack-overflow";
+  Report("ERROR: %s: %s on address %p (pc %p bp %p sp %p T%d)\n",
+         SanitizerToolName, kDescription, (void *)sig.addr, (void *)sig.pc,
+         (void *)sig.bp, (void *)sig.sp, tid);
+  Printf("%s", d.Default());
+  InternalScopedBuffer<BufferedStackTrace> stack_buffer(1);
+  BufferedStackTrace *stack = stack_buffer.data();
+  stack->Reset();
+  unwind(sig, unwind_context, stack);
+  stack->Print();
+  ReportErrorSummary(kDescription, stack);
+}
+
+static void ReportDeadlySignalImpl(const SignalContext &sig, u32 tid,
+                                   UnwindSignalStackCallbackType unwind,
+                                   const void *unwind_context) {
+  SanitizerCommonDecorator d;
+  Printf("%s", d.Warning());
+  const char *description = sig.Describe();
+  Report("ERROR: %s: %s on unknown address %p (pc %p bp %p sp %p T%d)\n",
+         SanitizerToolName, description, (void *)sig.addr, (void *)sig.pc,
+         (void *)sig.bp, (void *)sig.sp, tid);
+  Printf("%s", d.Default());
+  if (sig.pc < GetPageSizeCached())
+    Report("Hint: pc points to the zero page.\n");
+  if (sig.is_memory_access) {
+    const char *access_type =
+        sig.write_flag == SignalContext::WRITE
+            ? "WRITE"
+            : (sig.write_flag == SignalContext::READ ? "READ" : "UNKNOWN");
+    Report("The signal is caused by a %s memory access.\n", access_type);
+    if (sig.addr < GetPageSizeCached())
+      Report("Hint: address points to the zero page.\n");
+  }
+  MaybeReportNonExecRegion(sig.pc);
+  InternalScopedBuffer<BufferedStackTrace> stack_buffer(1);
+  BufferedStackTrace *stack = stack_buffer.data();
+  stack->Reset();
+  unwind(sig, unwind_context, stack);
+  stack->Print();
+  MaybeDumpInstructionBytes(sig.pc);
+  MaybeDumpRegisters(sig.context);
+  Printf("%s can not provide additional info.\n", SanitizerToolName);
+  ReportErrorSummary(description, stack);
+}
+
+void ReportDeadlySignal(const SignalContext &sig, u32 tid,
+                        UnwindSignalStackCallbackType unwind,
+                        const void *unwind_context) {
+  if (sig.IsStackOverflow())
+    ReportStackOverflowImpl(sig, tid, unwind, unwind_context);
+  else
+    ReportDeadlySignalImpl(sig, tid, unwind, unwind_context);
+}
+
+void HandleDeadlySignal(void *siginfo, void *context, u32 tid,
+                        UnwindSignalStackCallbackType unwind,
+                        const void *unwind_context) {
+  StartReportDeadlySignal();
+  ScopedErrorReportLock rl;
+  SignalContext sig(siginfo, context);
+  ReportDeadlySignal(sig, tid, unwind, unwind_context);
+  Report("ABORTING\n");
+  Die();
+}
+
+#endif  // !SANITIZER_FUCHSIA && !SANITIZER_GO
+
 void WriteToSyslog(const char *msg) {
   InternalScopedString msg_copy(kErrorMessageBufferSize);
   msg_copy.append("%s", msg);
@@ -160,6 +298,46 @@
 #endif
 }
 
+static atomic_uintptr_t reporting_thread = {0};
+
+ScopedErrorReportLock::ScopedErrorReportLock() {
+  uptr current = GetThreadSelf();
+  for (;;) {
+    uptr expected = 0;
+    if (atomic_compare_exchange_strong(&reporting_thread, &expected, current,
+                                       memory_order_relaxed)) {
+      // We've claimed reporting_thread so proceed.
+      CommonSanitizerReportMutex.Lock();
+      return;
+    }
+
+    if (expected == current) {
+      // This is either asynch signal or nested error during error reporting.
+      // Fail simple to avoid deadlocks in Report().
+
+      // Can't use Report() here because of potential deadlocks in nested
+      // signal handlers.
+      CatastrophicErrorWrite(SanitizerToolName,
+                             internal_strlen(SanitizerToolName));
+      static const char msg[] = ": nested bug in the same thread, aborting.\n";
+      CatastrophicErrorWrite(msg, sizeof(msg) - 1);
+
+      internal__exit(common_flags()->exitcode);
+    }
+
+    internal_sched_yield();
+  }
+}
+
+ScopedErrorReportLock::~ScopedErrorReportLock() {
+  CommonSanitizerReportMutex.Unlock();
+  atomic_store_relaxed(&reporting_thread, 0);
+}
+
+void ScopedErrorReportLock::CheckLocked() {
+  CommonSanitizerReportMutex.CheckLocked();
+}
+
 }  // namespace __sanitizer
 
 SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_sandbox_on_notify,
diff --git a/lib/sanitizer_common/sanitizer_coverage_fuchsia.cc b/lib/sanitizer_common/sanitizer_coverage_fuchsia.cc
new file mode 100644
index 0000000..c5be48b
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_coverage_fuchsia.cc
@@ -0,0 +1,240 @@
+//===-- sanitizer_coverage_fuchsia.cc ------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+//
+// Sanitizer Coverage Controller for Trace PC Guard, Fuchsia-specific version.
+//
+// This Fuchsia-specific implementation uses the same basic scheme and the
+// same simple '.sancov' file format as the generic implementation.  The
+// difference is that we just produce a single blob of output for the whole
+// program, not a separate one per DSO.  We do not sort the PC table and do
+// not prune the zeros, so the resulting file is always as large as it
+// would be to report 100% coverage.  Implicit tracing information about
+// the address ranges of DSOs allows offline tools to split the one big
+// blob into separate files that the 'sancov' tool can understand.
+//
+// Unlike the traditional implementation that uses an atexit hook to write
+// out data files at the end, the results on Fuchsia do not go into a file
+// per se.  The 'coverage_dir' option is ignored.  Instead, they are stored
+// directly into a shared memory object (a Zircon VMO).  At exit, that VMO
+// is handed over to a system service that's responsible for getting the
+// data out to somewhere that it can be fed into the sancov tool (where and
+// how is not our problem).
+
+#include "sanitizer_platform.h"
+#if SANITIZER_FUCHSIA
+#include "sanitizer_atomic.h"
+#include "sanitizer_common.h"
+#include "sanitizer_internal_defs.h"
+
+#include <zircon/process.h>
+#include <zircon/sanitizer.h>
+#include <zircon/syscalls.h>
+
+using namespace __sanitizer;  // NOLINT
+
+namespace __sancov {
+namespace {
+
+// TODO(mcgrathr): Move the constant into a header shared with other impls.
+constexpr u64 Magic64 = 0xC0BFFFFFFFFFFF64ULL;
+static_assert(SANITIZER_WORDSIZE == 64, "Fuchsia is always LP64");
+
+constexpr const char kSancovSinkName[] = "sancov";
+
+// Collects trace-pc guard coverage.
+// This class relies on zero-initialization.
+class TracePcGuardController {
+ public:
+  // For each PC location being tracked, there is a u32 reserved in global
+  // data called the "guard".  At startup, we assign each guard slot a
+  // unique index into the big results array.  Later during runtime, the
+  // first call to TracePcGuard (below) will store the corresponding PC at
+  // that index in the array.  (Each later call with the same guard slot is
+  // presumed to be from the same PC.)  Then it clears the guard slot back
+  // to zero, which tells the compiler not to bother calling in again.  At
+  // the end of the run, we have a big array where each element is either
+  // zero or is a tracked PC location that was hit in the trace.
+
+  // This is called from global constructors.  Each translation unit has a
+  // contiguous array of guard slots, and a constructor that calls here
+  // with the bounds of its array.  Those constructors are allowed to call
+  // here more than once for the same array.  Usually all of these
+  // constructors run in the initial thread, but it's possible that a
+  // dlopen call on a secondary thread will run constructors that get here.
+  void InitTracePcGuard(u32 *start, u32 *end) {
+    if (end > start && *start == 0 && common_flags()->coverage) {
+      // Complete the setup before filling in any guards with indices.
+      // This avoids the possibility of code called from Setup reentering
+      // TracePcGuard.
+      u32 idx = Setup(end - start);
+      for (u32 *p = start; p < end; ++p) {
+        *p = idx++;
+      }
+    }
+  }
+
+  void TracePcGuard(u32 *guard, uptr pc) {
+    atomic_uint32_t *guard_ptr = reinterpret_cast<atomic_uint32_t *>(guard);
+    u32 idx = atomic_exchange(guard_ptr, 0, memory_order_relaxed);
+    if (idx > 0) array_[idx] = pc;
+  }
+
+  void Dump() {
+    BlockingMutexLock locked(&setup_lock_);
+    if (array_) {
+      CHECK_NE(vmo_, ZX_HANDLE_INVALID);
+
+      // Publish the VMO to the system, where it can be collected and
+      // analyzed after this process exits.  This always consumes the VMO
+      // handle.  Any failure is just logged and not indicated to us.
+      __sanitizer_publish_data(kSancovSinkName, vmo_);
+      vmo_ = ZX_HANDLE_INVALID;
+
+      // This will route to __sanitizer_log_write, which will ensure that
+      // information about shared libraries is written out.  This message
+      // uses the `dumpfile` symbolizer markup element to highlight the
+      // dump.  See the explanation for this in:
+      // https://fuchsia.googlesource.com/zircon/+/master/docs/symbolizer_markup.md
+      Printf("SanitizerCoverage: {{{dumpfile:%s:%s}}} with up to %u PCs\n",
+             kSancovSinkName, vmo_name_, next_index_ - 1);
+    }
+  }
+
+ private:
+  // We map in the largest possible view into the VMO: one word
+  // for every possible 32-bit index value.  This avoids the need
+  // to change the mapping when increasing the size of the VMO.
+  // We can always spare the 32G of address space.
+  static constexpr size_t MappingSize = sizeof(uptr) << 32;
+
+  BlockingMutex setup_lock_;
+  uptr *array_;
+  u32 next_index_;
+  zx_handle_t vmo_;
+  char vmo_name_[ZX_MAX_NAME_LEN];
+
+  size_t DataSize() const { return next_index_ * sizeof(uintptr_t); }
+
+  u32 Setup(u32 num_guards) {
+    BlockingMutexLock locked(&setup_lock_);
+    DCHECK(common_flags()->coverage);
+
+    if (next_index_ == 0) {
+      CHECK_EQ(vmo_, ZX_HANDLE_INVALID);
+      CHECK_EQ(array_, nullptr);
+
+      // The first sample goes at [1] to reserve [0] for the magic number.
+      next_index_ = 1 + num_guards;
+
+      zx_status_t status = _zx_vmo_create(DataSize(), 0, &vmo_);
+      CHECK_EQ(status, ZX_OK);
+
+      // Give the VMO a name including our process KOID so it's easy to spot.
+      internal_snprintf(vmo_name_, sizeof(vmo_name_), "%s.%zu", kSancovSinkName,
+                        internal_getpid());
+      _zx_object_set_property(vmo_, ZX_PROP_NAME, vmo_name_,
+                              internal_strlen(vmo_name_));
+
+      // Map the largest possible view we might need into the VMO.  Later
+      // we might need to increase the VMO's size before we can use larger
+      // indices, but we'll never move the mapping address so we don't have
+      // any multi-thread synchronization issues with that.
+      uintptr_t mapping;
+      status =
+          _zx_vmar_map(_zx_vmar_root_self(), 0, vmo_, 0, MappingSize,
+                       ZX_VM_FLAG_PERM_READ | ZX_VM_FLAG_PERM_WRITE, &mapping);
+      CHECK_EQ(status, ZX_OK);
+
+      // Hereafter other threads are free to start storing into
+      // elements [1, next_index_) of the big array.
+      array_ = reinterpret_cast<uptr *>(mapping);
+
+      // Store the magic number.
+      // Hereafter, the VMO serves as the contents of the '.sancov' file.
+      array_[0] = Magic64;
+
+      return 1;
+    } else {
+      // The VMO is already mapped in, but it's not big enough to use the
+      // new indices.  So increase the size to cover the new maximum index.
+
+      CHECK_NE(vmo_, ZX_HANDLE_INVALID);
+      CHECK_NE(array_, nullptr);
+
+      uint32_t first_index = next_index_;
+      next_index_ += num_guards;
+
+      zx_status_t status = _zx_vmo_set_size(vmo_, DataSize());
+      CHECK_EQ(status, ZX_OK);
+
+      return first_index;
+    }
+  }
+};
+
+static TracePcGuardController pc_guard_controller;
+
+}  // namespace
+}  // namespace __sancov
+
+namespace __sanitizer {
+void InitializeCoverage(bool enabled, const char *dir) {
+  CHECK_EQ(enabled, common_flags()->coverage);
+  CHECK_EQ(dir, common_flags()->coverage_dir);
+
+  static bool coverage_enabled = false;
+  if (!coverage_enabled) {
+    coverage_enabled = enabled;
+    Atexit(__sanitizer_cov_dump);
+    AddDieCallback(__sanitizer_cov_dump);
+  }
+}
+}  // namespace __sanitizer
+
+extern "C" {
+SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_dump_coverage(  // NOLINT
+    const uptr *pcs, uptr len) {
+  UNIMPLEMENTED();
+}
+
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_pc_guard, u32 *guard) {
+  if (!*guard) return;
+  __sancov::pc_guard_controller.TracePcGuard(guard, GET_CALLER_PC() - 1);
+}
+
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_pc_guard_init,
+                             u32 *start, u32 *end) {
+  if (start == end || *start) return;
+  __sancov::pc_guard_controller.InitTracePcGuard(start, end);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_dump_trace_pc_guard_coverage() {
+  __sancov::pc_guard_controller.Dump();
+}
+SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_cov_dump() {
+  __sanitizer_dump_trace_pc_guard_coverage();
+}
+// Default empty implementations (weak). Users should redefine them.
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_cmp, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_cmp1, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_cmp2, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_cmp4, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_cmp8, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_const_cmp1, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_const_cmp2, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_const_cmp4, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_const_cmp8, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_switch, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_div4, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_div8, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_gep, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_pc_indir, void) {}
+}  // extern "C"
+
+#endif  // !SANITIZER_FUCHSIA
diff --git a/lib/sanitizer_common/sanitizer_coverage_interface.inc b/lib/sanitizer_common/sanitizer_coverage_interface.inc
index ae691bd..f909b74 100644
--- a/lib/sanitizer_common/sanitizer_coverage_interface.inc
+++ b/lib/sanitizer_common/sanitizer_coverage_interface.inc
@@ -8,29 +8,20 @@
 //===----------------------------------------------------------------------===//
 // Sanitizer Coverage interface list.
 //===----------------------------------------------------------------------===//
-INTERFACE_FUNCTION(__sanitizer_cov)
 INTERFACE_FUNCTION(__sanitizer_cov_dump)
-INTERFACE_FUNCTION(__sanitizer_cov_indir_call16)
-INTERFACE_FUNCTION(__sanitizer_cov_init)
-INTERFACE_FUNCTION(__sanitizer_cov_module_init)
-INTERFACE_FUNCTION(__sanitizer_cov_trace_basic_block)
-INTERFACE_FUNCTION(__sanitizer_cov_trace_func_enter)
-INTERFACE_FUNCTION(__sanitizer_cov_with_check)
+INTERFACE_FUNCTION(__sanitizer_cov_reset)
 INTERFACE_FUNCTION(__sanitizer_dump_coverage)
 INTERFACE_FUNCTION(__sanitizer_dump_trace_pc_guard_coverage)
-INTERFACE_FUNCTION(__sanitizer_get_coverage_guards)
-INTERFACE_FUNCTION(__sanitizer_get_number_of_counters)
-INTERFACE_FUNCTION(__sanitizer_get_total_unique_caller_callee_pairs)
-INTERFACE_FUNCTION(__sanitizer_get_total_unique_coverage)
-INTERFACE_FUNCTION(__sanitizer_maybe_open_cov_file)
-INTERFACE_FUNCTION(__sanitizer_reset_coverage)
-INTERFACE_FUNCTION(__sanitizer_update_counter_bitset_and_clear_counters)
 INTERFACE_WEAK_FUNCTION(__sancov_default_options)
 INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_cmp)
 INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_cmp1)
 INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_cmp2)
 INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_cmp4)
 INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_cmp8)
+INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_const_cmp1)
+INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_const_cmp2)
+INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_const_cmp4)
+INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_const_cmp8)
 INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_div4)
 INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_div8)
 INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_gep)
@@ -38,3 +29,5 @@
 INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_pc_guard_init)
 INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_pc_indir)
 INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_switch)
+INTERFACE_WEAK_FUNCTION(__sanitizer_cov_8bit_counters_init)
+INTERFACE_WEAK_FUNCTION(__sanitizer_cov_pcs_init)
diff --git a/lib/sanitizer_common/sanitizer_coverage_libcdep.cc b/lib/sanitizer_common/sanitizer_coverage_libcdep.cc
deleted file mode 100644
index e934af3..0000000
--- a/lib/sanitizer_common/sanitizer_coverage_libcdep.cc
+++ /dev/null
@@ -1,1035 +0,0 @@
-//===-- sanitizer_coverage.cc ---------------------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Sanitizer Coverage.
-// This file implements run-time support for a poor man's coverage tool.
-//
-// Compiler instrumentation:
-// For every interesting basic block the compiler injects the following code:
-// if (Guard < 0) {
-//    __sanitizer_cov(&Guard);
-// }
-// At the module start up time __sanitizer_cov_module_init sets the guards
-// to consecutive negative numbers (-1, -2, -3, ...).
-// It's fine to call __sanitizer_cov more than once for a given block.
-//
-// Run-time:
-//  - __sanitizer_cov(): record that we've executed the PC (GET_CALLER_PC).
-//    and atomically set Guard to -Guard.
-//  - __sanitizer_cov_dump: dump the coverage data to disk.
-//  For every module of the current process that has coverage data
-//  this will create a file module_name.PID.sancov.
-//
-// The file format is simple: the first 8 bytes is the magic,
-// one of 0xC0BFFFFFFFFFFF64 and 0xC0BFFFFFFFFFFF32. The last byte of the
-// magic defines the size of the following offsets.
-// The rest of the data is the offsets in the module.
-//
-// Eventually, this coverage implementation should be obsoleted by a more
-// powerful general purpose Clang/LLVM coverage instrumentation.
-// Consider this implementation as prototype.
-//
-// FIXME: support (or at least test with) dlclose.
-//===----------------------------------------------------------------------===//
-
-#include "sanitizer_allocator_internal.h"
-#include "sanitizer_common.h"
-#include "sanitizer_libc.h"
-#include "sanitizer_mutex.h"
-#include "sanitizer_procmaps.h"
-#include "sanitizer_stacktrace.h"
-#include "sanitizer_symbolizer.h"
-#include "sanitizer_flags.h"
-
-using namespace __sanitizer;
-
-static const u64 kMagic64 = 0xC0BFFFFFFFFFFF64ULL;
-static const u64 kMagic32 = 0xC0BFFFFFFFFFFF32ULL;
-static const uptr kNumWordsForMagic = SANITIZER_WORDSIZE == 64 ? 1 : 2;
-static const u64 kMagic = SANITIZER_WORDSIZE == 64 ? kMagic64 : kMagic32;
-
-static atomic_uint32_t dump_once_guard;  // Ensure that CovDump runs only once.
-
-static atomic_uintptr_t coverage_counter;
-static atomic_uintptr_t caller_callee_counter;
-
-static void ResetGlobalCounters() {
-  return atomic_store(&coverage_counter, 0, memory_order_relaxed);
-  return atomic_store(&caller_callee_counter, 0, memory_order_relaxed);
-}
-
-// pc_array is the array containing the covered PCs.
-// To make the pc_array thread- and async-signal-safe it has to be large enough.
-// 128M counters "ought to be enough for anybody" (4M on 32-bit).
-
-// With coverage_direct=1 in ASAN_OPTIONS, pc_array memory is mapped to a file.
-// In this mode, __sanitizer_cov_dump does nothing, and CovUpdateMapping()
-// dump current memory layout to another file.
-
-static bool cov_sandboxed = false;
-static fd_t cov_fd = kInvalidFd;
-static unsigned int cov_max_block_size = 0;
-static bool coverage_enabled = false;
-static const char *coverage_dir;
-
-namespace __sanitizer {
-
-class CoverageData {
- public:
-  void Init();
-  void Enable();
-  void Disable();
-  void ReInit();
-  void BeforeFork();
-  void AfterFork(int child_pid);
-  void Extend(uptr npcs);
-  void Add(uptr pc, u32 *guard);
-  void IndirCall(uptr caller, uptr callee, uptr callee_cache[],
-                 uptr cache_size);
-  void DumpCallerCalleePairs();
-  void DumpTrace();
-  void DumpAsBitSet();
-  void DumpCounters();
-  void DumpOffsets();
-  void DumpAll();
-
-  ALWAYS_INLINE
-  void TraceBasicBlock(u32 *id);
-
-  void InitializeGuardArray(s32 *guards);
-  void InitializeGuards(s32 *guards, uptr n, const char *module_name,
-                        uptr caller_pc);
-  void InitializeCounters(u8 *counters, uptr n);
-  void ReinitializeGuards();
-  uptr GetNumberOf8bitCounters();
-  uptr Update8bitCounterBitsetAndClearCounters(u8 *bitset);
-
-  uptr *data();
-  uptr size() const;
-
- private:
-  struct NamedPcRange {
-    const char *copied_module_name;
-    uptr beg, end; // elements [beg,end) in pc_array.
-  };
-
-  void DirectOpen();
-  void UpdateModuleNameVec(uptr caller_pc, uptr range_beg, uptr range_end);
-  void GetRangeOffsets(const NamedPcRange& r, Symbolizer* s,
-      InternalMmapVector<uptr>* offsets) const;
-
-  // Maximal size pc array may ever grow.
-  // We MmapNoReserve this space to ensure that the array is contiguous.
-  static const uptr kPcArrayMaxSize =
-      FIRST_32_SECOND_64(1 << (SANITIZER_ANDROID ? 24 : 26), 1 << 27);
-  // The amount file mapping for the pc array is grown by.
-  static const uptr kPcArrayMmapSize = 64 * 1024;
-
-  // pc_array is allocated with MmapNoReserveOrDie and so it uses only as
-  // much RAM as it really needs.
-  uptr *pc_array;
-  // Index of the first available pc_array slot.
-  atomic_uintptr_t pc_array_index;
-  // Array size.
-  atomic_uintptr_t pc_array_size;
-  // Current file mapped size of the pc array.
-  uptr pc_array_mapped_size;
-  // Descriptor of the file mapped pc array.
-  fd_t pc_fd;
-
-  // Vector of coverage guard arrays, protected by mu.
-  InternalMmapVectorNoCtor<s32*> guard_array_vec;
-
-  // Vector of module and compilation unit pc ranges.
-  InternalMmapVectorNoCtor<NamedPcRange> comp_unit_name_vec;
-  InternalMmapVectorNoCtor<NamedPcRange> module_name_vec;
-
-  struct CounterAndSize {
-    u8 *counters;
-    uptr n;
-  };
-
-  InternalMmapVectorNoCtor<CounterAndSize> counters_vec;
-  uptr num_8bit_counters;
-
-  // Caller-Callee (cc) array, size and current index.
-  static const uptr kCcArrayMaxSize = FIRST_32_SECOND_64(1 << 18, 1 << 24);
-  uptr **cc_array;
-  atomic_uintptr_t cc_array_index;
-  atomic_uintptr_t cc_array_size;
-
-  // Tracing event array, size and current pointer.
-  // We record all events (basic block entries) in a global buffer of u32
-  // values. Each such value is the index in pc_array.
-  // So far the tracing is highly experimental:
-  //   - not thread-safe;
-  //   - does not support long traces;
-  //   - not tuned for performance.
-  // Windows doesn't do overcommit (committed virtual memory costs swap), so
-  // programs can't reliably map such large amounts of virtual memory.
-  // TODO(etienneb): Find a way to support coverage of larger executable
-static const uptr kTrEventArrayMaxSize =
-    (SANITIZER_WORDSIZE == 32 || SANITIZER_WINDOWS) ? 1 << 22 : 1 << 30;
-  u32 *tr_event_array;
-  uptr tr_event_array_size;
-  u32 *tr_event_pointer;
-  static const uptr kTrPcArrayMaxSize    = FIRST_32_SECOND_64(1 << 22, 1 << 27);
-
-  StaticSpinMutex mu;
-};
-
-static CoverageData coverage_data;
-
-void CovUpdateMapping(const char *path, uptr caller_pc = 0);
-
-void CoverageData::DirectOpen() {
-  InternalScopedString path(kMaxPathLength);
-  internal_snprintf((char *)path.data(), path.size(), "%s/%zd.sancov.raw",
-                    coverage_dir, internal_getpid());
-  pc_fd = OpenFile(path.data(), RdWr);
-  if (pc_fd == kInvalidFd) {
-    Report("Coverage: failed to open %s for reading/writing\n", path.data());
-    Die();
-  }
-
-  pc_array_mapped_size = 0;
-  CovUpdateMapping(coverage_dir);
-}
-
-void CoverageData::Init() {
-  pc_fd = kInvalidFd;
-}
-
-void CoverageData::Enable() {
-  if (pc_array)
-    return;
-  pc_array = reinterpret_cast<uptr *>(
-      MmapNoReserveOrDie(sizeof(uptr) * kPcArrayMaxSize, "CovInit"));
-  atomic_store(&pc_array_index, 0, memory_order_relaxed);
-  if (common_flags()->coverage_direct) {
-    atomic_store(&pc_array_size, 0, memory_order_relaxed);
-  } else {
-    atomic_store(&pc_array_size, kPcArrayMaxSize, memory_order_relaxed);
-  }
-
-  cc_array = reinterpret_cast<uptr **>(MmapNoReserveOrDie(
-      sizeof(uptr *) * kCcArrayMaxSize, "CovInit::cc_array"));
-  atomic_store(&cc_array_size, kCcArrayMaxSize, memory_order_relaxed);
-  atomic_store(&cc_array_index, 0, memory_order_relaxed);
-
-  // Allocate tr_event_array with a guard page at the end.
-  tr_event_array = reinterpret_cast<u32 *>(MmapNoReserveOrDie(
-      sizeof(tr_event_array[0]) * kTrEventArrayMaxSize + GetMmapGranularity(),
-      "CovInit::tr_event_array"));
-  MprotectNoAccess(
-      reinterpret_cast<uptr>(&tr_event_array[kTrEventArrayMaxSize]),
-      GetMmapGranularity());
-  tr_event_array_size = kTrEventArrayMaxSize;
-  tr_event_pointer = tr_event_array;
-
-  num_8bit_counters = 0;
-}
-
-void CoverageData::InitializeGuardArray(s32 *guards) {
-  Enable();  // Make sure coverage is enabled at this point.
-  s32 n = guards[0];
-  for (s32 j = 1; j <= n; j++) {
-    uptr idx = atomic_load_relaxed(&pc_array_index);
-    atomic_store_relaxed(&pc_array_index, idx + 1);
-    guards[j] = -static_cast<s32>(idx + 1);
-  }
-}
-
-void CoverageData::Disable() {
-  if (pc_array) {
-    UnmapOrDie(pc_array, sizeof(uptr) * kPcArrayMaxSize);
-    pc_array = nullptr;
-  }
-  if (cc_array) {
-    UnmapOrDie(cc_array, sizeof(uptr *) * kCcArrayMaxSize);
-    cc_array = nullptr;
-  }
-  if (tr_event_array) {
-    UnmapOrDie(tr_event_array,
-               sizeof(tr_event_array[0]) * kTrEventArrayMaxSize +
-                   GetMmapGranularity());
-    tr_event_array = nullptr;
-    tr_event_pointer = nullptr;
-  }
-  if (pc_fd != kInvalidFd) {
-    CloseFile(pc_fd);
-    pc_fd = kInvalidFd;
-  }
-}
-
-void CoverageData::ReinitializeGuards() {
-  // Assuming single thread.
-  atomic_store(&pc_array_index, 0, memory_order_relaxed);
-  for (uptr i = 0; i < guard_array_vec.size(); i++)
-    InitializeGuardArray(guard_array_vec[i]);
-}
-
-void CoverageData::ReInit() {
-  Disable();
-  if (coverage_enabled) {
-    if (common_flags()->coverage_direct) {
-      // In memory-mapped mode we must extend the new file to the known array
-      // size.
-      uptr size = atomic_load(&pc_array_size, memory_order_relaxed);
-      uptr npcs = size / sizeof(uptr);
-      Enable();
-      if (size) Extend(npcs);
-      if (coverage_enabled) CovUpdateMapping(coverage_dir);
-    } else {
-      Enable();
-    }
-  }
-  // Re-initialize the guards.
-  // We are single-threaded now, no need to grab any lock.
-  CHECK_EQ(atomic_load(&pc_array_index, memory_order_relaxed), 0);
-  ReinitializeGuards();
-}
-
-void CoverageData::BeforeFork() {
-  mu.Lock();
-}
-
-void CoverageData::AfterFork(int child_pid) {
-  // We are single-threaded so it's OK to release the lock early.
-  mu.Unlock();
-  if (child_pid == 0) ReInit();
-}
-
-// Extend coverage PC array to fit additional npcs elements.
-void CoverageData::Extend(uptr npcs) {
-  if (!common_flags()->coverage_direct) return;
-  SpinMutexLock l(&mu);
-
-  uptr size = atomic_load(&pc_array_size, memory_order_relaxed);
-  size += npcs * sizeof(uptr);
-
-  if (coverage_enabled && size > pc_array_mapped_size) {
-    if (pc_fd == kInvalidFd) DirectOpen();
-    CHECK_NE(pc_fd, kInvalidFd);
-
-    uptr new_mapped_size = pc_array_mapped_size;
-    while (size > new_mapped_size) new_mapped_size += kPcArrayMmapSize;
-    CHECK_LE(new_mapped_size, sizeof(uptr) * kPcArrayMaxSize);
-
-    // Extend the file and map the new space at the end of pc_array.
-    uptr res = internal_ftruncate(pc_fd, new_mapped_size);
-    int err;
-    if (internal_iserror(res, &err)) {
-      Printf("failed to extend raw coverage file: %d\n", err);
-      Die();
-    }
-
-    uptr next_map_base = ((uptr)pc_array) + pc_array_mapped_size;
-    void *p = MapWritableFileToMemory((void *)next_map_base,
-                                      new_mapped_size - pc_array_mapped_size,
-                                      pc_fd, pc_array_mapped_size);
-    CHECK_EQ((uptr)p, next_map_base);
-    pc_array_mapped_size = new_mapped_size;
-  }
-
-  atomic_store(&pc_array_size, size, memory_order_release);
-}
-
-void CoverageData::InitializeCounters(u8 *counters, uptr n) {
-  if (!counters) return;
-  CHECK_EQ(reinterpret_cast<uptr>(counters) % 16, 0);
-  n = RoundUpTo(n, 16); // The compiler must ensure that counters is 16-aligned.
-  SpinMutexLock l(&mu);
-  counters_vec.push_back({counters, n});
-  num_8bit_counters += n;
-}
-
-void CoverageData::UpdateModuleNameVec(uptr caller_pc, uptr range_beg,
-                                       uptr range_end) {
-  auto sym = Symbolizer::GetOrInit();
-  if (!sym)
-    return;
-  const char *module_name = sym->GetModuleNameForPc(caller_pc);
-  if (!module_name) return;
-  if (module_name_vec.empty() ||
-      module_name_vec.back().copied_module_name != module_name)
-    module_name_vec.push_back({module_name, range_beg, range_end});
-  else
-    module_name_vec.back().end = range_end;
-}
-
-void CoverageData::InitializeGuards(s32 *guards, uptr n,
-                                    const char *comp_unit_name,
-                                    uptr caller_pc) {
-  // The array 'guards' has n+1 elements, we use the element zero
-  // to store 'n'.
-  CHECK_LT(n, 1 << 30);
-  guards[0] = static_cast<s32>(n);
-  InitializeGuardArray(guards);
-  SpinMutexLock l(&mu);
-  uptr range_end = atomic_load(&pc_array_index, memory_order_relaxed);
-  uptr range_beg = range_end - n;
-  comp_unit_name_vec.push_back({comp_unit_name, range_beg, range_end});
-  guard_array_vec.push_back(guards);
-  UpdateModuleNameVec(caller_pc, range_beg, range_end);
-}
-
-static const uptr kBundleCounterBits = 16;
-
-// When coverage_order_pcs==true and SANITIZER_WORDSIZE==64
-// we insert the global counter into the first 16 bits of the PC.
-uptr BundlePcAndCounter(uptr pc, uptr counter) {
-  if (SANITIZER_WORDSIZE != 64 || !common_flags()->coverage_order_pcs)
-    return pc;
-  static const uptr kMaxCounter = (1 << kBundleCounterBits) - 1;
-  if (counter > kMaxCounter)
-    counter = kMaxCounter;
-  CHECK_EQ(0, pc >> (SANITIZER_WORDSIZE - kBundleCounterBits));
-  return pc | (counter << (SANITIZER_WORDSIZE - kBundleCounterBits));
-}
-
-uptr UnbundlePc(uptr bundle) {
-  if (SANITIZER_WORDSIZE != 64 || !common_flags()->coverage_order_pcs)
-    return bundle;
-  return (bundle << kBundleCounterBits) >> kBundleCounterBits;
-}
-
-uptr UnbundleCounter(uptr bundle) {
-  if (SANITIZER_WORDSIZE != 64 || !common_flags()->coverage_order_pcs)
-    return 0;
-  return bundle >> (SANITIZER_WORDSIZE - kBundleCounterBits);
-}
-
-// If guard is negative, atomically set it to -guard and store the PC in
-// pc_array.
-void CoverageData::Add(uptr pc, u32 *guard) {
-  atomic_uint32_t *atomic_guard = reinterpret_cast<atomic_uint32_t*>(guard);
-  s32 guard_value = atomic_load(atomic_guard, memory_order_relaxed);
-  if (guard_value >= 0) return;
-
-  atomic_store(atomic_guard, -guard_value, memory_order_relaxed);
-  if (!pc_array) return;
-
-  uptr idx = -guard_value - 1;
-  if (idx >= atomic_load(&pc_array_index, memory_order_acquire))
-    return;  // May happen after fork when pc_array_index becomes 0.
-  CHECK_LT(idx, atomic_load(&pc_array_size, memory_order_acquire));
-  uptr counter = atomic_fetch_add(&coverage_counter, 1, memory_order_relaxed);
-  pc_array[idx] = BundlePcAndCounter(pc, counter);
-}
-
-// Registers a pair caller=>callee.
-// When a given caller is seen for the first time, the callee_cache is added
-// to the global array cc_array, callee_cache[0] is set to caller and
-// callee_cache[1] is set to cache_size.
-// Then we are trying to add callee to callee_cache [2,cache_size) if it is
-// not there yet.
-// If the cache is full we drop the callee (may want to fix this later).
-void CoverageData::IndirCall(uptr caller, uptr callee, uptr callee_cache[],
-                             uptr cache_size) {
-  if (!cc_array) return;
-  atomic_uintptr_t *atomic_callee_cache =
-      reinterpret_cast<atomic_uintptr_t *>(callee_cache);
-  uptr zero = 0;
-  if (atomic_compare_exchange_strong(&atomic_callee_cache[0], &zero, caller,
-                                     memory_order_seq_cst)) {
-    uptr idx = atomic_fetch_add(&cc_array_index, 1, memory_order_relaxed);
-    CHECK_LT(idx * sizeof(uptr),
-             atomic_load(&cc_array_size, memory_order_acquire));
-    callee_cache[1] = cache_size;
-    cc_array[idx] = callee_cache;
-  }
-  CHECK_EQ(atomic_load(&atomic_callee_cache[0], memory_order_relaxed), caller);
-  for (uptr i = 2; i < cache_size; i++) {
-    uptr was = 0;
-    if (atomic_compare_exchange_strong(&atomic_callee_cache[i], &was, callee,
-                                       memory_order_seq_cst)) {
-      atomic_fetch_add(&caller_callee_counter, 1, memory_order_relaxed);
-      return;
-    }
-    if (was == callee)  // Already have this callee.
-      return;
-  }
-}
-
-uptr CoverageData::GetNumberOf8bitCounters() {
-  return num_8bit_counters;
-}
-
-// Map every 8bit counter to a 8-bit bitset and clear the counter.
-uptr CoverageData::Update8bitCounterBitsetAndClearCounters(u8 *bitset) {
-  uptr num_new_bits = 0;
-  uptr cur = 0;
-  // For better speed we map 8 counters to 8 bytes of bitset at once.
-  static const uptr kBatchSize = 8;
-  CHECK_EQ(reinterpret_cast<uptr>(bitset) % kBatchSize, 0);
-  for (uptr i = 0, len = counters_vec.size(); i < len; i++) {
-    u8 *c = counters_vec[i].counters;
-    uptr n = counters_vec[i].n;
-    CHECK_EQ(n % 16, 0);
-    CHECK_EQ(cur % kBatchSize, 0);
-    CHECK_EQ(reinterpret_cast<uptr>(c) % kBatchSize, 0);
-    if (!bitset) {
-      internal_bzero_aligned16(c, n);
-      cur += n;
-      continue;
-    }
-    for (uptr j = 0; j < n; j += kBatchSize, cur += kBatchSize) {
-      CHECK_LT(cur, num_8bit_counters);
-      u64 *pc64 = reinterpret_cast<u64*>(c + j);
-      u64 *pb64 = reinterpret_cast<u64*>(bitset + cur);
-      u64 c64 = *pc64;
-      u64 old_bits_64 = *pb64;
-      u64 new_bits_64 = old_bits_64;
-      if (c64) {
-        *pc64 = 0;
-        for (uptr k = 0; k < kBatchSize; k++) {
-          u64 x = (c64 >> (8 * k)) & 0xff;
-          if (x) {
-            u64 bit = 0;
-            /**/ if (x >= 128) bit = 128;
-            else if (x >= 32) bit = 64;
-            else if (x >= 16) bit = 32;
-            else if (x >= 8) bit = 16;
-            else if (x >= 4) bit = 8;
-            else if (x >= 3) bit = 4;
-            else if (x >= 2) bit = 2;
-            else if (x >= 1) bit = 1;
-            u64 mask = bit << (8 * k);
-            if (!(new_bits_64 & mask)) {
-              num_new_bits++;
-              new_bits_64 |= mask;
-            }
-          }
-        }
-        *pb64 = new_bits_64;
-      }
-    }
-  }
-  CHECK_EQ(cur, num_8bit_counters);
-  return num_new_bits;
-}
-
-uptr *CoverageData::data() {
-  return pc_array;
-}
-
-uptr CoverageData::size() const {
-  return atomic_load(&pc_array_index, memory_order_relaxed);
-}
-
-// Block layout for packed file format: header, followed by module name (no
-// trailing zero), followed by data blob.
-struct CovHeader {
-  int pid;
-  unsigned int module_name_length;
-  unsigned int data_length;
-};
-
-static void CovWritePacked(int pid, const char *module, const void *blob,
-                           unsigned int blob_size) {
-  if (cov_fd == kInvalidFd) return;
-  unsigned module_name_length = internal_strlen(module);
-  CovHeader header = {pid, module_name_length, blob_size};
-
-  if (cov_max_block_size == 0) {
-    // Writing to a file. Just go ahead.
-    WriteToFile(cov_fd, &header, sizeof(header));
-    WriteToFile(cov_fd, module, module_name_length);
-    WriteToFile(cov_fd, blob, blob_size);
-  } else {
-    // Writing to a socket. We want to split the data into appropriately sized
-    // blocks.
-    InternalScopedBuffer<char> block(cov_max_block_size);
-    CHECK_EQ((uptr)block.data(), (uptr)(CovHeader *)block.data());
-    uptr header_size_with_module = sizeof(header) + module_name_length;
-    CHECK_LT(header_size_with_module, cov_max_block_size);
-    unsigned int max_payload_size =
-        cov_max_block_size - header_size_with_module;
-    char *block_pos = block.data();
-    internal_memcpy(block_pos, &header, sizeof(header));
-    block_pos += sizeof(header);
-    internal_memcpy(block_pos, module, module_name_length);
-    block_pos += module_name_length;
-    char *block_data_begin = block_pos;
-    const char *blob_pos = (const char *)blob;
-    while (blob_size > 0) {
-      unsigned int payload_size = Min(blob_size, max_payload_size);
-      blob_size -= payload_size;
-      internal_memcpy(block_data_begin, blob_pos, payload_size);
-      blob_pos += payload_size;
-      ((CovHeader *)block.data())->data_length = payload_size;
-      WriteToFile(cov_fd, block.data(), header_size_with_module + payload_size);
-    }
-  }
-}
-
-// If packed = false: <name>.<pid>.<sancov> (name = module name).
-// If packed = true and name == 0: <pid>.<sancov>.<packed>.
-// If packed = true and name != 0: <name>.<sancov>.<packed> (name is
-// user-supplied).
-static fd_t CovOpenFile(InternalScopedString *path, bool packed,
-                       const char *name, const char *extension = "sancov") {
-  path->clear();
-  if (!packed) {
-    CHECK(name);
-    path->append("%s/%s.%zd.%s", coverage_dir, name, internal_getpid(),
-                extension);
-  } else {
-    if (!name)
-      path->append("%s/%zd.%s.packed", coverage_dir, internal_getpid(),
-                  extension);
-    else
-      path->append("%s/%s.%s.packed", coverage_dir, name, extension);
-  }
-  error_t err;
-  fd_t fd = OpenFile(path->data(), WrOnly, &err);
-  if (fd == kInvalidFd)
-    Report("SanitizerCoverage: failed to open %s for writing (reason: %d)\n",
-           path->data(), err);
-  return fd;
-}
-
-// Dump trace PCs and trace events into two separate files.
-void CoverageData::DumpTrace() {
-  uptr max_idx = tr_event_pointer - tr_event_array;
-  if (!max_idx) return;
-  auto sym = Symbolizer::GetOrInit();
-  if (!sym)
-    return;
-  InternalScopedString out(32 << 20);
-  for (uptr i = 0, n = size(); i < n; i++) {
-    const char *module_name = "<unknown>";
-    uptr module_address = 0;
-    sym->GetModuleNameAndOffsetForPC(UnbundlePc(pc_array[i]), &module_name,
-                                     &module_address);
-    out.append("%s 0x%zx\n", module_name, module_address);
-  }
-  InternalScopedString path(kMaxPathLength);
-  fd_t fd = CovOpenFile(&path, false, "trace-points");
-  if (fd == kInvalidFd) return;
-  WriteToFile(fd, out.data(), out.length());
-  CloseFile(fd);
-
-  fd = CovOpenFile(&path, false, "trace-compunits");
-  if (fd == kInvalidFd) return;
-  out.clear();
-  for (uptr i = 0; i < comp_unit_name_vec.size(); i++)
-    out.append("%s\n", comp_unit_name_vec[i].copied_module_name);
-  WriteToFile(fd, out.data(), out.length());
-  CloseFile(fd);
-
-  fd = CovOpenFile(&path, false, "trace-events");
-  if (fd == kInvalidFd) return;
-  uptr bytes_to_write = max_idx * sizeof(tr_event_array[0]);
-  u8 *event_bytes = reinterpret_cast<u8*>(tr_event_array);
-  // The trace file could be huge, and may not be written with a single syscall.
-  while (bytes_to_write) {
-    uptr actually_written;
-    if (WriteToFile(fd, event_bytes, bytes_to_write, &actually_written) &&
-        actually_written <= bytes_to_write) {
-      bytes_to_write -= actually_written;
-      event_bytes += actually_written;
-    } else {
-      break;
-    }
-  }
-  CloseFile(fd);
-  VReport(1, " CovDump: Trace: %zd PCs written\n", size());
-  VReport(1, " CovDump: Trace: %zd Events written\n", max_idx);
-}
-
-// This function dumps the caller=>callee pairs into a file as a sequence of
-// lines like "module_name offset".
-void CoverageData::DumpCallerCalleePairs() {
-  uptr max_idx = atomic_load(&cc_array_index, memory_order_relaxed);
-  if (!max_idx) return;
-  auto sym = Symbolizer::GetOrInit();
-  if (!sym)
-    return;
-  InternalScopedString out(32 << 20);
-  uptr total = 0;
-  for (uptr i = 0; i < max_idx; i++) {
-    uptr *cc_cache = cc_array[i];
-    CHECK(cc_cache);
-    uptr caller = cc_cache[0];
-    uptr n_callees = cc_cache[1];
-    const char *caller_module_name = "<unknown>";
-    uptr caller_module_address = 0;
-    sym->GetModuleNameAndOffsetForPC(caller, &caller_module_name,
-                                     &caller_module_address);
-    for (uptr j = 2; j < n_callees; j++) {
-      uptr callee = cc_cache[j];
-      if (!callee) break;
-      total++;
-      const char *callee_module_name = "<unknown>";
-      uptr callee_module_address = 0;
-      sym->GetModuleNameAndOffsetForPC(callee, &callee_module_name,
-                                       &callee_module_address);
-      out.append("%s 0x%zx\n%s 0x%zx\n", caller_module_name,
-                 caller_module_address, callee_module_name,
-                 callee_module_address);
-    }
-  }
-  InternalScopedString path(kMaxPathLength);
-  fd_t fd = CovOpenFile(&path, false, "caller-callee");
-  if (fd == kInvalidFd) return;
-  WriteToFile(fd, out.data(), out.length());
-  CloseFile(fd);
-  VReport(1, " CovDump: %zd caller-callee pairs written\n", total);
-}
-
-// Record the current PC into the event buffer.
-// Every event is a u32 value (index in tr_pc_array_index) so we compute
-// it once and then cache in the provided 'cache' storage.
-//
-// This function will eventually be inlined by the compiler.
-void CoverageData::TraceBasicBlock(u32 *id) {
-  // Will trap here if
-  //  1. coverage is not enabled at run-time.
-  //  2. The array tr_event_array is full.
-  *tr_event_pointer = *id - 1;
-  tr_event_pointer++;
-}
-
-void CoverageData::DumpCounters() {
-  if (!common_flags()->coverage_counters) return;
-  uptr n = coverage_data.GetNumberOf8bitCounters();
-  if (!n) return;
-  InternalScopedBuffer<u8> bitset(n);
-  coverage_data.Update8bitCounterBitsetAndClearCounters(bitset.data());
-  InternalScopedString path(kMaxPathLength);
-
-  for (uptr m = 0; m < module_name_vec.size(); m++) {
-    auto r = module_name_vec[m];
-    CHECK(r.copied_module_name);
-    CHECK_LE(r.beg, r.end);
-    CHECK_LE(r.end, size());
-    const char *base_name = StripModuleName(r.copied_module_name);
-    fd_t fd =
-        CovOpenFile(&path, /* packed */ false, base_name, "counters-sancov");
-    if (fd == kInvalidFd) return;
-    WriteToFile(fd, bitset.data() + r.beg, r.end - r.beg);
-    CloseFile(fd);
-    VReport(1, " CovDump: %zd counters written for '%s'\n", r.end - r.beg,
-            base_name);
-  }
-}
-
-void CoverageData::DumpAsBitSet() {
-  if (!common_flags()->coverage_bitset) return;
-  if (!size()) return;
-  InternalScopedBuffer<char> out(size());
-  InternalScopedString path(kMaxPathLength);
-  for (uptr m = 0; m < module_name_vec.size(); m++) {
-    uptr n_set_bits = 0;
-    auto r = module_name_vec[m];
-    CHECK(r.copied_module_name);
-    CHECK_LE(r.beg, r.end);
-    CHECK_LE(r.end, size());
-    for (uptr i = r.beg; i < r.end; i++) {
-      uptr pc = UnbundlePc(pc_array[i]);
-      out[i] = pc ? '1' : '0';
-      if (pc)
-        n_set_bits++;
-    }
-    const char *base_name = StripModuleName(r.copied_module_name);
-    fd_t fd = CovOpenFile(&path, /* packed */false, base_name, "bitset-sancov");
-    if (fd == kInvalidFd) return;
-    WriteToFile(fd, out.data() + r.beg, r.end - r.beg);
-    CloseFile(fd);
-    VReport(1,
-            " CovDump: bitset of %zd bits written for '%s', %zd bits are set\n",
-            r.end - r.beg, base_name, n_set_bits);
-  }
-}
-
-
-void CoverageData::GetRangeOffsets(const NamedPcRange& r, Symbolizer* sym,
-    InternalMmapVector<uptr>* offsets) const {
-  offsets->clear();
-  for (uptr i = 0; i < kNumWordsForMagic; i++)
-    offsets->push_back(0);
-  CHECK(r.copied_module_name);
-  CHECK_LE(r.beg, r.end);
-  CHECK_LE(r.end, size());
-  for (uptr i = r.beg; i < r.end; i++) {
-    uptr pc = UnbundlePc(pc_array[i]);
-    uptr counter = UnbundleCounter(pc_array[i]);
-    if (!pc) continue; // Not visited.
-    uptr offset = 0;
-    sym->GetModuleNameAndOffsetForPC(pc, nullptr, &offset);
-    offsets->push_back(BundlePcAndCounter(offset, counter));
-  }
-
-  CHECK_GE(offsets->size(), kNumWordsForMagic);
-  SortArray(offsets->data(), offsets->size());
-  for (uptr i = 0; i < offsets->size(); i++)
-    (*offsets)[i] = UnbundlePc((*offsets)[i]);
-}
-
-static void GenerateHtmlReport(const InternalMmapVector<char *> &cov_files) {
-  if (!common_flags()->html_cov_report) {
-    return;
-  }
-  char *sancov_path = FindPathToBinary(common_flags()->sancov_path);
-  if (sancov_path == nullptr) {
-    return;
-  }
-
-  InternalMmapVector<char *> sancov_argv(cov_files.size() * 2 + 3);
-  sancov_argv.push_back(sancov_path);
-  sancov_argv.push_back(internal_strdup("-html-report"));
-  auto argv_deleter = at_scope_exit([&] {
-    for (uptr i = 0; i < sancov_argv.size(); ++i) {
-      InternalFree(sancov_argv[i]);
-    }
-  });
-
-  for (const auto &cov_file : cov_files) {
-    sancov_argv.push_back(internal_strdup(cov_file));
-  }
-
-  {
-    ListOfModules modules;
-    modules.init();
-    for (const LoadedModule &module : modules) {
-      sancov_argv.push_back(internal_strdup(module.full_name()));
-    }
-  }
-
-  InternalScopedString report_path(kMaxPathLength);
-  fd_t report_fd =
-      CovOpenFile(&report_path, false /* packed */, GetProcessName(), "html");
-  int pid = StartSubprocess(sancov_argv[0], sancov_argv.data(),
-                            kInvalidFd /* stdin */, report_fd /* std_out */);
-  if (pid > 0) {
-    int result = WaitForProcess(pid);
-    if (result == 0)
-      Printf("coverage report generated to %s\n", report_path.data());
-  }
-}
-
-void CoverageData::DumpOffsets() {
-  auto sym = Symbolizer::GetOrInit();
-  if (!common_flags()->coverage_pcs) return;
-  CHECK_NE(sym, nullptr);
-  InternalMmapVector<uptr> offsets(0);
-  InternalScopedString path(kMaxPathLength);
-
-  InternalMmapVector<char *> cov_files(module_name_vec.size());
-  auto cov_files_deleter = at_scope_exit([&] {
-    for (uptr i = 0; i < cov_files.size(); ++i) {
-      InternalFree(cov_files[i]);
-    }
-  });
-
-  for (uptr m = 0; m < module_name_vec.size(); m++) {
-    auto r = module_name_vec[m];
-    GetRangeOffsets(r, sym, &offsets);
-
-    uptr num_offsets = offsets.size() - kNumWordsForMagic;
-    u64 *magic_p = reinterpret_cast<u64*>(offsets.data());
-    CHECK_EQ(*magic_p, 0ULL);
-    // FIXME: we may want to write 32-bit offsets even in 64-mode
-    // if all the offsets are small enough.
-    *magic_p = kMagic;
-
-    const char *module_name = StripModuleName(r.copied_module_name);
-    if (cov_sandboxed) {
-      if (cov_fd != kInvalidFd) {
-        CovWritePacked(internal_getpid(), module_name, offsets.data(),
-                       offsets.size() * sizeof(offsets[0]));
-        VReport(1, " CovDump: %zd PCs written to packed file\n", num_offsets);
-      }
-    } else {
-      // One file per module per process.
-      fd_t fd = CovOpenFile(&path, false /* packed */, module_name);
-      if (fd == kInvalidFd) continue;
-      WriteToFile(fd, offsets.data(), offsets.size() * sizeof(offsets[0]));
-      CloseFile(fd);
-      cov_files.push_back(internal_strdup(path.data()));
-      VReport(1, " CovDump: %s: %zd PCs written\n", path.data(), num_offsets);
-    }
-  }
-  if (cov_fd != kInvalidFd)
-    CloseFile(cov_fd);
-
-  GenerateHtmlReport(cov_files);
-}
-
-void CoverageData::DumpAll() {
-  if (!coverage_enabled || common_flags()->coverage_direct) return;
-  if (atomic_fetch_add(&dump_once_guard, 1, memory_order_relaxed))
-    return;
-  DumpAsBitSet();
-  DumpCounters();
-  DumpTrace();
-  DumpOffsets();
-  DumpCallerCalleePairs();
-}
-
-void CovPrepareForSandboxing(__sanitizer_sandbox_arguments *args) {
-  if (!args) return;
-  if (!coverage_enabled) return;
-  cov_sandboxed = args->coverage_sandboxed;
-  if (!cov_sandboxed) return;
-  cov_max_block_size = args->coverage_max_block_size;
-  if (args->coverage_fd >= 0) {
-    cov_fd = (fd_t)args->coverage_fd;
-  } else {
-    InternalScopedString path(kMaxPathLength);
-    // Pre-open the file now. The sandbox won't allow us to do it later.
-    cov_fd = CovOpenFile(&path, true /* packed */, nullptr);
-  }
-}
-
-fd_t MaybeOpenCovFile(const char *name) {
-  CHECK(name);
-  if (!coverage_enabled) return kInvalidFd;
-  InternalScopedString path(kMaxPathLength);
-  return CovOpenFile(&path, true /* packed */, name);
-}
-
-void CovBeforeFork() {
-  coverage_data.BeforeFork();
-}
-
-void CovAfterFork(int child_pid) {
-  coverage_data.AfterFork(child_pid);
-}
-
-static void MaybeDumpCoverage() {
-  if (common_flags()->coverage)
-    __sanitizer_cov_dump();
-}
-
-void InitializeCoverage(bool enabled, const char *dir) {
-  if (coverage_enabled)
-    return;  // May happen if two sanitizer enable coverage in the same process.
-  coverage_enabled = enabled;
-  coverage_dir = dir;
-  coverage_data.Init();
-  if (enabled) coverage_data.Enable();
-  if (!common_flags()->coverage_direct) Atexit(__sanitizer_cov_dump);
-  AddDieCallback(MaybeDumpCoverage);
-}
-
-void ReInitializeCoverage(bool enabled, const char *dir) {
-  coverage_enabled = enabled;
-  coverage_dir = dir;
-  coverage_data.ReInit();
-}
-
-void CoverageUpdateMapping() {
-  if (coverage_enabled)
-    CovUpdateMapping(coverage_dir);
-}
-
-} // namespace __sanitizer
-
-extern "C" {
-SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_cov(u32 *guard) {
-  coverage_data.Add(StackTrace::GetPreviousInstructionPc(GET_CALLER_PC()),
-                    guard);
-}
-SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_cov_with_check(u32 *guard) {
-  atomic_uint32_t *atomic_guard = reinterpret_cast<atomic_uint32_t*>(guard);
-  if (static_cast<s32>(
-          __sanitizer::atomic_load(atomic_guard, memory_order_relaxed)) < 0)
-  coverage_data.Add(StackTrace::GetPreviousInstructionPc(GET_CALLER_PC()),
-                    guard);
-}
-SANITIZER_INTERFACE_ATTRIBUTE void
-__sanitizer_cov_indir_call16(uptr callee, uptr callee_cache16[]) {
-  coverage_data.IndirCall(StackTrace::GetPreviousInstructionPc(GET_CALLER_PC()),
-                          callee, callee_cache16, 16);
-}
-SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_cov_init() {
-  coverage_enabled = true;
-  coverage_dir = common_flags()->coverage_dir;
-  coverage_data.Init();
-}
-SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_cov_dump() {
-  coverage_data.DumpAll();
-  __sanitizer_dump_trace_pc_guard_coverage();
-}
-SANITIZER_INTERFACE_ATTRIBUTE void
-__sanitizer_cov_module_init(s32 *guards, uptr npcs, u8 *counters,
-                            const char *comp_unit_name) {
-  coverage_data.InitializeGuards(guards, npcs, comp_unit_name, GET_CALLER_PC());
-  coverage_data.InitializeCounters(counters, npcs);
-  if (!common_flags()->coverage_direct) return;
-  if (SANITIZER_ANDROID && coverage_enabled) {
-    // dlopen/dlclose interceptors do not work on Android, so we rely on
-    // Extend() calls to update .sancov.map.
-    CovUpdateMapping(coverage_dir, GET_CALLER_PC());
-  }
-  coverage_data.Extend(npcs);
-}
-SANITIZER_INTERFACE_ATTRIBUTE
-sptr __sanitizer_maybe_open_cov_file(const char *name) {
-  return (sptr)MaybeOpenCovFile(name);
-}
-SANITIZER_INTERFACE_ATTRIBUTE
-uptr __sanitizer_get_total_unique_coverage() {
-  return atomic_load(&coverage_counter, memory_order_relaxed);
-}
-
-SANITIZER_INTERFACE_ATTRIBUTE
-uptr __sanitizer_get_total_unique_caller_callee_pairs() {
-  return atomic_load(&caller_callee_counter, memory_order_relaxed);
-}
-
-SANITIZER_INTERFACE_ATTRIBUTE
-void __sanitizer_cov_trace_func_enter(u32 *id) {
-  __sanitizer_cov_with_check(id);
-  coverage_data.TraceBasicBlock(id);
-}
-SANITIZER_INTERFACE_ATTRIBUTE
-void __sanitizer_cov_trace_basic_block(u32 *id) {
-  __sanitizer_cov_with_check(id);
-  coverage_data.TraceBasicBlock(id);
-}
-SANITIZER_INTERFACE_ATTRIBUTE
-void __sanitizer_reset_coverage() {
-  ResetGlobalCounters();
-  coverage_data.ReinitializeGuards();
-  internal_bzero_aligned16(
-      coverage_data.data(),
-      RoundUpTo(coverage_data.size() * sizeof(coverage_data.data()[0]), 16));
-}
-SANITIZER_INTERFACE_ATTRIBUTE
-uptr __sanitizer_get_coverage_guards(uptr **data) {
-  *data = coverage_data.data();
-  return coverage_data.size();
-}
-
-SANITIZER_INTERFACE_ATTRIBUTE
-uptr __sanitizer_get_number_of_counters() {
-  return coverage_data.GetNumberOf8bitCounters();
-}
-
-SANITIZER_INTERFACE_ATTRIBUTE
-uptr __sanitizer_update_counter_bitset_and_clear_counters(u8 *bitset) {
-  return coverage_data.Update8bitCounterBitsetAndClearCounters(bitset);
-}
-
-// Default empty implementations (weak). Users should redefine them.
-SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_cmp, void) {}
-SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_cmp1, void) {}
-SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_cmp2, void) {}
-SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_cmp4, void) {}
-SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_cmp8, void) {}
-SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_switch, void) {}
-SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_div4, void) {}
-SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_div8, void) {}
-SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_gep, void) {}
-SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_pc_indir, void) {}
-} // extern "C"
diff --git a/lib/sanitizer_common/sanitizer_coverage_libcdep_new.cc b/lib/sanitizer_common/sanitizer_coverage_libcdep_new.cc
index 73c3608..3c5f29b 100644
--- a/lib/sanitizer_common/sanitizer_coverage_libcdep_new.cc
+++ b/lib/sanitizer_common/sanitizer_coverage_libcdep_new.cc
@@ -8,10 +8,14 @@
 //===----------------------------------------------------------------------===//
 // Sanitizer Coverage Controller for Trace PC Guard.
 
+#include "sanitizer_platform.h"
+
+#if !SANITIZER_FUCHSIA
 #include "sancov_flags.h"
 #include "sanitizer_allocator_internal.h"
 #include "sanitizer_atomic.h"
 #include "sanitizer_common.h"
+#include "sanitizer_file.h"
 #include "sanitizer_symbolizer.h"
 
 using namespace __sanitizer;
@@ -49,7 +53,7 @@
   WriteToFile(fd, &Magic, sizeof(Magic));
   WriteToFile(fd, pcs, len * sizeof(*pcs));
   CloseFile(fd);
-  Printf("SanitizerCoverage: %s %zd PCs written\n", file_path, len);
+  Printf("SanitizerCoverage: %s: %zd PCs written\n", file_path, len);
 }
 
 static void SanitizerDumpCoverage(const uptr* unsorted_pcs, uptr len) {
@@ -71,7 +75,7 @@
     if (!pc) continue;
 
     if (!__sanitizer_get_module_and_offset_for_pc(pc, nullptr, 0, &pcs[i])) {
-      Printf("ERROR: bad pc %x\n", pc);
+      Printf("ERROR: unknown pc 0x%x (may happen if dlclose is used)\n", pc);
       continue;
     }
     uptr module_base = pc - pcs[i];
@@ -98,10 +102,6 @@
   InternalFree(file_path);
   InternalFree(module_name);
   InternalFree(pcs);
-
-  if (sancov_flags()->symbolize) {
-    Printf("TODO(aizatsky): call sancov to symbolize\n");
-  }
 }
 
 // Collects trace-pc guard coverage.
@@ -128,11 +128,17 @@
   }
 
   void TracePcGuard(u32* guard, uptr pc) {
-    atomic_uint32_t* guard_ptr = reinterpret_cast<atomic_uint32_t*>(guard);
-    u32 idx = atomic_exchange(guard_ptr, 0, memory_order_relaxed);
+    u32 idx = *guard;
     if (!idx) return;
     // we start indices from 1.
-    pc_vector[idx - 1] = pc;
+    atomic_uintptr_t* pc_ptr =
+        reinterpret_cast<atomic_uintptr_t*>(&pc_vector[idx - 1]);
+    if (atomic_load(pc_ptr, memory_order_relaxed) == 0)
+      atomic_store(pc_ptr, pc, memory_order_relaxed);
+  }
+
+  void Reset() {
+    internal_memset(&pc_vector[0], 0, sizeof(pc_vector[0]) * pc_vector.size());
   }
 
   void Dump() {
@@ -150,6 +156,17 @@
 }  // namespace
 }  // namespace __sancov
 
+namespace __sanitizer {
+void InitializeCoverage(bool enabled, const char *dir) {
+  static bool coverage_enabled = false;
+  if (coverage_enabled)
+    return;  // May happen if two sanitizer enable coverage in the same process.
+  coverage_enabled = enabled;
+  Atexit(__sanitizer_cov_dump);
+  AddDieCallback(__sanitizer_cov_dump);
+}
+} // namespace __sanitizer
+
 extern "C" {
 SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_dump_coverage(  // NOLINT
     const uptr* pcs, uptr len) {
@@ -170,4 +187,34 @@
 SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_dump_trace_pc_guard_coverage() {
   __sancov::pc_guard_controller.Dump();
 }
+SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_cov_dump() {
+  __sanitizer_dump_trace_pc_guard_coverage();
+}
+SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_cov_reset() {
+  __sancov::pc_guard_controller.Reset();
+}
+// Default empty implementations (weak). Users should redefine them.
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_cmp, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_cmp1, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_cmp2, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_cmp4, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_cmp8, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_const_cmp1, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_const_cmp2, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_const_cmp4, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_const_cmp8, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_switch, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_div4, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_div8, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_gep, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_pc_indir, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_8bit_counters_init, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_pcs_init, void) {}
 }  // extern "C"
+// Weak definition for code instrumented with -fsanitize-coverage=stack-depth
+// and later linked with code containing a strong definition.
+// E.g., -fsanitize=fuzzer-no-link
+SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+SANITIZER_TLS_INITIAL_EXEC_ATTRIBUTE uptr __sancov_lowest_stack;
+
+#endif  // !SANITIZER_FUCHSIA
diff --git a/lib/sanitizer_common/sanitizer_coverage_mapping_libcdep.cc b/lib/sanitizer_common/sanitizer_coverage_mapping_libcdep.cc
deleted file mode 100644
index 3477b06..0000000
--- a/lib/sanitizer_common/sanitizer_coverage_mapping_libcdep.cc
+++ /dev/null
@@ -1,122 +0,0 @@
-//===-- sanitizer_coverage_mapping.cc -------------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Mmap-based implementation of sanitizer coverage.
-//
-// This is part of the implementation of code coverage that does not require
-// __sanitizer_cov_dump() call. Data is stored in 2 files per process.
-//
-// $pid.sancov.map describes process memory layout in the following text-based
-// format:
-// <pointer size in bits>  // 1 line, 32 or 64
-// <mapping start> <mapping end> <base address> <dso name> // repeated
-// ...
-// Mapping lines are NOT sorted. This file is updated every time memory layout
-// is changed (i.e. in dlopen() and dlclose() interceptors).
-//
-// $pid.sancov.raw is a binary dump of PC values, sizeof(uptr) each. Again, not
-// sorted. This file is extended by 64Kb at a time and mapped into memory. It
-// contains one or more 0 words at the end, up to the next 64Kb aligned offset.
-//
-// To convert these 2 files to the usual .sancov format, run sancov.py rawunpack
-// $pid.sancov.raw.
-//
-//===----------------------------------------------------------------------===//
-
-#include "sanitizer_allocator_internal.h"
-#include "sanitizer_libc.h"
-#include "sanitizer_procmaps.h"
-
-namespace __sanitizer {
-
-static const uptr kMaxTextSize = 64 * 1024;
-
-struct CachedMapping {
- public:
-  bool NeedsUpdate(uptr pc) {
-    int new_pid = internal_getpid();
-    if (last_pid == new_pid && pc && pc >= last_range_start &&
-        pc < last_range_end)
-      return false;
-    last_pid = new_pid;
-    return true;
-  }
-
-  void SetModuleRange(uptr start, uptr end) {
-    last_range_start = start;
-    last_range_end = end;
-  }
-
- private:
-  uptr last_range_start, last_range_end;
-  int last_pid;
-};
-
-static CachedMapping cached_mapping;
-static StaticSpinMutex mapping_mu;
-
-void CovUpdateMapping(const char *coverage_dir, uptr caller_pc) {
-  if (!common_flags()->coverage_direct) return;
-
-  SpinMutexLock l(&mapping_mu);
-
-  if (!cached_mapping.NeedsUpdate(caller_pc))
-    return;
-
-  InternalScopedString text(kMaxTextSize);
-
-  {
-    text.append("%d\n", sizeof(uptr) * 8);
-    ListOfModules modules;
-    modules.init();
-    for (const LoadedModule &module : modules) {
-      const char *module_name = StripModuleName(module.full_name());
-      uptr base = module.base_address();
-      for (const auto &range : module.ranges()) {
-        if (range.executable) {
-          uptr start = range.beg;
-          uptr end = range.end;
-          text.append("%zx %zx %zx %s\n", start, end, base, module_name);
-          if (caller_pc && caller_pc >= start && caller_pc < end)
-            cached_mapping.SetModuleRange(start, end);
-        }
-      }
-    }
-  }
-
-  error_t err;
-  InternalScopedString tmp_path(64 + internal_strlen(coverage_dir));
-  uptr res = internal_snprintf((char *)tmp_path.data(), tmp_path.size(),
-                               "%s/%zd.sancov.map.tmp", coverage_dir,
-                               internal_getpid());
-  CHECK_LE(res, tmp_path.size());
-  fd_t map_fd = OpenFile(tmp_path.data(), WrOnly, &err);
-  if (map_fd == kInvalidFd) {
-    Report("Coverage: failed to open %s for writing: %d\n", tmp_path.data(),
-           err);
-    Die();
-  }
-
-  if (!WriteToFile(map_fd, text.data(), text.length(), nullptr, &err)) {
-    Printf("sancov.map write failed: %d\n", err);
-    Die();
-  }
-  CloseFile(map_fd);
-
-  InternalScopedString path(64 + internal_strlen(coverage_dir));
-  res = internal_snprintf((char *)path.data(), path.size(), "%s/%zd.sancov.map",
-                          coverage_dir, internal_getpid());
-  CHECK_LE(res, path.size());
-  if (!RenameFile(tmp_path.data(), path.data(), &err)) {
-    Printf("sancov.map rename failed: %d\n", err);
-    Die();
-  }
-}
-
-} // namespace __sanitizer
diff --git a/lib/sanitizer_common/sanitizer_errno.cc b/lib/sanitizer_common/sanitizer_errno.cc
new file mode 100644
index 0000000..a6f9fc6
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_errno.cc
@@ -0,0 +1,35 @@
+//===-- sanitizer_errno.cc --------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is shared between sanitizers run-time libraries.
+//
+// Defines errno to avoid including errno.h and its dependencies into other
+// files (e.g. interceptors are not supposed to include any system headers).
+//
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_errno_codes.h"
+#include "sanitizer_internal_defs.h"
+
+#include <errno.h>
+
+namespace __sanitizer {
+
+COMPILER_CHECK(errno_ENOMEM == ENOMEM);
+COMPILER_CHECK(errno_EBUSY == EBUSY);
+COMPILER_CHECK(errno_EINVAL == EINVAL);
+
+// EOWNERDEAD is not present in some older platforms.
+#if defined(EOWNERDEAD)
+extern const int errno_EOWNERDEAD = EOWNERDEAD;
+#else
+extern const int errno_EOWNERDEAD = -1;
+#endif
+
+}  // namespace __sanitizer
diff --git a/lib/sanitizer_common/sanitizer_errno.h b/lib/sanitizer_common/sanitizer_errno.h
new file mode 100644
index 0000000..6cbbace
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_errno.h
@@ -0,0 +1,37 @@
+//===-- sanitizer_errno.h ---------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is shared between sanitizers run-time libraries.
+//
+// Defines errno to avoid including errno.h and its dependencies into sensitive
+// files (e.g. interceptors are not supposed to include any system headers).
+// It's ok to use errno.h directly when your file already depend on other system
+// includes though.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SANITIZER_ERRNO_H
+#define SANITIZER_ERRNO_H
+
+#include "sanitizer_errno_codes.h"
+#include "sanitizer_platform.h"
+
+#if SANITIZER_FREEBSD || SANITIZER_MAC
+#  define __errno_location __error
+#elif SANITIZER_ANDROID || SANITIZER_NETBSD
+#  define __errno_location __errno
+#elif SANITIZER_WINDOWS
+#  define __errno_location _errno
+#endif
+
+extern "C" int *__errno_location();
+
+#define errno (*__errno_location())
+
+#endif  // SANITIZER_ERRNO_H
diff --git a/lib/sanitizer_common/sanitizer_errno_codes.h b/lib/sanitizer_common/sanitizer_errno_codes.h
new file mode 100644
index 0000000..dba774c
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_errno_codes.h
@@ -0,0 +1,34 @@
+//===-- sanitizer_errno_codes.h ---------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is shared between sanitizers run-time libraries.
+//
+// Defines errno codes to avoid including errno.h and its dependencies into
+// sensitive files (e.g. interceptors are not supposed to include any system
+// headers).
+// It's ok to use errno.h directly when your file already depend on other system
+// includes though.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SANITIZER_ERRNO_CODES_H
+#define SANITIZER_ERRNO_CODES_H
+
+namespace __sanitizer {
+
+#define errno_ENOMEM 12
+#define errno_EBUSY 16
+#define errno_EINVAL 22
+
+// Those might not present or their value differ on different platforms.
+extern const int errno_EOWNERDEAD;
+
+}  // namespace __sanitizer
+
+#endif  // SANITIZER_ERRNO_CODES_H
diff --git a/lib/sanitizer_common/sanitizer_file.cc b/lib/sanitizer_common/sanitizer_file.cc
new file mode 100644
index 0000000..cde54bf
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_file.cc
@@ -0,0 +1,177 @@
+//===-- sanitizer_file.cc ------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+//
+// This file is shared between AddressSanitizer and ThreadSanitizer
+// run-time libraries.  It defines filesystem-related interfaces.  This
+// is separate from sanitizer_common.cc so that it's simpler to disable
+// all the filesystem support code for a port that doesn't use it.
+//
+//===---------------------------------------------------------------------===//
+
+#include "sanitizer_platform.h"
+
+#if !SANITIZER_FUCHSIA
+
+#include "sanitizer_common.h"
+#include "sanitizer_file.h"
+
+namespace __sanitizer {
+
+void CatastrophicErrorWrite(const char *buffer, uptr length) {
+  WriteToFile(kStderrFd, buffer, length);
+}
+
+StaticSpinMutex report_file_mu;
+ReportFile report_file = {&report_file_mu, kStderrFd, "", "", 0};
+
+void RawWrite(const char *buffer) {
+  report_file.Write(buffer, internal_strlen(buffer));
+}
+
+void ReportFile::ReopenIfNecessary() {
+  mu->CheckLocked();
+  if (fd == kStdoutFd || fd == kStderrFd) return;
+
+  uptr pid = internal_getpid();
+  // If in tracer, use the parent's file.
+  if (pid == stoptheworld_tracer_pid)
+    pid = stoptheworld_tracer_ppid;
+  if (fd != kInvalidFd) {
+    // If the report file is already opened by the current process,
+    // do nothing. Otherwise the report file was opened by the parent
+    // process, close it now.
+    if (fd_pid == pid)
+      return;
+    else
+      CloseFile(fd);
+  }
+
+  const char *exe_name = GetProcessName();
+  if (common_flags()->log_exe_name && exe_name) {
+    internal_snprintf(full_path, kMaxPathLength, "%s.%s.%zu", path_prefix,
+                      exe_name, pid);
+  } else {
+    internal_snprintf(full_path, kMaxPathLength, "%s.%zu", path_prefix, pid);
+  }
+  fd = OpenFile(full_path, WrOnly);
+  if (fd == kInvalidFd) {
+    const char *ErrorMsgPrefix = "ERROR: Can't open file: ";
+    WriteToFile(kStderrFd, ErrorMsgPrefix, internal_strlen(ErrorMsgPrefix));
+    WriteToFile(kStderrFd, full_path, internal_strlen(full_path));
+    Die();
+  }
+  fd_pid = pid;
+}
+
+void ReportFile::SetReportPath(const char *path) {
+  if (!path)
+    return;
+  uptr len = internal_strlen(path);
+  if (len > sizeof(path_prefix) - 100) {
+    Report("ERROR: Path is too long: %c%c%c%c%c%c%c%c...\n",
+           path[0], path[1], path[2], path[3],
+           path[4], path[5], path[6], path[7]);
+    Die();
+  }
+
+  SpinMutexLock l(mu);
+  if (fd != kStdoutFd && fd != kStderrFd && fd != kInvalidFd)
+    CloseFile(fd);
+  fd = kInvalidFd;
+  if (internal_strcmp(path, "stdout") == 0) {
+    fd = kStdoutFd;
+  } else if (internal_strcmp(path, "stderr") == 0) {
+    fd = kStderrFd;
+  } else {
+    internal_snprintf(path_prefix, kMaxPathLength, "%s", path);
+  }
+}
+
+bool ReadFileToBuffer(const char *file_name, char **buff, uptr *buff_size,
+                      uptr *read_len, uptr max_len, error_t *errno_p) {
+  uptr PageSize = GetPageSizeCached();
+  uptr kMinFileLen = PageSize;
+  *buff = nullptr;
+  *buff_size = 0;
+  *read_len = 0;
+  // The files we usually open are not seekable, so try different buffer sizes.
+  for (uptr size = kMinFileLen; size <= max_len; size *= 2) {
+    fd_t fd = OpenFile(file_name, RdOnly, errno_p);
+    if (fd == kInvalidFd) return false;
+    UnmapOrDie(*buff, *buff_size);
+    *buff = (char*)MmapOrDie(size, __func__);
+    *buff_size = size;
+    *read_len = 0;
+    // Read up to one page at a time.
+    bool reached_eof = false;
+    while (*read_len + PageSize <= size) {
+      uptr just_read;
+      if (!ReadFromFile(fd, *buff + *read_len, PageSize, &just_read, errno_p)) {
+        UnmapOrDie(*buff, *buff_size);
+        return false;
+      }
+      if (just_read == 0) {
+        reached_eof = true;
+        break;
+      }
+      *read_len += just_read;
+    }
+    CloseFile(fd);
+    if (reached_eof)  // We've read the whole file.
+      break;
+  }
+  return true;
+}
+
+static const char kPathSeparator = SANITIZER_WINDOWS ? ';' : ':';
+
+char *FindPathToBinary(const char *name) {
+  if (FileExists(name)) {
+    return internal_strdup(name);
+  }
+
+  const char *path = GetEnv("PATH");
+  if (!path)
+    return nullptr;
+  uptr name_len = internal_strlen(name);
+  InternalScopedBuffer<char> buffer(kMaxPathLength);
+  const char *beg = path;
+  while (true) {
+    const char *end = internal_strchrnul(beg, kPathSeparator);
+    uptr prefix_len = end - beg;
+    if (prefix_len + name_len + 2 <= kMaxPathLength) {
+      internal_memcpy(buffer.data(), beg, prefix_len);
+      buffer[prefix_len] = '/';
+      internal_memcpy(&buffer[prefix_len + 1], name, name_len);
+      buffer[prefix_len + 1 + name_len] = '\0';
+      if (FileExists(buffer.data()))
+        return internal_strdup(buffer.data());
+    }
+    if (*end == '\0') break;
+    beg = end + 1;
+  }
+  return nullptr;
+}
+
+} // namespace __sanitizer
+
+using namespace __sanitizer;  // NOLINT
+
+extern "C" {
+void __sanitizer_set_report_path(const char *path) {
+  report_file.SetReportPath(path);
+}
+
+void __sanitizer_set_report_fd(void *fd) {
+  report_file.fd = (fd_t)reinterpret_cast<uptr>(fd);
+  report_file.fd_pid = internal_getpid();
+}
+} // extern "C"
+
+#endif  // !SANITIZER_FUCHSIA
diff --git a/lib/sanitizer_common/sanitizer_file.h b/lib/sanitizer_common/sanitizer_file.h
new file mode 100644
index 0000000..9a12ab7
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_file.h
@@ -0,0 +1,110 @@
+//===-- sanitizer_file.h ---------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+//
+// This file is shared between run-time libraries of sanitizers.
+// It declares filesystem-related interfaces.  This is separate from
+// sanitizer_common.h so that it's simpler to disable all the filesystem
+// support code for a port that doesn't use it.
+//
+//===---------------------------------------------------------------------===//
+#ifndef SANITIZER_FILE_H
+#define SANITIZER_FILE_H
+
+#include "sanitizer_interface_internal.h"
+#include "sanitizer_internal_defs.h"
+#include "sanitizer_libc.h"
+#include "sanitizer_mutex.h"
+
+namespace __sanitizer {
+
+struct ReportFile {
+  void Write(const char *buffer, uptr length);
+  bool SupportsColors();
+  void SetReportPath(const char *path);
+
+  // Don't use fields directly. They are only declared public to allow
+  // aggregate initialization.
+
+  // Protects fields below.
+  StaticSpinMutex *mu;
+  // Opened file descriptor. Defaults to stderr. It may be equal to
+  // kInvalidFd, in which case new file will be opened when necessary.
+  fd_t fd;
+  // Path prefix of report file, set via __sanitizer_set_report_path.
+  char path_prefix[kMaxPathLength];
+  // Full path to report, obtained as <path_prefix>.PID
+  char full_path[kMaxPathLength];
+  // PID of the process that opened fd. If a fork() occurs,
+  // the PID of child will be different from fd_pid.
+  uptr fd_pid;
+
+ private:
+  void ReopenIfNecessary();
+};
+extern ReportFile report_file;
+
+enum FileAccessMode {
+  RdOnly,
+  WrOnly,
+  RdWr
+};
+
+// Returns kInvalidFd on error.
+fd_t OpenFile(const char *filename, FileAccessMode mode,
+              error_t *errno_p = nullptr);
+void CloseFile(fd_t);
+
+// Return true on success, false on error.
+bool ReadFromFile(fd_t fd, void *buff, uptr buff_size,
+                  uptr *bytes_read = nullptr, error_t *error_p = nullptr);
+bool WriteToFile(fd_t fd, const void *buff, uptr buff_size,
+                 uptr *bytes_written = nullptr, error_t *error_p = nullptr);
+
+bool RenameFile(const char *oldpath, const char *newpath,
+                error_t *error_p = nullptr);
+
+// Scoped file handle closer.
+struct FileCloser {
+  explicit FileCloser(fd_t fd) : fd(fd) {}
+  ~FileCloser() { CloseFile(fd); }
+  fd_t fd;
+};
+
+bool SupportsColoredOutput(fd_t fd);
+
+// OS
+const char *GetPwd();
+bool FileExists(const char *filename);
+char *FindPathToBinary(const char *name);
+bool IsPathSeparator(const char c);
+bool IsAbsolutePath(const char *path);
+// Starts a subprocess and returs its pid.
+// If *_fd parameters are not kInvalidFd their corresponding input/output
+// streams will be redirect to the file. The files will always be closed
+// in parent process even in case of an error.
+// The child process will close all fds after STDERR_FILENO
+// before passing control to a program.
+pid_t StartSubprocess(const char *filename, const char *const argv[],
+                      fd_t stdin_fd = kInvalidFd, fd_t stdout_fd = kInvalidFd,
+                      fd_t stderr_fd = kInvalidFd);
+// Checks if specified process is still running
+bool IsProcessRunning(pid_t pid);
+// Waits for the process to finish and returns its exit code.
+// Returns -1 in case of an error.
+int WaitForProcess(pid_t pid);
+
+// Maps given file to virtual memory, and returns pointer to it
+// (or NULL if mapping fails). Stores the size of mmaped region
+// in '*buff_size'.
+void *MapFileToMemory(const char *file_name, uptr *buff_size);
+void *MapWritableFileToMemory(void *addr, uptr size, fd_t fd, OFF_T offset);
+
+}  // namespace __sanitizer
+
+#endif  // SANITIZER_FILE_H
diff --git a/lib/sanitizer_common/sanitizer_flag_parser.h b/lib/sanitizer_common/sanitizer_flag_parser.h
index 2477aed..f649f5b 100644
--- a/lib/sanitizer_common/sanitizer_flag_parser.h
+++ b/lib/sanitizer_common/sanitizer_flag_parser.h
@@ -34,27 +34,48 @@
   bool Parse(const char *value) final;
 };
 
-template <>
-inline bool FlagHandler<bool>::Parse(const char *value) {
+inline bool ParseBool(const char *value, bool *b) {
   if (internal_strcmp(value, "0") == 0 ||
       internal_strcmp(value, "no") == 0 ||
       internal_strcmp(value, "false") == 0) {
-    *t_ = false;
+    *b = false;
     return true;
   }
   if (internal_strcmp(value, "1") == 0 ||
       internal_strcmp(value, "yes") == 0 ||
       internal_strcmp(value, "true") == 0) {
-    *t_ = true;
+    *b = true;
     return true;
   }
+  return false;
+}
+
+template <>
+inline bool FlagHandler<bool>::Parse(const char *value) {
+  if (ParseBool(value, t_)) return true;
   Printf("ERROR: Invalid value for bool option: '%s'\n", value);
   return false;
 }
 
 template <>
+inline bool FlagHandler<HandleSignalMode>::Parse(const char *value) {
+  bool b;
+  if (ParseBool(value, &b)) {
+    *t_ = b ? kHandleSignalYes : kHandleSignalNo;
+    return true;
+  }
+  if (internal_strcmp(value, "2") == 0 ||
+      internal_strcmp(value, "exclusive") == 0) {
+    *t_ = kHandleSignalExclusive;
+    return true;
+  }
+  Printf("ERROR: Invalid value for signal handler option: '%s'\n", value);
+  return false;
+}
+
+template <>
 inline bool FlagHandler<const char *>::Parse(const char *value) {
-  *t_ = internal_strdup(value);
+  *t_ = value;
   return true;
 }
 
diff --git a/lib/sanitizer_common/sanitizer_flags.h b/lib/sanitizer_common/sanitizer_flags.h
index 503126b..c2ec29d 100644
--- a/lib/sanitizer_common/sanitizer_flags.h
+++ b/lib/sanitizer_common/sanitizer_flags.h
@@ -18,6 +18,12 @@
 
 namespace __sanitizer {
 
+enum HandleSignalMode {
+  kHandleSignalNo,
+  kHandleSignalYes,
+  kHandleSignalExclusive,
+};
+
 struct CommonFlags {
 #define COMMON_FLAG(Type, Name, DefaultValue, Description) Type Name;
 #include "sanitizer_flags.inc"
diff --git a/lib/sanitizer_common/sanitizer_flags.inc b/lib/sanitizer_common/sanitizer_flags.inc
index 1306c72..21876e0 100644
--- a/lib/sanitizer_common/sanitizer_flags.inc
+++ b/lib/sanitizer_common/sanitizer_flags.inc
@@ -62,8 +62,7 @@
 COMMON_FLAG(
     int, verbosity, 0,
     "Verbosity level (0 - silent, 1 - a bit of output, 2+ - more output).")
-COMMON_FLAG(bool, detect_leaks, SANITIZER_WORDSIZE == 64,
-            "Enable memory leak detection.")
+COMMON_FLAG(bool, detect_leaks, !SANITIZER_MAC, "Enable memory leak detection.")
 COMMON_FLAG(
     bool, leak_check_at_exit, true,
     "Invoke leak checking in an atexit handler. Has no effect if "
@@ -76,22 +75,27 @@
             "If false, disable printing error summaries in addition to error "
             "reports.")
 COMMON_FLAG(int, print_module_map, 0,
-            "OS X only. 0 = don't print, 1 = print only once before process "
-            "exits, 2 = print after each report.")
+            "OS X only (0 - don't print, 1 - print only once before process "
+            "exits, 2 - print after each report).")
 COMMON_FLAG(bool, check_printf, true, "Check printf arguments.")
-COMMON_FLAG(bool, handle_segv, true,
-            "If set, registers the tool's custom SIGSEGV handler.")
-COMMON_FLAG(bool, handle_sigbus, true,
-            "If set, registers the tool's custom SIGBUS handler.")
-COMMON_FLAG(bool, handle_abort, false,
-            "If set, registers the tool's custom SIGABRT handler.")
-COMMON_FLAG(bool, handle_sigill, false,
-            "If set, registers the tool's custom SIGILL handler.")
-COMMON_FLAG(bool, handle_sigfpe, true,
-            "If set, registers the tool's custom SIGFPE handler.")
-COMMON_FLAG(bool, allow_user_segv_handler, false,
-            "If set, allows user to register a SEGV handler even if the tool "
-            "registers one.")
+#define COMMON_FLAG_HANDLE_SIGNAL_HELP(signal) \
+    "Controls custom tool's " #signal " handler (0 - do not registers the " \
+    "handler, 1 - register the handler and allow user to set own, " \
+    "2 - registers the handler and block user from changing it). "
+COMMON_FLAG(HandleSignalMode, handle_segv, kHandleSignalYes,
+            COMMON_FLAG_HANDLE_SIGNAL_HELP(SIGSEGV))
+COMMON_FLAG(HandleSignalMode, handle_sigbus, kHandleSignalYes,
+            COMMON_FLAG_HANDLE_SIGNAL_HELP(SIGBUS))
+COMMON_FLAG(HandleSignalMode, handle_abort, kHandleSignalNo,
+            COMMON_FLAG_HANDLE_SIGNAL_HELP(SIGABRT))
+COMMON_FLAG(HandleSignalMode, handle_sigill, kHandleSignalNo,
+            COMMON_FLAG_HANDLE_SIGNAL_HELP(SIGILL))
+COMMON_FLAG(HandleSignalMode, handle_sigfpe, kHandleSignalYes,
+            COMMON_FLAG_HANDLE_SIGNAL_HELP(SIGFPE))
+#undef COMMON_FLAG_HANDLE_SIGNAL_HELP
+COMMON_FLAG(bool, allow_user_segv_handler, true,
+            "Deprecated. True has no effect, use handle_sigbus=1. If false, "
+            "handle_*=1 will be upgraded to handle_*=2.")
 COMMON_FLAG(bool, use_sigaltstack, true,
             "If set, uses alternate stack for signal handling.")
 COMMON_FLAG(bool, detect_deadlocks, false,
@@ -125,11 +129,11 @@
             " This limit does not affect memory allocations other than"
             " malloc/new.")
 COMMON_FLAG(bool, heap_profile, false, "Experimental heap profiler, asan-only")
-COMMON_FLAG(s32, allocator_release_to_os_interval_ms, kReleaseToOSIntervalNever,
-            "Experimental. Only affects a 64-bit allocator. If set, tries to "
-            "release unused memory to the OS, but not more often than this "
-            "interval (in milliseconds). Negative values mean do not attempt "
-            "to release memory to the OS.\n")
+COMMON_FLAG(s32, allocator_release_to_os_interval_ms, 5000,
+            "Only affects a 64-bit allocator. If set, tries to release unused "
+            "memory to the OS, but not more often than this interval (in "
+            "milliseconds). Negative values mean do not attempt to release "
+            "memory to the OS.\n")
 COMMON_FLAG(bool, can_use_proc_maps_statm, true,
             "If false, do not attempt to read /proc/maps/statm."
             " Mostly useful for testing sanitizers.")
@@ -137,22 +141,6 @@
     bool, coverage, false,
     "If set, coverage information will be dumped at program shutdown (if the "
     "coverage instrumentation was enabled at compile time).")
-COMMON_FLAG(bool, coverage_pcs, true,
-            "If set (and if 'coverage' is set too), the coverage information "
-            "will be dumped as a set of PC offsets for every module.")
-COMMON_FLAG(bool, coverage_order_pcs, false,
-             "If true, the PCs will be dumped in the order they've"
-             " appeared during the execution.")
-COMMON_FLAG(bool, coverage_bitset, false,
-            "If set (and if 'coverage' is set too), the coverage information "
-            "will also be dumped as a bitset to a separate file.")
-COMMON_FLAG(bool, coverage_counters, false,
-            "If set (and if 'coverage' is set too), the bitmap that corresponds"
-            " to coverage counters will be dumped.")
-COMMON_FLAG(bool, coverage_direct, SANITIZER_ANDROID,
-            "If set, coverage information will be dumped directly to a memory "
-            "mapped file. This way data is not lost even if the process is "
-            "suddenly killed.")
 COMMON_FLAG(const char *, coverage_dir, ".",
             "Target directory for coverage dumps. Defaults to the current "
             "directory.")
@@ -193,12 +181,18 @@
 COMMON_FLAG(bool, intercept_strspn, true,
             "If set, uses custom wrappers for strspn and strcspn function "
             "to find more errors.")
+COMMON_FLAG(bool, intercept_strtok, true,
+            "If set, uses a custom wrapper for the strtok function "
+            "to find more errors.")
 COMMON_FLAG(bool, intercept_strpbrk, true,
             "If set, uses custom wrappers for strpbrk function "
             "to find more errors.")
 COMMON_FLAG(bool, intercept_strlen, true,
             "If set, uses custom wrappers for strlen and strnlen functions "
             "to find more errors.")
+COMMON_FLAG(bool, intercept_strndup, true,
+            "If set, uses custom wrappers for strndup functions "
+            "to find more errors.")
 COMMON_FLAG(bool, intercept_strchr, true,
             "If set, uses custom wrappers for strchr, strchrnul, and strrchr "
             "functions to find more errors.")
@@ -235,3 +229,8 @@
             "(asan only).")
 COMMON_FLAG(bool, html_cov_report, false, "Generate html coverage report.")
 COMMON_FLAG(const char *, sancov_path, "sancov", "Sancov tool location.")
+COMMON_FLAG(bool, dump_instruction_bytes, false,
+          "If true, dump 16 bytes starting at the instruction that caused SEGV")
+COMMON_FLAG(bool, dump_registers, true,
+          "If true, dump values of CPU registers when SEGV happens. Only "
+          "available on OS X for now.")
diff --git a/lib/sanitizer_common/sanitizer_fuchsia.cc b/lib/sanitizer_common/sanitizer_fuchsia.cc
new file mode 100644
index 0000000..a2a5321
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_fuchsia.cc
@@ -0,0 +1,543 @@
+//===-- sanitizer_fuchsia.cc ---------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+//
+// This file is shared between AddressSanitizer and other sanitizer
+// run-time libraries and implements Fuchsia-specific functions from
+// sanitizer_common.h.
+//===---------------------------------------------------------------------===//
+
+#include "sanitizer_fuchsia.h"
+#if SANITIZER_FUCHSIA
+
+#include "sanitizer_common.h"
+#include "sanitizer_libc.h"
+#include "sanitizer_mutex.h"
+#include "sanitizer_stacktrace.h"
+
+#include <limits.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <unwind.h>
+#include <zircon/errors.h>
+#include <zircon/process.h>
+#include <zircon/syscalls.h>
+
+namespace __sanitizer {
+
+void NORETURN internal__exit(int exitcode) { _zx_process_exit(exitcode); }
+
+uptr internal_sched_yield() {
+  zx_status_t status = _zx_nanosleep(0);
+  CHECK_EQ(status, ZX_OK);
+  return 0;  // Why doesn't this return void?
+}
+
+static void internal_nanosleep(zx_time_t ns) {
+  zx_status_t status = _zx_nanosleep(_zx_deadline_after(ns));
+  CHECK_EQ(status, ZX_OK);
+}
+
+unsigned int internal_sleep(unsigned int seconds) {
+  internal_nanosleep(ZX_SEC(seconds));
+  return 0;
+}
+
+u64 NanoTime() { return _zx_time_get(ZX_CLOCK_UTC); }
+
+uptr internal_getpid() {
+  zx_info_handle_basic_t info;
+  zx_status_t status =
+      _zx_object_get_info(_zx_process_self(), ZX_INFO_HANDLE_BASIC, &info,
+                          sizeof(info), NULL, NULL);
+  CHECK_EQ(status, ZX_OK);
+  uptr pid = static_cast<uptr>(info.koid);
+  CHECK_EQ(pid, info.koid);
+  return pid;
+}
+
+uptr GetThreadSelf() { return reinterpret_cast<uptr>(thrd_current()); }
+
+uptr GetTid() { return GetThreadSelf(); }
+
+void Abort() { abort(); }
+
+int Atexit(void (*function)(void)) { return atexit(function); }
+
+void SleepForSeconds(int seconds) { internal_sleep(seconds); }
+
+void SleepForMillis(int millis) { internal_nanosleep(ZX_MSEC(millis)); }
+
+void GetThreadStackTopAndBottom(bool, uptr *stack_top, uptr *stack_bottom) {
+  pthread_attr_t attr;
+  CHECK_EQ(pthread_getattr_np(pthread_self(), &attr), 0);
+  void *base;
+  size_t size;
+  CHECK_EQ(pthread_attr_getstack(&attr, &base, &size), 0);
+  CHECK_EQ(pthread_attr_destroy(&attr), 0);
+
+  *stack_bottom = reinterpret_cast<uptr>(base);
+  *stack_top = *stack_bottom + size;
+}
+
+void MaybeReexec() {}
+void PrepareForSandboxing(__sanitizer_sandbox_arguments *args) {}
+void DisableCoreDumperIfNecessary() {}
+void InstallDeadlySignalHandlers(SignalHandlerType handler) {}
+void StartReportDeadlySignal() {}
+void ReportDeadlySignal(const SignalContext &sig, u32 tid,
+                        UnwindSignalStackCallbackType unwind,
+                        const void *unwind_context) {}
+void SetAlternateSignalStack() {}
+void UnsetAlternateSignalStack() {}
+void InitTlsSize() {}
+
+void PrintModuleMap() {}
+
+bool SignalContext::IsStackOverflow() const { return false; }
+void SignalContext::DumpAllRegisters(void *context) { UNIMPLEMENTED(); }
+const char *SignalContext::Describe() const { UNIMPLEMENTED(); }
+
+struct UnwindTraceArg {
+  BufferedStackTrace *stack;
+  u32 max_depth;
+};
+
+_Unwind_Reason_Code Unwind_Trace(struct _Unwind_Context *ctx, void *param) {
+  UnwindTraceArg *arg = static_cast<UnwindTraceArg *>(param);
+  CHECK_LT(arg->stack->size, arg->max_depth);
+  uptr pc = _Unwind_GetIP(ctx);
+  if (pc < PAGE_SIZE) return _URC_NORMAL_STOP;
+  arg->stack->trace_buffer[arg->stack->size++] = pc;
+  return (arg->stack->size == arg->max_depth ? _URC_NORMAL_STOP
+                                             : _URC_NO_REASON);
+}
+
+void BufferedStackTrace::SlowUnwindStack(uptr pc, u32 max_depth) {
+  CHECK_GE(max_depth, 2);
+  size = 0;
+  UnwindTraceArg arg = {this, Min(max_depth + 1, kStackTraceMax)};
+  _Unwind_Backtrace(Unwind_Trace, &arg);
+  CHECK_GT(size, 0);
+  // We need to pop a few frames so that pc is on top.
+  uptr to_pop = LocatePcInTrace(pc);
+  // trace_buffer[0] belongs to the current function so we always pop it,
+  // unless there is only 1 frame in the stack trace (1 frame is always better
+  // than 0!).
+  PopStackFrames(Min(to_pop, static_cast<uptr>(1)));
+  trace_buffer[0] = pc;
+}
+
+void BufferedStackTrace::SlowUnwindStackWithContext(uptr pc, void *context,
+                                                    u32 max_depth) {
+  CHECK_NE(context, nullptr);
+  UNREACHABLE("signal context doesn't exist");
+}
+
+enum MutexState : int { MtxUnlocked = 0, MtxLocked = 1, MtxSleeping = 2 };
+
+BlockingMutex::BlockingMutex() {
+  // NOTE!  It's important that this use internal_memset, because plain
+  // memset might be intercepted (e.g., actually be __asan_memset).
+  // Defining this so the compiler initializes each field, e.g.:
+  //   BlockingMutex::BlockingMutex() : BlockingMutex(LINKER_INITIALIZED) {}
+  // might result in the compiler generating a call to memset, which would
+  // have the same problem.
+  internal_memset(this, 0, sizeof(*this));
+}
+
+void BlockingMutex::Lock() {
+  CHECK_EQ(owner_, 0);
+  atomic_uint32_t *m = reinterpret_cast<atomic_uint32_t *>(&opaque_storage_);
+  if (atomic_exchange(m, MtxLocked, memory_order_acquire) == MtxUnlocked)
+    return;
+  while (atomic_exchange(m, MtxSleeping, memory_order_acquire) != MtxUnlocked) {
+    zx_status_t status = _zx_futex_wait(reinterpret_cast<zx_futex_t *>(m),
+                                        MtxSleeping, ZX_TIME_INFINITE);
+    if (status != ZX_ERR_BAD_STATE)  // Normal race.
+      CHECK_EQ(status, ZX_OK);
+  }
+}
+
+void BlockingMutex::Unlock() {
+  atomic_uint32_t *m = reinterpret_cast<atomic_uint32_t *>(&opaque_storage_);
+  u32 v = atomic_exchange(m, MtxUnlocked, memory_order_release);
+  CHECK_NE(v, MtxUnlocked);
+  if (v == MtxSleeping) {
+    zx_status_t status = _zx_futex_wake(reinterpret_cast<zx_futex_t *>(m), 1);
+    CHECK_EQ(status, ZX_OK);
+  }
+}
+
+void BlockingMutex::CheckLocked() {
+  atomic_uint32_t *m = reinterpret_cast<atomic_uint32_t *>(&opaque_storage_);
+  CHECK_NE(MtxUnlocked, atomic_load(m, memory_order_relaxed));
+}
+
+uptr GetPageSize() { return PAGE_SIZE; }
+
+uptr GetMmapGranularity() { return PAGE_SIZE; }
+
+sanitizer_shadow_bounds_t ShadowBounds;
+
+uptr GetMaxVirtualAddress() {
+  ShadowBounds = __sanitizer_shadow_bounds();
+  return ShadowBounds.memory_limit - 1;
+}
+
+static void *DoAnonymousMmapOrDie(uptr size, const char *mem_type,
+                                  bool raw_report, bool die_for_nomem) {
+  size = RoundUpTo(size, PAGE_SIZE);
+
+  zx_handle_t vmo;
+  zx_status_t status = _zx_vmo_create(size, 0, &vmo);
+  if (status != ZX_OK) {
+    if (status != ZX_ERR_NO_MEMORY || die_for_nomem)
+      ReportMmapFailureAndDie(size, mem_type, "zx_vmo_create", status,
+                              raw_report);
+    return nullptr;
+  }
+  _zx_object_set_property(vmo, ZX_PROP_NAME, mem_type,
+                          internal_strlen(mem_type));
+
+  // TODO(mcgrathr): Maybe allocate a VMAR for all sanitizer heap and use that?
+  uintptr_t addr;
+  status = _zx_vmar_map(_zx_vmar_root_self(), 0, vmo, 0, size,
+                        ZX_VM_FLAG_PERM_READ | ZX_VM_FLAG_PERM_WRITE, &addr);
+  _zx_handle_close(vmo);
+
+  if (status != ZX_OK) {
+    if (status != ZX_ERR_NO_MEMORY || die_for_nomem)
+      ReportMmapFailureAndDie(size, mem_type, "zx_vmar_map", status,
+                              raw_report);
+    return nullptr;
+  }
+
+  IncreaseTotalMmap(size);
+
+  return reinterpret_cast<void *>(addr);
+}
+
+void *MmapOrDie(uptr size, const char *mem_type, bool raw_report) {
+  return DoAnonymousMmapOrDie(size, mem_type, raw_report, true);
+}
+
+void *MmapNoReserveOrDie(uptr size, const char *mem_type) {
+  return MmapOrDie(size, mem_type);
+}
+
+void *MmapOrDieOnFatalError(uptr size, const char *mem_type) {
+  return DoAnonymousMmapOrDie(size, mem_type, false, false);
+}
+
+uptr ReservedAddressRange::Init(uptr init_size, const char* name,
+                                uptr fixed_addr) {
+  base_ = MmapNoAccess(init_size);
+  size_ = init_size;
+  name_ = name;
+  return reinterpret_cast<uptr>(base_);
+}
+
+// Uses fixed_addr for now.
+// Will use offset instead once we've implemented this function for real.
+uptr ReservedAddressRange::Map(uptr fixed_addr, uptr map_size,
+                               bool tolerate_enomem) {
+  return reinterpret_cast<uptr>(MmapFixedOrDie(fixed_addr, map_size));
+}
+
+void ReservedAddressRange::Unmap(uptr addr, uptr size) {
+  void* addr_as_void = reinterpret_cast<void*>(addr);
+  uptr base_as_uptr = reinterpret_cast<uptr>(base_);
+  // Only unmap at the beginning or end of the range.
+  CHECK((addr_as_void == base_) || (addr + size == base_as_uptr + size_));
+  CHECK_LE(size, size_);
+  UnmapOrDie(reinterpret_cast<void*>(addr), size);
+}
+
+// MmapNoAccess and MmapFixedOrDie are used only by sanitizer_allocator.
+// Instead of doing exactly what they say, we make MmapNoAccess actually
+// just allocate a VMAR to reserve the address space.  Then MmapFixedOrDie
+// uses that VMAR instead of the root.
+
+zx_handle_t allocator_vmar = ZX_HANDLE_INVALID;
+uintptr_t allocator_vmar_base;
+size_t allocator_vmar_size;
+
+void *MmapNoAccess(uptr size) {
+  size = RoundUpTo(size, PAGE_SIZE);
+  CHECK_EQ(allocator_vmar, ZX_HANDLE_INVALID);
+  uintptr_t base;
+  zx_status_t status =
+      _zx_vmar_allocate(_zx_vmar_root_self(), 0, size,
+                        ZX_VM_FLAG_CAN_MAP_READ | ZX_VM_FLAG_CAN_MAP_WRITE |
+                            ZX_VM_FLAG_CAN_MAP_SPECIFIC,
+                        &allocator_vmar, &base);
+  if (status != ZX_OK)
+    ReportMmapFailureAndDie(size, "sanitizer allocator address space",
+                            "zx_vmar_allocate", status);
+
+  allocator_vmar_base = base;
+  allocator_vmar_size = size;
+  return reinterpret_cast<void *>(base);
+}
+
+constexpr const char kAllocatorVmoName[] = "sanitizer_allocator";
+
+static void *DoMmapFixedOrDie(uptr fixed_addr, uptr size, bool die_for_nomem) {
+  size = RoundUpTo(size, PAGE_SIZE);
+
+  zx_handle_t vmo;
+  zx_status_t status = _zx_vmo_create(size, 0, &vmo);
+  if (status != ZX_OK) {
+    if (status != ZX_ERR_NO_MEMORY || die_for_nomem)
+      ReportMmapFailureAndDie(size, kAllocatorVmoName, "zx_vmo_create", status);
+    return nullptr;
+  }
+  _zx_object_set_property(vmo, ZX_PROP_NAME, kAllocatorVmoName,
+                          sizeof(kAllocatorVmoName) - 1);
+
+  DCHECK_GE(fixed_addr, allocator_vmar_base);
+  uintptr_t offset = fixed_addr - allocator_vmar_base;
+  DCHECK_LE(size, allocator_vmar_size);
+  DCHECK_GE(allocator_vmar_size - offset, size);
+
+  uintptr_t addr;
+  status = _zx_vmar_map(
+      allocator_vmar, offset, vmo, 0, size,
+      ZX_VM_FLAG_PERM_READ | ZX_VM_FLAG_PERM_WRITE | ZX_VM_FLAG_SPECIFIC,
+      &addr);
+  _zx_handle_close(vmo);
+  if (status != ZX_OK) {
+    if (status != ZX_ERR_NO_MEMORY || die_for_nomem)
+      ReportMmapFailureAndDie(size, kAllocatorVmoName, "zx_vmar_map", status);
+    return nullptr;
+  }
+
+  IncreaseTotalMmap(size);
+
+  return reinterpret_cast<void *>(addr);
+}
+
+void *MmapFixedOrDie(uptr fixed_addr, uptr size) {
+  return DoMmapFixedOrDie(fixed_addr, size, true);
+}
+
+void *MmapFixedOrDieOnFatalError(uptr fixed_addr, uptr size) {
+  return DoMmapFixedOrDie(fixed_addr, size, false);
+}
+
+// This should never be called.
+void *MmapFixedNoAccess(uptr fixed_addr, uptr size, const char *name) {
+  UNIMPLEMENTED();
+}
+
+void *MmapAlignedOrDieOnFatalError(uptr size, uptr alignment,
+                                   const char *mem_type) {
+  CHECK_GE(size, PAGE_SIZE);
+  CHECK(IsPowerOfTwo(size));
+  CHECK(IsPowerOfTwo(alignment));
+
+  zx_handle_t vmo;
+  zx_status_t status = _zx_vmo_create(size, 0, &vmo);
+  if (status != ZX_OK) {
+    if (status != ZX_ERR_NO_MEMORY)
+      ReportMmapFailureAndDie(size, mem_type, "zx_vmo_create", status, false);
+    return nullptr;
+  }
+  _zx_object_set_property(vmo, ZX_PROP_NAME, mem_type,
+                          internal_strlen(mem_type));
+
+  // TODO(mcgrathr): Maybe allocate a VMAR for all sanitizer heap and use that?
+
+  // Map a larger size to get a chunk of address space big enough that
+  // it surely contains an aligned region of the requested size.  Then
+  // overwrite the aligned middle portion with a mapping from the
+  // beginning of the VMO, and unmap the excess before and after.
+  size_t map_size = size + alignment;
+  uintptr_t addr;
+  status = _zx_vmar_map(_zx_vmar_root_self(), 0, vmo, 0, map_size,
+                        ZX_VM_FLAG_PERM_READ | ZX_VM_FLAG_PERM_WRITE, &addr);
+  if (status == ZX_OK) {
+    uintptr_t map_addr = addr;
+    uintptr_t map_end = map_addr + map_size;
+    addr = RoundUpTo(map_addr, alignment);
+    uintptr_t end = addr + size;
+    if (addr != map_addr) {
+      zx_info_vmar_t info;
+      status = _zx_object_get_info(_zx_vmar_root_self(), ZX_INFO_VMAR, &info,
+                                   sizeof(info), NULL, NULL);
+      if (status == ZX_OK) {
+        uintptr_t new_addr;
+        status =
+            _zx_vmar_map(_zx_vmar_root_self(), addr - info.base, vmo, 0, size,
+                         ZX_VM_FLAG_PERM_READ | ZX_VM_FLAG_PERM_WRITE |
+                             ZX_VM_FLAG_SPECIFIC_OVERWRITE,
+                         &new_addr);
+        if (status == ZX_OK) CHECK_EQ(new_addr, addr);
+      }
+    }
+    if (status == ZX_OK && addr != map_addr)
+      status = _zx_vmar_unmap(_zx_vmar_root_self(), map_addr, addr - map_addr);
+    if (status == ZX_OK && end != map_end)
+      status = _zx_vmar_unmap(_zx_vmar_root_self(), end, map_end - end);
+  }
+  _zx_handle_close(vmo);
+
+  if (status != ZX_OK) {
+    if (status != ZX_ERR_NO_MEMORY)
+      ReportMmapFailureAndDie(size, mem_type, "zx_vmar_map", status, false);
+    return nullptr;
+  }
+
+  IncreaseTotalMmap(size);
+
+  return reinterpret_cast<void *>(addr);
+}
+
+void UnmapOrDie(void *addr, uptr size) {
+  if (!addr || !size) return;
+  size = RoundUpTo(size, PAGE_SIZE);
+
+  zx_status_t status = _zx_vmar_unmap(_zx_vmar_root_self(),
+                                      reinterpret_cast<uintptr_t>(addr), size);
+  if (status != ZX_OK) {
+    Report("ERROR: %s failed to deallocate 0x%zx (%zd) bytes at address %p\n",
+           SanitizerToolName, size, size, addr);
+    CHECK("unable to unmap" && 0);
+  }
+
+  DecreaseTotalMmap(size);
+}
+
+// This is used on the shadow mapping, which cannot be changed.
+// Zircon doesn't have anything like MADV_DONTNEED.
+void ReleaseMemoryPagesToOS(uptr beg, uptr end) {}
+
+void DumpProcessMap() {
+  UNIMPLEMENTED();  // TODO(mcgrathr): write it
+}
+
+bool IsAccessibleMemoryRange(uptr beg, uptr size) {
+  // TODO(mcgrathr): Figure out a better way.
+  zx_handle_t vmo;
+  zx_status_t status = _zx_vmo_create(size, 0, &vmo);
+  if (status == ZX_OK) {
+    while (size > 0) {
+      size_t wrote;
+      status = _zx_vmo_write(vmo, reinterpret_cast<const void *>(beg), 0, size,
+                             &wrote);
+      if (status != ZX_OK) break;
+      CHECK_GT(wrote, 0);
+      CHECK_LE(wrote, size);
+      beg += wrote;
+      size -= wrote;
+    }
+    _zx_handle_close(vmo);
+  }
+  return status == ZX_OK;
+}
+
+// FIXME implement on this platform.
+void GetMemoryProfile(fill_profile_f cb, uptr *stats, uptr stats_size) {}
+
+bool ReadFileToBuffer(const char *file_name, char **buff, uptr *buff_size,
+                      uptr *read_len, uptr max_len, error_t *errno_p) {
+  zx_handle_t vmo;
+  zx_status_t status = __sanitizer_get_configuration(file_name, &vmo);
+  if (status == ZX_OK) {
+    uint64_t vmo_size;
+    status = _zx_vmo_get_size(vmo, &vmo_size);
+    if (status == ZX_OK) {
+      if (vmo_size < max_len) max_len = vmo_size;
+      size_t map_size = RoundUpTo(max_len, PAGE_SIZE);
+      uintptr_t addr;
+      status = _zx_vmar_map(_zx_vmar_root_self(), 0, vmo, 0, map_size,
+                            ZX_VM_FLAG_PERM_READ, &addr);
+      if (status == ZX_OK) {
+        *buff = reinterpret_cast<char *>(addr);
+        *buff_size = map_size;
+        *read_len = max_len;
+      }
+    }
+    _zx_handle_close(vmo);
+  }
+  if (status != ZX_OK && errno_p) *errno_p = status;
+  return status == ZX_OK;
+}
+
+void RawWrite(const char *buffer) {
+  __sanitizer_log_write(buffer, internal_strlen(buffer));
+}
+
+void CatastrophicErrorWrite(const char *buffer, uptr length) {
+  __sanitizer_log_write(buffer, length);
+}
+
+char **StoredArgv;
+char **StoredEnviron;
+
+char **GetArgv() { return StoredArgv; }
+
+const char *GetEnv(const char *name) {
+  if (StoredEnviron) {
+    uptr NameLen = internal_strlen(name);
+    for (char **Env = StoredEnviron; *Env != 0; Env++) {
+      if (internal_strncmp(*Env, name, NameLen) == 0 && (*Env)[NameLen] == '=')
+        return (*Env) + NameLen + 1;
+    }
+  }
+  return nullptr;
+}
+
+uptr ReadBinaryName(/*out*/ char *buf, uptr buf_len) {
+  const char *argv0 = StoredArgv[0];
+  if (!argv0) argv0 = "<UNKNOWN>";
+  internal_strncpy(buf, argv0, buf_len);
+  return internal_strlen(buf);
+}
+
+uptr ReadLongProcessName(/*out*/ char *buf, uptr buf_len) {
+  return ReadBinaryName(buf, buf_len);
+}
+
+uptr MainThreadStackBase, MainThreadStackSize;
+
+bool GetRandom(void *buffer, uptr length, bool blocking) {
+  CHECK_LE(length, ZX_CPRNG_DRAW_MAX_LEN);
+  size_t size;
+  CHECK_EQ(_zx_cprng_draw(buffer, length, &size), ZX_OK);
+  CHECK_EQ(size, length);
+  return true;
+}
+
+}  // namespace __sanitizer
+
+using namespace __sanitizer;  // NOLINT
+
+extern "C" {
+void __sanitizer_startup_hook(int argc, char **argv, char **envp,
+                              void *stack_base, size_t stack_size) {
+  __sanitizer::StoredArgv = argv;
+  __sanitizer::StoredEnviron = envp;
+  __sanitizer::MainThreadStackBase = reinterpret_cast<uintptr_t>(stack_base);
+  __sanitizer::MainThreadStackSize = stack_size;
+}
+
+void __sanitizer_set_report_path(const char *path) {
+  // Handle the initialization code in each sanitizer, but no other calls.
+  // This setting is never consulted on Fuchsia.
+  DCHECK_EQ(path, common_flags()->log_path);
+}
+
+void __sanitizer_set_report_fd(void *fd) {
+  UNREACHABLE("not available on Fuchsia");
+}
+}  // extern "C"
+
+#endif  // SANITIZER_FUCHSIA
diff --git a/lib/sanitizer_common/sanitizer_fuchsia.h b/lib/sanitizer_common/sanitizer_fuchsia.h
new file mode 100644
index 0000000..18821b4
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_fuchsia.h
@@ -0,0 +1,31 @@
+//===-- sanitizer_fuchsia.h ------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+//
+// Fuchsia-specific sanitizer support.
+//
+//===---------------------------------------------------------------------===//
+#ifndef SANITIZER_FUCHSIA_H
+#define SANITIZER_FUCHSIA_H
+
+#include "sanitizer_platform.h"
+#if SANITIZER_FUCHSIA
+
+#include "sanitizer_common.h"
+
+#include <zircon/sanitizer.h>
+
+namespace __sanitizer {
+
+extern uptr MainThreadStackBase, MainThreadStackSize;
+extern sanitizer_shadow_bounds_t ShadowBounds;
+
+}  // namespace __sanitizer
+
+#endif  // SANITIZER_FUCHSIA
+#endif  // SANITIZER_FUCHSIA_H
diff --git a/lib/sanitizer_common/sanitizer_interface_internal.h b/lib/sanitizer_common/sanitizer_interface_internal.h
index b28d8f0..942f0b1 100644
--- a/lib/sanitizer_common/sanitizer_interface_internal.h
+++ b/lib/sanitizer_common/sanitizer_interface_internal.h
@@ -46,7 +46,6 @@
   SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
   void __sanitizer_report_error_summary(const char *error_summary);
 
-  SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_cov_init();
   SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_cov_dump();
   SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_dump_coverage(
       const __sanitizer::uptr *pcs, const __sanitizer::uptr len);
@@ -81,6 +80,14 @@
   SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
   void __sanitizer_cov_trace_cmp8();
   SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+  void __sanitizer_cov_trace_const_cmp1();
+  SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+  void __sanitizer_cov_trace_const_cmp2();
+  SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+  void __sanitizer_cov_trace_const_cmp4();
+  SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+  void __sanitizer_cov_trace_const_cmp8();
+  SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
   void __sanitizer_cov_trace_switch();
   SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
   void __sanitizer_cov_trace_div4();
@@ -95,6 +102,10 @@
   SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
   void __sanitizer_cov_trace_pc_guard_init(__sanitizer::u32*,
                                            __sanitizer::u32*);
+  SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+  void __sanitizer_cov_8bit_counters_init();
+  SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+  void __sanitizer_cov_pcs_init();
 } // extern "C"
 
 #endif  // SANITIZER_INTERFACE_INTERNAL_H
diff --git a/lib/sanitizer_common/sanitizer_internal_defs.h b/lib/sanitizer_common/sanitizer_internal_defs.h
index ea5022e..dcc8db4 100644
--- a/lib/sanitizer_common/sanitizer_internal_defs.h
+++ b/lib/sanitizer_common/sanitizer_internal_defs.h
@@ -35,6 +35,14 @@
 # define SANITIZER_WEAK_ATTRIBUTE  __attribute__((weak))
 #endif
 
+// TLS is handled differently on different platforms
+#if SANITIZER_LINUX || SANITIZER_NETBSD
+# define SANITIZER_TLS_INITIAL_EXEC_ATTRIBUTE \
+    __attribute__((tls_model("initial-exec"))) thread_local
+#else
+# define SANITIZER_TLS_INITIAL_EXEC_ATTRIBUTE
+#endif
+
 //--------------------------- WEAK FUNCTIONS ---------------------------------//
 // When working with weak functions, to simplify the code and make it more
 // portable, when possible define a default implementation using this macro:
@@ -56,11 +64,13 @@
 
 // SANITIZER_SUPPORTS_WEAK_HOOKS means that we support real weak functions that
 // will evaluate to a null pointer when not defined.
+#ifndef SANITIZER_SUPPORTS_WEAK_HOOKS
 #if (SANITIZER_LINUX || SANITIZER_MAC) && !SANITIZER_GO
 # define SANITIZER_SUPPORTS_WEAK_HOOKS 1
 #else
 # define SANITIZER_SUPPORTS_WEAK_HOOKS 0
 #endif
+#endif // SANITIZER_SUPPORTS_WEAK_HOOKS
 // For some weak hooks that will be called very often and we want to avoid the
 // overhead of executing the default implementation when it is not necessary,
 // we can use the flag SANITIZER_SUPPORTS_WEAK_HOOKS to only define the default
@@ -129,12 +139,8 @@
 #endif
 typedef int pid_t;
 
-// WARNING: OFF_T may be different from OS type off_t, depending on the value of
-// _FILE_OFFSET_BITS. This definition of OFF_T matches the ABI of system calls
-// like pread and mmap, as opposed to pread64 and mmap64.
-// FreeBSD, Mac and Linux/x86-64 are special.
-#if SANITIZER_FREEBSD || SANITIZER_MAC || \
-  (SANITIZER_LINUX && defined(__x86_64__))
+#if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_MAC || \
+    (SANITIZER_LINUX && defined(__x86_64__))
 typedef u64 OFF_T;
 #else
 typedef uptr OFF_T;
@@ -152,6 +158,12 @@
 # endif
 #endif
 
+#if SANITIZER_MAC
+// On Darwin, thread IDs are 64-bit even on 32-bit systems.
+typedef u64 tid_t;
+#else
+typedef uptr tid_t;
+#endif
 
 // ----------- ATTENTION -------------
 // This header should NOT include any other headers to avoid portability issues.
@@ -251,8 +263,8 @@
 
 #define CHECK_IMPL(c1, op, c2) \
   do { \
-    __sanitizer::u64 v1 = (u64)(c1); \
-    __sanitizer::u64 v2 = (u64)(c2); \
+    __sanitizer::u64 v1 = (__sanitizer::u64)(c1); \
+    __sanitizer::u64 v2 = (__sanitizer::u64)(c2); \
     if (UNLIKELY(!(v1 op v2))) \
       __sanitizer::CheckFailed(__FILE__, __LINE__, \
         "(" #c1 ") " #op " (" #c2 ")", v1, v2); \
diff --git a/lib/sanitizer_common/sanitizer_libignore.cc b/lib/sanitizer_common/sanitizer_libignore.cc
index aa4fa88..0df055e 100644
--- a/lib/sanitizer_common/sanitizer_libignore.cc
+++ b/lib/sanitizer_common/sanitizer_libignore.cc
@@ -9,7 +9,7 @@
 
 #include "sanitizer_platform.h"
 
-#if SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_MAC
+#if SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_MAC || SANITIZER_NETBSD
 
 #include "sanitizer_libignore.h"
 #include "sanitizer_flags.h"
@@ -125,4 +125,5 @@
 
 } // namespace __sanitizer
 
-#endif // #if SANITIZER_FREEBSD || SANITIZER_LINUX
+#endif  // SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_MAC ||
+        // SANITIZER_NETBSD
diff --git a/lib/sanitizer_common/sanitizer_linux.cc b/lib/sanitizer_common/sanitizer_linux.cc
index 46dd085..6ea1858 100644
--- a/lib/sanitizer_common/sanitizer_linux.cc
+++ b/lib/sanitizer_common/sanitizer_linux.cc
@@ -14,7 +14,7 @@
 
 #include "sanitizer_platform.h"
 
-#if SANITIZER_FREEBSD || SANITIZER_LINUX
+#if SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_NETBSD
 
 #include "sanitizer_common.h"
 #include "sanitizer_flags.h"
@@ -27,10 +27,14 @@
 #include "sanitizer_stacktrace.h"
 #include "sanitizer_symbolizer.h"
 
-#if !SANITIZER_FREEBSD
+#if SANITIZER_LINUX
 #include <asm/param.h>
 #endif
 
+#if SANITIZER_NETBSD
+#include <lwp.h>
+#endif
+
 // For mips64, syscall(__NR_stat) fills the buffer in the 'struct kernel_stat'
 // format. Struct kernel_stat is defined as 'struct stat' in asm/stat.h. To
 // access stat from asm/stat.h, without conflicting with definition in
@@ -59,11 +63,17 @@
 #include <ucontext.h>
 #include <unistd.h>
 
+#if SANITIZER_LINUX
+#include <sys/utsname.h>
+#endif
+
+#if SANITIZER_LINUX && !SANITIZER_ANDROID
+#include <sys/personality.h>
+#endif
+
 #if SANITIZER_FREEBSD
 #include <sys/exec.h>
 #include <sys/sysctl.h>
-#include <vm/vm_param.h>
-#include <vm/pmap.h>
 #include <machine/atomic.h>
 extern "C" {
 // <sys/umtx.h> must be included after <errno.h> and <sys/types.h> on
@@ -73,10 +83,32 @@
 extern char **environ;  // provided by crt1
 #endif  // SANITIZER_FREEBSD
 
+#if SANITIZER_NETBSD
+#include <limits.h>  // For NAME_MAX
+#include <sys/sysctl.h>
+extern char **environ;  // provided by crt1
+#include <sys/exec.h>
+extern struct ps_strings *__ps_strings;
+#endif  // SANITIZER_NETBSD
+
 #if !SANITIZER_ANDROID
 #include <sys/signal.h>
 #endif
 
+#ifndef __GLIBC_PREREQ
+#define __GLIBC_PREREQ(x, y) 0
+#endif
+
+#if SANITIZER_LINUX && __GLIBC_PREREQ(2, 16)
+# define SANITIZER_USE_GETAUXVAL 1
+#else
+# define SANITIZER_USE_GETAUXVAL 0
+#endif
+
+#if SANITIZER_USE_GETAUXVAL
+#include <sys/auxv.h>
+#endif
+
 #if SANITIZER_LINUX
 // <linux/time.h>
 struct kernel_timeval {
@@ -105,6 +137,15 @@
 }
 #endif
 
+#if SANITIZER_LINUX && defined(__NR_getrandom)
+# if !defined(GRND_NONBLOCK)
+#  define GRND_NONBLOCK 1
+# endif
+# define SANITIZER_USE_GETRANDOM 1
+#else
+# define SANITIZER_USE_GETRANDOM 0
+#endif  // SANITIZER_LINUX && defined(__NR_getrandom)
+
 namespace __sanitizer {
 
 #if SANITIZER_LINUX && defined(__x86_64__)
@@ -119,7 +160,10 @@
 #if !SANITIZER_S390
 uptr internal_mmap(void *addr, uptr length, int prot, int flags, int fd,
                    OFF_T offset) {
-#if SANITIZER_FREEBSD || SANITIZER_LINUX_USES_64BIT_SYSCALLS
+#if SANITIZER_NETBSD
+  return internal_syscall_ptr(SYSCALL(mmap), addr, length, prot, flags, fd,
+                              (long)0, offset);
+#elif SANITIZER_FREEBSD || SANITIZER_LINUX_USES_64BIT_SYSCALLS
   return internal_syscall(SYSCALL(mmap), (uptr)addr, length, prot, flags, fd,
                           offset);
 #else
@@ -132,11 +176,11 @@
 #endif // !SANITIZER_S390
 
 uptr internal_munmap(void *addr, uptr length) {
-  return internal_syscall(SYSCALL(munmap), (uptr)addr, length);
+  return internal_syscall_ptr(SYSCALL(munmap), (uptr)addr, length);
 }
 
 int internal_mprotect(void *addr, uptr length, int prot) {
-  return internal_syscall(SYSCALL(mprotect), (uptr)addr, length, prot);
+  return internal_syscall_ptr(SYSCALL(mprotect), (uptr)addr, length, prot);
 }
 
 uptr internal_close(fd_t fd) {
@@ -147,7 +191,7 @@
 #if SANITIZER_USES_CANONICAL_LINUX_SYSCALLS
   return internal_syscall(SYSCALL(openat), AT_FDCWD, (uptr)filename, flags);
 #else
-  return internal_syscall(SYSCALL(open), (uptr)filename, flags);
+  return internal_syscall_ptr(SYSCALL(open), (uptr)filename, flags);
 #endif
 }
 
@@ -156,32 +200,36 @@
   return internal_syscall(SYSCALL(openat), AT_FDCWD, (uptr)filename, flags,
                           mode);
 #else
-  return internal_syscall(SYSCALL(open), (uptr)filename, flags, mode);
+  return internal_syscall_ptr(SYSCALL(open), (uptr)filename, flags, mode);
 #endif
 }
 
 uptr internal_read(fd_t fd, void *buf, uptr count) {
   sptr res;
-  HANDLE_EINTR(res, (sptr)internal_syscall(SYSCALL(read), fd, (uptr)buf,
+  HANDLE_EINTR(res, (sptr)internal_syscall_ptr(SYSCALL(read), fd, (uptr)buf,
                count));
   return res;
 }
 
 uptr internal_write(fd_t fd, const void *buf, uptr count) {
   sptr res;
-  HANDLE_EINTR(res, (sptr)internal_syscall(SYSCALL(write), fd, (uptr)buf,
+  HANDLE_EINTR(res, (sptr)internal_syscall_ptr(SYSCALL(write), fd, (uptr)buf,
                count));
   return res;
 }
 
 uptr internal_ftruncate(fd_t fd, uptr size) {
   sptr res;
+#if SANITIZER_NETBSD
+  HANDLE_EINTR(res, internal_syscall64(SYSCALL(ftruncate), fd, 0, (s64)size));
+#else
   HANDLE_EINTR(res, (sptr)internal_syscall(SYSCALL(ftruncate), fd,
                (OFF_T)size));
+#endif
   return res;
 }
 
-#if !SANITIZER_LINUX_USES_64BIT_SYSCALLS && !SANITIZER_FREEBSD
+#if !SANITIZER_LINUX_USES_64BIT_SYSCALLS && SANITIZER_LINUX
 static void stat64_to_stat(struct stat64 *in, struct stat *out) {
   internal_memset(out, 0, sizeof(*out));
   out->st_dev = in->st_dev;
@@ -197,11 +245,25 @@
   out->st_atime = in->st_atime;
   out->st_mtime = in->st_mtime;
   out->st_ctime = in->st_ctime;
-  out->st_ino = in->st_ino;
 }
 #endif
 
 #if defined(__mips64)
+// Undefine compatibility macros from <sys/stat.h>
+// so that they would not clash with the kernel_stat
+// st_[a|m|c]time fields
+#undef st_atime
+#undef st_mtime
+#undef st_ctime
+#if defined(SANITIZER_ANDROID)
+// Bionic sys/stat.h defines additional macros
+// for compatibility with the old NDKs and
+// they clash with the kernel_stat structure
+// st_[a|m|c]time_nsec fields.
+#undef st_atime_nsec
+#undef st_mtime_nsec
+#undef st_ctime_nsec
+#endif
 static void kernel_stat_to_stat(struct kernel_stat *in, struct stat *out) {
   internal_memset(out, 0, sizeof(*out));
   out->st_dev = in->st_dev;
@@ -214,16 +276,30 @@
   out->st_size = in->st_size;
   out->st_blksize = in->st_blksize;
   out->st_blocks = in->st_blocks;
-  out->st_atime = in->st_atime_nsec;
-  out->st_mtime = in->st_mtime_nsec;
-  out->st_ctime = in->st_ctime_nsec;
-  out->st_ino = in->st_ino;
+#if defined(__USE_MISC)     || \
+    defined(__USE_XOPEN2K8) || \
+    defined(SANITIZER_ANDROID)
+  out->st_atim.tv_sec = in->st_atime;
+  out->st_atim.tv_nsec = in->st_atime_nsec;
+  out->st_mtim.tv_sec = in->st_mtime;
+  out->st_mtim.tv_nsec = in->st_mtime_nsec;
+  out->st_ctim.tv_sec = in->st_ctime;
+  out->st_ctim.tv_nsec = in->st_ctime_nsec;
+#else
+  out->st_atime = in->st_atime;
+  out->st_atimensec = in->st_atime_nsec;
+  out->st_mtime = in->st_mtime;
+  out->st_mtimensec = in->st_mtime_nsec;
+  out->st_ctime = in->st_ctime;
+  out->st_atimensec = in->st_ctime_nsec;
+#endif
 }
 #endif
 
 uptr internal_stat(const char *path, void *buf) {
-#if SANITIZER_FREEBSD
-  return internal_syscall(SYSCALL(stat), path, buf);
+#if SANITIZER_FREEBSD || SANITIZER_NETBSD
+  return internal_syscall_ptr(SYSCALL(fstatat), AT_FDCWD, (uptr)path,
+                          (uptr)buf, 0);
 #elif SANITIZER_USES_CANONICAL_LINUX_SYSCALLS
   return internal_syscall(SYSCALL(newfstatat), AT_FDCWD, (uptr)path,
                           (uptr)buf, 0);
@@ -246,8 +322,11 @@
 }
 
 uptr internal_lstat(const char *path, void *buf) {
-#if SANITIZER_FREEBSD
-  return internal_syscall(SYSCALL(lstat), path, buf);
+#if SANITIZER_NETBSD
+  return internal_syscall_ptr(SYSCALL(lstat), path, buf);
+#elif SANITIZER_FREEBSD
+  return internal_syscall(SYSCALL(fstatat), AT_FDCWD, (uptr)path,
+                          (uptr)buf, AT_SYMLINK_NOFOLLOW);
 #elif SANITIZER_USES_CANONICAL_LINUX_SYSCALLS
   return internal_syscall(SYSCALL(newfstatat), AT_FDCWD, (uptr)path,
                          (uptr)buf, AT_SYMLINK_NOFOLLOW);
@@ -270,15 +349,15 @@
 }
 
 uptr internal_fstat(fd_t fd, void *buf) {
-#if SANITIZER_FREEBSD || SANITIZER_LINUX_USES_64BIT_SYSCALLS
-# if SANITIZER_MIPS64
+#if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_LINUX_USES_64BIT_SYSCALLS
+# if SANITIZER_MIPS64 && !SANITIZER_NETBSD
   // For mips64, fstat syscall fills buffer in the format of kernel_stat
   struct kernel_stat kbuf;
   int res = internal_syscall(SYSCALL(fstat), fd, &kbuf);
   kernel_stat_to_stat(&kbuf, (struct stat *)buf);
   return res;
 # else
-  return internal_syscall(SYSCALL(fstat), fd, (uptr)buf);
+  return internal_syscall_ptr(SYSCALL(fstat), fd, (uptr)buf);
 # endif
 #else
   struct stat64 buf64;
@@ -308,7 +387,7 @@
   return internal_syscall(SYSCALL(readlinkat), AT_FDCWD,
                           (uptr)path, (uptr)buf, bufsize);
 #else
-  return internal_syscall(SYSCALL(readlink), (uptr)path, (uptr)buf, bufsize);
+  return internal_syscall_ptr(SYSCALL(readlink), path, buf, bufsize);
 #endif
 }
 
@@ -316,7 +395,7 @@
 #if SANITIZER_USES_CANONICAL_LINUX_SYSCALLS
   return internal_syscall(SYSCALL(unlinkat), AT_FDCWD, (uptr)path, 0);
 #else
-  return internal_syscall(SYSCALL(unlink), (uptr)path);
+  return internal_syscall_ptr(SYSCALL(unlink), (uptr)path);
 #endif
 }
 
@@ -325,7 +404,7 @@
   return internal_syscall(SYSCALL(renameat), AT_FDCWD, (uptr)oldpath, AT_FDCWD,
                           (uptr)newpath);
 #else
-  return internal_syscall(SYSCALL(rename), (uptr)oldpath, (uptr)newpath);
+  return internal_syscall_ptr(SYSCALL(rename), (uptr)oldpath, (uptr)newpath);
 #endif
 }
 
@@ -334,7 +413,7 @@
 }
 
 void internal__exit(int exitcode) {
-#if SANITIZER_FREEBSD
+#if SANITIZER_FREEBSD || SANITIZER_NETBSD
   internal_syscall(SYSCALL(exit), exitcode);
 #else
   internal_syscall(SYSCALL(exit_group), exitcode);
@@ -346,14 +425,14 @@
   struct timespec ts;
   ts.tv_sec = 1;
   ts.tv_nsec = 0;
-  int res = internal_syscall(SYSCALL(nanosleep), &ts, &ts);
+  int res = internal_syscall_ptr(SYSCALL(nanosleep), &ts, &ts);
   if (res) return ts.tv_sec;
   return 0;
 }
 
 uptr internal_execve(const char *filename, char *const argv[],
                      char *const envp[]) {
-  return internal_syscall(SYSCALL(execve), (uptr)filename, (uptr)argv,
+  return internal_syscall_ptr(SYSCALL(execve), (uptr)filename, (uptr)argv,
                           (uptr)envp);
 }
 
@@ -370,22 +449,24 @@
   return S_ISREG(st.st_mode);
 }
 
-uptr GetTid() {
+tid_t GetTid() {
 #if SANITIZER_FREEBSD
   return (uptr)pthread_self();
+#elif SANITIZER_NETBSD
+  return _lwp_self();
 #else
   return internal_syscall(SYSCALL(gettid));
 #endif
 }
 
 u64 NanoTime() {
-#if SANITIZER_FREEBSD
+#if SANITIZER_FREEBSD || SANITIZER_NETBSD
   timeval tv;
 #else
   kernel_timeval tv;
 #endif
   internal_memset(&tv, 0, sizeof(tv));
-  internal_syscall(SYSCALL(gettimeofday), (uptr)&tv, 0);
+  internal_syscall_ptr(SYSCALL(gettimeofday), &tv, 0);
   return (u64)tv.tv_sec * 1000*1000*1000 + tv.tv_usec * 1000;
 }
 
@@ -393,7 +474,7 @@
 // 'environ' array (on FreeBSD) and does not use libc. This function should be
 // called first inside __asan_init.
 const char *GetEnv(const char *name) {
-#if SANITIZER_FREEBSD
+#if SANITIZER_FREEBSD || SANITIZER_NETBSD
   if (::environ != 0) {
     uptr NameLen = internal_strlen(name);
     for (char **Env = ::environ; *Env != 0; Env++) {
@@ -431,13 +512,13 @@
 #endif
 }
 
-#if !SANITIZER_FREEBSD
+#if !SANITIZER_FREEBSD && !SANITIZER_NETBSD
 extern "C" {
   SANITIZER_WEAK_ATTRIBUTE extern void *__libc_stack_end;
 }
 #endif
 
-#if !SANITIZER_GO && !SANITIZER_FREEBSD
+#if !SANITIZER_GO && !SANITIZER_FREEBSD && !SANITIZER_NETBSD
 static void ReadNullSepFileToArray(const char *path, char ***arr,
                                    int arr_size) {
   char *buff;
@@ -463,7 +544,22 @@
 #endif
 
 static void GetArgsAndEnv(char ***argv, char ***envp) {
-#if !SANITIZER_FREEBSD
+#if SANITIZER_FREEBSD
+  // On FreeBSD, retrieving the argument and environment arrays is done via the
+  // kern.ps_strings sysctl, which returns a pointer to a structure containing
+  // this information. See also <sys/exec.h>.
+  ps_strings *pss;
+  size_t sz = sizeof(pss);
+  if (sysctlbyname("kern.ps_strings", &pss, &sz, NULL, 0) == -1) {
+    Printf("sysctl kern.ps_strings failed\n");
+    Die();
+  }
+  *argv = pss->ps_argvstr;
+  *envp = pss->ps_envstr;
+#elif SANITIZER_NETBSD
+  *argv = __ps_strings->ps_argvstr;
+  *argv = __ps_strings->ps_envstr;
+#else
 #if !SANITIZER_GO
   if (&__libc_stack_end) {
 #endif
@@ -478,18 +574,6 @@
     ReadNullSepFileToArray("/proc/self/environ", envp, kMaxEnvp);
   }
 #endif
-#else
-  // On FreeBSD, retrieving the argument and environment arrays is done via the
-  // kern.ps_strings sysctl, which returns a pointer to a structure containing
-  // this information. See also <sys/exec.h>.
-  ps_strings *pss;
-  size_t sz = sizeof(pss);
-  if (sysctlbyname("kern.ps_strings", &pss, &sz, NULL, 0) == -1) {
-    Printf("sysctl kern.ps_strings failed\n");
-    Die();
-  }
-  *argv = pss->ps_argvstr;
-  *envp = pss->ps_envstr;
 #endif
 }
 
@@ -501,8 +585,22 @@
 
 void ReExec() {
   char **argv, **envp;
+  const char *pathname = "/proc/self/exe";
+
+#if SANITIZER_NETBSD
+  static const int name[] = {
+    CTL_KERN, KERN_PROC_ARGS, -1, KERN_PROC_PATHNAME,
+  };
+  char path[400];
+  size_t len;
+
+  len = sizeof(path);
+  if (sysctl(name, ARRAY_SIZE(name), path, &len, NULL, 0) != -1)
+    pathname = path;
+#endif
+
   GetArgsAndEnv(&argv, &envp);
-  uptr rv = internal_execve("/proc/self/exe", argv, envp);
+  uptr rv = internal_execve(pathname, argv, envp);
   int rverrno;
   CHECK_EQ(internal_iserror(rv, &rverrno), true);
   Printf("execve failed, errno %d\n", rverrno);
@@ -527,6 +625,8 @@
   while (atomic_exchange(m, MtxSleeping, memory_order_acquire) != MtxUnlocked) {
 #if SANITIZER_FREEBSD
     _umtx_op(m, UMTX_OP_WAIT_UINT, MtxSleeping, 0, 0);
+#elif SANITIZER_NETBSD
+    sched_yield(); /* No userspace futex-like synchromization */
 #else
     internal_syscall(SYSCALL(futex), (uptr)m, FUTEX_WAIT, MtxSleeping, 0, 0, 0);
 #endif
@@ -535,11 +635,13 @@
 
 void BlockingMutex::Unlock() {
   atomic_uint32_t *m = reinterpret_cast<atomic_uint32_t *>(&opaque_storage_);
-  u32 v = atomic_exchange(m, MtxUnlocked, memory_order_relaxed);
+  u32 v = atomic_exchange(m, MtxUnlocked, memory_order_release);
   CHECK_NE(v, MtxUnlocked);
   if (v == MtxSleeping) {
 #if SANITIZER_FREEBSD
     _umtx_op(m, UMTX_OP_WAKE, 1, 0, 0);
+#elif SANITIZER_NETBSD
+                   /* No userspace futex-like synchromization */
 #else
     internal_syscall(SYSCALL(futex), (uptr)m, FUTEX_WAKE, 1, 0, 0, 0);
 #endif
@@ -555,6 +657,17 @@
 // The actual size of this structure is specified by d_reclen.
 // Note that getdents64 uses a different structure format. We only provide the
 // 32-bit syscall here.
+#if SANITIZER_NETBSD
+// struct dirent is different for Linux and us. At this moment, we use only
+// d_fileno (Linux call this d_ino), d_reclen, and d_name.
+struct linux_dirent {
+  u64 d_ino;  // d_fileno
+  u16 d_reclen;
+  u16 d_namlen;  // not used
+  u8 d_type;     // not used
+  char d_name[NAME_MAX + 1];
+};
+#else
 struct linux_dirent {
 #if SANITIZER_X32 || defined(__aarch64__)
   u64 d_ino;
@@ -569,15 +682,28 @@
 #endif
   char               d_name[256];
 };
+#endif
 
 // Syscall wrappers.
 uptr internal_ptrace(int request, int pid, void *addr, void *data) {
+#if SANITIZER_NETBSD
+  // XXX We need additional work for ptrace:
+  //   - for request, we use PT_FOO whereas Linux uses PTRACE_FOO
+  //   - data is int for us, but void * for Linux
+  //   - Linux sometimes uses data in the case where we use addr instead
+  // At this moment, this function is used only within
+  // "#if SANITIZER_LINUX && defined(__x86_64__)" block in
+  // sanitizer_stoptheworld_linux_libcdep.cc.
+  return internal_syscall_ptr(SYSCALL(ptrace), request, pid, (uptr)addr,
+                              (uptr)data);
+#else
   return internal_syscall(SYSCALL(ptrace), request, pid, (uptr)addr,
                           (uptr)data);
+#endif
 }
 
 uptr internal_waitpid(int pid, int *status, int options) {
-  return internal_syscall(SYSCALL(wait4), pid, (uptr)status, options,
+  return internal_syscall_ptr(SYSCALL(wait4), pid, (uptr)status, options,
                           0 /* rusage */);
 }
 
@@ -590,15 +716,21 @@
 }
 
 uptr internal_getdents(fd_t fd, struct linux_dirent *dirp, unsigned int count) {
-#if SANITIZER_USES_CANONICAL_LINUX_SYSCALLS
+#if SANITIZER_FREEBSD
+  return internal_syscall(SYSCALL(getdirentries), fd, (uptr)dirp, count, NULL);
+#elif SANITIZER_USES_CANONICAL_LINUX_SYSCALLS
   return internal_syscall(SYSCALL(getdents64), fd, (uptr)dirp, count);
 #else
-  return internal_syscall(SYSCALL(getdents), fd, (uptr)dirp, count);
+  return internal_syscall_ptr(SYSCALL(getdents), fd, (uptr)dirp, count);
 #endif
 }
 
 uptr internal_lseek(fd_t fd, OFF_T offset, int whence) {
+#if SANITIZER_NETBSD
+  return internal_syscall64(SYSCALL(lseek), fd, 0, offset, whence);
+#else
   return internal_syscall(SYSCALL(lseek), fd, offset, whence);
+#endif
 }
 
 #if SANITIZER_LINUX
@@ -607,9 +739,8 @@
 }
 #endif
 
-uptr internal_sigaltstack(const struct sigaltstack *ss,
-                         struct sigaltstack *oss) {
-  return internal_syscall(SYSCALL(sigaltstack), (uptr)ss, (uptr)oss);
+uptr internal_sigaltstack(const void *ss, void *oss) {
+  return internal_syscall_ptr(SYSCALL(sigaltstack), (uptr)ss, (uptr)oss);
 }
 
 int internal_fork() {
@@ -690,8 +821,8 @@
 
 uptr internal_sigprocmask(int how, __sanitizer_sigset_t *set,
     __sanitizer_sigset_t *oldset) {
-#if SANITIZER_FREEBSD
-  return internal_syscall(SYSCALL(sigprocmask), how, set, oldset);
+#if SANITIZER_FREEBSD || SANITIZER_NETBSD
+  return internal_syscall_ptr(SYSCALL(sigprocmask), how, set, oldset);
 #else
   __sanitizer_kernel_sigset_t *k_set = (__sanitizer_kernel_sigset_t *)set;
   __sanitizer_kernel_sigset_t *k_oldset = (__sanitizer_kernel_sigset_t *)oldset;
@@ -799,20 +930,94 @@
   return true;
 }
 
+#if SANITIZER_WORDSIZE == 32
+// Take care of unusable kernel area in top gigabyte.
+static uptr GetKernelAreaSize() {
+#if SANITIZER_LINUX && !SANITIZER_X32
+  const uptr gbyte = 1UL << 30;
+
+  // Firstly check if there are writable segments
+  // mapped to top gigabyte (e.g. stack).
+  MemoryMappingLayout proc_maps(/*cache_enabled*/true);
+  MemoryMappedSegment segment;
+  while (proc_maps.Next(&segment)) {
+    if ((segment.end >= 3 * gbyte) && segment.IsWritable()) return 0;
+  }
+
+#if !SANITIZER_ANDROID
+  // Even if nothing is mapped, top Gb may still be accessible
+  // if we are running on 64-bit kernel.
+  // Uname may report misleading results if personality type
+  // is modified (e.g. under schroot) so check this as well.
+  struct utsname uname_info;
+  int pers = personality(0xffffffffUL);
+  if (!(pers & PER_MASK)
+      && uname(&uname_info) == 0
+      && internal_strstr(uname_info.machine, "64"))
+    return 0;
+#endif  // SANITIZER_ANDROID
+
+  // Top gigabyte is reserved for kernel.
+  return gbyte;
+#else
+  return 0;
+#endif  // SANITIZER_LINUX && !SANITIZER_X32
+}
+#endif  // SANITIZER_WORDSIZE == 32
+
+uptr GetMaxVirtualAddress() {
+#if SANITIZER_NETBSD && defined(__x86_64__)
+  return 0x7f7ffffff000ULL;  // (0x00007f8000000000 - PAGE_SIZE)
+#elif SANITIZER_WORDSIZE == 64
+# if defined(__powerpc64__) || defined(__aarch64__)
+  // On PowerPC64 we have two different address space layouts: 44- and 46-bit.
+  // We somehow need to figure out which one we are using now and choose
+  // one of 0x00000fffffffffffUL and 0x00003fffffffffffUL.
+  // Note that with 'ulimit -s unlimited' the stack is moved away from the top
+  // of the address space, so simply checking the stack address is not enough.
+  // This should (does) work for both PowerPC64 Endian modes.
+  // Similarly, aarch64 has multiple address space layouts: 39, 42 and 47-bit.
+  return (1ULL << (MostSignificantSetBitIndex(GET_CURRENT_FRAME()) + 1)) - 1;
+# elif defined(__mips64)
+  return (1ULL << 40) - 1;  // 0x000000ffffffffffUL;
+# elif defined(__s390x__)
+  return (1ULL << 53) - 1;  // 0x001fffffffffffffUL;
+# else
+  return (1ULL << 47) - 1;  // 0x00007fffffffffffUL;
+# endif
+#else  // SANITIZER_WORDSIZE == 32
+# if defined(__s390__)
+  return (1ULL << 31) - 1;  // 0x7fffffff;
+# else
+  uptr res = (1ULL << 32) - 1;  // 0xffffffff;
+  if (!common_flags()->full_address_space)
+    res -= GetKernelAreaSize();
+  CHECK_LT(reinterpret_cast<uptr>(&res), res);
+  return res;
+# endif
+#endif  // SANITIZER_WORDSIZE
+}
+
 uptr GetPageSize() {
 // Android post-M sysconf(_SC_PAGESIZE) crashes if called from .preinit_array.
 #if SANITIZER_ANDROID
   return 4096;
 #elif SANITIZER_LINUX && (defined(__x86_64__) || defined(__i386__))
   return EXEC_PAGESIZE;
+#elif SANITIZER_USE_GETAUXVAL
+  return getauxval(AT_PAGESZ);
 #else
   return sysconf(_SC_PAGESIZE);  // EXEC_PAGESIZE may not be trustworthy.
 #endif
 }
 
 uptr ReadBinaryName(/*out*/char *buf, uptr buf_len) {
+#if SANITIZER_FREEBSD || SANITIZER_NETBSD
 #if SANITIZER_FREEBSD
-  const int Mib[] = { CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1 };
+  const int Mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};
+#else
+  const int Mib[4] = {CTL_KERN, KERN_PROC_ARGS, -1, KERN_PROC_PATHNAME};
+#endif
   const char *default_module_name = "kern.proc.pathname";
   size_t Size = buf_len;
   bool IsErr = (sysctl(Mib, ARRAY_SIZE(Mib), buf, &Size, NULL, 0) != 0);
@@ -1097,36 +1302,50 @@
 uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
                    int *parent_tidptr, void *newtls, int *child_tidptr) {
   long long res;
-/* Stack frame offsets.  */
-#if _CALL_ELF != 2
-#define FRAME_MIN_SIZE         112
-#define FRAME_TOC_SAVE         40
+// Stack frame structure.
+#if SANITIZER_PPC64V1
+//   Back chain == 0        (SP + 112)
+// Frame (112 bytes):
+//   Parameter save area    (SP + 48), 8 doublewords
+//   TOC save area          (SP + 40)
+//   Link editor doubleword (SP + 32)
+//   Compiler doubleword    (SP + 24)
+//   LR save area           (SP + 16)
+//   CR save area           (SP + 8)
+//   Back chain             (SP + 0)
+# define FRAME_SIZE 112
+# define FRAME_TOC_SAVE_OFFSET 40
+#elif SANITIZER_PPC64V2
+//   Back chain == 0        (SP + 32)
+// Frame (32 bytes):
+//   TOC save area          (SP + 24)
+//   LR save area           (SP + 16)
+//   CR save area           (SP + 8)
+//   Back chain             (SP + 0)
+# define FRAME_SIZE 32
+# define FRAME_TOC_SAVE_OFFSET 24
 #else
-#define FRAME_MIN_SIZE         32
-#define FRAME_TOC_SAVE         24
+# error "Unsupported PPC64 ABI"
 #endif
   if (!fn || !child_stack)
     return -EINVAL;
   CHECK_EQ(0, (uptr)child_stack % 16);
-  child_stack = (char *)child_stack - 2 * sizeof(unsigned long long);
-  ((unsigned long long *)child_stack)[0] = (uptr)fn;
-  ((unsigned long long *)child_stack)[1] = (uptr)arg;
 
   register int (*__fn)(void *) __asm__("r3") = fn;
   register void *__cstack      __asm__("r4") = child_stack;
   register int __flags         __asm__("r5") = flags;
-  register void * __arg        __asm__("r6") = arg;
-  register int * __ptidptr     __asm__("r7") = parent_tidptr;
-  register void * __newtls     __asm__("r8") = newtls;
-  register int * __ctidptr     __asm__("r9") = child_tidptr;
+  register void *__arg         __asm__("r6") = arg;
+  register int *__ptidptr      __asm__("r7") = parent_tidptr;
+  register void *__newtls      __asm__("r8") = newtls;
+  register int *__ctidptr      __asm__("r9") = child_tidptr;
 
  __asm__ __volatile__(
-           /* fn, arg, child_stack are saved acrVoss the syscall */
+           /* fn and arg are saved across the syscall */
            "mr 28, %5\n\t"
-           "mr 29, %6\n\t"
            "mr 27, %8\n\t"
 
            /* syscall
+             r0 == __NR_clone
              r3 == flags
              r4 == child_stack
              r5 == parent_tidptr
@@ -1144,15 +1363,21 @@
            "crandc cr1*4+eq, cr1*4+eq, cr0*4+so\n\t"
            "bne-   cr1, 1f\n\t"
 
+           /* Set up stack frame */
+           "li    29, 0\n\t"
+           "stdu  29, -8(1)\n\t"
+           "stdu  1, -%12(1)\n\t"
            /* Do the function call */
            "std   2, %13(1)\n\t"
-#if _CALL_ELF != 2
+#if SANITIZER_PPC64V1
            "ld    0, 0(28)\n\t"
            "ld    2, 8(28)\n\t"
            "mtctr 0\n\t"
-#else
+#elif SANITIZER_PPC64V2
            "mr    12, 28\n\t"
            "mtctr 12\n\t"
+#else
+# error "Unsupported PPC64 ABI"
 #endif
            "mr    3, 27\n\t"
            "bctrl\n\t"
@@ -1166,13 +1391,20 @@
            "1:\n\t"
            "mr %0, 3\n\t"
              : "=r" (res)
-             : "0" (-1), "i" (EINVAL),
-               "i" (__NR_clone), "i" (__NR_exit),
-               "r" (__fn), "r" (__cstack), "r" (__flags),
-               "r" (__arg), "r" (__ptidptr), "r" (__newtls),
-               "r" (__ctidptr), "i" (FRAME_MIN_SIZE), "i" (FRAME_TOC_SAVE)
-             : "cr0", "cr1", "memory", "ctr",
-               "r0", "r29", "r27", "r28");
+             : "0" (-1),
+               "i" (EINVAL),
+               "i" (__NR_clone),
+               "i" (__NR_exit),
+               "r" (__fn),
+               "r" (__cstack),
+               "r" (__flags),
+               "r" (__arg),
+               "r" (__ptidptr),
+               "r" (__newtls),
+               "r" (__ctidptr),
+               "i" (FRAME_SIZE),
+               "i" (FRAME_TOC_SAVE_OFFSET)
+             : "cr0", "cr1", "memory", "ctr", "r0", "r27", "r28", "r29");
   return res;
 }
 #elif defined(__i386__) && SANITIZER_LINUX
@@ -1240,6 +1472,72 @@
                        : "memory");
   return res;
 }
+#elif defined(__arm__) && SANITIZER_LINUX
+uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
+                    int *parent_tidptr, void *newtls, int *child_tidptr) {
+  unsigned int res;
+  if (!fn || !child_stack)
+    return -EINVAL;
+  child_stack = (char *)child_stack - 2 * sizeof(unsigned int);
+  ((unsigned int *)child_stack)[0] = (uptr)fn;
+  ((unsigned int *)child_stack)[1] = (uptr)arg;
+  register int r0 __asm__("r0") = flags;
+  register void *r1 __asm__("r1") = child_stack;
+  register int *r2 __asm__("r2") = parent_tidptr;
+  register void *r3 __asm__("r3") = newtls;
+  register int *r4 __asm__("r4") = child_tidptr;
+  register int r7 __asm__("r7") = __NR_clone;
+
+#if __ARM_ARCH > 4 || defined (__ARM_ARCH_4T__)
+# define ARCH_HAS_BX
+#endif
+#if __ARM_ARCH > 4
+# define ARCH_HAS_BLX
+#endif
+
+#ifdef ARCH_HAS_BX
+# ifdef ARCH_HAS_BLX
+#  define BLX(R) "blx "  #R "\n"
+# else
+#  define BLX(R) "mov lr, pc; bx " #R "\n"
+# endif
+#else
+# define BLX(R)  "mov lr, pc; mov pc," #R "\n"
+#endif
+
+  __asm__ __volatile__(
+                       /* %r0 = syscall(%r7 = SYSCALL(clone),
+                        *               %r0 = flags,
+                        *               %r1 = child_stack,
+                        *               %r2 = parent_tidptr,
+                        *               %r3  = new_tls,
+                        *               %r4 = child_tidptr)
+                        */
+
+                       /* Do the system call */
+                       "swi 0x0\n"
+
+                       /* if (%r0 != 0)
+                        *   return %r0;
+                        */
+                       "cmp r0, #0\n"
+                       "bne 1f\n"
+
+                       /* In the child, now. Call "fn(arg)". */
+                       "ldr r0, [sp, #4]\n"
+                       "ldr ip, [sp], #8\n"
+                       BLX(ip)
+                       /* Call _exit(%r0). */
+                       "mov r7, %7\n"
+                       "swi 0x0\n"
+                       "1:\n"
+                       "mov %0, r0\n"
+                       : "=r"(res)
+                       : "r"(r0), "r"(r1), "r"(r2), "r"(r3), "r"(r4), "r"(r7),
+                         "i"(__NR_exit)
+                       : "memory");
+  return res;
+}
 #endif  // defined(__x86_64__) && SANITIZER_LINUX
 
 #if SANITIZER_ANDROID
@@ -1285,16 +1583,27 @@
 
 #endif
 
-bool IsHandledDeadlySignal(int signum) {
-  if (common_flags()->handle_abort && signum == SIGABRT)
-    return true;
-  if (common_flags()->handle_sigill && signum == SIGILL)
-    return true;
-  if (common_flags()->handle_sigfpe && signum == SIGFPE)
-    return true;
-  if (common_flags()->handle_segv && signum == SIGSEGV)
-    return true;
-  return common_flags()->handle_sigbus && signum == SIGBUS;
+static HandleSignalMode GetHandleSignalModeImpl(int signum) {
+  switch (signum) {
+    case SIGABRT:
+      return common_flags()->handle_abort;
+    case SIGILL:
+      return common_flags()->handle_sigill;
+    case SIGFPE:
+      return common_flags()->handle_sigfpe;
+    case SIGSEGV:
+      return common_flags()->handle_segv;
+    case SIGBUS:
+      return common_flags()->handle_sigbus;
+  }
+  return kHandleSignalNo;
+}
+
+HandleSignalMode GetHandleSignalMode(int signum) {
+  HandleSignalMode result = GetHandleSignalModeImpl(signum);
+  if (result == kHandleSignalYes && !common_flags()->allow_user_segv_handler)
+    return kHandleSignalExclusive;
+  return result;
 }
 
 #if !SANITIZER_GO
@@ -1346,12 +1655,14 @@
 }
 #endif
 
-SignalContext::WriteFlag SignalContext::GetWriteFlag(void *context) {
+SignalContext::WriteFlag SignalContext::GetWriteFlag() const {
   ucontext_t *ucontext = (ucontext_t *)context;
 #if defined(__x86_64__) || defined(__i386__)
   static const uptr PF_WRITE = 1U << 1;
 #if SANITIZER_FREEBSD
   uptr err = ucontext->uc_mcontext.mc_err;
+#elif SANITIZER_NETBSD
+  uptr err = ucontext->uc_mcontext.__gregs[_REG_ERR];
 #else
   uptr err = ucontext->uc_mcontext.gregs[REG_ERR];
 #endif
@@ -1375,7 +1686,7 @@
   // FIXME: Implement this.
 }
 
-void GetPcSpBp(void *context, uptr *pc, uptr *sp, uptr *bp) {
+static void GetPcSpBp(void *context, uptr *pc, uptr *sp, uptr *bp) {
 #if defined(__arm__)
   ucontext_t *ucontext = (ucontext_t*)context;
   *pc = ucontext->uc_mcontext.arm_pc;
@@ -1398,6 +1709,11 @@
   *pc = ucontext->uc_mcontext.mc_rip;
   *bp = ucontext->uc_mcontext.mc_rbp;
   *sp = ucontext->uc_mcontext.mc_rsp;
+#elif SANITIZER_NETBSD
+  ucontext_t *ucontext = (ucontext_t *)context;
+  *pc = ucontext->uc_mcontext.__gregs[_REG_RIP];
+  *bp = ucontext->uc_mcontext.__gregs[_REG_RBP];
+  *sp = ucontext->uc_mcontext.__gregs[_REG_RSP];
 # else
   ucontext_t *ucontext = (ucontext_t*)context;
   *pc = ucontext->uc_mcontext.gregs[REG_RIP];
@@ -1410,6 +1726,11 @@
   *pc = ucontext->uc_mcontext.mc_eip;
   *bp = ucontext->uc_mcontext.mc_ebp;
   *sp = ucontext->uc_mcontext.mc_esp;
+#elif SANITIZER_NETBSD
+  ucontext_t *ucontext = (ucontext_t *)context;
+  *pc = ucontext->uc_mcontext.__gregs[_REG_EIP];
+  *bp = ucontext->uc_mcontext.__gregs[_REG_EBP];
+  *sp = ucontext->uc_mcontext.__gregs[_REG_ESP];
 # else
   ucontext_t *ucontext = (ucontext_t*)context;
   *pc = ucontext->uc_mcontext.gregs[REG_EIP];
@@ -1456,6 +1777,8 @@
 #endif
 }
 
+void SignalContext::InitPcSpBp() { GetPcSpBp(context, &pc, &sp, &bp); }
+
 void MaybeReexec() {
   // No need to re-exec on Linux.
 }
@@ -1463,7 +1786,7 @@
 void PrintModuleMap() { }
 
 void CheckNoDeepBind(const char *filename, int flag) {
-#if !SANITIZER_ANDROID
+#ifdef RTLD_DEEPBIND
   if (flag & RTLD_DEEPBIND) {
     Report(
         "You are trying to dlopen a %s shared library with RTLD_DEEPBIND flag"
@@ -1477,11 +1800,40 @@
 #endif
 }
 
-uptr FindAvailableMemoryRange(uptr size, uptr alignment, uptr left_padding) {
+uptr FindAvailableMemoryRange(uptr size, uptr alignment, uptr left_padding,
+                              uptr *largest_gap_found) {
   UNREACHABLE("FindAvailableMemoryRange is not available");
   return 0;
 }
 
+bool GetRandom(void *buffer, uptr length, bool blocking) {
+  if (!buffer || !length || length > 256)
+    return false;
+#if SANITIZER_USE_GETRANDOM
+  static atomic_uint8_t skip_getrandom_syscall;
+  if (!atomic_load_relaxed(&skip_getrandom_syscall)) {
+    // Up to 256 bytes, getrandom will not be interrupted.
+    uptr res = internal_syscall(SYSCALL(getrandom), buffer, length,
+                                blocking ? 0 : GRND_NONBLOCK);
+    int rverrno = 0;
+    if (internal_iserror(res, &rverrno) && rverrno == ENOSYS)
+      atomic_store_relaxed(&skip_getrandom_syscall, 1);
+    else if (res == length)
+      return true;
+  }
+#endif  // SANITIZER_USE_GETRANDOM
+  // Up to 256 bytes, a read off /dev/urandom will not be interrupted.
+  // blocking is moot here, O_NONBLOCK has no effect when opening /dev/urandom.
+  uptr fd = internal_open("/dev/urandom", O_RDONLY);
+  if (internal_iserror(fd))
+    return false;
+  uptr res = internal_read(fd, buffer, length);
+  if (internal_iserror(res))
+    return false;
+  internal_close(fd);
+  return true;
+}
+
 } // namespace __sanitizer
 
-#endif // SANITIZER_FREEBSD || SANITIZER_LINUX
+#endif  // SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_NETBSD
diff --git a/lib/sanitizer_common/sanitizer_linux.h b/lib/sanitizer_common/sanitizer_linux.h
index bba8624..88cac33 100644
--- a/lib/sanitizer_common/sanitizer_linux.h
+++ b/lib/sanitizer_common/sanitizer_linux.h
@@ -14,24 +14,36 @@
 #define SANITIZER_LINUX_H
 
 #include "sanitizer_platform.h"
-#if SANITIZER_FREEBSD || SANITIZER_LINUX
+#if SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_NETBSD
 #include "sanitizer_common.h"
 #include "sanitizer_internal_defs.h"
-#include "sanitizer_posix.h"
+#include "sanitizer_platform_limits_netbsd.h"
 #include "sanitizer_platform_limits_posix.h"
+#include "sanitizer_posix.h"
 
 struct link_map;  // Opaque type returned by dlopen().
-struct sigaltstack;
 
 namespace __sanitizer {
 // Dirent structure for getdents(). Note that this structure is different from
 // the one in <dirent.h>, which is used by readdir().
 struct linux_dirent;
 
+struct ProcSelfMapsBuff {
+  char *data;
+  uptr mmaped_size;
+  uptr len;
+};
+
+struct MemoryMappingLayoutData {
+  ProcSelfMapsBuff proc_self_maps;
+  const char *current;
+};
+
+void ReadProcMaps(ProcSelfMapsBuff *proc_maps);
+
 // Syscall wrappers.
 uptr internal_getdents(fd_t fd, struct linux_dirent *dirp, unsigned int count);
-uptr internal_sigaltstack(const struct sigaltstack* ss,
-                          struct sigaltstack* oss);
+uptr internal_sigaltstack(const void* ss, void* oss);
 uptr internal_sigprocmask(int how, __sanitizer_sigset_t *set,
     __sanitizer_sigset_t *oldset);
 
@@ -48,7 +60,8 @@
 #endif
 void internal_sigdelset(__sanitizer_sigset_t *set, int signum);
 #if defined(__x86_64__) || defined(__mips__) || defined(__aarch64__) \
-  || defined(__powerpc64__) || defined(__s390__) || defined(__i386__)
+  || defined(__powerpc64__) || defined(__s390__) || defined(__i386__) \
+  || defined(__arm__)
 uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
                     int *parent_tidptr, void *newtls, int *child_tidptr);
 #endif
@@ -87,7 +100,47 @@
 
 // Call cb for each region mapped by map.
 void ForEachMappedRegion(link_map *map, void (*cb)(const void *, uptr));
+
+#if SANITIZER_ANDROID
+
+#if defined(__aarch64__)
+# define __get_tls() \
+    ({ void** __v; __asm__("mrs %0, tpidr_el0" : "=r"(__v)); __v; })
+#elif defined(__arm__)
+# define __get_tls() \
+    ({ void** __v; __asm__("mrc p15, 0, %0, c13, c0, 3" : "=r"(__v)); __v; })
+#elif defined(__mips__)
+// On mips32r1, this goes via a kernel illegal instruction trap that's
+// optimized for v1.
+# define __get_tls() \
+    ({ register void** __v asm("v1"); \
+       __asm__(".set    push\n" \
+               ".set    mips32r2\n" \
+               "rdhwr   %0,$29\n" \
+               ".set    pop\n" : "=r"(__v)); \
+       __v; })
+#elif defined(__i386__)
+# define __get_tls() \
+    ({ void** __v; __asm__("movl %%gs:0, %0" : "=r"(__v)); __v; })
+#elif defined(__x86_64__)
+# define __get_tls() \
+    ({ void** __v; __asm__("mov %%fs:0, %0" : "=r"(__v)); __v; })
+#else
+#error "Unsupported architecture."
+#endif
+
+// The Android Bionic team has allocated a TLS slot for TSan starting with N,
+// given that Android currently doesn't support ELF TLS. It is used to store
+// Sanitizers thread specific data.
+static const int TLS_SLOT_TSAN = 8;
+
+ALWAYS_INLINE uptr *get_android_tls_ptr() {
+  return reinterpret_cast<uptr *>(&__get_tls()[TLS_SLOT_TSAN]);
+}
+
+#endif  // SANITIZER_ANDROID
+
 }  // namespace __sanitizer
 
-#endif  // SANITIZER_FREEBSD || SANITIZER_LINUX
+#endif  // SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_NETBSD
 #endif  // SANITIZER_LINUX_H
diff --git a/lib/sanitizer_common/sanitizer_linux_libcdep.cc b/lib/sanitizer_common/sanitizer_linux_libcdep.cc
index a5e2840..0dc4375 100644
--- a/lib/sanitizer_common/sanitizer_linux_libcdep.cc
+++ b/lib/sanitizer_common/sanitizer_linux_libcdep.cc
@@ -14,11 +14,12 @@
 
 #include "sanitizer_platform.h"
 
-#if SANITIZER_FREEBSD || SANITIZER_LINUX
+#if SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_NETBSD
 
 #include "sanitizer_allocator_internal.h"
 #include "sanitizer_atomic.h"
 #include "sanitizer_common.h"
+#include "sanitizer_file.h"
 #include "sanitizer_flags.h"
 #include "sanitizer_freebsd.h"
 #include "sanitizer_linux.h"
@@ -81,28 +82,25 @@
 
     // Find the mapping that contains a stack variable.
     MemoryMappingLayout proc_maps(/*cache_enabled*/true);
-    uptr start, end, offset;
+    MemoryMappedSegment segment;
     uptr prev_end = 0;
-    while (proc_maps.Next(&start, &end, &offset, nullptr, 0,
-          /* protection */nullptr)) {
-      if ((uptr)&rl < end)
-        break;
-      prev_end = end;
+    while (proc_maps.Next(&segment)) {
+      if ((uptr)&rl < segment.end) break;
+      prev_end = segment.end;
     }
-    CHECK((uptr)&rl >= start && (uptr)&rl < end);
+    CHECK((uptr)&rl >= segment.start && (uptr)&rl < segment.end);
 
     // Get stacksize from rlimit, but clip it so that it does not overlap
     // with other mappings.
     uptr stacksize = rl.rlim_cur;
-    if (stacksize > end - prev_end)
-      stacksize = end - prev_end;
+    if (stacksize > segment.end - prev_end) stacksize = segment.end - prev_end;
     // When running with unlimited stack size, we still want to set some limit.
     // The unlimited stack size is caused by 'ulimit -s unlimited'.
     // Also, for some reason, GNU make spawns subprocesses with unlimited stack.
     if (stacksize > kMaxThreadStackSize)
       stacksize = kMaxThreadStackSize;
-    *stack_top = end;
-    *stack_bottom = end - stacksize;
+    *stack_top = segment.end;
+    *stack_bottom = segment.end - stacksize;
     return;
   }
   pthread_attr_t attr;
@@ -113,7 +111,6 @@
   my_pthread_attr_getstack(&attr, &stackaddr, &stacksize);
   pthread_attr_destroy(&attr);
 
-  CHECK_LE(stacksize, kMaxThreadStackSize);  // Sanity check.
   *stack_top = (uptr)stackaddr + stacksize;
   *stack_bottom = (uptr)stackaddr;
 }
@@ -152,7 +149,8 @@
 #endif
 }
 
-#if !SANITIZER_FREEBSD && !SANITIZER_ANDROID && !SANITIZER_GO
+#if !SANITIZER_FREEBSD && !SANITIZER_ANDROID && !SANITIZER_GO && \
+    !SANITIZER_NETBSD
 static uptr g_tls_size;
 
 #ifdef __i386__
@@ -180,11 +178,12 @@
 }
 #else
 void InitTlsSize() { }
-#endif  // !SANITIZER_FREEBSD && !SANITIZER_ANDROID && !SANITIZER_GO
+#endif  // !SANITIZER_FREEBSD && !SANITIZER_ANDROID && !SANITIZER_GO &&
+        // !SANITIZER_NETBSD
 
 #if (defined(__x86_64__) || defined(__i386__) || defined(__mips__) \
-    || defined(__aarch64__) || defined(__powerpc64__) || defined(__s390__)) \
-    && SANITIZER_LINUX && !SANITIZER_ANDROID
+    || defined(__aarch64__) || defined(__powerpc64__) || defined(__s390__) \
+    || defined(__arm__)) && SANITIZER_LINUX && !SANITIZER_ANDROID
 // sizeof(struct pthread) from glibc.
 static atomic_uintptr_t kThreadDescriptorSize;
 
@@ -192,14 +191,14 @@
   uptr val = atomic_load(&kThreadDescriptorSize, memory_order_relaxed);
   if (val)
     return val;
-#if defined(__x86_64__) || defined(__i386__)
+#if defined(__x86_64__) || defined(__i386__) || defined(__arm__)
 #ifdef _CS_GNU_LIBC_VERSION
   char buf[64];
   uptr len = confstr(_CS_GNU_LIBC_VERSION, buf, sizeof(buf));
   if (len < sizeof(buf) && internal_strncmp(buf, "glibc 2.", 8) == 0) {
     char *end;
     int minor = internal_simple_strtoll(buf + 8, &end, 10);
-    if (end != buf + 8 && (*end == '\0' || *end == '.')) {
+    if (end != buf + 8 && (*end == '\0' || *end == '.' || *end == '-')) {
       int patch = 0;
       if (*end == '.')
         // strtoll will return 0 if no valid conversion could be performed
@@ -208,6 +207,9 @@
       /* sizeof(struct pthread) values from various glibc versions.  */
       if (SANITIZER_X32)
         val = 1728;  // Assume only one particular version for x32.
+      // For ARM sizeof(struct pthread) changed in Glibc 2.23.
+      else if (SANITIZER_ARM)
+        val = minor <= 22 ? 1120 : 1216;
       else if (minor <= 3)
         val = FIRST_32_SECOND_64(1104, 1696);
       else if (minor == 4)
@@ -293,7 +295,7 @@
                 rdhwr %0,$29;\
                 .set pop" : "=r" (thread_pointer));
   descr_addr = thread_pointer - kTlsTcbOffset - TlsPreTcbSize();
-# elif defined(__aarch64__)
+# elif defined(__aarch64__) || defined(__arm__)
   descr_addr = reinterpret_cast<uptr>(__builtin_thread_pointer()) -
                                       ThreadDescriptorSize();
 # elif defined(__s390__)
@@ -332,7 +334,9 @@
 uptr ThreadSelf() {
   return (uptr)ThreadSelfSegbase()[2];
 }
-#endif  // SANITIZER_FREEBSD
+#elif SANITIZER_NETBSD
+uptr ThreadSelf() { return (uptr)pthread_self(); }
+#endif  // SANITIZER_NETBSD
 
 #if !SANITIZER_GO
 static void GetTls(uptr *addr, uptr *size) {
@@ -342,7 +346,8 @@
   *size = GetTlsSize();
   *addr -= *size;
   *addr += ThreadDescriptorSize();
-# elif defined(__mips__) || defined(__aarch64__) || defined(__powerpc64__)
+# elif defined(__mips__) || defined(__aarch64__) || defined(__powerpc64__) \
+    || defined(__arm__)
   *addr = ThreadSelf();
   *size = GetTlsSize();
 # else
@@ -362,7 +367,7 @@
     *addr = (uptr) dtv[2];
     *size = (*addr == 0) ? 0 : ((uptr) segbase[0] - (uptr) dtv[2]);
   }
-#elif SANITIZER_ANDROID
+#elif SANITIZER_ANDROID || SANITIZER_NETBSD
   *addr = 0;
   *size = 0;
 #else
@@ -373,7 +378,7 @@
 
 #if !SANITIZER_GO
 uptr GetTlsSize() {
-#if SANITIZER_FREEBSD || SANITIZER_ANDROID
+#if SANITIZER_FREEBSD || SANITIZER_ANDROID || SANITIZER_NETBSD
   uptr addr, size;
   GetTls(&addr, &size);
   return size;
@@ -419,7 +424,7 @@
 # endif
 
 struct DlIteratePhdrData {
-  InternalMmapVector<LoadedModule> *modules;
+  InternalMmapVectorNoCtor<LoadedModule> *modules;
   bool first;
 };
 
@@ -443,7 +448,9 @@
       uptr cur_beg = info->dlpi_addr + phdr->p_vaddr;
       uptr cur_end = cur_beg + phdr->p_memsz;
       bool executable = phdr->p_flags & PF_X;
-      cur_module.addAddressRange(cur_beg, cur_end, executable);
+      bool writable = phdr->p_flags & PF_W;
+      cur_module.addAddressRange(cur_beg, cur_end, executable,
+                                 writable);
     }
   }
   data->modules->push_back(cur_module);
@@ -455,21 +462,41 @@
     int (*)(struct dl_phdr_info *, size_t, void *), void *);
 #endif
 
-void ListOfModules::init() {
-  clear();
+static bool requiresProcmaps() {
 #if SANITIZER_ANDROID && __ANDROID_API__ <= 22
-  u32 api_level = AndroidGetApiLevel();
   // Fall back to /proc/maps if dl_iterate_phdr is unavailable or broken.
   // The runtime check allows the same library to work with
   // both K and L (and future) Android releases.
-  if (api_level <= ANDROID_LOLLIPOP_MR1) { // L or earlier
-    MemoryMappingLayout memory_mapping(false);
-    memory_mapping.DumpListOfModules(&modules_);
-    return;
-  }
+  return AndroidGetApiLevel() <= ANDROID_LOLLIPOP_MR1;
+#else
+  return false;
 #endif
-  DlIteratePhdrData data = {&modules_, true};
-  dl_iterate_phdr(dl_iterate_phdr_cb, &data);
+}
+
+static void procmapsInit(InternalMmapVectorNoCtor<LoadedModule> *modules) {
+  MemoryMappingLayout memory_mapping(/*cache_enabled*/true);
+  memory_mapping.DumpListOfModules(modules);
+}
+
+void ListOfModules::init() {
+  clearOrInit();
+  if (requiresProcmaps()) {
+    procmapsInit(&modules_);
+  } else {
+    DlIteratePhdrData data = {&modules_, true};
+    dl_iterate_phdr(dl_iterate_phdr_cb, &data);
+  }
+}
+
+// When a custom loader is used, dl_iterate_phdr may not contain the full
+// list of modules. Allow callers to fall back to using procmaps.
+void ListOfModules::fallbackInit() {
+  if (!requiresProcmaps()) {
+    clearOrInit();
+    procmapsInit(&modules_);
+  } else {
+    clear();
+  }
 }
 
 // getrusage does not give us the current RSS, only the max RSS.
@@ -546,6 +573,15 @@
     WriteToSyslog(str);
 }
 
+#if SANITIZER_ANDROID
+extern "C" __attribute__((weak)) void android_set_abort_message(const char *);
+void SetAbortMessage(const char *str) {
+  if (&android_set_abort_message) android_set_abort_message(str);
+}
+#else
+void SetAbortMessage(const char *str) {}
+#endif
+
 #endif // SANITIZER_LINUX
 
 } // namespace __sanitizer
diff --git a/lib/sanitizer_common/sanitizer_linux_s390.cc b/lib/sanitizer_common/sanitizer_linux_s390.cc
index c2b03b2..a6da82e 100644
--- a/lib/sanitizer_common/sanitizer_linux_s390.cc
+++ b/lib/sanitizer_common/sanitizer_linux_s390.cc
@@ -178,6 +178,13 @@
     // 4.4.6+ is OK.
     if (minor == 4 && patch >= 6)
       return true;
+    if (minor == 4 && patch == 0 && ptr[0] == '-' &&
+        internal_strstr(buf.version, "Ubuntu")) {
+      // Check Ubuntu 16.04
+      int r1 = internal_simple_strtoll(ptr+1, &ptr, 10);
+      if (r1 >= 13) // 4.4.0-13 or later
+        return true;
+    }
     // Otherwise, OK if 4.5+.
     return minor >= 5;
   } else {
diff --git a/lib/sanitizer_common/sanitizer_mac.cc b/lib/sanitizer_common/sanitizer_mac.cc
index 7e85505..9fead91 100644
--- a/lib/sanitizer_common/sanitizer_mac.cc
+++ b/lib/sanitizer_common/sanitizer_mac.cc
@@ -23,6 +23,7 @@
 #include <stdio.h>
 
 #include "sanitizer_common.h"
+#include "sanitizer_file.h"
 #include "sanitizer_flags.h"
 #include "sanitizer_internal_defs.h"
 #include "sanitizer_libc.h"
@@ -102,12 +103,12 @@
 uptr internal_mmap(void *addr, size_t length, int prot, int flags,
                    int fd, u64 offset) {
   if (fd == -1) fd = VM_MAKE_TAG(VM_MEMORY_ANALYSIS_TOOL);
-  if (__mmap) return (uptr)__mmap(addr, length, prot, flags, fd, offset);
+  if (&__mmap) return (uptr)__mmap(addr, length, prot, flags, fd, offset);
   return (uptr)mmap(addr, length, prot, flags, fd, offset);
 }
 
 uptr internal_munmap(void *addr, uptr length) {
-  if (__munmap) return __munmap(addr, length);
+  if (&__munmap) return __munmap(addr, length);
   return munmap(addr, length);
 }
 
@@ -191,14 +192,15 @@
 
 uptr internal_sigprocmask(int how, __sanitizer_sigset_t *set,
                           __sanitizer_sigset_t *oldset) {
-  return sigprocmask(how, set, oldset);
+  // Don't use sigprocmask here, because it affects all threads.
+  return pthread_sigmask(how, set, oldset);
 }
 
 // Doesn't call pthread_atfork() handlers (but not available on 10.6).
 extern "C" pid_t __fork(void) SANITIZER_WEAK_ATTRIBUTE;
 
 int internal_fork() {
-  if (__fork)
+  if (&__fork)
     return __fork();
   return fork();
 }
@@ -252,9 +254,8 @@
   return S_ISREG(st.st_mode);
 }
 
-uptr GetTid() {
-  // FIXME: This can potentially get truncated on 32-bit, where uptr is 4 bytes.
-  uint64_t tid;
+tid_t GetTid() {
+  tid_t tid;
   pthread_threadid_np(nullptr, &tid);
   return tid;
 }
@@ -371,6 +372,27 @@
 void InitTlsSize() {
 }
 
+uptr TlsBaseAddr() {
+  uptr segbase = 0;
+#if defined(__x86_64__)
+  asm("movq %%gs:0,%0" : "=r"(segbase));
+#elif defined(__i386__)
+  asm("movl %%gs:0,%0" : "=r"(segbase));
+#endif
+  return segbase;
+}
+
+// The size of the tls on darwin does not appear to be well documented,
+// however the vm memory map suggests that it is 1024 uptrs in size,
+// with a size of 0x2000 bytes on x86_64 and 0x1000 bytes on i386.
+uptr TlsSize() {
+#if defined(__x86_64__) || defined(__i386__)
+  return 1024 * sizeof(uptr);
+#else
+  return 0;
+#endif
+}
+
 void GetThreadStackAndTls(bool main, uptr *stk_addr, uptr *stk_size,
                           uptr *tls_addr, uptr *tls_size) {
 #if !SANITIZER_GO
@@ -378,8 +400,8 @@
   GetThreadStackTopAndBottom(main, &stack_top, &stack_bottom);
   *stk_addr = stack_bottom;
   *stk_size = stack_top - stack_bottom;
-  *tls_addr = 0;
-  *tls_size = 0;
+  *tls_addr = TlsBaseAddr();
+  *tls_size = TlsSize();
 #else
   *stk_addr = 0;
   *stk_size = 0;
@@ -389,24 +411,37 @@
 }
 
 void ListOfModules::init() {
-  clear();
+  clearOrInit();
   MemoryMappingLayout memory_mapping(false);
   memory_mapping.DumpListOfModules(&modules_);
 }
 
-bool IsHandledDeadlySignal(int signum) {
+void ListOfModules::fallbackInit() { clear(); }
+
+static HandleSignalMode GetHandleSignalModeImpl(int signum) {
+  switch (signum) {
+    case SIGABRT:
+      return common_flags()->handle_abort;
+    case SIGILL:
+      return common_flags()->handle_sigill;
+    case SIGFPE:
+      return common_flags()->handle_sigfpe;
+    case SIGSEGV:
+      return common_flags()->handle_segv;
+    case SIGBUS:
+      return common_flags()->handle_sigbus;
+  }
+  return kHandleSignalNo;
+}
+
+HandleSignalMode GetHandleSignalMode(int signum) {
+  // Handling fatal signals on watchOS and tvOS devices is disallowed.
   if ((SANITIZER_WATCHOS || SANITIZER_TVOS) && !(SANITIZER_IOSSIM))
-    // Handling fatal signals on watchOS and tvOS devices is disallowed.
-    return false;
-  if (common_flags()->handle_abort && signum == SIGABRT)
-    return true;
-  if (common_flags()->handle_sigill && signum == SIGILL)
-    return true;
-  if (common_flags()->handle_sigfpe && signum == SIGFPE)
-    return true;
-  if (common_flags()->handle_segv && signum == SIGSEGV)
-    return true;
-  return common_flags()->handle_sigbus && signum == SIGBUS;
+    return kHandleSignalNo;
+  HandleSignalMode result = GetHandleSignalModeImpl(signum);
+  if (result == kHandleSignalYes && !common_flags()->allow_user_segv_handler)
+    return kHandleSignalExclusive;
+  return result;
 }
 
 MacosVersion cached_macos_version = MACOS_VERSION_UNINITIALIZED;
@@ -541,7 +576,7 @@
 #endif
 }
 
-SignalContext::WriteFlag SignalContext::GetWriteFlag(void *context) {
+SignalContext::WriteFlag SignalContext::GetWriteFlag() const {
 #if defined(__x86_64__) || defined(__i386__)
   ucontext_t *ucontext = static_cast<ucontext_t*>(context);
   return ucontext->uc_mcontext->__es.__err & 2 /*T_PF_WRITE*/ ? WRITE : READ;
@@ -550,7 +585,7 @@
 #endif
 }
 
-void GetPcSpBp(void *context, uptr *pc, uptr *sp, uptr *bp) {
+static void GetPcSpBp(void *context, uptr *pc, uptr *sp, uptr *bp) {
   ucontext_t *ucontext = (ucontext_t*)context;
 # if defined(__aarch64__)
   *pc = ucontext->uc_mcontext->__ss.__pc;
@@ -577,6 +612,8 @@
 # endif
 }
 
+void SignalContext::InitPcSpBp() { GetPcSpBp(context, &pc, &sp, &bp); }
+
 #if !SANITIZER_GO
 static const char kDyldInsertLibraries[] = "DYLD_INSERT_LIBRARIES";
 LowLevelAllocator allocator_for_env;
@@ -768,9 +805,69 @@
   return *_NSGetArgv();
 }
 
+#if defined(__aarch64__) && SANITIZER_IOS && !SANITIZER_IOSSIM
+// The task_vm_info struct is normally provided by the macOS SDK, but we need
+// fields only available in 10.12+. Declare the struct manually to be able to
+// build against older SDKs.
+struct __sanitizer_task_vm_info {
+  mach_vm_size_t virtual_size;
+  integer_t region_count;
+  integer_t page_size;
+  mach_vm_size_t resident_size;
+  mach_vm_size_t resident_size_peak;
+  mach_vm_size_t device;
+  mach_vm_size_t device_peak;
+  mach_vm_size_t internal;
+  mach_vm_size_t internal_peak;
+  mach_vm_size_t external;
+  mach_vm_size_t external_peak;
+  mach_vm_size_t reusable;
+  mach_vm_size_t reusable_peak;
+  mach_vm_size_t purgeable_volatile_pmap;
+  mach_vm_size_t purgeable_volatile_resident;
+  mach_vm_size_t purgeable_volatile_virtual;
+  mach_vm_size_t compressed;
+  mach_vm_size_t compressed_peak;
+  mach_vm_size_t compressed_lifetime;
+  mach_vm_size_t phys_footprint;
+  mach_vm_address_t min_address;
+  mach_vm_address_t max_address;
+};
+#define __SANITIZER_TASK_VM_INFO_COUNT ((mach_msg_type_number_t) \
+    (sizeof(__sanitizer_task_vm_info) / sizeof(natural_t)))
+
+uptr GetTaskInfoMaxAddress() {
+  __sanitizer_task_vm_info vm_info = {};
+  mach_msg_type_number_t count = __SANITIZER_TASK_VM_INFO_COUNT;
+  int err = task_info(mach_task_self(), TASK_VM_INFO, (int *)&vm_info, &count);
+  if (err == 0) {
+    return vm_info.max_address - 1;
+  } else {
+    // xnu cannot provide vm address limit
+    return 0x200000000 - 1;
+  }
+}
+#endif
+
+uptr GetMaxVirtualAddress() {
+#if SANITIZER_WORDSIZE == 64
+# if defined(__aarch64__) && SANITIZER_IOS && !SANITIZER_IOSSIM
+  // Get the maximum VM address
+  static uptr max_vm = GetTaskInfoMaxAddress();
+  CHECK(max_vm);
+  return max_vm;
+# else
+  return (1ULL << 47) - 1;  // 0x00007fffffffffffUL;
+# endif
+#else  // SANITIZER_WORDSIZE == 32
+  return (1ULL << 32) - 1;  // 0xffffffff;
+#endif  // SANITIZER_WORDSIZE
+}
+
 uptr FindAvailableMemoryRange(uptr shadow_size,
                               uptr alignment,
-                              uptr left_padding) {
+                              uptr left_padding,
+                              uptr *largest_gap_found) {
   typedef vm_region_submap_short_info_data_64_t RegionInfo;
   enum { kRegionInfoSize = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64 };
   // Start searching for available memory region past PAGEZERO, which is
@@ -781,6 +878,7 @@
   mach_vm_address_t address = start_address;
   mach_vm_address_t free_begin = start_address;
   kern_return_t kr = KERN_SUCCESS;
+  if (largest_gap_found) *largest_gap_found = 0;
   while (kr == KERN_SUCCESS) {
     mach_vm_size_t vmsize = 0;
     natural_t depth = 0;
@@ -790,10 +888,15 @@
                                 (vm_region_info_t)&vminfo, &count);
     if (free_begin != address) {
       // We found a free region [free_begin..address-1].
-      uptr shadow_address = RoundUpTo((uptr)free_begin + left_padding,
-                                      alignment);
-      if (shadow_address + shadow_size < (uptr)address) {
-        return shadow_address;
+      uptr gap_start = RoundUpTo((uptr)free_begin + left_padding, alignment);
+      uptr gap_end = RoundDownTo((uptr)address, alignment);
+      uptr gap_size = gap_end > gap_start ? gap_end - gap_start : 0;
+      if (shadow_size < gap_size) {
+        return gap_start;
+      }
+
+      if (largest_gap_found && *largest_gap_found < gap_size) {
+        *largest_gap_found = gap_size;
       }
     }
     // Move to the next region.
@@ -892,6 +995,11 @@
   // Do nothing.
 }
 
+// FIXME: implement on this platform.
+bool GetRandom(void *buffer, uptr length, bool blocking) {
+  UNIMPLEMENTED();
+}
+
 }  // namespace __sanitizer
 
 #endif  // SANITIZER_MAC
diff --git a/lib/sanitizer_common/sanitizer_mac.h b/lib/sanitizer_common/sanitizer_mac.h
index 636d9bf..60febef 100644
--- a/lib/sanitizer_common/sanitizer_mac.h
+++ b/lib/sanitizer_common/sanitizer_mac.h
@@ -20,6 +20,17 @@
 
 namespace __sanitizer {
 
+struct MemoryMappingLayoutData {
+  int current_image;
+  u32 current_magic;
+  u32 current_filetype;
+  ModuleArch current_arch;
+  u8 current_uuid[kModuleUUIDSize];
+  int current_load_cmd_count;
+  char *current_load_cmd_addr;
+  bool current_instrumented;
+};
+
 enum MacosVersion {
   MACOS_VERSION_UNINITIALIZED = 0,
   MACOS_VERSION_UNKNOWN,
@@ -36,6 +47,8 @@
 
 char **GetEnviron();
 
+void RestrictMemoryToMaxAddress(uptr max_address);
+
 }  // namespace __sanitizer
 
 extern "C" {
diff --git a/lib/sanitizer_common/sanitizer_mac_libcdep.cc b/lib/sanitizer_common/sanitizer_mac_libcdep.cc
new file mode 100644
index 0000000..c95daa9
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_mac_libcdep.cc
@@ -0,0 +1,30 @@
+//===-- sanitizer_mac_libcdep.cc ------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is shared between various sanitizers' runtime libraries and
+// implements OSX-specific functions.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_platform.h"
+#if SANITIZER_MAC
+#include "sanitizer_mac.h"
+
+#include <sys/mman.h>
+
+namespace __sanitizer {
+
+void RestrictMemoryToMaxAddress(uptr max_address) {
+  uptr size_to_mmap = GetMaxVirtualAddress() + 1 - max_address;
+  void *res = MmapFixedNoAccess(max_address, size_to_mmap, "high gap");
+  CHECK(res != MAP_FAILED);
+}
+
+}  // namespace __sanitizer
+
+#endif  // SANITIZER_MAC
diff --git a/lib/sanitizer_common/sanitizer_malloc_mac.inc b/lib/sanitizer_common/sanitizer_malloc_mac.inc
index 6fbee07..5699c59 100644
--- a/lib/sanitizer_common/sanitizer_malloc_mac.inc
+++ b/lib/sanitizer_common/sanitizer_malloc_mac.inc
@@ -59,6 +59,9 @@
     uptr allocated_size = RoundUpTo(sizeof(sanitizer_zone), page_size);
     mprotect(zone, allocated_size, PROT_READ | PROT_WRITE);
   }
+  if (zone->zone_name) {
+    COMMON_MALLOC_FREE((void *)zone->zone_name);
+  }
   COMMON_MALLOC_FREE(zone);
 }
 
diff --git a/lib/sanitizer_common/sanitizer_platform.h b/lib/sanitizer_common/sanitizer_platform.h
index d9a8e8d..81d9164 100644
--- a/lib/sanitizer_common/sanitizer_platform.h
+++ b/lib/sanitizer_common/sanitizer_platform.h
@@ -13,8 +13,8 @@
 #ifndef SANITIZER_PLATFORM_H
 #define SANITIZER_PLATFORM_H
 
-#if !defined(__linux__) && !defined(__FreeBSD__) && \
-  !defined(__APPLE__) && !defined(_WIN32)
+#if !defined(__linux__) && !defined(__FreeBSD__) && !defined(__NetBSD__) && \
+  !defined(__APPLE__) && !defined(_WIN32) && !defined(__Fuchsia__)
 # error "This operating system is not supported"
 #endif
 
@@ -30,6 +30,12 @@
 # define SANITIZER_FREEBSD 0
 #endif
 
+#if defined(__NetBSD__)
+# define SANITIZER_NETBSD 1
+#else
+# define SANITIZER_NETBSD 0
+#endif
+
 #if defined(__APPLE__)
 # define SANITIZER_MAC     1
 # include <TargetConditionals.h>
@@ -79,7 +85,14 @@
 # define SANITIZER_ANDROID 0
 #endif
 
-#define SANITIZER_POSIX (SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_MAC)
+#if defined(__Fuchsia__)
+# define SANITIZER_FUCHSIA 1
+#else
+# define SANITIZER_FUCHSIA 0
+#endif
+
+#define SANITIZER_POSIX \
+  (SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_MAC || SANITIZER_NETBSD)
 
 #if __LP64__ || defined(_WIN64)
 #  define SANITIZER_WORDSIZE 64
@@ -162,13 +175,19 @@
 # define SANITIZER_PPC64V2 0
 #endif
 
+#if defined(__arm__)
+# define SANITIZER_ARM 1
+#else
+# define SANITIZER_ARM 0
+#endif
+
 // By default we allow to use SizeClassAllocator64 on 64-bit platform.
 // But in some cases (e.g. AArch64's 39-bit address space) SizeClassAllocator64
 // does not work well and we need to fallback to SizeClassAllocator32.
 // For such platforms build this code with -DSANITIZER_CAN_USE_ALLOCATOR64=0 or
 // change the definition of SANITIZER_CAN_USE_ALLOCATOR64 here.
 #ifndef SANITIZER_CAN_USE_ALLOCATOR64
-# if SANITIZER_ANDROID && defined(__aarch64__)
+# if (SANITIZER_ANDROID && defined(__aarch64__)) || SANITIZER_FUCHSIA
 #  define SANITIZER_CAN_USE_ALLOCATOR64 1
 # elif defined(__mips64) || defined(__aarch64__)
 #  define SANITIZER_CAN_USE_ALLOCATOR64 0
@@ -253,4 +272,15 @@
 # define SANITIZER_GO 0
 #endif
 
+// On PowerPC and ARM Thumb, calling pthread_exit() causes LSan to detect leaks.
+// pthread_exit() performs unwinding that leads to dlopen'ing libgcc_s.so.
+// dlopen mallocs "libgcc_s.so" string which confuses LSan, it fails to realize
+// that this allocation happens in dynamic linker and should be ignored.
+#if SANITIZER_PPC || defined(__thumb__)
+# define SANITIZER_SUPPRESS_LEAK_ON_PTHREAD_EXIT 1
+#else
+# define SANITIZER_SUPPRESS_LEAK_ON_PTHREAD_EXIT 0
+#endif
+
+
 #endif // SANITIZER_PLATFORM_H
diff --git a/lib/sanitizer_common/sanitizer_platform_interceptors.h b/lib/sanitizer_common/sanitizer_platform_interceptors.h
index 480e63a..1b80270 100644
--- a/lib/sanitizer_common/sanitizer_platform_interceptors.h
+++ b/lib/sanitizer_common/sanitizer_platform_interceptors.h
@@ -16,13 +16,25 @@
 
 #include "sanitizer_internal_defs.h"
 
+#if SANITIZER_POSIX
+# define SI_POSIX 1
+#else
+# define SI_POSIX 0
+#endif
+
 #if !SANITIZER_WINDOWS
 # define SI_WINDOWS 0
-# define SI_NOT_WINDOWS 1
-# include "sanitizer_platform_limits_posix.h"
 #else
 # define SI_WINDOWS 1
-# define SI_NOT_WINDOWS 0
+#endif
+
+#if (SI_POSIX != 0) == (SI_WINDOWS != 0) && !SANITIZER_FUCHSIA
+# error "Windows is not POSIX!"
+#endif
+
+#if SI_POSIX
+# include "sanitizer_platform_limits_netbsd.h"
+# include "sanitizer_platform_limits_posix.h"
 #endif
 
 #if SANITIZER_LINUX && !SANITIZER_ANDROID
@@ -43,6 +55,12 @@
 # define SI_FREEBSD 0
 #endif
 
+#if SANITIZER_NETBSD
+# define SI_NETBSD 1
+#else
+# define SI_NETBSD 0
+#endif
+
 #if SANITIZER_LINUX
 # define SI_LINUX 1
 #else
@@ -63,28 +81,43 @@
 # define SI_IOS 0
 #endif
 
-#if !SANITIZER_WINDOWS && !SANITIZER_MAC
-# define SI_UNIX_NOT_MAC 1
+#if SANITIZER_FUCHSIA
+# define SI_NOT_FUCHSIA 0
 #else
-# define SI_UNIX_NOT_MAC 0
+# define SI_NOT_FUCHSIA 1
 #endif
 
-#define SANITIZER_INTERCEPT_STRLEN 1
-#define SANITIZER_INTERCEPT_STRNLEN SI_NOT_MAC
-#define SANITIZER_INTERCEPT_STRCMP 1
-#define SANITIZER_INTERCEPT_STRSTR 1
-#define SANITIZER_INTERCEPT_STRCASESTR SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_STRCHR 1
-#define SANITIZER_INTERCEPT_STRCHRNUL SI_UNIX_NOT_MAC
-#define SANITIZER_INTERCEPT_STRRCHR 1
-#define SANITIZER_INTERCEPT_STRSPN 1
-#define SANITIZER_INTERCEPT_STRPBRK 1
+#if SANITIZER_POSIX && !SANITIZER_MAC
+# define SI_POSIX_NOT_MAC 1
+#else
+# define SI_POSIX_NOT_MAC 0
+#endif
+
+#if SANITIZER_LINUX && !SANITIZER_FREEBSD
+# define SI_LINUX_NOT_FREEBSD 1
+# else
+# define SI_LINUX_NOT_FREEBSD 0
+#endif
+
+#define SANITIZER_INTERCEPT_STRLEN SI_NOT_FUCHSIA
+#define SANITIZER_INTERCEPT_STRNLEN (SI_NOT_MAC && SI_NOT_FUCHSIA)
+#define SANITIZER_INTERCEPT_STRCMP SI_NOT_FUCHSIA
+#define SANITIZER_INTERCEPT_STRSTR SI_NOT_FUCHSIA
+#define SANITIZER_INTERCEPT_STRCASESTR SI_POSIX
+#define SANITIZER_INTERCEPT_STRTOK SI_NOT_FUCHSIA
+#define SANITIZER_INTERCEPT_STRCHR SI_NOT_FUCHSIA
+#define SANITIZER_INTERCEPT_STRCHRNUL SI_POSIX_NOT_MAC
+#define SANITIZER_INTERCEPT_STRRCHR SI_NOT_FUCHSIA
+#define SANITIZER_INTERCEPT_STRSPN SI_NOT_FUCHSIA
+#define SANITIZER_INTERCEPT_STRPBRK SI_NOT_FUCHSIA
 #define SANITIZER_INTERCEPT_TEXTDOMAIN SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_STRCASECMP SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_STRCASECMP SI_POSIX
 #define SANITIZER_INTERCEPT_MEMSET 1
 #define SANITIZER_INTERCEPT_MEMMOVE 1
 #define SANITIZER_INTERCEPT_MEMCPY 1
-#define SANITIZER_INTERCEPT_MEMCMP 1
+#define SANITIZER_INTERCEPT_MEMCMP SI_NOT_FUCHSIA
+#define SANITIZER_INTERCEPT_STRNDUP SI_POSIX
+#define SANITIZER_INTERCEPT___STRNDUP SI_LINUX_NOT_FREEBSD
 #if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && \
     __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 1070
 # define SI_MAC_DEPLOYMENT_BELOW_10_7 1
@@ -93,78 +126,83 @@
 #endif
 // memmem on Darwin doesn't exist on 10.6
 // FIXME: enable memmem on Windows.
-#define SANITIZER_INTERCEPT_MEMMEM \
-  SI_NOT_WINDOWS && !SI_MAC_DEPLOYMENT_BELOW_10_7
-#define SANITIZER_INTERCEPT_MEMCHR 1
-#define SANITIZER_INTERCEPT_MEMRCHR SI_FREEBSD || SI_LINUX
+#define SANITIZER_INTERCEPT_MEMMEM (SI_POSIX && !SI_MAC_DEPLOYMENT_BELOW_10_7)
+#define SANITIZER_INTERCEPT_MEMCHR SI_NOT_FUCHSIA
+#define SANITIZER_INTERCEPT_MEMRCHR (SI_FREEBSD || SI_LINUX || SI_NETBSD)
 
-#define SANITIZER_INTERCEPT_READ   SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_PREAD  SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_WRITE  SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_PWRITE SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_READ SI_POSIX
+#define SANITIZER_INTERCEPT_PREAD SI_POSIX
+#define SANITIZER_INTERCEPT_WRITE SI_POSIX
+#define SANITIZER_INTERCEPT_PWRITE SI_POSIX
+
+#define SANITIZER_INTERCEPT_FREAD SI_POSIX
+#define SANITIZER_INTERCEPT_FWRITE SI_POSIX
 
 #define SANITIZER_INTERCEPT_PREAD64 SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_PWRITE64 SI_LINUX_NOT_ANDROID
 
-#define SANITIZER_INTERCEPT_READV SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_WRITEV SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_READV SI_POSIX
+#define SANITIZER_INTERCEPT_WRITEV SI_POSIX
 
-#define SANITIZER_INTERCEPT_PREADV SI_FREEBSD || SI_LINUX_NOT_ANDROID
+#define SANITIZER_INTERCEPT_PREADV \
+  (SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID)
 #define SANITIZER_INTERCEPT_PWRITEV SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_PREADV64 SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_PWRITEV64 SI_LINUX_NOT_ANDROID
 
 #define SANITIZER_INTERCEPT_PRCTL   SI_LINUX
 
-#define SANITIZER_INTERCEPT_LOCALTIME_AND_FRIENDS SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_STRPTIME SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_LOCALTIME_AND_FRIENDS SI_POSIX
+#define SANITIZER_INTERCEPT_STRPTIME SI_POSIX
 
-#define SANITIZER_INTERCEPT_SCANF SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_SCANF SI_POSIX
 #define SANITIZER_INTERCEPT_ISOC99_SCANF SI_LINUX_NOT_ANDROID
 
 #ifndef SANITIZER_INTERCEPT_PRINTF
-# define SANITIZER_INTERCEPT_PRINTF SI_NOT_WINDOWS
-# define SANITIZER_INTERCEPT_PRINTF_L SI_FREEBSD
+# define SANITIZER_INTERCEPT_PRINTF SI_POSIX
+# define SANITIZER_INTERCEPT_PRINTF_L (SI_FREEBSD || SI_NETBSD)
 # define SANITIZER_INTERCEPT_ISOC99_PRINTF SI_LINUX_NOT_ANDROID
 #endif
 
-#define SANITIZER_INTERCEPT_FREXP 1
-#define SANITIZER_INTERCEPT_FREXPF_FREXPL SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_FREXP SI_NOT_FUCHSIA
+#define SANITIZER_INTERCEPT_FREXPF_FREXPL SI_POSIX
 
-#define SANITIZER_INTERCEPT_GETPWNAM_AND_FRIENDS SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_GETPWNAM_AND_FRIENDS SI_POSIX
 #define SANITIZER_INTERCEPT_GETPWNAM_R_AND_FRIENDS \
-  SI_FREEBSD || SI_MAC || SI_LINUX_NOT_ANDROID
+  (SI_FREEBSD || SI_NETBSD || SI_MAC || SI_LINUX_NOT_ANDROID)
 #define SANITIZER_INTERCEPT_GETPWENT \
-  SI_FREEBSD || SI_MAC || SI_LINUX_NOT_ANDROID
+  (SI_FREEBSD || SI_NETBSD || SI_MAC || SI_LINUX_NOT_ANDROID)
 #define SANITIZER_INTERCEPT_FGETPWENT SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_GETPWENT_R SI_FREEBSD || SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_SETPWENT SI_MAC || SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_CLOCK_GETTIME SI_FREEBSD || SI_LINUX
-#define SANITIZER_INTERCEPT_GETITIMER SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_TIME SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_GETPWENT_R \
+  (SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID)
+#define SANITIZER_INTERCEPT_SETPWENT (SI_MAC || SI_LINUX_NOT_ANDROID)
+#define SANITIZER_INTERCEPT_CLOCK_GETTIME (SI_FREEBSD || SI_NETBSD || SI_LINUX)
+#define SANITIZER_INTERCEPT_GETITIMER SI_POSIX
+#define SANITIZER_INTERCEPT_TIME SI_POSIX
 #define SANITIZER_INTERCEPT_GLOB SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_WAIT SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_INET SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_PTHREAD_GETSCHEDPARAM SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_GETADDRINFO SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_GETNAMEINFO SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_GETSOCKNAME SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_GETHOSTBYNAME SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_GETHOSTBYNAME_R SI_FREEBSD || SI_LINUX
-#define SANITIZER_INTERCEPT_GETHOSTBYNAME2_R SI_FREEBSD || SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_GETHOSTBYADDR_R SI_FREEBSD || SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_GETHOSTENT_R SI_FREEBSD || SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_GETSOCKOPT SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_ACCEPT SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_WAIT SI_POSIX
+#define SANITIZER_INTERCEPT_INET SI_POSIX
+#define SANITIZER_INTERCEPT_PTHREAD_GETSCHEDPARAM SI_POSIX
+#define SANITIZER_INTERCEPT_GETADDRINFO SI_POSIX
+#define SANITIZER_INTERCEPT_GETNAMEINFO SI_POSIX
+#define SANITIZER_INTERCEPT_GETSOCKNAME SI_POSIX
+#define SANITIZER_INTERCEPT_GETHOSTBYNAME SI_POSIX
+#define SANITIZER_INTERCEPT_GETHOSTBYNAME_R (SI_FREEBSD || SI_LINUX)
+#define SANITIZER_INTERCEPT_GETHOSTBYNAME2_R \
+  (SI_FREEBSD || SI_LINUX_NOT_ANDROID)
+#define SANITIZER_INTERCEPT_GETHOSTBYADDR_R (SI_FREEBSD || SI_LINUX_NOT_ANDROID)
+#define SANITIZER_INTERCEPT_GETHOSTENT_R (SI_FREEBSD || SI_LINUX_NOT_ANDROID)
+#define SANITIZER_INTERCEPT_GETSOCKOPT SI_POSIX
+#define SANITIZER_INTERCEPT_ACCEPT SI_POSIX
 #define SANITIZER_INTERCEPT_ACCEPT4 SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_MODF SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_RECVMSG SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_SENDMSG SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_GETPEERNAME SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_IOCTL SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_INET_ATON SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_MODF SI_POSIX
+#define SANITIZER_INTERCEPT_RECVMSG SI_POSIX
+#define SANITIZER_INTERCEPT_SENDMSG SI_POSIX
+#define SANITIZER_INTERCEPT_GETPEERNAME SI_POSIX
+#define SANITIZER_INTERCEPT_IOCTL SI_POSIX
+#define SANITIZER_INTERCEPT_INET_ATON SI_POSIX
 #define SANITIZER_INTERCEPT_SYSINFO SI_LINUX
-#define SANITIZER_INTERCEPT_READDIR SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_READDIR SI_POSIX
 #define SANITIZER_INTERCEPT_READDIR64 SI_LINUX_NOT_ANDROID
 #if SI_LINUX_NOT_ANDROID && \
   (defined(__i386) || defined(__x86_64) || defined(__mips64) || \
@@ -174,109 +212,116 @@
 #else
 #define SANITIZER_INTERCEPT_PTRACE 0
 #endif
-#define SANITIZER_INTERCEPT_SETLOCALE SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_GETCWD SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_SETLOCALE SI_POSIX
+#define SANITIZER_INTERCEPT_GETCWD SI_POSIX
 #define SANITIZER_INTERCEPT_GET_CURRENT_DIR_NAME SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_STRTOIMAX SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_MBSTOWCS SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_MBSNRTOWCS SI_MAC || SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_WCSTOMBS SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_STRTOIMAX SI_POSIX
+#define SANITIZER_INTERCEPT_MBSTOWCS SI_POSIX
+#define SANITIZER_INTERCEPT_MBSNRTOWCS (SI_MAC || SI_LINUX_NOT_ANDROID)
+#define SANITIZER_INTERCEPT_WCSTOMBS SI_POSIX
 #define SANITIZER_INTERCEPT_WCSNRTOMBS \
-  SI_FREEBSD || SI_MAC || SI_LINUX_NOT_ANDROID
+  (SI_FREEBSD || SI_NETBSD || SI_MAC || SI_LINUX_NOT_ANDROID)
 #define SANITIZER_INTERCEPT_WCRTOMB \
-  SI_FREEBSD || SI_MAC || SI_LINUX_NOT_ANDROID
+  (SI_FREEBSD || SI_NETBSD || SI_MAC || SI_LINUX_NOT_ANDROID)
 #define SANITIZER_INTERCEPT_TCGETATTR SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_REALPATH SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_REALPATH SI_POSIX
 #define SANITIZER_INTERCEPT_CANONICALIZE_FILE_NAME SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_CONFSTR \
-  SI_FREEBSD || SI_MAC || SI_LINUX_NOT_ANDROID
+  (SI_FREEBSD || SI_NETBSD || SI_MAC || SI_LINUX_NOT_ANDROID)
 #define SANITIZER_INTERCEPT_SCHED_GETAFFINITY SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_SCHED_GETPARAM SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_STRERROR SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_STRERROR_R SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_STRERROR SI_POSIX
+#define SANITIZER_INTERCEPT_STRERROR_R SI_POSIX
 #define SANITIZER_INTERCEPT_XPG_STRERROR_R SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_SCANDIR \
-  SI_FREEBSD || SI_LINUX_NOT_ANDROID
+  (SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID)
 #define SANITIZER_INTERCEPT_SCANDIR64 SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_GETGROUPS SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_POLL SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_GETGROUPS SI_POSIX
+#define SANITIZER_INTERCEPT_POLL SI_POSIX
 #define SANITIZER_INTERCEPT_PPOLL SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_WORDEXP \
-  SI_FREEBSD || (SI_MAC && !SI_IOS) || SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_SIGWAIT SI_NOT_WINDOWS
+  (SI_FREEBSD || SI_NETBSD || (SI_MAC && !SI_IOS) || SI_LINUX_NOT_ANDROID)
+#define SANITIZER_INTERCEPT_SIGWAIT SI_POSIX
 #define SANITIZER_INTERCEPT_SIGWAITINFO SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_SIGTIMEDWAIT SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_SIGSETOPS \
-  SI_FREEBSD || SI_MAC || SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_SIGPENDING SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_SIGPROCMASK SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_BACKTRACE SI_FREEBSD || SI_LINUX_NOT_ANDROID
+  (SI_FREEBSD || SI_NETBSD || SI_MAC || SI_LINUX_NOT_ANDROID)
+#define SANITIZER_INTERCEPT_SIGPENDING SI_POSIX
+#define SANITIZER_INTERCEPT_SIGPROCMASK SI_POSIX
+#define SANITIZER_INTERCEPT_BACKTRACE \
+  (SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID)
 #define SANITIZER_INTERCEPT_GETMNTENT SI_LINUX
 #define SANITIZER_INTERCEPT_GETMNTENT_R SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_STATFS SI_FREEBSD || SI_MAC || SI_LINUX_NOT_ANDROID
+#define SANITIZER_INTERCEPT_STATFS \
+  (SI_FREEBSD || SI_MAC || SI_LINUX_NOT_ANDROID)
 #define SANITIZER_INTERCEPT_STATFS64 \
-  (SI_MAC && !SI_IOS) || SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_STATVFS SI_FREEBSD || SI_LINUX_NOT_ANDROID
+  ((SI_MAC && !SI_IOS) || SI_LINUX_NOT_ANDROID)
+#define SANITIZER_INTERCEPT_STATVFS \
+  (SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID)
 #define SANITIZER_INTERCEPT_STATVFS64 SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_INITGROUPS SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_ETHER_NTOA_ATON SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_INITGROUPS SI_POSIX
+#define SANITIZER_INTERCEPT_ETHER_NTOA_ATON SI_POSIX
 #define SANITIZER_INTERCEPT_ETHER_HOST \
-  SI_FREEBSD || SI_MAC || SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_ETHER_R SI_FREEBSD || SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_SHMCTL \
-  ((SI_FREEBSD || SI_LINUX_NOT_ANDROID) && SANITIZER_WORDSIZE == 64)
+  (SI_FREEBSD || SI_MAC || SI_LINUX_NOT_ANDROID)
+#define SANITIZER_INTERCEPT_ETHER_R (SI_FREEBSD || SI_LINUX_NOT_ANDROID)
+#define SANITIZER_INTERCEPT_SHMCTL                       \
+  (SI_NETBSD || ((SI_FREEBSD || SI_LINUX_NOT_ANDROID) && \
+                 SANITIZER_WORDSIZE == 64))  // NOLINT
 #define SANITIZER_INTERCEPT_RANDOM_R SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_PTHREAD_ATTR_GET SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_PTHREAD_ATTR_GET SI_POSIX
 #define SANITIZER_INTERCEPT_PTHREAD_ATTR_GETINHERITSCHED \
-  SI_FREEBSD || SI_MAC || SI_LINUX_NOT_ANDROID
+  (SI_FREEBSD || SI_NETBSD || SI_MAC || SI_LINUX_NOT_ANDROID)
 #define SANITIZER_INTERCEPT_PTHREAD_ATTR_GETAFFINITY_NP SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETPSHARED SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETTYPE SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETPSHARED SI_POSIX
+#define SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETTYPE SI_POSIX
 #define SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETPROTOCOL \
-  SI_MAC || SI_LINUX_NOT_ANDROID
+  (SI_MAC || SI_NETBSD || SI_LINUX_NOT_ANDROID)
 #define SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETPRIOCEILING \
-  SI_MAC || SI_LINUX_NOT_ANDROID
+  (SI_MAC || SI_NETBSD || SI_LINUX_NOT_ANDROID)
 #define SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETROBUST SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETROBUST_NP SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_PTHREAD_RWLOCKATTR_GETPSHARED SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_PTHREAD_RWLOCKATTR_GETPSHARED SI_POSIX
 #define SANITIZER_INTERCEPT_PTHREAD_RWLOCKATTR_GETKIND_NP SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_PTHREAD_CONDATTR_GETPSHARED SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_PTHREAD_CONDATTR_GETPSHARED SI_POSIX
 #define SANITIZER_INTERCEPT_PTHREAD_CONDATTR_GETCLOCK SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_PTHREAD_BARRIERATTR_GETPSHARED SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_TMPNAM SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_TMPNAM SI_POSIX
 #define SANITIZER_INTERCEPT_TMPNAM_R SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_TTYNAME_R SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_TEMPNAM SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_TTYNAME_R SI_POSIX
+#define SANITIZER_INTERCEPT_TEMPNAM SI_POSIX
 #define SANITIZER_INTERCEPT_SINCOS SI_LINUX
-#define SANITIZER_INTERCEPT_REMQUO SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_LGAMMA SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_LGAMMA_R SI_FREEBSD || SI_LINUX
+#define SANITIZER_INTERCEPT_REMQUO SI_POSIX
+#define SANITIZER_INTERCEPT_LGAMMA SI_POSIX
+#define SANITIZER_INTERCEPT_LGAMMA_R (SI_FREEBSD || SI_LINUX)
 #define SANITIZER_INTERCEPT_LGAMMAL_R SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_DRAND48_R SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_RAND_R \
-  SI_FREEBSD || SI_MAC || SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_ICONV SI_FREEBSD || SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_TIMES SI_NOT_WINDOWS
+  (SI_FREEBSD || SI_NETBSD || SI_MAC || SI_LINUX_NOT_ANDROID)
+#define SANITIZER_INTERCEPT_ICONV \
+  (SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID)
+#define SANITIZER_INTERCEPT_TIMES SI_POSIX
 
 // FIXME: getline seems to be available on OSX 10.7
-#define SANITIZER_INTERCEPT_GETLINE SI_FREEBSD || SI_LINUX_NOT_ANDROID
+#define SANITIZER_INTERCEPT_GETLINE \
+  (SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID)
 
-#define SANITIZER_INTERCEPT__EXIT SI_LINUX || SI_FREEBSD || SI_MAC
+#define SANITIZER_INTERCEPT__EXIT \
+  (SI_LINUX || SI_FREEBSD || SI_NETBSD || SI_MAC)
 
-#define SANITIZER_INTERCEPT_PHTREAD_MUTEX SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_PHTREAD_MUTEX SI_POSIX
 #define SANITIZER_INTERCEPT_PTHREAD_SETNAME_NP \
-  SI_FREEBSD || SI_LINUX_NOT_ANDROID
+  (SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID)
 
 #define SANITIZER_INTERCEPT_TLS_GET_ADDR \
-  SI_FREEBSD || SI_LINUX_NOT_ANDROID
+  (SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID)
 
 #define SANITIZER_INTERCEPT_LISTXATTR SI_LINUX
 #define SANITIZER_INTERCEPT_GETXATTR SI_LINUX
 #define SANITIZER_INTERCEPT_GETRESID SI_LINUX
 #define SANITIZER_INTERCEPT_GETIFADDRS \
-  SI_FREEBSD || SI_LINUX_NOT_ANDROID || SI_MAC
+  (SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID || SI_MAC)
 #define SANITIZER_INTERCEPT_IF_INDEXTONAME \
-  SI_FREEBSD || SI_LINUX_NOT_ANDROID || SI_MAC
+  (SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID || SI_MAC)
 #define SANITIZER_INTERCEPT_CAPGET SI_LINUX_NOT_ANDROID
 #if SI_LINUX && defined(__arm__)
 #define SANITIZER_INTERCEPT_AEABI_MEM 1
@@ -284,53 +329,67 @@
 #define SANITIZER_INTERCEPT_AEABI_MEM 0
 #endif
 #define SANITIZER_INTERCEPT___BZERO SI_MAC
-#define SANITIZER_INTERCEPT_FTIME !SI_FREEBSD && SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_FTIME (!SI_FREEBSD && !SI_NETBSD && SI_POSIX)
 #define SANITIZER_INTERCEPT_XDR SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_TSEARCH SI_LINUX_NOT_ANDROID || SI_MAC
+#define SANITIZER_INTERCEPT_TSEARCH \
+  (SI_LINUX_NOT_ANDROID || SI_MAC || SI_NETBSD)
 #define SANITIZER_INTERCEPT_LIBIO_INTERNALS SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_FOPEN SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_FOPEN SI_POSIX
 #define SANITIZER_INTERCEPT_FOPEN64 SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_OPEN_MEMSTREAM SI_LINUX_NOT_ANDROID
+#define SANITIZER_INTERCEPT_OPEN_MEMSTREAM (SI_LINUX_NOT_ANDROID || SI_NETBSD)
 #define SANITIZER_INTERCEPT_OBSTACK SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_FFLUSH SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_FCLOSE SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_FFLUSH SI_POSIX
+#define SANITIZER_INTERCEPT_FCLOSE SI_POSIX
 
 #ifndef SANITIZER_INTERCEPT_DLOPEN_DLCLOSE
 #define SANITIZER_INTERCEPT_DLOPEN_DLCLOSE \
-    SI_FREEBSD || SI_LINUX_NOT_ANDROID || SI_MAC
+  (SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID || SI_MAC)
 #endif
 
-#define SANITIZER_INTERCEPT_GETPASS SI_LINUX_NOT_ANDROID || SI_MAC
+#define SANITIZER_INTERCEPT_GETPASS \
+  (SI_LINUX_NOT_ANDROID || SI_MAC || SI_NETBSD)
 #define SANITIZER_INTERCEPT_TIMERFD SI_LINUX_NOT_ANDROID
 
-#define SANITIZER_INTERCEPT_MLOCKX SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_MLOCKX SI_POSIX
 #define SANITIZER_INTERCEPT_FOPENCOOKIE SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_SEM SI_LINUX || SI_FREEBSD
-#define SANITIZER_INTERCEPT_PTHREAD_SETCANCEL SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_MINCORE SI_LINUX
+#define SANITIZER_INTERCEPT_SEM (SI_LINUX || SI_FREEBSD || SI_NETBSD)
+#define SANITIZER_INTERCEPT_PTHREAD_SETCANCEL SI_POSIX
+#define SANITIZER_INTERCEPT_MINCORE (SI_LINUX || SI_NETBSD)
 #define SANITIZER_INTERCEPT_PROCESS_VM_READV SI_LINUX
-#define SANITIZER_INTERCEPT_CTERMID SI_LINUX || SI_MAC || SI_FREEBSD
-#define SANITIZER_INTERCEPT_CTERMID_R SI_MAC || SI_FREEBSD
+#define SANITIZER_INTERCEPT_CTERMID \
+  (SI_LINUX || SI_MAC || SI_FREEBSD || SI_NETBSD)
+#define SANITIZER_INTERCEPT_CTERMID_R (SI_MAC || SI_FREEBSD)
 
-#define SANITIZER_INTERCEPTOR_HOOKS SI_LINUX || SI_MAC || SI_WINDOWS
-#define SANITIZER_INTERCEPT_RECV_RECVFROM SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_SEND_SENDTO SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPTOR_HOOKS (SI_LINUX || SI_MAC || SI_WINDOWS)
+#define SANITIZER_INTERCEPT_RECV_RECVFROM SI_POSIX
+#define SANITIZER_INTERCEPT_SEND_SENDTO SI_POSIX
 #define SANITIZER_INTERCEPT_EVENTFD_READ_WRITE SI_LINUX
 
-#define SANITIZER_INTERCEPT_STAT (SI_FREEBSD || SI_MAC || SI_ANDROID)
-#define SANITIZER_INTERCEPT___XSTAT !SANITIZER_INTERCEPT_STAT && SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_STAT \
+  (SI_FREEBSD || SI_MAC || SI_ANDROID || SI_NETBSD)
+#define SANITIZER_INTERCEPT___XSTAT (!SANITIZER_INTERCEPT_STAT && SI_POSIX)
 #define SANITIZER_INTERCEPT___XSTAT64 SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT___LXSTAT SANITIZER_INTERCEPT___XSTAT
 #define SANITIZER_INTERCEPT___LXSTAT64 SI_LINUX_NOT_ANDROID
 
-#define SANITIZER_INTERCEPT_UTMP SI_NOT_WINDOWS && !SI_MAC && !SI_FREEBSD
-#define SANITIZER_INTERCEPT_UTMPX SI_LINUX_NOT_ANDROID || SI_MAC || SI_FREEBSD
+#define SANITIZER_INTERCEPT_UTMP (SI_POSIX && !SI_MAC && !SI_FREEBSD)
+#define SANITIZER_INTERCEPT_UTMPX (SI_LINUX_NOT_ANDROID || SI_MAC || SI_FREEBSD)
 
-#define SANITIZER_INTERCEPT_MALLOPT_AND_MALLINFO (!SI_FREEBSD && !SI_MAC)
-#define SANITIZER_INTERCEPT_MEMALIGN (!SI_FREEBSD && !SI_MAC)
-#define SANITIZER_INTERCEPT_PVALLOC (!SI_FREEBSD && !SI_MAC)
-#define SANITIZER_INTERCEPT_CFREE (!SI_FREEBSD && !SI_MAC)
+#define SANITIZER_INTERCEPT_GETLOADAVG \
+  (SI_LINUX_NOT_ANDROID || SI_MAC || SI_FREEBSD || SI_NETBSD)
+
+#define SANITIZER_INTERCEPT_MALLOPT_AND_MALLINFO \
+  (!SI_FREEBSD && !SI_MAC && !SI_NETBSD && SI_NOT_FUCHSIA)
+#define SANITIZER_INTERCEPT_MEMALIGN (!SI_FREEBSD && !SI_MAC && !SI_NETBSD)
+#define SANITIZER_INTERCEPT_PVALLOC \
+  (!SI_FREEBSD && !SI_MAC && !SI_NETBSD && SI_NOT_FUCHSIA)
+#define SANITIZER_INTERCEPT_CFREE \
+  (!SI_FREEBSD && !SI_MAC && !SI_NETBSD && SI_NOT_FUCHSIA)
 #define SANITIZER_INTERCEPT_ALIGNED_ALLOC (!SI_MAC)
 #define SANITIZER_INTERCEPT_MALLOC_USABLE_SIZE (!SI_MAC)
+#define SANITIZER_INTERCEPT_MCHECK_MPROBE SI_LINUX_NOT_ANDROID
+#define SANITIZER_INTERCEPT_WCSCAT SI_POSIX
+#define SANITIZER_INTERCEPT_SIGNAL_AND_SIGACTION (!SI_WINDOWS && SI_NOT_FUCHSIA)
+#define SANITIZER_INTERCEPT_BSD_SIGNAL SI_ANDROID
 
 #endif  // #ifndef SANITIZER_PLATFORM_INTERCEPTORS_H
diff --git a/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cc b/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cc
new file mode 100644
index 0000000..342bd08
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cc
@@ -0,0 +1,361 @@
+//===-- sanitizer_platform_limits_netbsd.cc -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of Sanitizer common code.
+//
+// Sizes and layouts of platform-specific NetBSD data structures.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_platform.h"
+
+#if SANITIZER_NETBSD
+#include <arpa/inet.h>
+#include <dirent.h>
+#include <glob.h>
+#include <grp.h>
+#include <ifaddrs.h>
+#include <limits.h>
+#include <link_elf.h>
+#include <net/if.h>
+#include <net/if_ether.h>
+#include <net/ppp_defs.h>
+#include <net/route.h>
+#include <netdb.h>
+#include <netinet/in.h>
+#include <netinet/ip_mroute.h>
+#include <poll.h>
+#include <pthread.h>
+#include <pwd.h>
+#include <semaphore.h>
+#include <signal.h>
+#include <stddef.h>
+#include <sys/filio.h>
+#include <sys/ipc.h>
+#include <sys/mman.h>
+#include <sys/mount.h>
+#include <sys/mqueue.h>
+#include <sys/msg.h>
+#include <sys/mtio.h>
+#include <sys/ptrace.h>
+#include <sys/resource.h>
+#include <sys/shm.h>
+#include <sys/signal.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/soundcard.h>
+#include <sys/stat.h>
+#include <sys/statvfs.h>
+#include <sys/time.h>
+#include <sys/timeb.h>
+#include <sys/times.h>
+#include <sys/timespec.h>
+#include <sys/timex.h>
+#include <sys/types.h>
+#include <sys/ucontext.h>
+#include <sys/utsname.h>
+#include <term.h>
+#include <termios.h>
+#include <time.h>
+#include <utime.h>
+#include <utmp.h>
+#include <utmpx.h>
+#include <wchar.h>
+#include <wordexp.h>
+
+// Include these after system headers to avoid name clashes and ambiguities.
+#include "sanitizer_internal_defs.h"
+#include "sanitizer_platform_limits_netbsd.h"
+
+namespace __sanitizer {
+unsigned struct_utsname_sz = sizeof(struct utsname);
+unsigned struct_stat_sz = sizeof(struct stat);
+unsigned struct_rusage_sz = sizeof(struct rusage);
+unsigned struct_tm_sz = sizeof(struct tm);
+unsigned struct_passwd_sz = sizeof(struct passwd);
+unsigned struct_group_sz = sizeof(struct group);
+unsigned siginfo_t_sz = sizeof(siginfo_t);
+unsigned struct_sigaction_sz = sizeof(struct sigaction);
+unsigned struct_itimerval_sz = sizeof(struct itimerval);
+unsigned pthread_t_sz = sizeof(pthread_t);
+unsigned pthread_cond_t_sz = sizeof(pthread_cond_t);
+unsigned pid_t_sz = sizeof(pid_t);
+unsigned timeval_sz = sizeof(timeval);
+unsigned uid_t_sz = sizeof(uid_t);
+unsigned gid_t_sz = sizeof(gid_t);
+unsigned mbstate_t_sz = sizeof(mbstate_t);
+unsigned sigset_t_sz = sizeof(sigset_t);
+unsigned struct_timezone_sz = sizeof(struct timezone);
+unsigned struct_tms_sz = sizeof(struct tms);
+unsigned struct_sigevent_sz = sizeof(struct sigevent);
+unsigned struct_sched_param_sz = sizeof(struct sched_param);
+unsigned struct_sockaddr_sz = sizeof(struct sockaddr);
+unsigned ucontext_t_sz = sizeof(ucontext_t);
+unsigned struct_rlimit_sz = sizeof(struct rlimit);
+unsigned struct_timespec_sz = sizeof(struct timespec);
+unsigned struct_utimbuf_sz = sizeof(struct utimbuf);
+unsigned struct_itimerspec_sz = sizeof(struct itimerspec);
+unsigned struct_timex_sz = sizeof(struct timex);
+unsigned struct_msqid_ds_sz = sizeof(struct msqid_ds);
+unsigned struct_mq_attr_sz = sizeof(struct mq_attr);
+unsigned struct_statvfs_sz = sizeof(struct statvfs);
+
+uptr sig_ign = (uptr)SIG_IGN;
+uptr sig_dfl = (uptr)SIG_DFL;
+uptr sa_siginfo = (uptr)SA_SIGINFO;
+
+int shmctl_ipc_stat = (int)IPC_STAT;
+
+unsigned struct_utmp_sz = sizeof(struct utmp);
+unsigned struct_utmpx_sz = sizeof(struct utmpx);
+
+int map_fixed = MAP_FIXED;
+
+int af_inet = (int)AF_INET;
+int af_inet6 = (int)AF_INET6;
+
+uptr __sanitizer_in_addr_sz(int af) {
+  if (af == AF_INET)
+    return sizeof(struct in_addr);
+  else if (af == AF_INET6)
+    return sizeof(struct in6_addr);
+  else
+    return 0;
+}
+
+unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr);
+
+int glob_nomatch = GLOB_NOMATCH;
+int glob_altdirfunc = GLOB_ALTDIRFUNC;
+
+unsigned path_max = PATH_MAX;
+
+// ioctl arguments
+unsigned struct_ifreq_sz = sizeof(struct ifreq);
+unsigned struct_termios_sz = sizeof(struct termios);
+unsigned struct_winsize_sz = sizeof(struct winsize);
+unsigned struct_mtget_sz = sizeof(struct mtget);
+unsigned struct_mtop_sz = sizeof(struct mtop);
+unsigned struct_audio_buf_info_sz = sizeof(struct audio_buf_info);
+unsigned struct_ppp_stats_sz = sizeof(struct ppp_stats);
+unsigned struct_sioc_sg_req_sz = sizeof(struct sioc_sg_req);
+unsigned struct_sioc_vif_req_sz = sizeof(struct sioc_vif_req);
+
+const unsigned IOCTL_NOT_PRESENT = 0;
+
+unsigned IOCTL_FIOASYNC = FIOASYNC;
+unsigned IOCTL_FIOCLEX = FIOCLEX;
+unsigned IOCTL_FIOGETOWN = FIOGETOWN;
+unsigned IOCTL_FIONBIO = FIONBIO;
+unsigned IOCTL_FIONCLEX = FIONCLEX;
+unsigned IOCTL_FIOSETOWN = FIOSETOWN;
+unsigned IOCTL_SIOCADDMULTI = SIOCADDMULTI;
+unsigned IOCTL_SIOCATMARK = SIOCATMARK;
+unsigned IOCTL_SIOCDELMULTI = SIOCDELMULTI;
+unsigned IOCTL_SIOCGIFADDR = SIOCGIFADDR;
+unsigned IOCTL_SIOCGIFBRDADDR = SIOCGIFBRDADDR;
+unsigned IOCTL_SIOCGIFCONF = SIOCGIFCONF;
+unsigned IOCTL_SIOCGIFDSTADDR = SIOCGIFDSTADDR;
+unsigned IOCTL_SIOCGIFFLAGS = SIOCGIFFLAGS;
+unsigned IOCTL_SIOCGIFMETRIC = SIOCGIFMETRIC;
+unsigned IOCTL_SIOCGIFMTU = SIOCGIFMTU;
+unsigned IOCTL_SIOCGIFNETMASK = SIOCGIFNETMASK;
+unsigned IOCTL_SIOCGPGRP = SIOCGPGRP;
+unsigned IOCTL_SIOCSIFADDR = SIOCSIFADDR;
+unsigned IOCTL_SIOCSIFBRDADDR = SIOCSIFBRDADDR;
+unsigned IOCTL_SIOCSIFDSTADDR = SIOCSIFDSTADDR;
+unsigned IOCTL_SIOCSIFFLAGS = SIOCSIFFLAGS;
+unsigned IOCTL_SIOCSIFMETRIC = SIOCSIFMETRIC;
+unsigned IOCTL_SIOCSIFMTU = SIOCSIFMTU;
+unsigned IOCTL_SIOCSIFNETMASK = SIOCSIFNETMASK;
+unsigned IOCTL_SIOCSPGRP = SIOCSPGRP;
+unsigned IOCTL_TIOCCONS = TIOCCONS;
+unsigned IOCTL_TIOCEXCL = TIOCEXCL;
+unsigned IOCTL_TIOCGETD = TIOCGETD;
+unsigned IOCTL_TIOCGPGRP = TIOCGPGRP;
+unsigned IOCTL_TIOCGWINSZ = TIOCGWINSZ;
+unsigned IOCTL_TIOCMBIC = TIOCMBIC;
+unsigned IOCTL_TIOCMBIS = TIOCMBIS;
+unsigned IOCTL_TIOCMGET = TIOCMGET;
+unsigned IOCTL_TIOCMSET = TIOCMSET;
+unsigned IOCTL_TIOCNOTTY = TIOCNOTTY;
+unsigned IOCTL_TIOCNXCL = TIOCNXCL;
+unsigned IOCTL_TIOCOUTQ = TIOCOUTQ;
+unsigned IOCTL_TIOCPKT = TIOCPKT;
+unsigned IOCTL_TIOCSCTTY = TIOCSCTTY;
+unsigned IOCTL_TIOCSETD = TIOCSETD;
+unsigned IOCTL_TIOCSPGRP = TIOCSPGRP;
+unsigned IOCTL_TIOCSTI = TIOCSTI;
+unsigned IOCTL_TIOCSWINSZ = TIOCSWINSZ;
+unsigned IOCTL_SIOCGETSGCNT = SIOCGETSGCNT;
+unsigned IOCTL_SIOCGETVIFCNT = SIOCGETVIFCNT;
+
+const int si_SEGV_MAPERR = SEGV_MAPERR;
+const int si_SEGV_ACCERR = SEGV_ACCERR;
+}  // namespace __sanitizer
+
+using namespace __sanitizer;
+
+COMPILER_CHECK(sizeof(__sanitizer_pthread_attr_t) >= sizeof(pthread_attr_t));
+
+COMPILER_CHECK(sizeof(socklen_t) == sizeof(unsigned));
+CHECK_TYPE_SIZE(pthread_key_t);
+
+// There are more undocumented fields in dl_phdr_info that we are not interested
+// in.
+COMPILER_CHECK(sizeof(__sanitizer_dl_phdr_info) <= sizeof(dl_phdr_info));
+CHECK_SIZE_AND_OFFSET(dl_phdr_info, dlpi_addr);
+CHECK_SIZE_AND_OFFSET(dl_phdr_info, dlpi_name);
+CHECK_SIZE_AND_OFFSET(dl_phdr_info, dlpi_phdr);
+CHECK_SIZE_AND_OFFSET(dl_phdr_info, dlpi_phnum);
+
+CHECK_TYPE_SIZE(glob_t);
+CHECK_SIZE_AND_OFFSET(glob_t, gl_pathc);
+CHECK_SIZE_AND_OFFSET(glob_t, gl_pathv);
+CHECK_SIZE_AND_OFFSET(glob_t, gl_offs);
+CHECK_SIZE_AND_OFFSET(glob_t, gl_flags);
+CHECK_SIZE_AND_OFFSET(glob_t, gl_closedir);
+CHECK_SIZE_AND_OFFSET(glob_t, gl_readdir);
+CHECK_SIZE_AND_OFFSET(glob_t, gl_opendir);
+CHECK_SIZE_AND_OFFSET(glob_t, gl_lstat);
+CHECK_SIZE_AND_OFFSET(glob_t, gl_stat);
+
+CHECK_TYPE_SIZE(addrinfo);
+CHECK_SIZE_AND_OFFSET(addrinfo, ai_flags);
+CHECK_SIZE_AND_OFFSET(addrinfo, ai_family);
+CHECK_SIZE_AND_OFFSET(addrinfo, ai_socktype);
+CHECK_SIZE_AND_OFFSET(addrinfo, ai_protocol);
+CHECK_SIZE_AND_OFFSET(addrinfo, ai_protocol);
+CHECK_SIZE_AND_OFFSET(addrinfo, ai_addrlen);
+CHECK_SIZE_AND_OFFSET(addrinfo, ai_canonname);
+CHECK_SIZE_AND_OFFSET(addrinfo, ai_addr);
+
+CHECK_TYPE_SIZE(hostent);
+CHECK_SIZE_AND_OFFSET(hostent, h_name);
+CHECK_SIZE_AND_OFFSET(hostent, h_aliases);
+CHECK_SIZE_AND_OFFSET(hostent, h_addrtype);
+CHECK_SIZE_AND_OFFSET(hostent, h_length);
+CHECK_SIZE_AND_OFFSET(hostent, h_addr_list);
+
+CHECK_TYPE_SIZE(iovec);
+CHECK_SIZE_AND_OFFSET(iovec, iov_base);
+CHECK_SIZE_AND_OFFSET(iovec, iov_len);
+
+CHECK_TYPE_SIZE(msghdr);
+CHECK_SIZE_AND_OFFSET(msghdr, msg_name);
+CHECK_SIZE_AND_OFFSET(msghdr, msg_namelen);
+CHECK_SIZE_AND_OFFSET(msghdr, msg_iov);
+CHECK_SIZE_AND_OFFSET(msghdr, msg_iovlen);
+CHECK_SIZE_AND_OFFSET(msghdr, msg_control);
+CHECK_SIZE_AND_OFFSET(msghdr, msg_controllen);
+CHECK_SIZE_AND_OFFSET(msghdr, msg_flags);
+
+CHECK_TYPE_SIZE(cmsghdr);
+CHECK_SIZE_AND_OFFSET(cmsghdr, cmsg_len);
+CHECK_SIZE_AND_OFFSET(cmsghdr, cmsg_level);
+CHECK_SIZE_AND_OFFSET(cmsghdr, cmsg_type);
+
+COMPILER_CHECK(sizeof(__sanitizer_dirent) <= sizeof(dirent));
+CHECK_SIZE_AND_OFFSET(dirent, d_fileno);
+CHECK_SIZE_AND_OFFSET(dirent, d_reclen);
+
+CHECK_TYPE_SIZE(ifconf);
+CHECK_SIZE_AND_OFFSET(ifconf, ifc_len);
+CHECK_SIZE_AND_OFFSET(ifconf, ifc_ifcu);
+
+CHECK_TYPE_SIZE(pollfd);
+CHECK_SIZE_AND_OFFSET(pollfd, fd);
+CHECK_SIZE_AND_OFFSET(pollfd, events);
+CHECK_SIZE_AND_OFFSET(pollfd, revents);
+
+CHECK_TYPE_SIZE(nfds_t);
+
+CHECK_TYPE_SIZE(sigset_t);
+
+COMPILER_CHECK(sizeof(__sanitizer_sigaction) == sizeof(struct sigaction));
+// Can't write checks for sa_handler and sa_sigaction due to them being
+// preprocessor macros.
+CHECK_STRUCT_SIZE_AND_OFFSET(sigaction, sa_mask);
+
+CHECK_TYPE_SIZE(wordexp_t);
+CHECK_SIZE_AND_OFFSET(wordexp_t, we_wordc);
+CHECK_SIZE_AND_OFFSET(wordexp_t, we_wordv);
+CHECK_SIZE_AND_OFFSET(wordexp_t, we_offs);
+
+CHECK_TYPE_SIZE(tm);
+CHECK_SIZE_AND_OFFSET(tm, tm_sec);
+CHECK_SIZE_AND_OFFSET(tm, tm_min);
+CHECK_SIZE_AND_OFFSET(tm, tm_hour);
+CHECK_SIZE_AND_OFFSET(tm, tm_mday);
+CHECK_SIZE_AND_OFFSET(tm, tm_mon);
+CHECK_SIZE_AND_OFFSET(tm, tm_year);
+CHECK_SIZE_AND_OFFSET(tm, tm_wday);
+CHECK_SIZE_AND_OFFSET(tm, tm_yday);
+CHECK_SIZE_AND_OFFSET(tm, tm_isdst);
+CHECK_SIZE_AND_OFFSET(tm, tm_gmtoff);
+CHECK_SIZE_AND_OFFSET(tm, tm_zone);
+
+CHECK_TYPE_SIZE(ether_addr);
+
+CHECK_TYPE_SIZE(ipc_perm);
+CHECK_SIZE_AND_OFFSET(ipc_perm, _key);
+CHECK_SIZE_AND_OFFSET(ipc_perm, _seq);
+CHECK_SIZE_AND_OFFSET(ipc_perm, uid);
+CHECK_SIZE_AND_OFFSET(ipc_perm, gid);
+CHECK_SIZE_AND_OFFSET(ipc_perm, cuid);
+CHECK_SIZE_AND_OFFSET(ipc_perm, cgid);
+CHECK_SIZE_AND_OFFSET(ipc_perm, mode);
+
+CHECK_TYPE_SIZE(shmid_ds);
+CHECK_SIZE_AND_OFFSET(shmid_ds, shm_perm);
+CHECK_SIZE_AND_OFFSET(shmid_ds, shm_segsz);
+CHECK_SIZE_AND_OFFSET(shmid_ds, shm_atime);
+CHECK_SIZE_AND_OFFSET(shmid_ds, shm_dtime);
+CHECK_SIZE_AND_OFFSET(shmid_ds, shm_ctime);
+CHECK_SIZE_AND_OFFSET(shmid_ds, shm_cpid);
+CHECK_SIZE_AND_OFFSET(shmid_ds, shm_lpid);
+CHECK_SIZE_AND_OFFSET(shmid_ds, shm_nattch);
+
+CHECK_TYPE_SIZE(clock_t);
+
+CHECK_TYPE_SIZE(ifaddrs);
+CHECK_SIZE_AND_OFFSET(ifaddrs, ifa_next);
+CHECK_SIZE_AND_OFFSET(ifaddrs, ifa_name);
+CHECK_SIZE_AND_OFFSET(ifaddrs, ifa_addr);
+CHECK_SIZE_AND_OFFSET(ifaddrs, ifa_netmask);
+// Compare against the union, because we can't reach into the union in a
+// compliant way.
+#ifdef ifa_dstaddr
+#undef ifa_dstaddr
+#endif
+CHECK_SIZE_AND_OFFSET(ifaddrs, ifa_dstaddr);
+CHECK_SIZE_AND_OFFSET(ifaddrs, ifa_data);
+
+CHECK_TYPE_SIZE(timeb);
+CHECK_SIZE_AND_OFFSET(timeb, time);
+CHECK_SIZE_AND_OFFSET(timeb, millitm);
+CHECK_SIZE_AND_OFFSET(timeb, timezone);
+CHECK_SIZE_AND_OFFSET(timeb, dstflag);
+
+CHECK_TYPE_SIZE(passwd);
+CHECK_SIZE_AND_OFFSET(passwd, pw_name);
+CHECK_SIZE_AND_OFFSET(passwd, pw_passwd);
+CHECK_SIZE_AND_OFFSET(passwd, pw_uid);
+CHECK_SIZE_AND_OFFSET(passwd, pw_gid);
+CHECK_SIZE_AND_OFFSET(passwd, pw_dir);
+CHECK_SIZE_AND_OFFSET(passwd, pw_shell);
+
+CHECK_SIZE_AND_OFFSET(passwd, pw_gecos);
+
+CHECK_TYPE_SIZE(group);
+CHECK_SIZE_AND_OFFSET(group, gr_name);
+CHECK_SIZE_AND_OFFSET(group, gr_passwd);
+CHECK_SIZE_AND_OFFSET(group, gr_gid);
+CHECK_SIZE_AND_OFFSET(group, gr_mem);
+
+#endif  // SANITIZER_NETBSD
diff --git a/lib/sanitizer_common/sanitizer_platform_limits_netbsd.h b/lib/sanitizer_common/sanitizer_platform_limits_netbsd.h
new file mode 100644
index 0000000..d2f3a65
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_platform_limits_netbsd.h
@@ -0,0 +1,568 @@
+//===-- sanitizer_platform_limits_netbsd.h --------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of Sanitizer common code.
+//
+// Sizes and layouts of platform-specific NetBSD data structures.
+//===----------------------------------------------------------------------===//
+
+#ifndef SANITIZER_PLATFORM_LIMITS_NETBSD_H
+#define SANITIZER_PLATFORM_LIMITS_NETBSD_H
+
+#if SANITIZER_NETBSD
+
+#include "sanitizer_internal_defs.h"
+#include "sanitizer_platform.h"
+
+#define _GET_LINK_MAP_BY_DLOPEN_HANDLE(handle, shift) \
+  ((link_map *)((handle) == nullptr ? nullptr : ((char *)(handle) + (shift))))
+
+#if defined(__x86_64__)
+#define GET_LINK_MAP_BY_DLOPEN_HANDLE(handle) \
+  _GET_LINK_MAP_BY_DLOPEN_HANDLE(handle, 608)
+#elif defined(__i386__)
+#define GET_LINK_MAP_BY_DLOPEN_HANDLE(handle) \
+  _GET_LINK_MAP_BY_DLOPEN_HANDLE(handle, 324)
+#endif
+
+namespace __sanitizer {
+extern unsigned struct_utsname_sz;
+extern unsigned struct_stat_sz;
+extern unsigned struct_rusage_sz;
+extern unsigned siginfo_t_sz;
+extern unsigned struct_itimerval_sz;
+extern unsigned pthread_t_sz;
+extern unsigned pthread_cond_t_sz;
+extern unsigned pid_t_sz;
+extern unsigned timeval_sz;
+extern unsigned uid_t_sz;
+extern unsigned gid_t_sz;
+extern unsigned mbstate_t_sz;
+extern unsigned struct_timezone_sz;
+extern unsigned struct_tms_sz;
+extern unsigned struct_itimerspec_sz;
+extern unsigned struct_sigevent_sz;
+extern unsigned struct_sched_param_sz;
+extern unsigned struct_statfs_sz;
+extern unsigned struct_sockaddr_sz;
+extern unsigned ucontext_t_sz;
+
+extern unsigned struct_rlimit_sz;
+extern unsigned struct_utimbuf_sz;
+extern unsigned struct_timespec_sz;
+
+struct __sanitizer_iocb {
+  u64 aio_offset;
+  uptr aio_buf;
+  long aio_nbytes;
+  u32 aio_fildes;
+  u32 aio_lio_opcode;
+  long aio_reqprio;
+#if SANITIZER_WORDSIZE == 64
+  u8 aio_sigevent[32];
+#else
+  u8 aio_sigevent[20];
+#endif
+  u32 _state;
+  u32 _errno;
+  long _retval;
+};
+
+struct __sanitizer___sysctl_args {
+  int *name;
+  int nlen;
+  void *oldval;
+  uptr *oldlenp;
+  void *newval;
+  uptr newlen;
+};
+
+struct __sanitizer_sem_t {
+  uptr data[5];
+};
+
+struct __sanitizer_ipc_perm {
+  u32 uid;
+  u32 gid;
+  u32 cuid;
+  u32 cgid;
+  u32 mode;
+  unsigned short _seq;
+  long _key;
+};
+
+struct __sanitizer_shmid_ds {
+  __sanitizer_ipc_perm shm_perm;
+  unsigned long shm_segsz;
+  u32 shm_lpid;
+  u32 shm_cpid;
+  unsigned int shm_nattch;
+  u64 shm_atime;
+  u64 shm_dtime;
+  u64 shm_ctime;
+  void *_shm_internal;
+};
+
+extern unsigned struct_msqid_ds_sz;
+extern unsigned struct_mq_attr_sz;
+extern unsigned struct_timex_sz;
+extern unsigned struct_statvfs_sz;
+
+struct __sanitizer_iovec {
+  void *iov_base;
+  uptr iov_len;
+};
+
+struct __sanitizer_ifaddrs {
+  struct __sanitizer_ifaddrs *ifa_next;
+  char *ifa_name;
+  unsigned int ifa_flags;
+  void *ifa_addr;     // (struct sockaddr *)
+  void *ifa_netmask;  // (struct sockaddr *)
+  void *ifa_dstaddr;  // (struct sockaddr *)
+  void *ifa_data;
+  unsigned int ifa_addrflags;
+};
+
+typedef unsigned __sanitizer_pthread_key_t;
+
+typedef long long __sanitizer_time_t;
+
+struct __sanitizer_passwd {
+  char *pw_name;
+  char *pw_passwd;
+  int pw_uid;
+  int pw_gid;
+  __sanitizer_time_t pw_change;
+  char *pw_class;
+  char *pw_gecos;
+  char *pw_dir;
+  char *pw_shell;
+  __sanitizer_time_t pw_expire;
+};
+
+struct __sanitizer_group {
+  char *gr_name;
+  char *gr_passwd;
+  int gr_gid;
+  char **gr_mem;
+};
+
+struct __sanitizer_timeb {
+  __sanitizer_time_t time;
+  unsigned short millitm;
+  short timezone;
+  short dstflag;
+};
+
+struct __sanitizer_ether_addr {
+  u8 octet[6];
+};
+
+struct __sanitizer_tm {
+  int tm_sec;
+  int tm_min;
+  int tm_hour;
+  int tm_mday;
+  int tm_mon;
+  int tm_year;
+  int tm_wday;
+  int tm_yday;
+  int tm_isdst;
+  long int tm_gmtoff;
+  const char *tm_zone;
+};
+
+struct __sanitizer_msghdr {
+  void *msg_name;
+  unsigned msg_namelen;
+  struct __sanitizer_iovec *msg_iov;
+  unsigned msg_iovlen;
+  void *msg_control;
+  unsigned msg_controllen;
+  int msg_flags;
+};
+struct __sanitizer_cmsghdr {
+  unsigned cmsg_len;
+  int cmsg_level;
+  int cmsg_type;
+};
+
+struct __sanitizer_dirent {
+  u64 d_fileno;
+  u16 d_reclen;
+  // more fields that we don't care about
+};
+
+typedef int __sanitizer_clock_t;
+typedef int __sanitizer_clockid_t;
+
+typedef u32 __sanitizer___kernel_uid_t;
+typedef u32 __sanitizer___kernel_gid_t;
+typedef u64 __sanitizer___kernel_off_t;
+typedef struct {
+  u32 fds_bits[8];
+} __sanitizer___kernel_fd_set;
+
+typedef struct {
+  unsigned int pta_magic;
+  int pta_flags;
+  void *pta_private;
+} __sanitizer_pthread_attr_t;
+
+struct __sanitizer_sigset_t {
+  // uint32_t * 4
+  unsigned int __bits[4];
+};
+
+struct __sanitizer_sigaction {
+  union {
+    void (*handler)(int sig);
+    void (*sigaction)(int sig, void *siginfo, void *uctx);
+  };
+  __sanitizer_sigset_t sa_mask;
+  int sa_flags;
+};
+
+typedef __sanitizer_sigset_t __sanitizer_kernel_sigset_t;
+
+struct __sanitizer_kernel_sigaction_t {
+  union {
+    void (*handler)(int signo);
+    void (*sigaction)(int signo, void *info, void *ctx);
+  };
+  unsigned long sa_flags;
+  void (*sa_restorer)(void);
+  __sanitizer_kernel_sigset_t sa_mask;
+};
+
+extern uptr sig_ign;
+extern uptr sig_dfl;
+extern uptr sa_siginfo;
+
+extern int af_inet;
+extern int af_inet6;
+uptr __sanitizer_in_addr_sz(int af);
+
+struct __sanitizer_dl_phdr_info {
+  uptr dlpi_addr;
+  const char *dlpi_name;
+  const void *dlpi_phdr;
+  short dlpi_phnum;
+};
+
+extern unsigned struct_ElfW_Phdr_sz;
+
+struct __sanitizer_addrinfo {
+  int ai_flags;
+  int ai_family;
+  int ai_socktype;
+  int ai_protocol;
+  unsigned ai_addrlen;
+  char *ai_canonname;
+  void *ai_addr;
+  struct __sanitizer_addrinfo *ai_next;
+};
+
+struct __sanitizer_hostent {
+  char *h_name;
+  char **h_aliases;
+  int h_addrtype;
+  int h_length;
+  char **h_addr_list;
+};
+
+struct __sanitizer_pollfd {
+  int fd;
+  short events;
+  short revents;
+};
+
+typedef unsigned __sanitizer_nfds_t;
+
+struct __sanitizer_glob_t {
+  uptr gl_pathc;
+  uptr gl_matchc;
+  uptr gl_offs;
+  int gl_flags;
+  char **gl_pathv;
+  int (*gl_errfunc)(const char *, int);
+  void (*gl_closedir)(void *dirp);
+  struct dirent *(*gl_readdir)(void *dirp);
+  void *(*gl_opendir)(const char *);
+  int (*gl_lstat)(const char *, void * /* struct stat* */);
+  int (*gl_stat)(const char *, void * /* struct stat* */);
+};
+
+extern int glob_nomatch;
+extern int glob_altdirfunc;
+
+extern unsigned path_max;
+
+struct __sanitizer_wordexp_t {
+  uptr we_wordc;
+  char **we_wordv;
+  uptr we_offs;
+  char *we_strings;
+  uptr we_nbytes;
+};
+
+typedef char __sanitizer_FILE;
+#define SANITIZER_HAS_STRUCT_FILE 0
+
+extern int shmctl_ipc_stat;
+
+// This simplifies generic code
+#define struct_shminfo_sz -1
+#define struct_shm_info_sz -1
+#define shmctl_shm_stat -1
+#define shmctl_ipc_info -1
+#define shmctl_shm_info -1
+
+extern unsigned struct_utmp_sz;
+extern unsigned struct_utmpx_sz;
+
+extern int map_fixed;
+
+// ioctl arguments
+struct __sanitizer_ifconf {
+  int ifc_len;
+  union {
+    void *ifcu_req;
+  } ifc_ifcu;
+};
+
+#define IOC_NRBITS 8
+#define IOC_TYPEBITS 8
+#define IOC_SIZEBITS 14
+#define IOC_DIRBITS 2
+#define IOC_NONE 0U
+#define IOC_WRITE 1U
+#define IOC_READ 2U
+#define IOC_NRMASK ((1 << IOC_NRBITS) - 1)
+#define IOC_TYPEMASK ((1 << IOC_TYPEBITS) - 1)
+#define IOC_SIZEMASK ((1 << IOC_SIZEBITS) - 1)
+#undef IOC_DIRMASK
+#define IOC_DIRMASK ((1 << IOC_DIRBITS) - 1)
+#define IOC_NRSHIFT 0
+#define IOC_TYPESHIFT (IOC_NRSHIFT + IOC_NRBITS)
+#define IOC_SIZESHIFT (IOC_TYPESHIFT + IOC_TYPEBITS)
+#define IOC_DIRSHIFT (IOC_SIZESHIFT + IOC_SIZEBITS)
+#define EVIOC_EV_MAX 0x1f
+#define EVIOC_ABS_MAX 0x3f
+
+#define IOC_DIR(nr) (((nr) >> IOC_DIRSHIFT) & IOC_DIRMASK)
+#define IOC_TYPE(nr) (((nr) >> IOC_TYPESHIFT) & IOC_TYPEMASK)
+#define IOC_NR(nr) (((nr) >> IOC_NRSHIFT) & IOC_NRMASK)
+#define IOC_SIZE(nr) (((nr) >> IOC_SIZESHIFT) & IOC_SIZEMASK)
+
+extern unsigned struct_ifreq_sz;
+extern unsigned struct_termios_sz;
+extern unsigned struct_winsize_sz;
+
+extern unsigned struct_arpreq_sz;
+
+extern unsigned struct_mtget_sz;
+extern unsigned struct_mtop_sz;
+extern unsigned struct_rtentry_sz;
+extern unsigned struct_sbi_instrument_sz;
+extern unsigned struct_seq_event_rec_sz;
+extern unsigned struct_synth_info_sz;
+extern unsigned struct_vt_mode_sz;
+extern unsigned struct_audio_buf_info_sz;
+extern unsigned struct_ppp_stats_sz;
+extern unsigned struct_sioc_sg_req_sz;
+extern unsigned struct_sioc_vif_req_sz;
+
+// ioctl request identifiers
+
+// A special value to mark ioctls that are not present on the target platform,
+// when it can not be determined without including any system headers.
+extern const unsigned IOCTL_NOT_PRESENT;
+
+extern unsigned IOCTL_FIOASYNC;
+extern unsigned IOCTL_FIOCLEX;
+extern unsigned IOCTL_FIOGETOWN;
+extern unsigned IOCTL_FIONBIO;
+extern unsigned IOCTL_FIONCLEX;
+extern unsigned IOCTL_FIOSETOWN;
+extern unsigned IOCTL_SIOCADDMULTI;
+extern unsigned IOCTL_SIOCATMARK;
+extern unsigned IOCTL_SIOCDELMULTI;
+extern unsigned IOCTL_SIOCGIFADDR;
+extern unsigned IOCTL_SIOCGIFBRDADDR;
+extern unsigned IOCTL_SIOCGIFCONF;
+extern unsigned IOCTL_SIOCGIFDSTADDR;
+extern unsigned IOCTL_SIOCGIFFLAGS;
+extern unsigned IOCTL_SIOCGIFMETRIC;
+extern unsigned IOCTL_SIOCGIFMTU;
+extern unsigned IOCTL_SIOCGIFNETMASK;
+extern unsigned IOCTL_SIOCGPGRP;
+extern unsigned IOCTL_SIOCSIFADDR;
+extern unsigned IOCTL_SIOCSIFBRDADDR;
+extern unsigned IOCTL_SIOCSIFDSTADDR;
+extern unsigned IOCTL_SIOCSIFFLAGS;
+extern unsigned IOCTL_SIOCSIFMETRIC;
+extern unsigned IOCTL_SIOCSIFMTU;
+extern unsigned IOCTL_SIOCSIFNETMASK;
+extern unsigned IOCTL_SIOCSPGRP;
+extern unsigned IOCTL_TIOCCONS;
+extern unsigned IOCTL_TIOCEXCL;
+extern unsigned IOCTL_TIOCGETD;
+extern unsigned IOCTL_TIOCGPGRP;
+extern unsigned IOCTL_TIOCGWINSZ;
+extern unsigned IOCTL_TIOCMBIC;
+extern unsigned IOCTL_TIOCMBIS;
+extern unsigned IOCTL_TIOCMGET;
+extern unsigned IOCTL_TIOCMSET;
+extern unsigned IOCTL_TIOCNOTTY;
+extern unsigned IOCTL_TIOCNXCL;
+extern unsigned IOCTL_TIOCOUTQ;
+extern unsigned IOCTL_TIOCPKT;
+extern unsigned IOCTL_TIOCSCTTY;
+extern unsigned IOCTL_TIOCSETD;
+extern unsigned IOCTL_TIOCSPGRP;
+extern unsigned IOCTL_TIOCSTI;
+extern unsigned IOCTL_TIOCSWINSZ;
+extern unsigned IOCTL_SIOCGETSGCNT;
+extern unsigned IOCTL_SIOCGETVIFCNT;
+extern unsigned IOCTL_MTIOCGET;
+extern unsigned IOCTL_MTIOCTOP;
+extern unsigned IOCTL_SIOCADDRT;
+extern unsigned IOCTL_SIOCDELRT;
+extern unsigned IOCTL_SNDCTL_DSP_GETBLKSIZE;
+extern unsigned IOCTL_SNDCTL_DSP_GETFMTS;
+extern unsigned IOCTL_SNDCTL_DSP_NONBLOCK;
+extern unsigned IOCTL_SNDCTL_DSP_POST;
+extern unsigned IOCTL_SNDCTL_DSP_RESET;
+extern unsigned IOCTL_SNDCTL_DSP_SETFMT;
+extern unsigned IOCTL_SNDCTL_DSP_SETFRAGMENT;
+extern unsigned IOCTL_SNDCTL_DSP_SPEED;
+extern unsigned IOCTL_SNDCTL_DSP_STEREO;
+extern unsigned IOCTL_SNDCTL_DSP_SUBDIVIDE;
+extern unsigned IOCTL_SNDCTL_DSP_SYNC;
+extern unsigned IOCTL_SNDCTL_FM_4OP_ENABLE;
+extern unsigned IOCTL_SNDCTL_FM_LOAD_INSTR;
+extern unsigned IOCTL_SNDCTL_MIDI_INFO;
+extern unsigned IOCTL_SNDCTL_MIDI_PRETIME;
+extern unsigned IOCTL_SNDCTL_SEQ_CTRLRATE;
+extern unsigned IOCTL_SNDCTL_SEQ_GETINCOUNT;
+extern unsigned IOCTL_SNDCTL_SEQ_GETOUTCOUNT;
+extern unsigned IOCTL_SNDCTL_SEQ_NRMIDIS;
+extern unsigned IOCTL_SNDCTL_SEQ_NRSYNTHS;
+extern unsigned IOCTL_SNDCTL_SEQ_OUTOFBAND;
+extern unsigned IOCTL_SNDCTL_SEQ_PANIC;
+extern unsigned IOCTL_SNDCTL_SEQ_PERCMODE;
+extern unsigned IOCTL_SNDCTL_SEQ_RESET;
+extern unsigned IOCTL_SNDCTL_SEQ_RESETSAMPLES;
+extern unsigned IOCTL_SNDCTL_SEQ_SYNC;
+extern unsigned IOCTL_SNDCTL_SEQ_TESTMIDI;
+extern unsigned IOCTL_SNDCTL_SEQ_THRESHOLD;
+extern unsigned IOCTL_SNDCTL_SYNTH_INFO;
+extern unsigned IOCTL_SNDCTL_SYNTH_MEMAVL;
+extern unsigned IOCTL_SNDCTL_TMR_CONTINUE;
+extern unsigned IOCTL_SNDCTL_TMR_METRONOME;
+extern unsigned IOCTL_SNDCTL_TMR_SELECT;
+extern unsigned IOCTL_SNDCTL_TMR_SOURCE;
+extern unsigned IOCTL_SNDCTL_TMR_START;
+extern unsigned IOCTL_SNDCTL_TMR_STOP;
+extern unsigned IOCTL_SNDCTL_TMR_TEMPO;
+extern unsigned IOCTL_SNDCTL_TMR_TIMEBASE;
+extern unsigned IOCTL_SOUND_MIXER_READ_ALTPCM;
+extern unsigned IOCTL_SOUND_MIXER_READ_BASS;
+extern unsigned IOCTL_SOUND_MIXER_READ_CAPS;
+extern unsigned IOCTL_SOUND_MIXER_READ_CD;
+extern unsigned IOCTL_SOUND_MIXER_READ_DEVMASK;
+extern unsigned IOCTL_SOUND_MIXER_READ_ENHANCE;
+extern unsigned IOCTL_SOUND_MIXER_READ_IGAIN;
+extern unsigned IOCTL_SOUND_MIXER_READ_IMIX;
+extern unsigned IOCTL_SOUND_MIXER_READ_LINE1;
+extern unsigned IOCTL_SOUND_MIXER_READ_LINE2;
+extern unsigned IOCTL_SOUND_MIXER_READ_LINE3;
+extern unsigned IOCTL_SOUND_MIXER_READ_LINE;
+extern unsigned IOCTL_SOUND_MIXER_READ_LOUD;
+extern unsigned IOCTL_SOUND_MIXER_READ_MIC;
+extern unsigned IOCTL_SOUND_MIXER_READ_MUTE;
+extern unsigned IOCTL_SOUND_MIXER_READ_OGAIN;
+extern unsigned IOCTL_SOUND_MIXER_READ_PCM;
+extern unsigned IOCTL_SOUND_MIXER_READ_RECLEV;
+extern unsigned IOCTL_SOUND_MIXER_READ_RECMASK;
+extern unsigned IOCTL_SOUND_MIXER_READ_RECSRC;
+extern unsigned IOCTL_SOUND_MIXER_READ_SPEAKER;
+extern unsigned IOCTL_SOUND_MIXER_READ_STEREODEVS;
+extern unsigned IOCTL_SOUND_MIXER_READ_SYNTH;
+extern unsigned IOCTL_SOUND_MIXER_READ_TREBLE;
+extern unsigned IOCTL_SOUND_MIXER_READ_VOLUME;
+extern unsigned IOCTL_SOUND_MIXER_WRITE_ALTPCM;
+extern unsigned IOCTL_SOUND_MIXER_WRITE_BASS;
+extern unsigned IOCTL_SOUND_MIXER_WRITE_CD;
+extern unsigned IOCTL_SOUND_MIXER_WRITE_ENHANCE;
+extern unsigned IOCTL_SOUND_MIXER_WRITE_IGAIN;
+extern unsigned IOCTL_SOUND_MIXER_WRITE_IMIX;
+extern unsigned IOCTL_SOUND_MIXER_WRITE_LINE1;
+extern unsigned IOCTL_SOUND_MIXER_WRITE_LINE2;
+extern unsigned IOCTL_SOUND_MIXER_WRITE_LINE3;
+extern unsigned IOCTL_SOUND_MIXER_WRITE_LINE;
+extern unsigned IOCTL_SOUND_MIXER_WRITE_LOUD;
+extern unsigned IOCTL_SOUND_MIXER_WRITE_MIC;
+extern unsigned IOCTL_SOUND_MIXER_WRITE_MUTE;
+extern unsigned IOCTL_SOUND_MIXER_WRITE_OGAIN;
+extern unsigned IOCTL_SOUND_MIXER_WRITE_PCM;
+extern unsigned IOCTL_SOUND_MIXER_WRITE_RECLEV;
+extern unsigned IOCTL_SOUND_MIXER_WRITE_RECSRC;
+extern unsigned IOCTL_SOUND_MIXER_WRITE_SPEAKER;
+extern unsigned IOCTL_SOUND_MIXER_WRITE_SYNTH;
+extern unsigned IOCTL_SOUND_MIXER_WRITE_TREBLE;
+extern unsigned IOCTL_SOUND_MIXER_WRITE_VOLUME;
+extern unsigned IOCTL_SOUND_PCM_READ_BITS;
+extern unsigned IOCTL_SOUND_PCM_READ_CHANNELS;
+extern unsigned IOCTL_SOUND_PCM_READ_FILTER;
+extern unsigned IOCTL_SOUND_PCM_READ_RATE;
+extern unsigned IOCTL_SOUND_PCM_WRITE_CHANNELS;
+extern unsigned IOCTL_SOUND_PCM_WRITE_FILTER;
+extern unsigned IOCTL_VT_ACTIVATE;
+extern unsigned IOCTL_VT_GETMODE;
+extern unsigned IOCTL_VT_OPENQRY;
+extern unsigned IOCTL_VT_RELDISP;
+extern unsigned IOCTL_VT_SETMODE;
+extern unsigned IOCTL_VT_WAITACTIVE;
+extern unsigned IOCTL_KDDISABIO;
+extern unsigned IOCTL_KDENABIO;
+extern unsigned IOCTL_KDGETLED;
+extern unsigned IOCTL_KDGKBMODE;
+extern unsigned IOCTL_KDGKBTYPE;
+extern unsigned IOCTL_KDMKTONE;
+extern unsigned IOCTL_KDSETLED;
+extern unsigned IOCTL_KDSETMODE;
+extern unsigned IOCTL_KDSKBMODE;
+
+extern const int si_SEGV_MAPERR;
+extern const int si_SEGV_ACCERR;
+}  // namespace __sanitizer
+
+#define CHECK_TYPE_SIZE(TYPE) \
+  COMPILER_CHECK(sizeof(__sanitizer_##TYPE) == sizeof(TYPE))
+
+#define CHECK_SIZE_AND_OFFSET(CLASS, MEMBER)                      \
+  COMPILER_CHECK(sizeof(((__sanitizer_##CLASS *)NULL)->MEMBER) == \
+                 sizeof(((CLASS *)NULL)->MEMBER));                \
+  COMPILER_CHECK(offsetof(__sanitizer_##CLASS, MEMBER) ==         \
+                 offsetof(CLASS, MEMBER))
+
+// For sigaction, which is a function and struct at the same time,
+// and thus requires explicit "struct" in sizeof() expression.
+#define CHECK_STRUCT_SIZE_AND_OFFSET(CLASS, MEMBER)                      \
+  COMPILER_CHECK(sizeof(((struct __sanitizer_##CLASS *)NULL)->MEMBER) == \
+                 sizeof(((struct CLASS *)NULL)->MEMBER));                \
+  COMPILER_CHECK(offsetof(struct __sanitizer_##CLASS, MEMBER) ==         \
+                 offsetof(struct CLASS, MEMBER))
+
+#endif  // SANITIZER_NETBSD
+
+#endif
diff --git a/lib/sanitizer_common/sanitizer_platform_limits_posix.cc b/lib/sanitizer_common/sanitizer_platform_limits_posix.cc
index 683f019..83f4fd2 100644
--- a/lib/sanitizer_common/sanitizer_platform_limits_posix.cc
+++ b/lib/sanitizer_common/sanitizer_platform_limits_posix.cc
@@ -25,7 +25,6 @@
 #endif
 #include <arpa/inet.h>
 #include <dirent.h>
-#include <errno.h>
 #include <grp.h>
 #include <limits.h>
 #include <net/if.h>
@@ -931,14 +930,6 @@
   unsigned IOCTL_SNDCTL_DSP_GETOSPACE = SNDCTL_DSP_GETOSPACE;
 #endif // (SANITIZER_LINUX || SANITIZER_FREEBSD) && !SANITIZER_ANDROID
 
-  const int errno_EINVAL = EINVAL;
-// EOWNERDEAD is not present in some older platforms.
-#if defined(EOWNERDEAD)
-  const int errno_EOWNERDEAD = EOWNERDEAD;
-#else
-  const int errno_EOWNERDEAD = -1;
-#endif
-
   const int si_SEGV_MAPERR = SEGV_MAPERR;
   const int si_SEGV_ACCERR = SEGV_ACCERR;
 } // namespace __sanitizer
diff --git a/lib/sanitizer_common/sanitizer_platform_limits_posix.h b/lib/sanitizer_common/sanitizer_platform_limits_posix.h
index c2d9f2c..45b9d8e 100644
--- a/lib/sanitizer_common/sanitizer_platform_limits_posix.h
+++ b/lib/sanitizer_common/sanitizer_platform_limits_posix.h
@@ -15,6 +15,8 @@
 #ifndef SANITIZER_PLATFORM_LIMITS_POSIX_H
 #define SANITIZER_PLATFORM_LIMITS_POSIX_H
 
+#if SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_MAC
+
 #include "sanitizer_internal_defs.h"
 #include "sanitizer_platform.h"
 
@@ -23,6 +25,9 @@
 // incorporates the map structure.
 # define GET_LINK_MAP_BY_DLOPEN_HANDLE(handle) \
     ((link_map*)((handle) == nullptr ? nullptr : ((char*)(handle) + 544)))
+// Get sys/_types.h, because that tells us whether 64-bit inodes are
+// used in struct dirent below.
+#include <sys/_types.h>
 #else
 # define GET_LINK_MAP_BY_DLOPEN_HANDLE(handle) ((link_map*)(handle))
 #endif  // !SANITIZER_FREEBSD
@@ -83,7 +88,7 @@
 #elif defined(__mips__)
   const unsigned struct_kernel_stat_sz =
                  SANITIZER_ANDROID ? FIRST_32_SECOND_64(104, 128) :
-                                     FIRST_32_SECOND_64(144, 216);
+                                     FIRST_32_SECOND_64(160, 216);
   const unsigned struct_kernel_stat64_sz = 104;
 #elif defined(__s390__) && !defined(__s390x__)
   const unsigned struct_kernel_stat_sz = 64;
@@ -485,7 +490,12 @@
   };
 #elif SANITIZER_FREEBSD
   struct __sanitizer_dirent {
+#if defined(__INO64)
+    unsigned long long d_fileno;
+    unsigned long long d_off;
+#else
     unsigned int d_fileno;
+#endif
     unsigned short d_reclen;
     // more fields that we don't care about
   };
@@ -1456,9 +1466,6 @@
   extern unsigned IOCTL_PIO_SCRNMAP;
 #endif
 
-  extern const int errno_EINVAL;
-  extern const int errno_EOWNERDEAD;
-
   extern const int si_SEGV_MAPERR;
   extern const int si_SEGV_ACCERR;
 }  // namespace __sanitizer
@@ -1480,4 +1487,6 @@
   COMPILER_CHECK(offsetof(struct __sanitizer_##CLASS, MEMBER) ==          \
                  offsetof(struct CLASS, MEMBER))
 
+#endif  // SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_MAC
+
 #endif
diff --git a/lib/sanitizer_common/sanitizer_posix.cc b/lib/sanitizer_common/sanitizer_posix.cc
index 9916f4d..1fad71f 100644
--- a/lib/sanitizer_common/sanitizer_posix.cc
+++ b/lib/sanitizer_common/sanitizer_posix.cc
@@ -17,23 +17,17 @@
 #if SANITIZER_POSIX
 
 #include "sanitizer_common.h"
+#include "sanitizer_file.h"
 #include "sanitizer_libc.h"
 #include "sanitizer_posix.h"
 #include "sanitizer_procmaps.h"
 #include "sanitizer_stacktrace.h"
 
+#include <errno.h>
 #include <fcntl.h>
 #include <signal.h>
 #include <sys/mman.h>
 
-#if SANITIZER_LINUX
-#include <sys/utsname.h>
-#endif
-
-#if SANITIZER_LINUX && !SANITIZER_ANDROID
-#include <sys/personality.h>
-#endif
-
 #if SANITIZER_FREEBSD
 // The MAP_NORESERVE define has been removed in FreeBSD 11.x, and even before
 // that, it was never implemented.  So just define it to zero.
@@ -48,87 +42,13 @@
   return GetPageSize();
 }
 
-#if SANITIZER_WORDSIZE == 32
-// Take care of unusable kernel area in top gigabyte.
-static uptr GetKernelAreaSize() {
-#if SANITIZER_LINUX && !SANITIZER_X32
-  const uptr gbyte = 1UL << 30;
-
-  // Firstly check if there are writable segments
-  // mapped to top gigabyte (e.g. stack).
-  MemoryMappingLayout proc_maps(/*cache_enabled*/true);
-  uptr end, prot;
-  while (proc_maps.Next(/*start*/nullptr, &end,
-                        /*offset*/nullptr, /*filename*/nullptr,
-                        /*filename_size*/0, &prot)) {
-    if ((end >= 3 * gbyte)
-        && (prot & MemoryMappingLayout::kProtectionWrite) != 0)
-      return 0;
-  }
-
-#if !SANITIZER_ANDROID
-  // Even if nothing is mapped, top Gb may still be accessible
-  // if we are running on 64-bit kernel.
-  // Uname may report misleading results if personality type
-  // is modified (e.g. under schroot) so check this as well.
-  struct utsname uname_info;
-  int pers = personality(0xffffffffUL);
-  if (!(pers & PER_MASK)
-      && uname(&uname_info) == 0
-      && internal_strstr(uname_info.machine, "64"))
-    return 0;
-#endif  // SANITIZER_ANDROID
-
-  // Top gigabyte is reserved for kernel.
-  return gbyte;
-#else
-  return 0;
-#endif  // SANITIZER_LINUX && !SANITIZER_X32
-}
-#endif  // SANITIZER_WORDSIZE == 32
-
-uptr GetMaxVirtualAddress() {
-#if SANITIZER_WORDSIZE == 64
-# if defined(__aarch64__) && SANITIZER_IOS && !SANITIZER_IOSSIM
-  // Ideally, we would derive the upper bound from MACH_VM_MAX_ADDRESS. The
-  // upper bound can change depending on the device.
-  return 0x200000000 - 1;
-# elif defined(__powerpc64__) || defined(__aarch64__)
-  // On PowerPC64 we have two different address space layouts: 44- and 46-bit.
-  // We somehow need to figure out which one we are using now and choose
-  // one of 0x00000fffffffffffUL and 0x00003fffffffffffUL.
-  // Note that with 'ulimit -s unlimited' the stack is moved away from the top
-  // of the address space, so simply checking the stack address is not enough.
-  // This should (does) work for both PowerPC64 Endian modes.
-  // Similarly, aarch64 has multiple address space layouts: 39, 42 and 47-bit.
-  return (1ULL << (MostSignificantSetBitIndex(GET_CURRENT_FRAME()) + 1)) - 1;
-# elif defined(__mips64)
-  return (1ULL << 40) - 1;  // 0x000000ffffffffffUL;
-# elif defined(__s390x__)
-  return (1ULL << 53) - 1;  // 0x001fffffffffffffUL;
-# else
-  return (1ULL << 47) - 1;  // 0x00007fffffffffffUL;
-# endif
-#else  // SANITIZER_WORDSIZE == 32
-# if defined(__s390__)
-  return (1ULL << 31) - 1;  // 0x7fffffff;
-# else
-  uptr res = (1ULL << 32) - 1;  // 0xffffffff;
-  if (!common_flags()->full_address_space)
-    res -= GetKernelAreaSize();
-  CHECK_LT(reinterpret_cast<uptr>(&res), res);
-  return res;
-# endif
-#endif  // SANITIZER_WORDSIZE
-}
-
 void *MmapOrDie(uptr size, const char *mem_type, bool raw_report) {
   size = RoundUpTo(size, GetPageSizeCached());
   uptr res = internal_mmap(nullptr, size,
                            PROT_READ | PROT_WRITE,
                            MAP_PRIVATE | MAP_ANON, -1, 0);
   int reserrno;
-  if (internal_iserror(res, &reserrno))
+  if (UNLIKELY(internal_iserror(res, &reserrno)))
     ReportMmapFailureAndDie(size, mem_type, "allocate", reserrno, raw_report);
   IncreaseTotalMmap(size);
   return (void *)res;
@@ -137,7 +57,7 @@
 void UnmapOrDie(void *addr, uptr size) {
   if (!addr || !size) return;
   uptr res = internal_munmap(addr, size);
-  if (internal_iserror(res)) {
+  if (UNLIKELY(internal_iserror(res))) {
     Report("ERROR: %s failed to deallocate 0x%zx (%zd) bytes at address %p\n",
            SanitizerToolName, size, size, addr);
     CHECK("unable to unmap" && 0);
@@ -145,21 +65,39 @@
   DecreaseTotalMmap(size);
 }
 
+void *MmapOrDieOnFatalError(uptr size, const char *mem_type) {
+  size = RoundUpTo(size, GetPageSizeCached());
+  uptr res = internal_mmap(nullptr, size,
+                           PROT_READ | PROT_WRITE,
+                           MAP_PRIVATE | MAP_ANON, -1, 0);
+  int reserrno;
+  if (UNLIKELY(internal_iserror(res, &reserrno))) {
+    if (reserrno == ENOMEM)
+      return nullptr;
+    ReportMmapFailureAndDie(size, mem_type, "allocate", reserrno);
+  }
+  IncreaseTotalMmap(size);
+  return (void *)res;
+}
+
 // We want to map a chunk of address space aligned to 'alignment'.
-// We do it by maping a bit more and then unmaping redundant pieces.
+// We do it by mapping a bit more and then unmapping redundant pieces.
 // We probably can do it with fewer syscalls in some OS-dependent way.
-void *MmapAlignedOrDie(uptr size, uptr alignment, const char *mem_type) {
+void *MmapAlignedOrDieOnFatalError(uptr size, uptr alignment,
+                                   const char *mem_type) {
   CHECK(IsPowerOfTwo(size));
   CHECK(IsPowerOfTwo(alignment));
   uptr map_size = size + alignment;
-  uptr map_res = (uptr)MmapOrDie(map_size, mem_type);
+  uptr map_res = (uptr)MmapOrDieOnFatalError(map_size, mem_type);
+  if (UNLIKELY(!map_res))
+    return nullptr;
   uptr map_end = map_res + map_size;
   uptr res = map_res;
-  if (res & (alignment - 1))  // Not aligned.
-    res = (map_res + alignment) & ~(alignment - 1);
-  uptr end = res + size;
-  if (res != map_res)
+  if (!IsAligned(res, alignment)) {
+    res = (map_res + alignment - 1) & ~(alignment - 1);
     UnmapOrDie((void*)map_res, res - map_res);
+  }
+  uptr end = res + size;
   if (end != map_end)
     UnmapOrDie((void*)end, map_end - end);
   return (void*)res;
@@ -173,13 +111,13 @@
                          MAP_PRIVATE | MAP_ANON | MAP_NORESERVE,
                          -1, 0);
   int reserrno;
-  if (internal_iserror(p, &reserrno))
+  if (UNLIKELY(internal_iserror(p, &reserrno)))
     ReportMmapFailureAndDie(size, mem_type, "allocate noreserve", reserrno);
   IncreaseTotalMmap(size);
   return (void *)p;
 }
 
-void *MmapFixedOrDie(uptr fixed_addr, uptr size) {
+void *MmapFixedImpl(uptr fixed_addr, uptr size, bool tolerate_enomem) {
   uptr PageSize = GetPageSizeCached();
   uptr p = internal_mmap((void*)(fixed_addr & ~(PageSize - 1)),
       RoundUpTo(size, PageSize),
@@ -187,8 +125,10 @@
       MAP_PRIVATE | MAP_ANON | MAP_FIXED,
       -1, 0);
   int reserrno;
-  if (internal_iserror(p, &reserrno)) {
-    char mem_type[30];
+  if (UNLIKELY(internal_iserror(p, &reserrno))) {
+    if (tolerate_enomem && reserrno == ENOMEM)
+      return nullptr;
+    char mem_type[40];
     internal_snprintf(mem_type, sizeof(mem_type), "memory at address 0x%zx",
                       fixed_addr);
     ReportMmapFailureAndDie(size, mem_type, "allocate", reserrno);
@@ -197,6 +137,14 @@
   return (void *)p;
 }
 
+void *MmapFixedOrDie(uptr fixed_addr, uptr size) {
+  return MmapFixedImpl(fixed_addr, size, false /*tolerate_enomem*/);
+}
+
+void *MmapFixedOrDieOnFatalError(uptr fixed_addr, uptr size) {
+  return MmapFixedImpl(fixed_addr, size, true /*tolerate_enomem*/);
+}
+
 bool MprotectNoAccess(uptr addr, uptr size) {
   return 0 == internal_mprotect((void*)addr, size, PROT_NONE);
 }
@@ -284,13 +232,12 @@
 // memory).
 bool MemoryRangeIsAvailable(uptr range_start, uptr range_end) {
   MemoryMappingLayout proc_maps(/*cache_enabled*/true);
-  uptr start, end;
-  while (proc_maps.Next(&start, &end,
-                        /*offset*/nullptr, /*filename*/nullptr,
-                        /*filename_size*/0, /*protection*/nullptr)) {
-    if (start == end) continue;  // Empty range.
-    CHECK_NE(0, end);
-    if (!IntervalsAreSeparate(start, end - 1, range_start, range_end))
+  MemoryMappedSegment segment;
+  while (proc_maps.Next(&segment)) {
+    if (segment.start == segment.end) continue;  // Empty range.
+    CHECK_NE(0, segment.end);
+    if (!IntervalsAreSeparate(segment.start, segment.end - 1, range_start,
+                              range_end))
       return false;
   }
   return true;
@@ -298,13 +245,13 @@
 
 void DumpProcessMap() {
   MemoryMappingLayout proc_maps(/*cache_enabled*/true);
-  uptr start, end;
   const sptr kBufSize = 4095;
   char *filename = (char*)MmapOrDie(kBufSize, __func__);
+  MemoryMappedSegment segment(filename, kBufSize);
   Report("Process memory map follows:\n");
-  while (proc_maps.Next(&start, &end, /* file_offset */nullptr,
-                        filename, kBufSize, /* protection */nullptr)) {
-    Printf("\t%p-%p\t%s\n", (void*)start, (void*)end, filename);
+  while (proc_maps.Next(&segment)) {
+    Printf("\t%p-%p\t%s\n", (void *)segment.start, (void *)segment.end,
+           segment.filename);
   }
   Report("End of process memory map.\n");
   UnmapOrDie(filename, kBufSize);
@@ -334,32 +281,36 @@
 }
 
 bool GetCodeRangeForFile(const char *module, uptr *start, uptr *end) {
-  uptr s, e, off, prot;
-  InternalScopedString buff(kMaxPathLength);
   MemoryMappingLayout proc_maps(/*cache_enabled*/false);
-  while (proc_maps.Next(&s, &e, &off, buff.data(), buff.size(), &prot)) {
-    if ((prot & MemoryMappingLayout::kProtectionExecute) != 0
-        && internal_strcmp(module, buff.data()) == 0) {
-      *start = s;
-      *end = e;
+  InternalScopedString buff(kMaxPathLength);
+  MemoryMappedSegment segment(buff.data(), kMaxPathLength);
+  while (proc_maps.Next(&segment)) {
+    if (segment.IsExecutable() &&
+        internal_strcmp(module, segment.filename) == 0) {
+      *start = segment.start;
+      *end = segment.end;
       return true;
     }
   }
   return false;
 }
 
-SignalContext SignalContext::Create(void *siginfo, void *context) {
-  auto si = (siginfo_t *)siginfo;
-  uptr addr = (uptr)si->si_addr;
-  uptr pc, sp, bp;
-  GetPcSpBp(context, &pc, &sp, &bp);
-  WriteFlag write_flag = GetWriteFlag(context);
-  bool is_memory_access = si->si_signo == SIGSEGV;
-  return SignalContext(context, addr, pc, sp, bp, is_memory_access, write_flag);
+uptr SignalContext::GetAddress() const {
+  auto si = static_cast<const siginfo_t *>(siginfo);
+  return (uptr)si->si_addr;
 }
 
-const char *DescribeSignalOrException(int signo) {
-  switch (signo) {
+bool SignalContext::IsMemoryAccess() const {
+  auto si = static_cast<const siginfo_t *>(siginfo);
+  return si->si_signo == SIGSEGV;
+}
+
+int SignalContext::GetType() const {
+  return static_cast<const siginfo_t *>(siginfo)->si_signo;
+}
+
+const char *SignalContext::Describe() const {
+  switch (GetType()) {
     case SIGFPE:
       return "FPE";
     case SIGILL:
diff --git a/lib/sanitizer_common/sanitizer_posix.h b/lib/sanitizer_common/sanitizer_posix.h
index 7f862cd..0c7445f 100644
--- a/lib/sanitizer_common/sanitizer_posix.h
+++ b/lib/sanitizer_common/sanitizer_posix.h
@@ -16,6 +16,7 @@
 // ----------- ATTENTION -------------
 // This header should NOT include any other headers from sanitizer runtime.
 #include "sanitizer_internal_defs.h"
+#include "sanitizer_platform_limits_netbsd.h"
 #include "sanitizer_platform_limits_posix.h"
 
 #if !SANITIZER_POSIX
@@ -87,6 +88,9 @@
 
 uptr internal_execve(const char *filename, char *const argv[],
                      char *const envp[]);
+
+bool IsStateDetached(int state);
+
 }  // namespace __sanitizer
 
 #endif  // SANITIZER_POSIX_H
diff --git a/lib/sanitizer_common/sanitizer_posix_libcdep.cc b/lib/sanitizer_common/sanitizer_posix_libcdep.cc
index dd62140..0ba3ad4 100644
--- a/lib/sanitizer_common/sanitizer_posix_libcdep.cc
+++ b/lib/sanitizer_common/sanitizer_posix_libcdep.cc
@@ -18,6 +18,7 @@
 
 #include "sanitizer_common.h"
 #include "sanitizer_flags.h"
+#include "sanitizer_platform_limits_netbsd.h"
 #include "sanitizer_platform_limits_posix.h"
 #include "sanitizer_posix.h"
 #include "sanitizer_procmaps.h"
@@ -134,7 +135,8 @@
 void Abort() {
 #if !SANITIZER_GO
   // If we are handling SIGABRT, unhandle it first.
-  if (IsHandledDeadlySignal(SIGABRT)) {
+  // TODO(vitalybuka): Check if handler belongs to sanitizer.
+  if (GetHandleSignalMode(SIGABRT) != kHandleSignalNo) {
     struct sigaction sigact;
     internal_memset(&sigact, 0, sizeof(sigact));
     sigact.sa_sigaction = (sa_sigaction_t)SIG_DFL;
@@ -188,8 +190,8 @@
 
 static void MaybeInstallSigaction(int signum,
                                   SignalHandlerType handler) {
-  if (!IsHandledDeadlySignal(signum))
-    return;
+  if (GetHandleSignalMode(signum) == kHandleSignalNo) return;
+
   struct sigaction sigact;
   internal_memset(&sigact, 0, sizeof(sigact));
   sigact.sa_sigaction = (sa_sigaction_t)handler;
@@ -212,6 +214,53 @@
   MaybeInstallSigaction(SIGFPE, handler);
   MaybeInstallSigaction(SIGILL, handler);
 }
+
+bool SignalContext::IsStackOverflow() const {
+  // Access at a reasonable offset above SP, or slightly below it (to account
+  // for x86_64 or PowerPC redzone, ARM push of multiple registers, etc) is
+  // probably a stack overflow.
+#ifdef __s390__
+  // On s390, the fault address in siginfo points to start of the page, not
+  // to the precise word that was accessed.  Mask off the low bits of sp to
+  // take it into account.
+  bool IsStackAccess = addr >= (sp & ~0xFFF) && addr < sp + 0xFFFF;
+#else
+  bool IsStackAccess = addr + 512 > sp && addr < sp + 0xFFFF;
+#endif
+
+#if __powerpc__
+  // Large stack frames can be allocated with e.g.
+  //   lis r0,-10000
+  //   stdux r1,r1,r0 # store sp to [sp-10000] and update sp by -10000
+  // If the store faults then sp will not have been updated, so test above
+  // will not work, because the fault address will be more than just "slightly"
+  // below sp.
+  if (!IsStackAccess && IsAccessibleMemoryRange(pc, 4)) {
+    u32 inst = *(unsigned *)pc;
+    u32 ra = (inst >> 16) & 0x1F;
+    u32 opcd = inst >> 26;
+    u32 xo = (inst >> 1) & 0x3FF;
+    // Check for store-with-update to sp. The instructions we accept are:
+    //   stbu rs,d(ra)          stbux rs,ra,rb
+    //   sthu rs,d(ra)          sthux rs,ra,rb
+    //   stwu rs,d(ra)          stwux rs,ra,rb
+    //   stdu rs,ds(ra)         stdux rs,ra,rb
+    // where ra is r1 (the stack pointer).
+    if (ra == 1 &&
+        (opcd == 39 || opcd == 45 || opcd == 37 || opcd == 62 ||
+         (opcd == 31 && (xo == 247 || xo == 439 || xo == 183 || xo == 181))))
+      IsStackAccess = true;
+  }
+#endif  // __powerpc__
+
+  // We also check si_code to filter out SEGV caused by something else other
+  // then hitting the guard page or unmapped memory, like, for example,
+  // unaligned memory access.
+  auto si = static_cast<const siginfo_t *>(siginfo);
+  return IsStackAccess &&
+         (si->si_code == si_SEGV_MAPERR || si->si_code == si_SEGV_ACCERR);
+}
+
 #endif  // SANITIZER_GO
 
 bool IsAccessibleMemoryRange(uptr beg, uptr size) {
@@ -245,7 +294,6 @@
   // Same for /proc/self/exe in the symbolizer.
 #if !SANITIZER_GO
   Symbolizer::GetOrInit()->PrepareForSandboxing();
-  CovPrepareForSandboxing(args);
 #endif
 }
 
@@ -289,6 +337,36 @@
   return (void *)p;
 }
 
+uptr ReservedAddressRange::Init(uptr size, const char *name, uptr fixed_addr) {
+  if (fixed_addr) {
+    base_ = MmapFixedNoAccess(fixed_addr, size, name);
+  } else {
+    base_ = MmapNoAccess(size);
+  }
+  size_ = size;
+  name_ = name;
+  return reinterpret_cast<uptr>(base_);
+}
+
+// Uses fixed_addr for now.
+// Will use offset instead once we've implemented this function for real.
+uptr ReservedAddressRange::Map(uptr fixed_addr, uptr size,
+                               bool tolerate_enomem) {
+  if (tolerate_enomem) {
+    return reinterpret_cast<uptr>(MmapFixedOrDieOnFatalError(fixed_addr, size));
+  }
+  return reinterpret_cast<uptr>(MmapFixedOrDie(fixed_addr, size));
+}
+
+void ReservedAddressRange::Unmap(uptr addr, uptr size) {
+  void* addr_as_void = reinterpret_cast<void*>(addr);
+  uptr base_as_uptr = reinterpret_cast<uptr>(base_);
+  // Only unmap at the beginning or end of the range.
+  CHECK((addr_as_void == base_) || (addr + size == base_as_uptr + size_));
+  CHECK_LE(size, size_);
+  UnmapOrDie(reinterpret_cast<void*>(addr), size);
+}
+
 void *MmapFixedNoAccess(uptr fixed_addr, uptr size, const char *name) {
   int fd = name ? GetNamedMappingFd(name, size) : -1;
   unsigned flags = MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE;
@@ -418,6 +496,10 @@
   return process_status;
 }
 
+bool IsStateDetached(int state) {
+  return state == PTHREAD_CREATE_DETACHED;
+}
+
 } // namespace __sanitizer
 
 #endif // SANITIZER_POSIX
diff --git a/lib/sanitizer_common/sanitizer_printf.cc b/lib/sanitizer_common/sanitizer_printf.cc
index 99b7ff1..5c23600 100644
--- a/lib/sanitizer_common/sanitizer_printf.cc
+++ b/lib/sanitizer_common/sanitizer_printf.cc
@@ -28,8 +28,6 @@
 
 namespace __sanitizer {
 
-StaticSpinMutex CommonSanitizerReportMutex;
-
 static int AppendChar(char **buff, const char *buff_end, char c) {
   if (*buff < buff_end) {
     **buff = c;
@@ -229,19 +227,16 @@
     PrintfAndReportCallback(str);
 }
 
-static void SharedPrintfCode(bool append_pid, const char *format,
-                             va_list args) {
+static void NOINLINE SharedPrintfCodeNoBuffer(bool append_pid,
+                                              char *local_buffer,
+                                              int buffer_size,
+                                              const char *format,
+                                              va_list args) {
   va_list args2;
   va_copy(args2, args);
   const int kLen = 16 * 1024;
-  // |local_buffer| is small enough not to overflow the stack and/or violate
-  // the stack limit enforced by TSan (-Wframe-larger-than=512). On the other
-  // hand, the bigger the buffer is, the more the chance the error report will
-  // fit into it.
-  char local_buffer[400];
   int needed_length;
   char *buffer = local_buffer;
-  int buffer_size = ARRAY_SIZE(local_buffer);
   // First try to print a message using a local buffer, and then fall back to
   // mmaped buffer.
   for (int use_mmap = 0; use_mmap < 2; use_mmap++) {
@@ -259,7 +254,9 @@
         RAW_CHECK_MSG(needed_length < kLen, \
                       "Buffer in Report is too short!\n"); \
       }
-    if (append_pid) {
+    // Fuchsia's logging infrastructure always keeps track of the logging
+    // process, thread, and timestamp, so never prepend such information.
+    if (!SANITIZER_FUCHSIA && append_pid) {
       int pid = internal_getpid();
       const char *exe_name = GetProcessName();
       if (common_flags()->log_exe_name && exe_name) {
@@ -267,9 +264,8 @@
                                            "==%s", exe_name);
         CHECK_NEEDED_LENGTH
       }
-      needed_length += internal_snprintf(buffer + needed_length,
-                                         buffer_size - needed_length,
-                                         "==%d==", pid);
+      needed_length += internal_snprintf(
+          buffer + needed_length, buffer_size - needed_length, "==%d==", pid);
       CHECK_NEEDED_LENGTH
     }
     needed_length += VSNPrintf(buffer + needed_length,
@@ -292,6 +288,17 @@
   va_end(args2);
 }
 
+static void NOINLINE SharedPrintfCode(bool append_pid, const char *format,
+                                      va_list args) {
+  // |local_buffer| is small enough not to overflow the stack and/or violate
+  // the stack limit enforced by TSan (-Wframe-larger-than=512). On the other
+  // hand, the bigger the buffer is, the more the chance the error report will
+  // fit into it.
+  char local_buffer[400];
+  SharedPrintfCodeNoBuffer(append_pid, local_buffer, ARRAY_SIZE(local_buffer),
+                           format, args);
+}
+
 FORMAT(1, 2)
 void Printf(const char *format, ...) {
   va_list args;
diff --git a/lib/sanitizer_common/sanitizer_procmaps.h b/lib/sanitizer_common/sanitizer_procmaps.h
index 9dbb5ef..d462ad8 100644
--- a/lib/sanitizer_common/sanitizer_procmaps.h
+++ b/lib/sanitizer_common/sanitizer_procmaps.h
@@ -14,30 +14,61 @@
 #ifndef SANITIZER_PROCMAPS_H
 #define SANITIZER_PROCMAPS_H
 
+#include "sanitizer_platform.h"
+
+#if SANITIZER_LINUX || SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_MAC
+
 #include "sanitizer_common.h"
 #include "sanitizer_internal_defs.h"
+#include "sanitizer_linux.h"
+#include "sanitizer_mac.h"
 #include "sanitizer_mutex.h"
 
 namespace __sanitizer {
 
-#if SANITIZER_FREEBSD || SANITIZER_LINUX
-struct ProcSelfMapsBuff {
-  char *data;
-  uptr mmaped_size;
-  uptr len;
-};
 
-// Reads process memory map in an OS-specific way.
-void ReadProcMaps(ProcSelfMapsBuff *proc_maps);
-#endif  // SANITIZER_FREEBSD || SANITIZER_LINUX
+// Memory protection masks.
+static const uptr kProtectionRead = 1;
+static const uptr kProtectionWrite = 2;
+static const uptr kProtectionExecute = 4;
+static const uptr kProtectionShared = 8;
+
+struct MemoryMappedSegmentData;
+
+class MemoryMappedSegment {
+ public:
+  MemoryMappedSegment(char *buff = nullptr, uptr size = 0)
+      : filename(buff), filename_size(size), data_(nullptr) {}
+  ~MemoryMappedSegment() {}
+
+  bool IsReadable() const { return protection & kProtectionRead; }
+  bool IsWritable() const { return protection & kProtectionWrite; }
+  bool IsExecutable() const { return protection & kProtectionExecute; }
+  bool IsShared() const { return protection & kProtectionShared; }
+
+  void AddAddressRanges(LoadedModule *module);
+
+  uptr start;
+  uptr end;
+  uptr offset;
+  char *filename;  // owned by caller
+  uptr filename_size;
+  uptr protection;
+  ModuleArch arch;
+  u8 uuid[kModuleUUIDSize];
+
+ private:
+  friend class MemoryMappingLayout;
+
+  // This field is assigned and owned by MemoryMappingLayout if needed
+  MemoryMappedSegmentData *data_;
+};
 
 class MemoryMappingLayout {
  public:
   explicit MemoryMappingLayout(bool cache_enabled);
   ~MemoryMappingLayout();
-  bool Next(uptr *start, uptr *end, uptr *offset, char filename[],
-            uptr filename_size, uptr *protection, ModuleArch *arch = nullptr,
-            u8 *uuid = nullptr);
+  bool Next(MemoryMappedSegment *segment);
   void Reset();
   // In some cases, e.g. when running under a sandbox on Linux, ASan is unable
   // to obtain the memory mappings. It should fall back to pre-cached data
@@ -45,50 +76,14 @@
   static void CacheMemoryMappings();
 
   // Adds all mapped objects into a vector.
-  void DumpListOfModules(InternalMmapVector<LoadedModule> *modules);
-
-  // Memory protection masks.
-  static const uptr kProtectionRead = 1;
-  static const uptr kProtectionWrite = 2;
-  static const uptr kProtectionExecute = 4;
-  static const uptr kProtectionShared = 8;
+  void DumpListOfModules(InternalMmapVectorNoCtor<LoadedModule> *modules);
 
  private:
   void LoadFromCache();
 
-  // FIXME: Hide implementation details for different platforms in
-  // platform-specific files.
-# if SANITIZER_FREEBSD || SANITIZER_LINUX
-  ProcSelfMapsBuff proc_self_maps_;
-  const char *current_;
-
-  // Static mappings cache.
-  static ProcSelfMapsBuff cached_proc_self_maps_;
-  static StaticSpinMutex cache_lock_;  // protects cached_proc_self_maps_.
-# elif SANITIZER_MAC
-  template <u32 kLCSegment, typename SegmentCommand>
-  bool NextSegmentLoad(uptr *start, uptr *end, uptr *offset, char filename[],
-                       uptr filename_size, ModuleArch *arch, u8 *uuid,
-                       uptr *protection);
-  int current_image_;
-  u32 current_magic_;
-  u32 current_filetype_;
-  ModuleArch current_arch_;
-  u8 current_uuid_[kModuleUUIDSize];
-  int current_load_cmd_count_;
-  char *current_load_cmd_addr_;
-  bool current_instrumented_;
-# endif
+  MemoryMappingLayoutData data_;
 };
 
-typedef void (*fill_profile_f)(uptr start, uptr rss, bool file,
-                               /*out*/uptr *stats, uptr stats_size);
-
-// Parse the contents of /proc/self/smaps and generate a memory profile.
-// |cb| is a tool-specific callback that fills the |stats| array containing
-// |stats_size| elements.
-void GetMemoryProfile(fill_profile_f cb, uptr *stats, uptr stats_size);
-
 // Returns code range for the specified module.
 bool GetCodeRangeForFile(const char *module, uptr *start, uptr *end);
 
@@ -99,4 +94,6 @@
 
 }  // namespace __sanitizer
 
+#endif  // SANITIZER_LINUX || SANITIZER_FREEBSD || SANITIZER_NETBSD ||
+        // SANITIZER_MAC
 #endif  // SANITIZER_PROCMAPS_H
diff --git a/lib/sanitizer_common/sanitizer_procmaps_common.cc b/lib/sanitizer_common/sanitizer_procmaps_common.cc
index fac3fbd..b9298d4 100644
--- a/lib/sanitizer_common/sanitizer_procmaps_common.cc
+++ b/lib/sanitizer_common/sanitizer_procmaps_common.cc
@@ -12,7 +12,7 @@
 
 #include "sanitizer_platform.h"
 
-#if SANITIZER_FREEBSD || SANITIZER_LINUX
+#if SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_NETBSD
 
 #include "sanitizer_common.h"
 #include "sanitizer_placement_new.h"
@@ -20,9 +20,8 @@
 
 namespace __sanitizer {
 
-// Linker initialized.
-ProcSelfMapsBuff MemoryMappingLayout::cached_proc_self_maps_;
-StaticSpinMutex MemoryMappingLayout::cache_lock_;  // Linker initialized.
+static ProcSelfMapsBuff cached_proc_self_maps;
+static StaticSpinMutex cache_lock;
 
 static int TranslateDigit(char c) {
   if (c >= '0' && c <= '9')
@@ -64,15 +63,21 @@
   return ParseNumber(p, 16);
 }
 
+void MemoryMappedSegment::AddAddressRanges(LoadedModule *module) {
+  // data_ should be unused on this platform
+  CHECK(!data_);
+  module->addAddressRange(start, end, IsExecutable(), IsWritable());
+}
+
 MemoryMappingLayout::MemoryMappingLayout(bool cache_enabled) {
-  ReadProcMaps(&proc_self_maps_);
+  ReadProcMaps(&data_.proc_self_maps);
   if (cache_enabled) {
-    if (proc_self_maps_.mmaped_size == 0) {
+    if (data_.proc_self_maps.mmaped_size == 0) {
       LoadFromCache();
-      CHECK_GT(proc_self_maps_.len, 0);
+      CHECK_GT(data_.proc_self_maps.len, 0);
     }
   } else {
-    CHECK_GT(proc_self_maps_.mmaped_size, 0);
+    CHECK_GT(data_.proc_self_maps.mmaped_size, 0);
   }
   Reset();
   // FIXME: in the future we may want to cache the mappings on demand only.
@@ -83,24 +88,22 @@
 MemoryMappingLayout::~MemoryMappingLayout() {
   // Only unmap the buffer if it is different from the cached one. Otherwise
   // it will be unmapped when the cache is refreshed.
-  if (proc_self_maps_.data != cached_proc_self_maps_.data) {
-    UnmapOrDie(proc_self_maps_.data, proc_self_maps_.mmaped_size);
+  if (data_.proc_self_maps.data != cached_proc_self_maps.data) {
+    UnmapOrDie(data_.proc_self_maps.data, data_.proc_self_maps.mmaped_size);
   }
 }
 
-void MemoryMappingLayout::Reset() {
-  current_ = proc_self_maps_.data;
-}
+void MemoryMappingLayout::Reset() { data_.current = data_.proc_self_maps.data; }
 
 // static
 void MemoryMappingLayout::CacheMemoryMappings() {
-  SpinMutexLock l(&cache_lock_);
+  SpinMutexLock l(&cache_lock);
   // Don't invalidate the cache if the mappings are unavailable.
   ProcSelfMapsBuff old_proc_self_maps;
-  old_proc_self_maps = cached_proc_self_maps_;
-  ReadProcMaps(&cached_proc_self_maps_);
-  if (cached_proc_self_maps_.mmaped_size == 0) {
-    cached_proc_self_maps_ = old_proc_self_maps;
+  old_proc_self_maps = cached_proc_self_maps;
+  ReadProcMaps(&cached_proc_self_maps);
+  if (cached_proc_self_maps.mmaped_size == 0) {
+    cached_proc_self_maps = old_proc_self_maps;
   } else {
     if (old_proc_self_maps.mmaped_size) {
       UnmapOrDie(old_proc_self_maps.data,
@@ -110,21 +113,19 @@
 }
 
 void MemoryMappingLayout::LoadFromCache() {
-  SpinMutexLock l(&cache_lock_);
-  if (cached_proc_self_maps_.data) {
-    proc_self_maps_ = cached_proc_self_maps_;
+  SpinMutexLock l(&cache_lock);
+  if (cached_proc_self_maps.data) {
+    data_.proc_self_maps = cached_proc_self_maps;
   }
 }
 
 void MemoryMappingLayout::DumpListOfModules(
-    InternalMmapVector<LoadedModule> *modules) {
+    InternalMmapVectorNoCtor<LoadedModule> *modules) {
   Reset();
-  uptr cur_beg, cur_end, cur_offset, prot;
   InternalScopedString module_name(kMaxPathLength);
-  for (uptr i = 0; Next(&cur_beg, &cur_end, &cur_offset, module_name.data(),
-                        module_name.size(), &prot);
-       i++) {
-    const char *cur_name = module_name.data();
+  MemoryMappedSegment segment(module_name.data(), module_name.size());
+  for (uptr i = 0; Next(&segment); i++) {
+    const char *cur_name = segment.filename;
     if (cur_name[0] == '\0')
       continue;
     // Don't subtract 'cur_beg' from the first entry:
@@ -138,10 +139,10 @@
     //   mapped high at address space (in particular, higher than
     //   shadow memory of the tool), so the module can't be the
     //   first entry.
-    uptr base_address = (i ? cur_beg : 0) - cur_offset;
+    uptr base_address = (i ? segment.start : 0) - segment.offset;
     LoadedModule cur_module;
     cur_module.set(cur_name, base_address);
-    cur_module.addAddressRange(cur_beg, cur_end, prot & kProtectionExecute);
+    segment.AddAddressRanges(&cur_module);
     modules->push_back(cur_module);
   }
 }
@@ -172,4 +173,4 @@
 
 } // namespace __sanitizer
 
-#endif // SANITIZER_FREEBSD || SANITIZER_LINUX
+#endif // SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_NETBSD
diff --git a/lib/sanitizer_common/sanitizer_procmaps_freebsd.cc b/lib/sanitizer_common/sanitizer_procmaps_freebsd.cc
index 3021645..c26a6d4 100644
--- a/lib/sanitizer_common/sanitizer_procmaps_freebsd.cc
+++ b/lib/sanitizer_common/sanitizer_procmaps_freebsd.cc
@@ -7,18 +7,22 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// Information about the process mappings (FreeBSD-specific parts).
+// Information about the process mappings (FreeBSD and NetBSD-specific parts).
 //===----------------------------------------------------------------------===//
 
 #include "sanitizer_platform.h"
-#if SANITIZER_FREEBSD
+#if SANITIZER_FREEBSD || SANITIZER_NETBSD
 #include "sanitizer_common.h"
+#if SANITIZER_FREEBSD
 #include "sanitizer_freebsd.h"
+#endif
 #include "sanitizer_procmaps.h"
 
 #include <unistd.h>
 #include <sys/sysctl.h>
+#if SANITIZER_FREEBSD
 #include <sys/user.h>
+#endif
 
 // Fix 'kinfo_vmentry' definition on FreeBSD prior v9.2 in 32-bit mode.
 #if SANITIZER_FREEBSD && (SANITIZER_WORDSIZE == 32)
@@ -31,16 +35,30 @@
 namespace __sanitizer {
 
 void ReadProcMaps(ProcSelfMapsBuff *proc_maps) {
-  const int Mib[4] = { CTL_KERN, KERN_PROC, KERN_PROC_VMMAP, getpid() };
+  const int Mib[] = {
+#if SANITIZER_FREEBSD
+    CTL_KERN,
+    KERN_PROC,
+    KERN_PROC_VMMAP,
+    getpid()
+#else
+    CTL_VM,
+    VM_PROC,
+    VM_PROC_MAP,
+    getpid(),
+    sizeof(struct kinfo_vmentry)
+#endif
+  };
+
   size_t Size = 0;
-  int Err = sysctl(Mib, 4, NULL, &Size, NULL, 0);
+  int Err = sysctl(Mib, ARRAY_SIZE(Mib), NULL, &Size, NULL, 0);
   CHECK_EQ(Err, 0);
   CHECK_GT(Size, 0);
 
   size_t MmapedSize = Size * 4 / 3;
   void *VmMap = MmapOrDie(MmapedSize, "ReadProcMaps()");
   Size = MmapedSize;
-  Err = sysctl(Mib, 4, VmMap, &Size, NULL, 0);
+  Err = sysctl(Mib, ARRAY_SIZE(Mib), VmMap, &Size, NULL, 0);
   CHECK_EQ(Err, 0);
 
   proc_maps->data = (char*)VmMap;
@@ -48,43 +66,38 @@
   proc_maps->len = Size;
 }
 
-bool MemoryMappingLayout::Next(uptr *start, uptr *end, uptr *offset,
-                               char filename[], uptr filename_size,
-                               uptr *protection, ModuleArch *arch, u8 *uuid) {
-  CHECK(!arch && "not implemented");
-  CHECK(!uuid && "not implemented");
-  char *last = proc_self_maps_.data + proc_self_maps_.len;
-  if (current_ >= last) return false;
-  uptr dummy;
-  if (!start) start = &dummy;
-  if (!end) end = &dummy;
-  if (!offset) offset = &dummy;
-  if (!protection) protection = &dummy;
-  struct kinfo_vmentry *VmEntry = (struct kinfo_vmentry*)current_;
+bool MemoryMappingLayout::Next(MemoryMappedSegment *segment) {
+  char *last = data_.proc_self_maps.data + data_.proc_self_maps.len;
+  if (data_.current >= last) return false;
+  struct kinfo_vmentry *VmEntry = (struct kinfo_vmentry *)data_.current;
 
-  *start = (uptr)VmEntry->kve_start;
-  *end = (uptr)VmEntry->kve_end;
-  *offset = (uptr)VmEntry->kve_offset;
+  segment->start = (uptr)VmEntry->kve_start;
+  segment->end = (uptr)VmEntry->kve_end;
+  segment->offset = (uptr)VmEntry->kve_offset;
 
-  *protection = 0;
+  segment->protection = 0;
   if ((VmEntry->kve_protection & KVME_PROT_READ) != 0)
-    *protection |= kProtectionRead;
+    segment->protection |= kProtectionRead;
   if ((VmEntry->kve_protection & KVME_PROT_WRITE) != 0)
-    *protection |= kProtectionWrite;
+    segment->protection |= kProtectionWrite;
   if ((VmEntry->kve_protection & KVME_PROT_EXEC) != 0)
-    *protection |= kProtectionExecute;
+    segment->protection |= kProtectionExecute;
 
-  if (filename != NULL && filename_size > 0) {
-    internal_snprintf(filename,
-                      Min(filename_size, (uptr)PATH_MAX),
-                      "%s", VmEntry->kve_path);
+  if (segment->filename != NULL && segment->filename_size > 0) {
+    internal_snprintf(segment->filename,
+                      Min(segment->filename_size, (uptr)PATH_MAX), "%s",
+                      VmEntry->kve_path);
   }
 
-  current_ += VmEntry->kve_structsize;
+#if SANITIZER_FREEBSD
+  data_.current += VmEntry->kve_structsize;
+#else
+  data_.current += sizeof(*VmEntry);
+#endif
 
   return true;
 }
 
 }  // namespace __sanitizer
 
-#endif  // SANITIZER_FREEBSD
+#endif  // SANITIZER_FREEBSD || SANITIZER_NETBSD
diff --git a/lib/sanitizer_common/sanitizer_procmaps_linux.cc b/lib/sanitizer_common/sanitizer_procmaps_linux.cc
index fdf85b7..ac894eb 100644
--- a/lib/sanitizer_common/sanitizer_procmaps_linux.cc
+++ b/lib/sanitizer_common/sanitizer_procmaps_linux.cc
@@ -18,72 +18,57 @@
 namespace __sanitizer {
 
 void ReadProcMaps(ProcSelfMapsBuff *proc_maps) {
-  CHECK(ReadFileToBuffer("/proc/self/maps", &proc_maps->data,
-                         &proc_maps->mmaped_size, &proc_maps->len));
+  ReadFileToBuffer("/proc/self/maps", &proc_maps->data, &proc_maps->mmaped_size,
+                   &proc_maps->len);
 }
 
 static bool IsOneOf(char c, char c1, char c2) {
   return c == c1 || c == c2;
 }
 
-bool MemoryMappingLayout::Next(uptr *start, uptr *end, uptr *offset,
-                               char filename[], uptr filename_size,
-                               uptr *protection, ModuleArch *arch, u8 *uuid) {
-  CHECK(!arch && "not implemented");
-  CHECK(!uuid && "not implemented");
-  char *last = proc_self_maps_.data + proc_self_maps_.len;
-  if (current_ >= last) return false;
-  uptr dummy;
-  if (!start) start = &dummy;
-  if (!end) end = &dummy;
-  if (!offset) offset = &dummy;
-  if (!protection) protection = &dummy;
-  char *next_line = (char*)internal_memchr(current_, '\n', last - current_);
+bool MemoryMappingLayout::Next(MemoryMappedSegment *segment) {
+  char *last = data_.proc_self_maps.data + data_.proc_self_maps.len;
+  if (data_.current >= last) return false;
+  char *next_line =
+      (char *)internal_memchr(data_.current, '\n', last - data_.current);
   if (next_line == 0)
     next_line = last;
   // Example: 08048000-08056000 r-xp 00000000 03:0c 64593   /foo/bar
-  *start = ParseHex(&current_);
-  CHECK_EQ(*current_++, '-');
-  *end = ParseHex(&current_);
-  CHECK_EQ(*current_++, ' ');
-  CHECK(IsOneOf(*current_, '-', 'r'));
-  *protection = 0;
-  if (*current_++ == 'r')
-    *protection |= kProtectionRead;
-  CHECK(IsOneOf(*current_, '-', 'w'));
-  if (*current_++ == 'w')
-    *protection |= kProtectionWrite;
-  CHECK(IsOneOf(*current_, '-', 'x'));
-  if (*current_++ == 'x')
-    *protection |= kProtectionExecute;
-  CHECK(IsOneOf(*current_, 's', 'p'));
-  if (*current_++ == 's')
-    *protection |= kProtectionShared;
-  CHECK_EQ(*current_++, ' ');
-  *offset = ParseHex(&current_);
-  CHECK_EQ(*current_++, ' ');
-  ParseHex(&current_);
-  CHECK_EQ(*current_++, ':');
-  ParseHex(&current_);
-  CHECK_EQ(*current_++, ' ');
-  while (IsDecimal(*current_))
-    current_++;
+  segment->start = ParseHex(&data_.current);
+  CHECK_EQ(*data_.current++, '-');
+  segment->end = ParseHex(&data_.current);
+  CHECK_EQ(*data_.current++, ' ');
+  CHECK(IsOneOf(*data_.current, '-', 'r'));
+  segment->protection = 0;
+  if (*data_.current++ == 'r') segment->protection |= kProtectionRead;
+  CHECK(IsOneOf(*data_.current, '-', 'w'));
+  if (*data_.current++ == 'w') segment->protection |= kProtectionWrite;
+  CHECK(IsOneOf(*data_.current, '-', 'x'));
+  if (*data_.current++ == 'x') segment->protection |= kProtectionExecute;
+  CHECK(IsOneOf(*data_.current, 's', 'p'));
+  if (*data_.current++ == 's') segment->protection |= kProtectionShared;
+  CHECK_EQ(*data_.current++, ' ');
+  segment->offset = ParseHex(&data_.current);
+  CHECK_EQ(*data_.current++, ' ');
+  ParseHex(&data_.current);
+  CHECK_EQ(*data_.current++, ':');
+  ParseHex(&data_.current);
+  CHECK_EQ(*data_.current++, ' ');
+  while (IsDecimal(*data_.current)) data_.current++;
   // Qemu may lack the trailing space.
   // https://github.com/google/sanitizers/issues/160
-  // CHECK_EQ(*current_++, ' ');
+  // CHECK_EQ(*data_.current++, ' ');
   // Skip spaces.
-  while (current_ < next_line && *current_ == ' ')
-    current_++;
+  while (data_.current < next_line && *data_.current == ' ') data_.current++;
   // Fill in the filename.
-  uptr i = 0;
-  while (current_ < next_line) {
-    if (filename && i < filename_size - 1)
-      filename[i++] = *current_;
-    current_++;
+  if (segment->filename) {
+    uptr len =
+        Min((uptr)(next_line - data_.current), segment->filename_size - 1);
+    internal_strncpy(segment->filename, data_.current, len);
+    segment->filename[len] = 0;
   }
-  if (filename && i < filename_size)
-    filename[i] = 0;
-  current_ = next_line + 1;
+
+  data_.current = next_line + 1;
   return true;
 }
 
diff --git a/lib/sanitizer_common/sanitizer_procmaps_mac.cc b/lib/sanitizer_common/sanitizer_procmaps_mac.cc
index 2831f28..3b98ff0 100644
--- a/lib/sanitizer_common/sanitizer_procmaps_mac.cc
+++ b/lib/sanitizer_common/sanitizer_procmaps_mac.cc
@@ -18,6 +18,7 @@
 
 #include <mach-o/dyld.h>
 #include <mach-o/loader.h>
+#include <mach/mach.h>
 
 // These are not available in older macOS SDKs.
 #ifndef CPU_SUBTYPE_X86_64_H
@@ -35,6 +36,51 @@
 
 namespace __sanitizer {
 
+// Contains information used to iterate through sections.
+struct MemoryMappedSegmentData {
+  char name[kMaxSegName];
+  uptr nsects;
+  char *current_load_cmd_addr;
+  u32 lc_type;
+  uptr base_virt_addr;
+  uptr addr_mask;
+};
+
+template <typename Section>
+static void NextSectionLoad(LoadedModule *module, MemoryMappedSegmentData *data,
+                            bool isWritable) {
+  const Section *sc = (const Section *)data->current_load_cmd_addr;
+  data->current_load_cmd_addr += sizeof(Section);
+
+  uptr sec_start = (sc->addr & data->addr_mask) + data->base_virt_addr;
+  uptr sec_end = sec_start + sc->size;
+  module->addAddressRange(sec_start, sec_end, /*executable=*/false, isWritable,
+                          sc->sectname);
+}
+
+void MemoryMappedSegment::AddAddressRanges(LoadedModule *module) {
+  // Don't iterate over sections when the caller hasn't set up the
+  // data pointer, when there are no sections, or when the segment
+  // is executable. Avoid iterating over executable sections because
+  // it will confuse libignore, and because the extra granularity
+  // of information is not needed by any sanitizers.
+  if (!data_ || !data_->nsects || IsExecutable()) {
+    module->addAddressRange(start, end, IsExecutable(), IsWritable(),
+                            data_ ? data_->name : nullptr);
+    return;
+  }
+
+  do {
+    if (data_->lc_type == LC_SEGMENT) {
+      NextSectionLoad<struct section>(module, data_, IsWritable());
+#ifdef MH_MAGIC_64
+    } else if (data_->lc_type == LC_SEGMENT_64) {
+      NextSectionLoad<struct section_64>(module, data_, IsWritable());
+#endif
+    }
+  } while (--data_->nsects);
+}
+
 MemoryMappingLayout::MemoryMappingLayout(bool cache_enabled) {
   Reset();
 }
@@ -62,15 +108,22 @@
   // _dyld_image_count is thread-unsafe. We need to register callbacks for
   // adding and removing images which will invalidate the MemoryMappingLayout
   // state.
-  current_image_ = _dyld_image_count();
-  current_load_cmd_count_ = -1;
-  current_load_cmd_addr_ = 0;
-  current_magic_ = 0;
-  current_filetype_ = 0;
-  current_arch_ = kModuleArchUnknown;
-  internal_memset(current_uuid_, 0, kModuleUUIDSize);
+  data_.current_image = _dyld_image_count();
+  data_.current_load_cmd_count = -1;
+  data_.current_load_cmd_addr = 0;
+  data_.current_magic = 0;
+  data_.current_filetype = 0;
+  data_.current_arch = kModuleArchUnknown;
+  internal_memset(data_.current_uuid, 0, kModuleUUIDSize);
 }
 
+// The dyld load address should be unchanged throughout process execution,
+// and it is expensive to compute once many libraries have been loaded,
+// so cache it here and do not reset.
+static mach_header *dyld_hdr = 0;
+static const char kDyldPath[] = "/usr/lib/dyld";
+static const int kDyldImageIdx = -1;
+
 // static
 void MemoryMappingLayout::CacheMemoryMappings() {
   // No-op on Mac for now.
@@ -80,6 +133,48 @@
   // No-op on Mac for now.
 }
 
+// _dyld_get_image_header() and related APIs don't report dyld itself.
+// We work around this by manually recursing through the memory map
+// until we hit a Mach header matching dyld instead. These recurse
+// calls are expensive, but the first memory map generation occurs
+// early in the process, when dyld is one of the only images loaded,
+// so it will be hit after only a few iterations.
+static mach_header *get_dyld_image_header() {
+  mach_port_name_t port;
+  if (task_for_pid(mach_task_self(), internal_getpid(), &port) !=
+      KERN_SUCCESS) {
+    return nullptr;
+  }
+
+  unsigned depth = 1;
+  vm_size_t size = 0;
+  vm_address_t address = 0;
+  kern_return_t err = KERN_SUCCESS;
+  mach_msg_type_number_t count = VM_REGION_SUBMAP_INFO_COUNT_64;
+
+  while (true) {
+    struct vm_region_submap_info_64 info;
+    err = vm_region_recurse_64(port, &address, &size, &depth,
+                               (vm_region_info_t)&info, &count);
+    if (err != KERN_SUCCESS) return nullptr;
+
+    if (size >= sizeof(mach_header) && info.protection & kProtectionRead) {
+      mach_header *hdr = (mach_header *)address;
+      if ((hdr->magic == MH_MAGIC || hdr->magic == MH_MAGIC_64) &&
+          hdr->filetype == MH_DYLINKER) {
+        return hdr;
+      }
+    }
+    address += size;
+  }
+}
+
+const mach_header *get_dyld_hdr() {
+  if (!dyld_hdr) dyld_hdr = get_dyld_image_header();
+
+  return dyld_hdr;
+}
+
 // Next and NextSegmentLoad were inspired by base/sysinfo.cc in
 // Google Perftools, https://github.com/gperftools/gperftools.
 
@@ -88,38 +183,57 @@
 // segment.
 // Note that the segment addresses are not necessarily sorted.
 template <u32 kLCSegment, typename SegmentCommand>
-bool MemoryMappingLayout::NextSegmentLoad(uptr *start, uptr *end, uptr *offset,
-                                          char filename[], uptr filename_size,
-                                          ModuleArch *arch, u8 *uuid,
-                                          uptr *protection) {
-  const char *lc = current_load_cmd_addr_;
-  current_load_cmd_addr_ += ((const load_command *)lc)->cmdsize;
+static bool NextSegmentLoad(MemoryMappedSegment *segment,
+MemoryMappedSegmentData *seg_data, MemoryMappingLayoutData &layout_data) {
+  const char *lc = layout_data.current_load_cmd_addr;
+  layout_data.current_load_cmd_addr += ((const load_command *)lc)->cmdsize;
   if (((const load_command *)lc)->cmd == kLCSegment) {
-    const sptr dlloff = _dyld_get_image_vmaddr_slide(current_image_);
     const SegmentCommand* sc = (const SegmentCommand *)lc;
-    if (start) *start = sc->vmaddr + dlloff;
-    if (protection) {
-      // Return the initial protection.
-      *protection = sc->initprot;
+    uptr base_virt_addr, addr_mask;
+    if (layout_data.current_image == kDyldImageIdx) {
+      base_virt_addr = (uptr)get_dyld_hdr();
+      // vmaddr is masked with 0xfffff because on macOS versions < 10.12,
+      // it contains an absolute address rather than an offset for dyld.
+      // To make matters even more complicated, this absolute address
+      // isn't actually the absolute segment address, but the offset portion
+      // of the address is accurate when combined with the dyld base address,
+      // and the mask will give just this offset.
+      addr_mask = 0xfffff;
+    } else {
+      base_virt_addr =
+          (uptr)_dyld_get_image_vmaddr_slide(layout_data.current_image);
+      addr_mask = ~0;
     }
-    if (end) *end = sc->vmaddr + sc->vmsize + dlloff;
-    if (offset) {
-      if (current_filetype_ == /*MH_EXECUTE*/ 0x2) {
-        *offset = sc->vmaddr;
-      } else {
-        *offset = sc->fileoff;
-      }
+
+    segment->start = (sc->vmaddr & addr_mask) + base_virt_addr;
+    segment->end = segment->start + sc->vmsize;
+    // Most callers don't need section information, so only fill this struct
+    // when required.
+    if (seg_data) {
+      seg_data->nsects = sc->nsects;
+      seg_data->current_load_cmd_addr =
+          (char *)lc + sizeof(SegmentCommand);
+      seg_data->lc_type = kLCSegment;
+      seg_data->base_virt_addr = base_virt_addr;
+      seg_data->addr_mask = addr_mask;
+      internal_strncpy(seg_data->name, sc->segname,
+                       ARRAY_SIZE(seg_data->name));
     }
-    if (filename) {
-      internal_strncpy(filename, _dyld_get_image_name(current_image_),
-                       filename_size);
+
+    // Return the initial protection.
+    segment->protection = sc->initprot;
+    segment->offset = (layout_data.current_filetype ==
+                       /*MH_EXECUTE*/ 0x2)
+                          ? sc->vmaddr
+                          : sc->fileoff;
+    if (segment->filename) {
+      const char *src = (layout_data.current_image == kDyldImageIdx)
+                            ? kDyldPath
+                            : _dyld_get_image_name(layout_data.current_image);
+      internal_strncpy(segment->filename, src, segment->filename_size);
     }
-    if (arch) {
-      *arch = current_arch_;
-    }
-    if (uuid) {
-      internal_memcpy(uuid, current_uuid_, kModuleUUIDSize);
-    }
+    segment->arch = layout_data.current_arch;
+    internal_memcpy(segment->uuid, layout_data.current_uuid, kModuleUUIDSize);
     return true;
   }
   return false;
@@ -180,54 +294,53 @@
   return false;
 }
 
-bool MemoryMappingLayout::Next(uptr *start, uptr *end, uptr *offset,
-                               char filename[], uptr filename_size,
-                               uptr *protection, ModuleArch *arch, u8 *uuid) {
-  for (; current_image_ >= 0; current_image_--) {
-    const mach_header* hdr = _dyld_get_image_header(current_image_);
+bool MemoryMappingLayout::Next(MemoryMappedSegment *segment) {
+  for (; data_.current_image >= kDyldImageIdx; data_.current_image--) {
+    const mach_header *hdr = (data_.current_image == kDyldImageIdx)
+                                 ? get_dyld_hdr()
+                                 : _dyld_get_image_header(data_.current_image);
     if (!hdr) continue;
-    if (current_load_cmd_count_ < 0) {
+    if (data_.current_load_cmd_count < 0) {
       // Set up for this image;
-      current_load_cmd_count_ = hdr->ncmds;
-      current_magic_ = hdr->magic;
-      current_filetype_ = hdr->filetype;
-      current_arch_ = ModuleArchFromCpuType(hdr->cputype, hdr->cpusubtype);
-      switch (current_magic_) {
+      data_.current_load_cmd_count = hdr->ncmds;
+      data_.current_magic = hdr->magic;
+      data_.current_filetype = hdr->filetype;
+      data_.current_arch = ModuleArchFromCpuType(hdr->cputype, hdr->cpusubtype);
+      switch (data_.current_magic) {
 #ifdef MH_MAGIC_64
         case MH_MAGIC_64: {
-          current_load_cmd_addr_ = (char*)hdr + sizeof(mach_header_64);
+          data_.current_load_cmd_addr = (char *)hdr + sizeof(mach_header_64);
           break;
         }
 #endif
         case MH_MAGIC: {
-          current_load_cmd_addr_ = (char*)hdr + sizeof(mach_header);
+          data_.current_load_cmd_addr = (char *)hdr + sizeof(mach_header);
           break;
         }
         default: {
           continue;
         }
       }
-      FindUUID((const load_command *)current_load_cmd_addr_, &current_uuid_[0]);
-      current_instrumented_ =
-          IsModuleInstrumented((const load_command *)current_load_cmd_addr_);
+      FindUUID((const load_command *)data_.current_load_cmd_addr,
+               data_.current_uuid);
+      data_.current_instrumented = IsModuleInstrumented(
+          (const load_command *)data_.current_load_cmd_addr);
     }
 
-    for (; current_load_cmd_count_ >= 0; current_load_cmd_count_--) {
-      switch (current_magic_) {
-        // current_magic_ may be only one of MH_MAGIC, MH_MAGIC_64.
+    for (; data_.current_load_cmd_count >= 0; data_.current_load_cmd_count--) {
+      switch (data_.current_magic) {
+        // data_.current_magic may be only one of MH_MAGIC, MH_MAGIC_64.
 #ifdef MH_MAGIC_64
         case MH_MAGIC_64: {
           if (NextSegmentLoad<LC_SEGMENT_64, struct segment_command_64>(
-                  start, end, offset, filename, filename_size, arch, uuid,
-                  protection))
+          segment, segment->data_, data_))
             return true;
           break;
         }
 #endif
         case MH_MAGIC: {
           if (NextSegmentLoad<LC_SEGMENT, struct segment_command>(
-                  start, end, offset, filename, filename_size, arch, uuid,
-                  protection))
+          segment, segment->data_, data_))
             return true;
           break;
         }
@@ -240,29 +353,25 @@
 }
 
 void MemoryMappingLayout::DumpListOfModules(
-    InternalMmapVector<LoadedModule> *modules) {
+    InternalMmapVectorNoCtor<LoadedModule> *modules) {
   Reset();
-  uptr cur_beg, cur_end, prot;
-  ModuleArch cur_arch;
-  u8 cur_uuid[kModuleUUIDSize];
   InternalScopedString module_name(kMaxPathLength);
-  for (uptr i = 0; Next(&cur_beg, &cur_end, 0, module_name.data(),
-                        module_name.size(), &prot, &cur_arch, &cur_uuid[0]);
-       i++) {
-    const char *cur_name = module_name.data();
-    if (cur_name[0] == '\0')
-      continue;
+  MemoryMappedSegment segment(module_name.data(), kMaxPathLength);
+  MemoryMappedSegmentData data;
+  segment.data_ = &data;
+  while (Next(&segment)) {
+    if (segment.filename[0] == '\0') continue;
     LoadedModule *cur_module = nullptr;
     if (!modules->empty() &&
-        0 == internal_strcmp(cur_name, modules->back().full_name())) {
+        0 == internal_strcmp(segment.filename, modules->back().full_name())) {
       cur_module = &modules->back();
     } else {
       modules->push_back(LoadedModule());
       cur_module = &modules->back();
-      cur_module->set(cur_name, cur_beg, cur_arch, cur_uuid,
-                      current_instrumented_);
+      cur_module->set(segment.filename, segment.start, segment.arch,
+                      segment.uuid, data_.current_instrumented);
     }
-    cur_module->addAddressRange(cur_beg, cur_end, prot & kProtectionExecute);
+    segment.AddAddressRanges(cur_module);
   }
 }
 
diff --git a/lib/sanitizer_common/sanitizer_quarantine.h b/lib/sanitizer_common/sanitizer_quarantine.h
index db38867..886445a 100644
--- a/lib/sanitizer_common/sanitizer_quarantine.h
+++ b/lib/sanitizer_common/sanitizer_quarantine.h
@@ -87,15 +87,14 @@
     // is zero (it allows us to perform just one atomic read per Put() call).
     CHECK((size == 0 && cache_size == 0) || cache_size != 0);
 
-    atomic_store(&max_size_, size, memory_order_relaxed);
-    atomic_store(&min_size_, size / 10 * 9,
-                 memory_order_relaxed);  // 90% of max size.
-    atomic_store(&max_cache_size_, cache_size, memory_order_relaxed);
+    atomic_store_relaxed(&max_size_, size);
+    atomic_store_relaxed(&min_size_, size / 10 * 9);  // 90% of max size.
+    atomic_store_relaxed(&max_cache_size_, cache_size);
   }
 
-  uptr GetSize() const { return atomic_load(&max_size_, memory_order_relaxed); }
+  uptr GetSize() const { return atomic_load_relaxed(&max_size_); }
   uptr GetCacheSize() const {
-    return atomic_load(&max_cache_size_, memory_order_relaxed);
+    return atomic_load_relaxed(&max_cache_size_);
   }
 
   void Put(Cache *c, Callback cb, Node *ptr, uptr size) {
@@ -117,7 +116,16 @@
       cache_.Transfer(c);
     }
     if (cache_.Size() > GetSize() && recycle_mutex_.TryLock())
-      Recycle(cb);
+      Recycle(atomic_load_relaxed(&min_size_), cb);
+  }
+
+  void NOINLINE DrainAndRecycle(Cache *c, Callback cb) {
+    {
+      SpinMutexLock l(&cache_mutex_);
+      cache_.Transfer(c);
+    }
+    recycle_mutex_.Lock();
+    Recycle(0, cb);
   }
 
   void PrintStats() const {
@@ -139,9 +147,8 @@
   Cache cache_;
   char pad2_[kCacheLineSize];
 
-  void NOINLINE Recycle(Callback cb) {
+  void NOINLINE Recycle(uptr min_size, Callback cb) {
     Cache tmp;
-    uptr min_size = atomic_load(&min_size_, memory_order_relaxed);
     {
       SpinMutexLock l(&cache_mutex_);
       // Go over the batches and merge partially filled ones to
@@ -201,7 +208,7 @@
 
   // Total memory used, including internal accounting.
   uptr Size() const {
-    return atomic_load(&size_, memory_order_relaxed);
+    return atomic_load_relaxed(&size_);
   }
 
   // Memory used for internal accounting.
@@ -225,7 +232,7 @@
     list_.append_back(&from_cache->list_);
     SizeAdd(from_cache->Size());
 
-    atomic_store(&from_cache->size_, 0, memory_order_relaxed);
+    atomic_store_relaxed(&from_cache->size_, 0);
   }
 
   void EnqueueBatch(QuarantineBatch *b) {
@@ -296,10 +303,10 @@
   atomic_uintptr_t size_;
 
   void SizeAdd(uptr add) {
-    atomic_store(&size_, Size() + add, memory_order_relaxed);
+    atomic_store_relaxed(&size_, Size() + add);
   }
   void SizeSub(uptr sub) {
-    atomic_store(&size_, Size() - sub, memory_order_relaxed);
+    atomic_store_relaxed(&size_, Size() - sub);
   }
 };
 
diff --git a/lib/sanitizer_common/sanitizer_report_decorator.h b/lib/sanitizer_common/sanitizer_report_decorator.h
index 86536aa..060b58d 100644
--- a/lib/sanitizer_common/sanitizer_report_decorator.h
+++ b/lib/sanitizer_common/sanitizer_report_decorator.h
@@ -27,8 +27,9 @@
   SanitizerCommonDecorator() : ansi_(ColorizeReports()) {}
   const char *Bold()    const { return ansi_ ? "\033[1m" : ""; }
   const char *Default() const { return ansi_ ? "\033[1m\033[0m"  : ""; }
-  const char *Warning()    { return Red(); }
-  const char *EndWarning() { return Default(); }
+  const char *Warning() const { return Red(); }
+  const char *MemoryByte() { return Magenta(); }
+
  protected:
   const char *Black()   const { return ansi_ ? "\033[1m\033[30m" : ""; }
   const char *Red()     const { return ansi_ ? "\033[1m\033[31m" : ""; }
diff --git a/lib/sanitizer_common/sanitizer_signal_interceptors.inc b/lib/sanitizer_common/sanitizer_signal_interceptors.inc
new file mode 100644
index 0000000..f38e1b9
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_signal_interceptors.inc
@@ -0,0 +1,67 @@
+//===-- sanitizer_signal_interceptors.inc -----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Signal interceptors for sanitizers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "interception/interception.h"
+#include "sanitizer_common.h"
+#include "sanitizer_internal_defs.h"
+#include "sanitizer_platform_interceptors.h"
+
+using namespace __sanitizer;
+
+#if SANITIZER_INTERCEPT_BSD_SIGNAL
+INTERCEPTOR(void *, bsd_signal, int signum, void *handler) {
+  if (GetHandleSignalMode(signum) == kHandleSignalExclusive) return 0;
+  return REAL(bsd_signal)(signum, handler);
+}
+#define INIT_BSD_SIGNAL COMMON_INTERCEPT_FUNCTION(bsd_signal)
+#else  // SANITIZER_INTERCEPT_BSD_SIGNAL
+#define INIT_BSD_SIGNAL
+#endif  // SANITIZER_INTERCEPT_BSD_SIGNAL
+
+#if SANITIZER_INTERCEPT_SIGNAL_AND_SIGACTION
+INTERCEPTOR(void *, signal, int signum, void *handler) {
+  if (GetHandleSignalMode(signum) == kHandleSignalExclusive) return nullptr;
+  return REAL(signal)(signum, handler);
+}
+#define INIT_SIGNAL COMMON_INTERCEPT_FUNCTION(signal)
+
+INTERCEPTOR(int, sigaction, int signum, const struct sigaction *act,
+            struct sigaction *oldact) {
+  if (GetHandleSignalMode(signum) == kHandleSignalExclusive) return 0;
+  return REAL(sigaction)(signum, act, oldact);
+}
+#define INIT_SIGACTION COMMON_INTERCEPT_FUNCTION(sigaction)
+
+namespace __sanitizer {
+int real_sigaction(int signum, const void *act, void *oldact) {
+  return REAL(sigaction)(signum, (const struct sigaction *)act,
+                         (struct sigaction *)oldact);
+}
+}  // namespace __sanitizer
+#else  // SANITIZER_INTERCEPT_SIGNAL_AND_SIGACTION
+#define INIT_SIGNAL
+#define INIT_SIGACTION
+// We need to have defined REAL(sigaction) on other systems.
+DEFINE_REAL(int, sigaction, int signum, const struct sigaction *act,
+            struct sigaction *oldact)
+#endif  // SANITIZER_INTERCEPT_SIGNAL_AND_SIGACTION
+
+static void InitializeSignalInterceptors() {
+  static bool was_called_once;
+  CHECK(!was_called_once);
+  was_called_once = true;
+
+  INIT_BSD_SIGNAL;
+  INIT_SIGNAL;
+  INIT_SIGACTION;
+}
diff --git a/lib/sanitizer_common/sanitizer_stacktrace.h b/lib/sanitizer_common/sanitizer_stacktrace.h
index 90142df..f15196e 100644
--- a/lib/sanitizer_common/sanitizer_stacktrace.h
+++ b/lib/sanitizer_common/sanitizer_stacktrace.h
@@ -97,6 +97,11 @@
   void Unwind(u32 max_depth, uptr pc, uptr bp, void *context, uptr stack_top,
               uptr stack_bottom, bool request_fast_unwind);
 
+  void Reset() {
+    *static_cast<StackTrace *>(this) = StackTrace(trace_buffer, 0);
+    top_frame_bp = 0;
+  }
+
  private:
   void FastUnwindStack(uptr pc, uptr bp, uptr stack_top, uptr stack_bottom,
                        u32 max_depth);
@@ -106,8 +111,8 @@
   void PopStackFrames(uptr count);
   uptr LocatePcInTrace(uptr pc);
 
-  BufferedStackTrace(const BufferedStackTrace &);
-  void operator=(const BufferedStackTrace &);
+  BufferedStackTrace(const BufferedStackTrace &) = delete;
+  void operator=(const BufferedStackTrace &) = delete;
 };
 
 // Check if given pointer points into allocated stack area.
diff --git a/lib/sanitizer_common/sanitizer_stacktrace_libcdep.cc b/lib/sanitizer_common/sanitizer_stacktrace_libcdep.cc
index 36c98d0..747a4a7 100644
--- a/lib/sanitizer_common/sanitizer_stacktrace_libcdep.cc
+++ b/lib/sanitizer_common/sanitizer_stacktrace_libcdep.cc
@@ -43,7 +43,8 @@
       if (dedup_frames-- > 0) {
         if (dedup_token.length())
           dedup_token.append("--");
-        dedup_token.append(cur->info.function);
+        if (cur->info.function != nullptr)
+          dedup_token.append(cur->info.function);
       }
     }
     frames->ClearAll();
diff --git a/lib/sanitizer_common/sanitizer_stacktrace_printer.cc b/lib/sanitizer_common/sanitizer_stacktrace_printer.cc
index 377f1ce..78f0744 100644
--- a/lib/sanitizer_common/sanitizer_stacktrace_printer.cc
+++ b/lib/sanitizer_common/sanitizer_stacktrace_printer.cc
@@ -12,9 +12,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "sanitizer_stacktrace_printer.h"
+#include "sanitizer_file.h"
+#include "sanitizer_fuchsia.h"
 
 namespace __sanitizer {
 
+// sanitizer_symbolizer_fuchsia.cc implements these differently for Fuchsia.
+#if !SANITIZER_FUCHSIA
+
 static const char *StripFunctionName(const char *function, const char *prefix) {
   if (!function) return nullptr;
   if (!prefix) return function;
@@ -146,6 +151,8 @@
   }
 }
 
+#endif  // !SANITIZER_FUCHSIA
+
 void RenderSourceLocation(InternalScopedString *buffer, const char *file,
                           int line, int column, bool vs_style,
                           const char *strip_path_prefix) {
diff --git a/lib/sanitizer_common/sanitizer_stoptheworld.h b/lib/sanitizer_common/sanitizer_stoptheworld.h
index aa6f5d8..20b49ae 100644
--- a/lib/sanitizer_common/sanitizer_stoptheworld.h
+++ b/lib/sanitizer_common/sanitizer_stoptheworld.h
@@ -18,36 +18,32 @@
 #include "sanitizer_common.h"
 
 namespace __sanitizer {
-typedef int SuspendedThreadID;
+
+enum PtraceRegistersStatus {
+  REGISTERS_UNAVAILABLE_FATAL = -1,
+  REGISTERS_UNAVAILABLE = 0,
+  REGISTERS_AVAILABLE = 1
+};
 
 // Holds the list of suspended threads and provides an interface to dump their
 // register contexts.
 class SuspendedThreadsList {
  public:
-  SuspendedThreadsList()
-    : thread_ids_(1024) {}
-  SuspendedThreadID GetThreadID(uptr index) const {
-    CHECK_LT(index, thread_ids_.size());
-    return thread_ids_[index];
+  SuspendedThreadsList() = default;
+
+  // Can't declare pure virtual functions in sanitizer runtimes:
+  // __cxa_pure_virtual might be unavailable. Use UNIMPLEMENTED() instead.
+  virtual PtraceRegistersStatus GetRegistersAndSP(uptr index, uptr *buffer,
+                                                  uptr *sp) const {
+    UNIMPLEMENTED();
   }
-  int GetRegistersAndSP(uptr index, uptr *buffer, uptr *sp) const;
+
   // The buffer in GetRegistersAndSP should be at least this big.
-  static uptr RegisterCount();
-  uptr thread_count() const { return thread_ids_.size(); }
-  bool Contains(SuspendedThreadID thread_id) const {
-    for (uptr i = 0; i < thread_ids_.size(); i++) {
-      if (thread_ids_[i] == thread_id)
-        return true;
-    }
-    return false;
-  }
-  void Append(SuspendedThreadID thread_id) {
-    thread_ids_.push_back(thread_id);
-  }
+  virtual uptr RegisterCount() const { UNIMPLEMENTED(); }
+  virtual uptr ThreadCount() const { UNIMPLEMENTED(); }
+  virtual tid_t GetThreadID(uptr index) const { UNIMPLEMENTED(); }
 
  private:
-  InternalMmapVector<SuspendedThreadID> thread_ids_;
-
   // Prohibit copy and assign.
   SuspendedThreadsList(const SuspendedThreadsList&);
   void operator=(const SuspendedThreadsList&);
diff --git a/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc b/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc
index ce8873b..e1cb6f7 100644
--- a/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc
+++ b/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc
@@ -16,7 +16,8 @@
 
 #if SANITIZER_LINUX && (defined(__x86_64__) || defined(__mips__) || \
                         defined(__aarch64__) || defined(__powerpc64__) || \
-                        defined(__s390__) || defined(__i386__))
+                        defined(__s390__) || defined(__i386__) || \
+                        defined(__arm__))
 
 #include "sanitizer_stoptheworld.h"
 
@@ -31,17 +32,13 @@
 #include <sys/types.h> // for pid_t
 #include <sys/uio.h> // for iovec
 #include <elf.h> // for NT_PRSTATUS
-#if SANITIZER_ANDROID && defined(__arm__)
-# include <linux/user.h>  // for pt_regs
-#else
-# ifdef __aarch64__
+#if defined(__aarch64__) && !SANITIZER_ANDROID
 // GLIBC 2.20+ sys/user does not include asm/ptrace.h
-#  include <asm/ptrace.h>
-# endif
-# include <sys/user.h>  // for user_regs_struct
-# if SANITIZER_ANDROID && SANITIZER_MIPS
-#   include <asm/reg.h>  // for mips SP register in sys/user.h
-# endif
+# include <asm/ptrace.h>
+#endif
+#include <sys/user.h>  // for user_regs_struct
+#if SANITIZER_ANDROID && SANITIZER_MIPS
+# include <asm/reg.h>  // for mips SP register in sys/user.h
 #endif
 #include <sys/wait.h> // for signal-related stuff
 
@@ -81,7 +78,22 @@
 
 namespace __sanitizer {
 
-COMPILER_CHECK(sizeof(SuspendedThreadID) == sizeof(pid_t));
+class SuspendedThreadsListLinux : public SuspendedThreadsList {
+ public:
+  SuspendedThreadsListLinux() : thread_ids_(1024) {}
+
+  tid_t GetThreadID(uptr index) const;
+  uptr ThreadCount() const;
+  bool ContainsTid(tid_t thread_id) const;
+  void Append(tid_t tid);
+
+  PtraceRegistersStatus GetRegistersAndSP(uptr index, uptr *buffer,
+                                          uptr *sp) const;
+  uptr RegisterCount() const;
+
+ private:
+  InternalMmapVector<tid_t> thread_ids_;
+};
 
 // Structure for passing arguments into the tracer thread.
 struct TracerThreadArgument {
@@ -106,31 +118,31 @@
   bool SuspendAllThreads();
   void ResumeAllThreads();
   void KillAllThreads();
-  SuspendedThreadsList &suspended_threads_list() {
+  SuspendedThreadsListLinux &suspended_threads_list() {
     return suspended_threads_list_;
   }
   TracerThreadArgument *arg;
  private:
-  SuspendedThreadsList suspended_threads_list_;
+  SuspendedThreadsListLinux suspended_threads_list_;
   pid_t pid_;
-  bool SuspendThread(SuspendedThreadID thread_id);
+  bool SuspendThread(tid_t thread_id);
 };
 
-bool ThreadSuspender::SuspendThread(SuspendedThreadID tid) {
+bool ThreadSuspender::SuspendThread(tid_t tid) {
   // Are we already attached to this thread?
   // Currently this check takes linear time, however the number of threads is
   // usually small.
-  if (suspended_threads_list_.Contains(tid))
-    return false;
+  if (suspended_threads_list_.ContainsTid(tid)) return false;
   int pterrno;
   if (internal_iserror(internal_ptrace(PTRACE_ATTACH, tid, nullptr, nullptr),
                        &pterrno)) {
     // Either the thread is dead, or something prevented us from attaching.
     // Log this event and move on.
-    VReport(1, "Could not attach to thread %d (errno %d).\n", tid, pterrno);
+    VReport(1, "Could not attach to thread %zu (errno %d).\n", (uptr)tid,
+            pterrno);
     return false;
   } else {
-    VReport(2, "Attached to thread %d.\n", tid);
+    VReport(2, "Attached to thread %zu.\n", (uptr)tid);
     // The thread is not guaranteed to stop before ptrace returns, so we must
     // wait on it. Note: if the thread receives a signal concurrently,
     // we can get notification about the signal before notification about stop.
@@ -148,8 +160,8 @@
       if (internal_iserror(waitpid_status, &wperrno)) {
         // Got a ECHILD error. I don't think this situation is possible, but it
         // doesn't hurt to report it.
-        VReport(1, "Waiting on thread %d failed, detaching (errno %d).\n",
-                tid, wperrno);
+        VReport(1, "Waiting on thread %zu failed, detaching (errno %d).\n",
+                (uptr)tid, wperrno);
         internal_ptrace(PTRACE_DETACH, tid, nullptr, nullptr);
         return false;
       }
@@ -166,7 +178,7 @@
 }
 
 void ThreadSuspender::ResumeAllThreads() {
-  for (uptr i = 0; i < suspended_threads_list_.thread_count(); i++) {
+  for (uptr i = 0; i < suspended_threads_list_.ThreadCount(); i++) {
     pid_t tid = suspended_threads_list_.GetThreadID(i);
     int pterrno;
     if (!internal_iserror(internal_ptrace(PTRACE_DETACH, tid, nullptr, nullptr),
@@ -182,7 +194,7 @@
 }
 
 void ThreadSuspender::KillAllThreads() {
-  for (uptr i = 0; i < suspended_threads_list_.thread_count(); i++)
+  for (uptr i = 0; i < suspended_threads_list_.ThreadCount(); i++)
     internal_ptrace(PTRACE_KILL, suspended_threads_list_.GetThreadID(i),
                     nullptr, nullptr);
 }
@@ -234,7 +246,7 @@
 
 // Signal handler to wake up suspended threads when the tracer thread dies.
 static void TracerThreadSignalHandler(int signum, void *siginfo, void *uctx) {
-  SignalContext ctx = SignalContext::Create(siginfo, uctx);
+  SignalContext ctx(siginfo, uctx);
   Printf("Tracer caught signal %d: addr=0x%zx pc=0x%zx sp=0x%zx\n", signum,
          ctx.addr, ctx.pc, ctx.sp);
   ThreadSuspender *inst = thread_suspender_instance;
@@ -251,7 +263,7 @@
 }
 
 // Size of alternative stack for signal handlers in the tracer thread.
-static const int kHandlerStackSize = 4096;
+static const int kHandlerStackSize = 8192;
 
 // This function will be run as a cloned task.
 static int TracerThread(void* argument) {
@@ -275,7 +287,7 @@
 
   // Alternate stack for signal handling.
   InternalScopedBuffer<char> handler_stack_memory(kHandlerStackSize);
-  struct sigaltstack handler_stack;
+  stack_t handler_stack;
   internal_memset(&handler_stack, 0, sizeof(handler_stack));
   handler_stack.ss_sp = handler_stack_memory.data();
   handler_stack.ss_size = kHandlerStackSize;
@@ -493,9 +505,28 @@
 #error "Unsupported architecture"
 #endif // SANITIZER_ANDROID && defined(__arm__)
 
-int SuspendedThreadsList::GetRegistersAndSP(uptr index,
-                                            uptr *buffer,
-                                            uptr *sp) const {
+tid_t SuspendedThreadsListLinux::GetThreadID(uptr index) const {
+  CHECK_LT(index, thread_ids_.size());
+  return thread_ids_[index];
+}
+
+uptr SuspendedThreadsListLinux::ThreadCount() const {
+  return thread_ids_.size();
+}
+
+bool SuspendedThreadsListLinux::ContainsTid(tid_t thread_id) const {
+  for (uptr i = 0; i < thread_ids_.size(); i++) {
+    if (thread_ids_[i] == thread_id) return true;
+  }
+  return false;
+}
+
+void SuspendedThreadsListLinux::Append(tid_t tid) {
+  thread_ids_.push_back(tid);
+}
+
+PtraceRegistersStatus SuspendedThreadsListLinux::GetRegistersAndSP(
+    uptr index, uptr *buffer, uptr *sp) const {
   pid_t tid = GetThreadID(index);
   regs_struct regs;
   int pterrno;
@@ -513,19 +544,23 @@
   if (isErr) {
     VReport(1, "Could not get registers from thread %d (errno %d).\n", tid,
             pterrno);
-    return -1;
+    // ESRCH means that the given thread is not suspended or already dead.
+    // Therefore it's unsafe to inspect its data (e.g. walk through stack) and
+    // we should notify caller about this.
+    return pterrno == ESRCH ? REGISTERS_UNAVAILABLE_FATAL
+                            : REGISTERS_UNAVAILABLE;
   }
 
   *sp = regs.REG_SP;
   internal_memcpy(buffer, &regs, sizeof(regs));
-  return 0;
+  return REGISTERS_AVAILABLE;
 }
 
-uptr SuspendedThreadsList::RegisterCount() {
+uptr SuspendedThreadsListLinux::RegisterCount() const {
   return sizeof(regs_struct) / sizeof(uptr);
 }
 } // namespace __sanitizer
 
 #endif  // SANITIZER_LINUX && (defined(__x86_64__) || defined(__mips__)
         // || defined(__aarch64__) || defined(__powerpc64__)
-        // || defined(__s390__) || defined(__i386__)
+        // || defined(__s390__) || defined(__i386__) || defined(__arm__)
diff --git a/lib/sanitizer_common/sanitizer_stoptheworld_mac.cc b/lib/sanitizer_common/sanitizer_stoptheworld_mac.cc
index 4cccc29..d84ebef 100644
--- a/lib/sanitizer_common/sanitizer_stoptheworld_mac.cc
+++ b/lib/sanitizer_common/sanitizer_stoptheworld_mac.cc
@@ -13,26 +13,166 @@
 
 #include "sanitizer_platform.h"
 
-#if SANITIZER_MAC && (defined(__x86_64__) || defined(__aarch64__))
+#if SANITIZER_MAC && (defined(__x86_64__) || defined(__aarch64__) || \
+                      defined(__i386))
+
+#include <mach/mach.h>
+#include <mach/thread_info.h>
+#include <pthread.h>
 
 #include "sanitizer_stoptheworld.h"
 
 namespace __sanitizer {
+typedef struct {
+  tid_t tid;
+  thread_t thread;
+} SuspendedThreadInfo;
+
+class SuspendedThreadsListMac : public SuspendedThreadsList {
+ public:
+  SuspendedThreadsListMac() : threads_(1024) {}
+
+  tid_t GetThreadID(uptr index) const;
+  thread_t GetThread(uptr index) const;
+  uptr ThreadCount() const;
+  bool ContainsThread(thread_t thread) const;
+  void Append(thread_t thread);
+
+  PtraceRegistersStatus GetRegistersAndSP(uptr index, uptr *buffer,
+                                          uptr *sp) const;
+  uptr RegisterCount() const;
+
+ private:
+  InternalMmapVector<SuspendedThreadInfo> threads_;
+};
+
+struct RunThreadArgs {
+  StopTheWorldCallback callback;
+  void *argument;
+};
+
+void RunThread(void *arg) {
+  struct RunThreadArgs *run_args = (struct RunThreadArgs *)arg;
+  SuspendedThreadsListMac suspended_threads_list;
+
+  thread_array_t threads;
+  mach_msg_type_number_t num_threads;
+  kern_return_t err = task_threads(mach_task_self(), &threads, &num_threads);
+  if (err != KERN_SUCCESS) {
+    VReport(1, "Failed to get threads for task (errno %d).\n", err);
+    return;
+  }
+
+  thread_t thread_self = mach_thread_self();
+  for (unsigned int i = 0; i < num_threads; ++i) {
+    if (threads[i] == thread_self) continue;
+
+    thread_suspend(threads[i]);
+    suspended_threads_list.Append(threads[i]);
+  }
+
+  run_args->callback(suspended_threads_list, run_args->argument);
+
+  uptr num_suspended = suspended_threads_list.ThreadCount();
+  for (unsigned int i = 0; i < num_suspended; ++i) {
+    thread_resume(suspended_threads_list.GetThread(i));
+  }
+}
+
 void StopTheWorld(StopTheWorldCallback callback, void *argument) {
-  CHECK(0 && "unimplemented");
+  struct RunThreadArgs arg = {callback, argument};
+  pthread_t run_thread = (pthread_t)internal_start_thread(RunThread, &arg);
+  internal_join_thread(run_thread);
 }
 
-int SuspendedThreadsList::GetRegistersAndSP(uptr index,
-                                            uptr *buffer,
-                                            uptr *sp) const {
-  CHECK(0 && "unimplemented");
-  return 0;
+#if defined(__x86_64__)
+typedef x86_thread_state64_t regs_struct;
+
+#define SP_REG __rsp
+
+#elif defined(__aarch64__)
+typedef arm_thread_state64_t regs_struct;
+
+# if __DARWIN_UNIX03
+#  define SP_REG __sp
+# else
+#  define SP_REG sp
+# endif
+
+#elif defined(__i386)
+typedef x86_thread_state32_t regs_struct;
+
+#define SP_REG __esp
+
+#else
+#error "Unsupported architecture"
+#endif
+
+tid_t SuspendedThreadsListMac::GetThreadID(uptr index) const {
+  CHECK_LT(index, threads_.size());
+  return threads_[index].tid;
 }
 
-uptr SuspendedThreadsList::RegisterCount() {
-  CHECK(0 && "unimplemented");
-  return 0;
+thread_t SuspendedThreadsListMac::GetThread(uptr index) const {
+  CHECK_LT(index, threads_.size());
+  return threads_[index].thread;
+}
+
+uptr SuspendedThreadsListMac::ThreadCount() const {
+  return threads_.size();
+}
+
+bool SuspendedThreadsListMac::ContainsThread(thread_t thread) const {
+  for (uptr i = 0; i < threads_.size(); i++) {
+    if (threads_[i].thread == thread) return true;
+  }
+  return false;
+}
+
+void SuspendedThreadsListMac::Append(thread_t thread) {
+  thread_identifier_info_data_t info;
+  mach_msg_type_number_t info_count = THREAD_IDENTIFIER_INFO_COUNT;
+  kern_return_t err = thread_info(thread, THREAD_IDENTIFIER_INFO,
+                                  (thread_info_t)&info, &info_count);
+  if (err != KERN_SUCCESS) {
+    VReport(1, "Error - unable to get thread ident for a thread\n");
+    return;
+  }
+  threads_.push_back({info.thread_id, thread});
+}
+
+PtraceRegistersStatus SuspendedThreadsListMac::GetRegistersAndSP(
+    uptr index, uptr *buffer, uptr *sp) const {
+  thread_t thread = GetThread(index);
+  regs_struct regs;
+  int err;
+  mach_msg_type_number_t reg_count = MACHINE_THREAD_STATE_COUNT;
+  err = thread_get_state(thread, MACHINE_THREAD_STATE, (thread_state_t)&regs,
+                         &reg_count);
+  if (err != KERN_SUCCESS) {
+    VReport(1, "Error - unable to get registers for a thread\n");
+    // KERN_INVALID_ARGUMENT indicates that either the flavor is invalid,
+    // or the thread does not exist. The other possible error case,
+    // MIG_ARRAY_TOO_LARGE, means that the state is too large, but it's
+    // still safe to proceed.
+    return err == KERN_INVALID_ARGUMENT ? REGISTERS_UNAVAILABLE_FATAL
+                                        : REGISTERS_UNAVAILABLE;
+  }
+
+  internal_memcpy(buffer, &regs, sizeof(regs));
+  *sp = regs.SP_REG;
+
+  // On x86_64 and aarch64, we must account for the stack redzone, which is 128
+  // bytes.
+  if (SANITIZER_WORDSIZE == 64) *sp -= 128;
+
+  return REGISTERS_AVAILABLE;
+}
+
+uptr SuspendedThreadsListMac::RegisterCount() const {
+  return MACHINE_THREAD_STATE_COUNT;
 }
 } // namespace __sanitizer
 
-#endif  // SANITIZER_MAC && (defined(__x86_64__) || defined(__aarch64__))
+#endif  // SANITIZER_MAC && (defined(__x86_64__) || defined(__aarch64__)) ||
+        //                   defined(__i386))
diff --git a/lib/sanitizer_common/sanitizer_suppressions.cc b/lib/sanitizer_common/sanitizer_suppressions.cc
index f0f2c9c..89ddfdd 100644
--- a/lib/sanitizer_common/sanitizer_suppressions.cc
+++ b/lib/sanitizer_common/sanitizer_suppressions.cc
@@ -16,6 +16,7 @@
 #include "sanitizer_allocator_internal.h"
 #include "sanitizer_common.h"
 #include "sanitizer_flags.h"
+#include "sanitizer_file.h"
 #include "sanitizer_libc.h"
 #include "sanitizer_placement_new.h"
 
@@ -50,6 +51,7 @@
   if (filename[0] == '\0')
     return;
 
+#if !SANITIZER_FUCHSIA
   // If we cannot find the file, check if its location is relative to
   // the location of the executable.
   InternalScopedString new_file_path(kMaxPathLength);
@@ -58,6 +60,7 @@
                                           new_file_path.size())) {
     filename = new_file_path.data();
   }
+#endif  // !SANITIZER_FUCHSIA
 
   // Read the file.
   VPrintf(1, "%s: reading suppressions file at %s\n",
diff --git a/lib/sanitizer_common/sanitizer_symbolizer.cc b/lib/sanitizer_common/sanitizer_symbolizer.cc
index 1cd5b6e..672c936 100644
--- a/lib/sanitizer_common/sanitizer_symbolizer.cc
+++ b/lib/sanitizer_common/sanitizer_symbolizer.cc
@@ -71,6 +71,10 @@
 StaticSpinMutex Symbolizer::init_mu_;
 LowLevelAllocator Symbolizer::symbolizer_allocator_;
 
+void Symbolizer::InvalidateModuleList() {
+  modules_fresh_ = false;
+}
+
 void Symbolizer::AddHooks(Symbolizer::StartSymbolizationHook start_hook,
                           Symbolizer::EndSymbolizationHook end_hook) {
   CHECK(start_hook_ == 0 && end_hook_ == 0);
diff --git a/lib/sanitizer_common/sanitizer_symbolizer.h b/lib/sanitizer_common/sanitizer_symbolizer.h
index 4fc7742..208362d 100644
--- a/lib/sanitizer_common/sanitizer_symbolizer.h
+++ b/lib/sanitizer_common/sanitizer_symbolizer.h
@@ -119,8 +119,11 @@
   void AddHooks(StartSymbolizationHook start_hook,
                 EndSymbolizationHook end_hook);
 
+  void RefreshModules();
   const LoadedModule *FindModuleForAddress(uptr address);
 
+  void InvalidateModuleList();
+
  private:
   // GetModuleNameAndOffsetForPC has to return a string to the caller.
   // Since the corresponding module might get unloaded later, we should create
@@ -149,6 +152,7 @@
                                          uptr *module_offset,
                                          ModuleArch *module_arch);
   ListOfModules modules_;
+  ListOfModules fallback_modules_;
   // If stale, need to reload the modules before looking up addresses.
   bool modules_fresh_;
 
diff --git a/lib/sanitizer_common/sanitizer_symbolizer_fuchsia.cc b/lib/sanitizer_common/sanitizer_symbolizer_fuchsia.cc
new file mode 100644
index 0000000..3d1117d
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_symbolizer_fuchsia.cc
@@ -0,0 +1,107 @@
+//===-- sanitizer_symbolizer_fuchsia.cc -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is shared between various sanitizers' runtime libraries.
+//
+// Implementation of Fuchsia-specific symbolizer.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_platform.h"
+#if SANITIZER_FUCHSIA
+
+#include "sanitizer_fuchsia.h"
+#include "sanitizer_symbolizer.h"
+
+namespace __sanitizer {
+
+// For Fuchsia we don't do any actual symbolization per se.
+// Instead, we emit text containing raw addresses and raw linkage
+// symbol names, embedded in Fuchsia's symbolization markup format.
+// Fuchsia's logging infrastructure emits enough information about
+// process memory layout that a post-processing filter can do the
+// symbolization and pretty-print the markup.  See the spec at:
+// https://fuchsia.googlesource.com/zircon/+/master/docs/symbolizer_markup.md
+
+// This is used by UBSan for type names, and by ASan for global variable names.
+constexpr const char *kFormatDemangle = "{{{symbol:%s}}}";
+constexpr uptr kFormatDemangleMax = 1024;  // Arbitrary.
+
+// Function name or equivalent from PC location.
+constexpr const char *kFormatFunction = "{{{pc:%p}}}";
+constexpr uptr kFormatFunctionMax = 64;  // More than big enough for 64-bit hex.
+
+// Global variable name or equivalent from data memory address.
+constexpr const char *kFormatData = "{{{data:%p}}}";
+
+// One frame in a backtrace (printed on a line by itself).
+constexpr const char *kFormatFrame = "{{{bt:%u:%p}}}";
+
+// This is used by UBSan for type names, and by ASan for global variable names.
+// It's expected to return a static buffer that will be reused on each call.
+const char *Symbolizer::Demangle(const char *name) {
+  static char buffer[kFormatDemangleMax];
+  internal_snprintf(buffer, sizeof(buffer), kFormatDemangle, name);
+  return buffer;
+}
+
+// This is used mostly for suppression matching.  Making it work
+// would enable "interceptor_via_lib" suppressions.  It's also used
+// once in UBSan to say "in module ..." in a message that also
+// includes an address in the module, so post-processing can already
+// pretty-print that so as to indicate the module.
+bool Symbolizer::GetModuleNameAndOffsetForPC(uptr pc, const char **module_name,
+                                             uptr *module_address) {
+  return false;
+}
+
+// This is used in some places for suppression checking, which we
+// don't really support for Fuchsia.  It's also used in UBSan to
+// identify a PC location to a function name, so we always fill in
+// the function member with a string containing markup around the PC
+// value.
+// TODO(mcgrathr): Under SANITIZER_GO, it's currently used by TSan
+// to render stack frames, but that should be changed to use
+// RenderStackFrame.
+SymbolizedStack *Symbolizer::SymbolizePC(uptr addr) {
+  SymbolizedStack *s = SymbolizedStack::New(addr);
+  char buffer[kFormatFunctionMax];
+  internal_snprintf(buffer, sizeof(buffer), kFormatFunction, addr);
+  s->info.function = internal_strdup(buffer);
+  return s;
+}
+
+// Always claim we succeeded, so that RenderDataInfo will be called.
+bool Symbolizer::SymbolizeData(uptr addr, DataInfo *info) {
+  info->Clear();
+  info->start = addr;
+  return true;
+}
+
+// We ignore the format argument to __sanitizer_symbolize_global.
+void RenderData(InternalScopedString *buffer, const char *format,
+                const DataInfo *DI, const char *strip_path_prefix) {
+  buffer->append(kFormatData, DI->start);
+}
+
+// We don't support the stack_trace_format flag at all.
+void RenderFrame(InternalScopedString *buffer, const char *format, int frame_no,
+                 const AddressInfo &info, bool vs_style,
+                 const char *strip_path_prefix, const char *strip_func_prefix) {
+  buffer->append(kFormatFrame, frame_no, info.address);
+}
+
+Symbolizer *Symbolizer::PlatformInit() {
+  return new (symbolizer_allocator_) Symbolizer({});
+}
+
+void Symbolizer::LateInitialize() { Symbolizer::GetOrInit(); }
+
+}  // namespace __sanitizer
+
+#endif  // SANITIZER_FUCHSIA
diff --git a/lib/sanitizer_common/sanitizer_symbolizer_internal.h b/lib/sanitizer_common/sanitizer_symbolizer_internal.h
index 2ae42b3..a2ac118 100644
--- a/lib/sanitizer_common/sanitizer_symbolizer_internal.h
+++ b/lib/sanitizer_common/sanitizer_symbolizer_internal.h
@@ -15,6 +15,7 @@
 #define SANITIZER_SYMBOLIZER_INTERNAL_H
 
 #include "sanitizer_symbolizer.h"
+#include "sanitizer_file.h"
 
 namespace __sanitizer {
 
diff --git a/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cc b/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cc
index 614470a..a4bab66 100644
--- a/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cc
+++ b/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cc
@@ -17,6 +17,18 @@
 
 namespace __sanitizer {
 
+Symbolizer *Symbolizer::GetOrInit() {
+  SpinMutexLock l(&init_mu_);
+  if (symbolizer_)
+    return symbolizer_;
+  symbolizer_ = PlatformInit();
+  CHECK(symbolizer_);
+  return symbolizer_;
+}
+
+// See sanitizer_symbolizer_fuchsia.cc.
+#if !SANITIZER_FUCHSIA
+
 const char *ExtractToken(const char *str, const char *delims, char **result) {
   uptr prefix_len = internal_strcspn(str, delims);
   *result = (char*)InternalAlloc(prefix_len + 1);
@@ -151,37 +163,47 @@
   return true;
 }
 
+void Symbolizer::RefreshModules() {
+  modules_.init();
+  fallback_modules_.fallbackInit();
+  RAW_CHECK(modules_.size() > 0);
+  modules_fresh_ = true;
+}
+
+static const LoadedModule *SearchForModule(const ListOfModules &modules,
+                                           uptr address) {
+  for (uptr i = 0; i < modules.size(); i++) {
+    if (modules[i].containsAddress(address)) {
+      return &modules[i];
+    }
+  }
+  return nullptr;
+}
+
 const LoadedModule *Symbolizer::FindModuleForAddress(uptr address) {
   bool modules_were_reloaded = false;
   if (!modules_fresh_) {
-    modules_.init();
-    RAW_CHECK(modules_.size() > 0);
-    modules_fresh_ = true;
+    RefreshModules();
     modules_were_reloaded = true;
   }
-  for (uptr i = 0; i < modules_.size(); i++) {
-    if (modules_[i].containsAddress(address)) {
-      return &modules_[i];
-    }
-  }
-  // Reload the modules and look up again, if we haven't tried it yet.
-  if (!modules_were_reloaded) {
-    // FIXME: set modules_fresh_ from dlopen()/dlclose() interceptors.
-    // It's too aggressive to reload the list of modules each time we fail
-    // to find a module for a given address.
-    modules_fresh_ = false;
-    return FindModuleForAddress(address);
-  }
-  return 0;
-}
+  const LoadedModule *module = SearchForModule(modules_, address);
+  if (module) return module;
 
-Symbolizer *Symbolizer::GetOrInit() {
-  SpinMutexLock l(&init_mu_);
-  if (symbolizer_)
-    return symbolizer_;
-  symbolizer_ = PlatformInit();
-  CHECK(symbolizer_);
-  return symbolizer_;
+  // dlopen/dlclose interceptors invalidate the module list, but when
+  // interception is disabled, we need to retry if the lookup fails in
+  // case the module list changed.
+#if !SANITIZER_INTERCEPT_DLOPEN_DLCLOSE
+  if (!modules_were_reloaded) {
+    RefreshModules();
+    module = SearchForModule(modules_, address);
+    if (module) return module;
+  }
+#endif
+
+  if (fallback_modules_.size()) {
+    module = SearchForModule(fallback_modules_, address);
+  }
+  return module;
 }
 
 // For now we assume the following protocol:
@@ -451,7 +473,7 @@
     if (ReachedEndOfOutput(buffer, read_len))
       break;
     if (read_len + 1 == max_length) {
-      Report("WARNING: Symbolizer buffer too small");
+      Report("WARNING: Symbolizer buffer too small\n");
       read_len = 0;
       break;
     }
@@ -472,4 +494,6 @@
   return true;
 }
 
+#endif  // !SANITIZER_FUCHSIA
+
 }  // namespace __sanitizer
diff --git a/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cc b/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cc
index d3c77b5..60f22bf 100644
--- a/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cc
+++ b/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cc
@@ -16,6 +16,7 @@
 #if SANITIZER_POSIX
 #include "sanitizer_allocator_internal.h"
 #include "sanitizer_common.h"
+#include "sanitizer_file.h"
 #include "sanitizer_flags.h"
 #include "sanitizer_internal_defs.h"
 #include "sanitizer_linux.h"
@@ -271,6 +272,10 @@
   bool ReadFromSymbolizer(char *buffer, uptr max_length) override {
     if (!SymbolizerProcess::ReadFromSymbolizer(buffer, max_length))
       return false;
+    // The returned buffer is empty when output is valid, but exceeds
+    // max_length.
+    if (*buffer == '\0')
+      return true;
     // We should cut out output_terminator_ at the end of given buffer,
     // appended by addr2line to mark the end of its meaningful output.
     // We cannot scan buffer from it's beginning, because it is legal for it
@@ -470,16 +475,16 @@
 
   // Otherwise symbolizer program is unknown, let's search $PATH
   CHECK(path == nullptr);
-  if (const char *found_path = FindPathToBinary("llvm-symbolizer")) {
-    VReport(2, "Using llvm-symbolizer found at: %s\n", found_path);
-    return new(*allocator) LLVMSymbolizer(found_path, allocator);
-  }
 #if SANITIZER_MAC
   if (const char *found_path = FindPathToBinary("atos")) {
     VReport(2, "Using atos found at: %s\n", found_path);
     return new(*allocator) AtosSymbolizer(found_path, allocator);
   }
 #endif  // SANITIZER_MAC
+  if (const char *found_path = FindPathToBinary("llvm-symbolizer")) {
+    VReport(2, "Using llvm-symbolizer found at: %s\n", found_path);
+    return new(*allocator) LLVMSymbolizer(found_path, allocator);
+  }
   if (common_flags()->allow_addr2line) {
     if (const char *found_path = FindPathToBinary("addr2line")) {
       VReport(2, "Using addr2line found at: %s\n", found_path);
@@ -495,7 +500,7 @@
     VReport(2, "Symbolizer is disabled.\n");
     return;
   }
-  if (IsReportingOOM()) {
+  if (IsAllocatorOutOfMemory()) {
     VReport(2, "Cannot use internal symbolizer: out of memory\n");
   } else if (SymbolizerTool *tool = InternalSymbolizer::get(allocator)) {
     VReport(2, "Using internal symbolizer.\n");
diff --git a/lib/sanitizer_common/sanitizer_syscall_generic.inc b/lib/sanitizer_common/sanitizer_syscall_generic.inc
index 15cf05f..138d5ca 100644
--- a/lib/sanitizer_common/sanitizer_syscall_generic.inc
+++ b/lib/sanitizer_common/sanitizer_syscall_generic.inc
@@ -7,20 +7,48 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// Generic implementations of internal_syscall and internal_iserror.
+// Generic implementations of internal_syscall* and internal_iserror.
 //
 //===----------------------------------------------------------------------===//
 
-#if SANITIZER_FREEBSD || SANITIZER_MAC
+#if SANITIZER_FREEBSD || SANITIZER_MAC || SANITIZER_NETBSD
 # define SYSCALL(name) SYS_ ## name
 #else
 # define SYSCALL(name) __NR_ ## name
 #endif
 
-#if (SANITIZER_FREEBSD || SANITIZER_MAC) && defined(__x86_64__)
+#if SANITIZER_NETBSD
+// We use 3 kinds of internal_syscall's for different types of retval in order
+// to address differences in calling conventions (e.g. registers to place the
+// return value in).
+//   - internal_syscall     for 32-bit length (int, pid_t)
+//   - internal_syscall64   for 64-bit length (off_t)
+//   - internal_syscall_ptr for pointer and (s)size_t
+# define  internal_syscall      syscall
+# define  internal_syscall64    __syscall
+// Handle syscall renames manually
+# define SYS_stat SYS___stat50
+# define SYS_lstat SYS___lstat50
+# define SYS_fstat SYS___fstat50
+# define SYS_gettimeofday SYS___gettimeofday50
+# define SYS_wait4 SYS___wait450
+# define SYS_getdents SYS___getdents30
+# define SYS_sigaltstack SYS___sigaltstack14
+# define SYS_sigprocmask SYS___sigprocmask14
+# define SYS_nanosleep SYS___nanosleep50
+# if SANITIZER_WORDSIZE == 64
+#  define internal_syscall_ptr  __syscall
+# else
+#  define internal_syscall_ptr  syscall
+# endif
+#elif defined(__x86_64__) && (SANITIZER_FREEBSD || SANITIZER_MAC)
 # define internal_syscall __syscall
+# define internal_syscall64 __syscall
+# define internal_syscall_ptr __syscall
 # else
 # define internal_syscall syscall
+# define internal_syscall64 syscall
+# define internal_syscall_ptr syscall
 #endif
 
 bool internal_iserror(uptr retval, int *rverrno) {
diff --git a/lib/sanitizer_common/sanitizer_syscall_linux_aarch64.inc b/lib/sanitizer_common/sanitizer_syscall_linux_aarch64.inc
index 7ab1d76..1f05ed9 100644
--- a/lib/sanitizer_common/sanitizer_syscall_linux_aarch64.inc
+++ b/lib/sanitizer_common/sanitizer_syscall_linux_aarch64.inc
@@ -127,6 +127,9 @@
 
 #define internal_syscall(...) __SYSCALL_DISP(__internal_syscall, __VA_ARGS__)
 
+#define internal_syscall_ptr internal_syscall
+#define internal_syscall64 internal_syscall
+
 // Helper function used to avoid cobbler errno.
 bool internal_iserror(uptr retval, int *rverrno) {
   if (retval >= (uptr)-4095) {
diff --git a/lib/sanitizer_common/sanitizer_syscall_linux_x86_64.inc b/lib/sanitizer_common/sanitizer_syscall_linux_x86_64.inc
index 9853a6a..327aaa8 100644
--- a/lib/sanitizer_common/sanitizer_syscall_linux_x86_64.inc
+++ b/lib/sanitizer_common/sanitizer_syscall_linux_x86_64.inc
@@ -20,6 +20,9 @@
   return retval;
 }
 
+#define internal_syscall_ptr internal_syscall
+#define internal_syscall64 internal_syscall
+
 template <typename T1>
 static uptr internal_syscall(u64 nr, T1 arg1) {
   u64 retval;
diff --git a/lib/sanitizer_common/sanitizer_thread_registry.cc b/lib/sanitizer_common/sanitizer_thread_registry.cc
index c5b2e09..79f3caa 100644
--- a/lib/sanitizer_common/sanitizer_thread_registry.cc
+++ b/lib/sanitizer_common/sanitizer_thread_registry.cc
@@ -54,12 +54,16 @@
 }
 
 void ThreadContextBase::SetFinished() {
-  if (!detached)
-    status = ThreadStatusFinished;
+  // ThreadRegistry::FinishThread calls here in ThreadStatusCreated state
+  // for a thread that never actually started.  In that case the thread
+  // should go to ThreadStatusFinished regardless of whether it was created
+  // as detached.
+  if (!detached || status == ThreadStatusCreated) status = ThreadStatusFinished;
   OnFinished();
 }
 
-void ThreadContextBase::SetStarted(uptr _os_id, bool _workerthread, void *arg) {
+void ThreadContextBase::SetStarted(tid_t _os_id, bool _workerthread,
+                                   void *arg) {
   status = ThreadStatusRunning;
   os_id = _os_id;
   workerthread = _workerthread;
@@ -193,7 +197,7 @@
       tctx->status != ThreadStatusDead);
 }
 
-ThreadContextBase *ThreadRegistry::FindThreadContextByOsIDLocked(uptr os_id) {
+ThreadContextBase *ThreadRegistry::FindThreadContextByOsIDLocked(tid_t os_id) {
   return FindThreadContextLocked(FindThreadContextByOsIdCallback,
                                  (void *)os_id);
 }
@@ -203,7 +207,8 @@
   CHECK_LT(tid, n_contexts_);
   ThreadContextBase *tctx = threads_[tid];
   CHECK_NE(tctx, 0);
-  CHECK_EQ(ThreadStatusRunning, tctx->status);
+  CHECK_EQ(SANITIZER_FUCHSIA ? ThreadStatusCreated : ThreadStatusRunning,
+           tctx->status);
   tctx->SetName(name);
 }
 
@@ -250,24 +255,35 @@
   QuarantinePush(tctx);
 }
 
+// Normally this is called when the thread is about to exit.  If
+// called in ThreadStatusCreated state, then this thread was never
+// really started.  We just did CreateThread for a prospective new
+// thread before trying to create it, and then failed to actually
+// create it, and so never called StartThread.
 void ThreadRegistry::FinishThread(u32 tid) {
   BlockingMutexLock l(&mtx_);
   CHECK_GT(alive_threads_, 0);
   alive_threads_--;
-  CHECK_GT(running_threads_, 0);
-  running_threads_--;
   CHECK_LT(tid, n_contexts_);
   ThreadContextBase *tctx = threads_[tid];
   CHECK_NE(tctx, 0);
-  CHECK_EQ(ThreadStatusRunning, tctx->status);
+  bool dead = tctx->detached;
+  if (tctx->status == ThreadStatusRunning) {
+    CHECK_GT(running_threads_, 0);
+    running_threads_--;
+  } else {
+    // The thread never really existed.
+    CHECK_EQ(tctx->status, ThreadStatusCreated);
+    dead = true;
+  }
   tctx->SetFinished();
-  if (tctx->detached) {
+  if (dead) {
     tctx->SetDead();
     QuarantinePush(tctx);
   }
 }
 
-void ThreadRegistry::StartThread(u32 tid, uptr os_id, bool workerthread,
+void ThreadRegistry::StartThread(u32 tid, tid_t os_id, bool workerthread,
                                  void *arg) {
   BlockingMutexLock l(&mtx_);
   running_threads_++;
diff --git a/lib/sanitizer_common/sanitizer_thread_registry.h b/lib/sanitizer_common/sanitizer_thread_registry.h
index 17b1d5d..9aae875 100644
--- a/lib/sanitizer_common/sanitizer_thread_registry.h
+++ b/lib/sanitizer_common/sanitizer_thread_registry.h
@@ -39,7 +39,7 @@
   const u32 tid;  // Thread ID. Main thread should have tid = 0.
   u64 unique_id;  // Unique thread ID.
   u32 reuse_count;  // Number of times this tid was reused.
-  uptr os_id;     // PID (used for reporting).
+  tid_t os_id;     // PID (used for reporting).
   uptr user_id;   // Some opaque user thread id (e.g. pthread_t).
   char name[64];  // As annotated by user.
 
@@ -55,7 +55,7 @@
   void SetDead();
   void SetJoined(void *arg);
   void SetFinished();
-  void SetStarted(uptr _os_id, bool _workerthread, void *arg);
+  void SetStarted(tid_t _os_id, bool _workerthread, void *arg);
   void SetCreated(uptr _user_id, u64 _unique_id, bool _detached,
                   u32 _parent_tid, void *arg);
   void Reset();
@@ -109,14 +109,14 @@
   // is found.
   ThreadContextBase *FindThreadContextLocked(FindThreadCallback cb,
                                              void *arg);
-  ThreadContextBase *FindThreadContextByOsIDLocked(uptr os_id);
+  ThreadContextBase *FindThreadContextByOsIDLocked(tid_t os_id);
 
   void SetThreadName(u32 tid, const char *name);
   void SetThreadNameByUserId(uptr user_id, const char *name);
   void DetachThread(u32 tid, void *arg);
   void JoinThread(u32 tid, void *arg);
   void FinishThread(u32 tid);
-  void StartThread(u32 tid, uptr os_id, bool workerthread, void *arg);
+  void StartThread(u32 tid, tid_t os_id, bool workerthread, void *arg);
 
  private:
   const ThreadContextFactory context_factory_;
diff --git a/lib/sanitizer_common/sanitizer_tls_get_addr.cc b/lib/sanitizer_common/sanitizer_tls_get_addr.cc
index 77c1947..6fa0e92 100644
--- a/lib/sanitizer_common/sanitizer_tls_get_addr.cc
+++ b/lib/sanitizer_common/sanitizer_tls_get_addr.cc
@@ -45,7 +45,7 @@
 
 static inline void DTLS_Deallocate(DTLS::DTV *dtv, uptr size) {
   if (!size) return;
-  VPrintf(2, "__tls_get_addr: DTLS_Deallocate %p %zd\n", dtv, size);
+  VReport(2, "__tls_get_addr: DTLS_Deallocate %p %zd\n", dtv, size);
   UnmapOrDie(dtv, size * sizeof(DTLS::DTV));
   atomic_fetch_sub(&number_of_live_dtls, 1, memory_order_relaxed);
 }
@@ -58,7 +58,7 @@
       (DTLS::DTV *)MmapOrDie(new_size * sizeof(DTLS::DTV), "DTLS_Resize");
   uptr num_live_dtls =
       atomic_fetch_add(&number_of_live_dtls, 1, memory_order_relaxed);
-  VPrintf(2, "__tls_get_addr: DTLS_Resize %p %zd\n", &dtls, num_live_dtls);
+  VReport(2, "__tls_get_addr: DTLS_Resize %p %zd\n", &dtls, num_live_dtls);
   CHECK_LT(num_live_dtls, 1 << 20);
   uptr old_dtv_size = dtls.dtv_size;
   DTLS::DTV *old_dtv = dtls.dtv;
@@ -72,7 +72,7 @@
 
 void DTLS_Destroy() {
   if (!common_flags()->intercept_tls_get_addr) return;
-  VPrintf(2, "__tls_get_addr: DTLS_Destroy %p %zd\n", &dtls, dtls.dtv_size);
+  VReport(2, "__tls_get_addr: DTLS_Destroy %p %zd\n", &dtls, dtls.dtv_size);
   uptr s = dtls.dtv_size;
   dtls.dtv_size = kDestroyedThread;  // Do this before unmap for AS-safety.
   DTLS_Deallocate(dtls.dtv, s);
@@ -97,28 +97,28 @@
   if (dtls.dtv[dso_id].beg) return 0;
   uptr tls_size = 0;
   uptr tls_beg = reinterpret_cast<uptr>(res) - arg->offset - kDtvOffset;
-  VPrintf(2, "__tls_get_addr: %p {%p,%p} => %p; tls_beg: %p; sp: %p "
+  VReport(2, "__tls_get_addr: %p {%p,%p} => %p; tls_beg: %p; sp: %p "
              "num_live_dtls %zd\n",
           arg, arg->dso_id, arg->offset, res, tls_beg, &tls_beg,
           atomic_load(&number_of_live_dtls, memory_order_relaxed));
   if (dtls.last_memalign_ptr == tls_beg) {
     tls_size = dtls.last_memalign_size;
-    VPrintf(2, "__tls_get_addr: glibc <=2.18 suspected; tls={%p,%p}\n",
+    VReport(2, "__tls_get_addr: glibc <=2.18 suspected; tls={%p,%p}\n",
         tls_beg, tls_size);
   } else if (tls_beg >= static_tls_begin && tls_beg < static_tls_end) {
     // This is the static TLS block which was initialized / unpoisoned at thread
     // creation.
-    VPrintf(2, "__tls_get_addr: static tls: %p\n", tls_beg);
+    VReport(2, "__tls_get_addr: static tls: %p\n", tls_beg);
     tls_size = 0;
   } else if ((tls_beg % 4096) == sizeof(Glibc_2_19_tls_header)) {
     // We may want to check gnu_get_libc_version().
     Glibc_2_19_tls_header *header = (Glibc_2_19_tls_header *)tls_beg - 1;
     tls_size = header->size;
     tls_beg = header->start;
-    VPrintf(2, "__tls_get_addr: glibc >=2.19 suspected; tls={%p %p}\n",
+    VReport(2, "__tls_get_addr: glibc >=2.19 suspected; tls={%p %p}\n",
         tls_beg, tls_size);
   } else {
-    VPrintf(2, "__tls_get_addr: Can't guess glibc version\n");
+    VReport(2, "__tls_get_addr: Can't guess glibc version\n");
     // This may happen inside the DTOR of main thread, so just ignore it.
     tls_size = 0;
   }
@@ -129,18 +129,26 @@
 
 void DTLS_on_libc_memalign(void *ptr, uptr size) {
   if (!common_flags()->intercept_tls_get_addr) return;
-  VPrintf(2, "DTLS_on_libc_memalign: %p %p\n", ptr, size);
+  VReport(2, "DTLS_on_libc_memalign: %p %p\n", ptr, size);
   dtls.last_memalign_ptr = reinterpret_cast<uptr>(ptr);
   dtls.last_memalign_size = size;
 }
 
 DTLS *DTLS_Get() { return &dtls; }
 
+bool DTLSInDestruction(DTLS *dtls) {
+  return dtls->dtv_size == kDestroyedThread;
+}
+
 #else
 void DTLS_on_libc_memalign(void *ptr, uptr size) {}
 DTLS::DTV *DTLS_on_tls_get_addr(void *arg, void *res) { return 0; }
 DTLS *DTLS_Get() { return 0; }
 void DTLS_Destroy() {}
+bool DTLSInDestruction(DTLS *dtls) {
+  UNREACHABLE("dtls is unsupported on this platform!");
+}
+
 #endif  // SANITIZER_INTERCEPT_TLS_GET_ADDR
 
 }  // namespace __sanitizer
diff --git a/lib/sanitizer_common/sanitizer_tls_get_addr.h b/lib/sanitizer_common/sanitizer_tls_get_addr.h
index 58d4763..199a3b2 100644
--- a/lib/sanitizer_common/sanitizer_tls_get_addr.h
+++ b/lib/sanitizer_common/sanitizer_tls_get_addr.h
@@ -55,6 +55,8 @@
 void DTLS_on_libc_memalign(void *ptr, uptr size);
 DTLS *DTLS_Get();
 void DTLS_Destroy();  // Make sure to call this before the thread is destroyed.
+// Returns true if DTLS of suspended thread is in destruction process.
+bool DTLSInDestruction(DTLS *dtls);
 
 }  // namespace __sanitizer
 
diff --git a/lib/sanitizer_common/sanitizer_unwind_linux_libcdep.cc b/lib/sanitizer_common/sanitizer_unwind_linux_libcdep.cc
index 5943125..59b7184 100644
--- a/lib/sanitizer_common/sanitizer_unwind_linux_libcdep.cc
+++ b/lib/sanitizer_common/sanitizer_unwind_linux_libcdep.cc
@@ -8,11 +8,11 @@
 //===----------------------------------------------------------------------===//
 //
 // This file contains the unwind.h-based (aka "slow") stack unwinding routines
-// available to the tools on Linux, Android, and FreeBSD.
+// available to the tools on Linux, Android, NetBSD and FreeBSD.
 //===----------------------------------------------------------------------===//
 
 #include "sanitizer_platform.h"
-#if SANITIZER_FREEBSD || SANITIZER_LINUX
+#if SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_NETBSD
 #include "sanitizer_common.h"
 #include "sanitizer_stacktrace.h"
 
@@ -78,7 +78,8 @@
 }
 #endif
 
-#ifdef __arm__
+#if defined(__arm__) && !SANITIZER_NETBSD
+// NetBSD uses dwarf EH
 #define UNWIND_STOP _URC_END_OF_STACK
 #define UNWIND_CONTINUE _URC_NO_REASON
 #else
@@ -165,4 +166,4 @@
 
 }  // namespace __sanitizer
 
-#endif  // SANITIZER_FREEBSD || SANITIZER_LINUX
+#endif  // SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_NETBSD
diff --git a/lib/sanitizer_common/sanitizer_win.cc b/lib/sanitizer_common/sanitizer_win.cc
index b1a2a53..b2fd8ba 100644
--- a/lib/sanitizer_common/sanitizer_win.cc
+++ b/lib/sanitizer_common/sanitizer_win.cc
@@ -24,10 +24,10 @@
 
 #include "sanitizer_common.h"
 #include "sanitizer_dbghelp.h"
+#include "sanitizer_file.h"
 #include "sanitizer_libc.h"
 #include "sanitizer_mutex.h"
 #include "sanitizer_placement_new.h"
-#include "sanitizer_procmaps.h"
 #include "sanitizer_stacktrace.h"
 #include "sanitizer_symbolizer.h"
 #include "sanitizer_win_defs.h"
@@ -80,7 +80,7 @@
 
 // In contrast to POSIX, on Windows GetCurrentThreadId()
 // returns a system-unique identifier.
-uptr GetTid() {
+tid_t GetTid() {
   return GetCurrentThreadId();
 }
 
@@ -131,8 +131,24 @@
   }
 }
 
+static void *ReturnNullptrOnOOMOrDie(uptr size, const char *mem_type,
+                                     const char *mmap_type) {
+  error_t last_error = GetLastError();
+  if (last_error == ERROR_NOT_ENOUGH_MEMORY)
+    return nullptr;
+  ReportMmapFailureAndDie(size, mem_type, mmap_type, last_error);
+}
+
+void *MmapOrDieOnFatalError(uptr size, const char *mem_type) {
+  void *rv = VirtualAlloc(0, size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
+  if (rv == 0)
+    return ReturnNullptrOnOOMOrDie(size, mem_type, "allocate");
+  return rv;
+}
+
 // We want to map a chunk of address space aligned to 'alignment'.
-void *MmapAlignedOrDie(uptr size, uptr alignment, const char *mem_type) {
+void *MmapAlignedOrDieOnFatalError(uptr size, uptr alignment,
+                                   const char *mem_type) {
   CHECK(IsPowerOfTwo(size));
   CHECK(IsPowerOfTwo(alignment));
 
@@ -142,7 +158,7 @@
   uptr mapped_addr =
       (uptr)VirtualAlloc(0, size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
   if (!mapped_addr)
-    ReportMmapFailureAndDie(size, mem_type, "allocate aligned", GetLastError());
+    return ReturnNullptrOnOOMOrDie(size, mem_type, "allocate aligned");
 
   // If we got it right on the first try, return. Otherwise, unmap it and go to
   // the slow path.
@@ -162,8 +178,7 @@
     mapped_addr =
         (uptr)VirtualAlloc(0, size + alignment, MEM_RESERVE, PAGE_NOACCESS);
     if (!mapped_addr)
-      ReportMmapFailureAndDie(size, mem_type, "allocate aligned",
-                              GetLastError());
+      return ReturnNullptrOnOOMOrDie(size, mem_type, "allocate aligned");
 
     // Find the aligned address.
     uptr aligned_addr = RoundUpTo(mapped_addr, alignment);
@@ -181,7 +196,7 @@
 
   // Fail if we can't make this work quickly.
   if (retries == kMaxRetries && mapped_addr == 0)
-    ReportMmapFailureAndDie(size, mem_type, "allocate aligned", GetLastError());
+    return ReturnNullptrOnOOMOrDie(size, mem_type, "allocate aligned");
 
   return (void *)mapped_addr;
 }
@@ -220,11 +235,53 @@
   return p;
 }
 
+// Uses fixed_addr for now.
+// Will use offset instead once we've implemented this function for real.
+uptr ReservedAddressRange::Map(uptr fixed_addr, uptr size,
+                               bool tolerate_enomem) {
+  if (tolerate_enomem) {
+    return reinterpret_cast<uptr>(MmapFixedOrDieOnFatalError(fixed_addr, size));
+  }
+  return reinterpret_cast<uptr>(MmapFixedOrDie(fixed_addr, size));
+}
+
+void ReservedAddressRange::Unmap(uptr addr, uptr size) {
+  void* addr_as_void = reinterpret_cast<void*>(addr);
+  uptr base_as_uptr = reinterpret_cast<uptr>(base_);
+  // Only unmap if it covers the entire range.
+  CHECK((addr == base_as_uptr) && (size == size_));
+  UnmapOrDie(addr_as_void, size);
+}
+
+void *MmapFixedOrDieOnFatalError(uptr fixed_addr, uptr size) {
+  void *p = VirtualAlloc((LPVOID)fixed_addr, size,
+      MEM_COMMIT, PAGE_READWRITE);
+  if (p == 0) {
+    char mem_type[30];
+    internal_snprintf(mem_type, sizeof(mem_type), "memory at address 0x%zx",
+                      fixed_addr);
+    return ReturnNullptrOnOOMOrDie(size, mem_type, "allocate");
+  }
+  return p;
+}
+
 void *MmapNoReserveOrDie(uptr size, const char *mem_type) {
   // FIXME: make this really NoReserve?
   return MmapOrDie(size, mem_type);
 }
 
+uptr ReservedAddressRange::Init(uptr size, const char *name, uptr fixed_addr) {
+  if (fixed_addr) {
+    base_ = MmapFixedNoAccess(fixed_addr, size, name);
+  } else {
+    base_ = MmapNoAccess(size);
+  }
+  size_ = size;
+  name_ = name;
+  return reinterpret_cast<uptr>(base_);
+}
+
+
 void *MmapFixedNoAccess(uptr fixed_addr, uptr size, const char *name) {
   (void)name; // unsupported
   void *res = VirtualAlloc((LPVOID)fixed_addr, size,
@@ -264,7 +321,8 @@
   // FIXME: add madvise-analog when we move to 64-bits.
 }
 
-uptr FindAvailableMemoryRange(uptr size, uptr alignment, uptr left_padding) {
+uptr FindAvailableMemoryRange(uptr size, uptr alignment, uptr left_padding,
+                              uptr *largest_gap_found) {
   uptr address = 0;
   while (true) {
     MEMORY_BASIC_INFORMATION info;
@@ -351,7 +409,7 @@
 
 #if !SANITIZER_GO
 int CompareModulesBase(const void *pl, const void *pr) {
-  const ModuleInfo *l = (ModuleInfo *)pl, *r = (ModuleInfo *)pr;
+  const ModuleInfo *l = (const ModuleInfo *)pl, *r = (const ModuleInfo *)pr;
   if (l->base_address < r->base_address)
     return -1;
   return l->base_address > r->base_address;
@@ -400,9 +458,6 @@
 }
 
 void PrepareForSandboxing(__sanitizer_sandbox_arguments *args) {
-#if !SANITIZER_GO
-  CovPrepareForSandboxing(args);
-#endif
 }
 
 bool StackSizeIsUnlimited() {
@@ -499,7 +554,7 @@
 }
 
 void ListOfModules::init() {
-  clear();
+  clearOrInit();
   HANDLE cur_process = GetCurrentProcess();
 
   // Query the list of modules.  Start by assuming there are no more than 256
@@ -553,11 +608,14 @@
     LoadedModule cur_module;
     cur_module.set(module_name, adjusted_base);
     // We add the whole module as one single address range.
-    cur_module.addAddressRange(base_address, end_address, /*executable*/ true);
+    cur_module.addAddressRange(base_address, end_address, /*executable*/ true,
+                               /*writable*/ true);
     modules_.push_back(cur_module);
   }
   UnmapOrDie(hmodules, modules_buffer_size);
-};
+}
+
+void ListOfModules::fallbackInit() { clear(); }
 
 // We can't use atexit() directly at __asan_init time as the CRT is not fully
 // initialized at this point.  Place the functions into a vector and use
@@ -766,7 +824,7 @@
   // FIXME: Compare with StackWalk64.
   // FIXME: Look at LLVMUnhandledExceptionFilter in Signals.inc
   size = CaptureStackBackTrace(1, Min(max_depth, kStackTraceMax),
-                               (void**)trace, 0);
+                               (void **)&trace_buffer[0], 0);
   if (size == 0)
     return;
 
@@ -831,9 +889,9 @@
   // FIXME: Decide what to do on Windows.
 }
 
-bool IsHandledDeadlySignal(int signum) {
+HandleSignalMode GetHandleSignalMode(int signum) {
   // FIXME: Decide what to do on Windows.
-  return false;
+  return kHandleSignalNo;
 }
 
 // Check based on flags if we should handle this exception.
@@ -863,32 +921,6 @@
   return false;
 }
 
-const char *DescribeSignalOrException(int signo) {
-  unsigned code = signo;
-  // Get the string description of the exception if this is a known deadly
-  // exception.
-  switch (code) {
-    case EXCEPTION_ACCESS_VIOLATION: return "access-violation";
-    case EXCEPTION_ARRAY_BOUNDS_EXCEEDED: return "array-bounds-exceeded";
-    case EXCEPTION_STACK_OVERFLOW: return "stack-overflow";
-    case EXCEPTION_DATATYPE_MISALIGNMENT: return "datatype-misalignment";
-    case EXCEPTION_IN_PAGE_ERROR: return "in-page-error";
-    case EXCEPTION_ILLEGAL_INSTRUCTION: return "illegal-instruction";
-    case EXCEPTION_PRIV_INSTRUCTION: return "priv-instruction";
-    case EXCEPTION_BREAKPOINT: return "breakpoint";
-    case EXCEPTION_FLT_DENORMAL_OPERAND: return "flt-denormal-operand";
-    case EXCEPTION_FLT_DIVIDE_BY_ZERO: return "flt-divide-by-zero";
-    case EXCEPTION_FLT_INEXACT_RESULT: return "flt-inexact-result";
-    case EXCEPTION_FLT_INVALID_OPERATION: return "flt-invalid-operation";
-    case EXCEPTION_FLT_OVERFLOW: return "flt-overflow";
-    case EXCEPTION_FLT_STACK_CHECK: return "flt-stack-check";
-    case EXCEPTION_FLT_UNDERFLOW: return "flt-underflow";
-    case EXCEPTION_INT_DIVIDE_BY_ZERO: return "int-divide-by-zero";
-    case EXCEPTION_INT_OVERFLOW: return "int-overflow";
-  }
-  return "unknown exception";
-}
-
 bool IsAccessibleMemoryRange(uptr beg, uptr size) {
   SYSTEM_INFO si;
   GetNativeSystemInfo(&si);
@@ -914,39 +946,101 @@
   return true;
 }
 
-SignalContext SignalContext::Create(void *siginfo, void *context) {
+bool SignalContext::IsStackOverflow() const {
+  return (DWORD)GetType() == EXCEPTION_STACK_OVERFLOW;
+}
+
+void SignalContext::InitPcSpBp() {
   EXCEPTION_RECORD *exception_record = (EXCEPTION_RECORD *)siginfo;
   CONTEXT *context_record = (CONTEXT *)context;
 
-  uptr pc = (uptr)exception_record->ExceptionAddress;
+  pc = (uptr)exception_record->ExceptionAddress;
 #ifdef _WIN64
-  uptr bp = (uptr)context_record->Rbp;
-  uptr sp = (uptr)context_record->Rsp;
+  bp = (uptr)context_record->Rbp;
+  sp = (uptr)context_record->Rsp;
 #else
-  uptr bp = (uptr)context_record->Ebp;
-  uptr sp = (uptr)context_record->Esp;
+  bp = (uptr)context_record->Ebp;
+  sp = (uptr)context_record->Esp;
 #endif
-  uptr access_addr = exception_record->ExceptionInformation[1];
+}
 
+uptr SignalContext::GetAddress() const {
+  EXCEPTION_RECORD *exception_record = (EXCEPTION_RECORD *)siginfo;
+  return exception_record->ExceptionInformation[1];
+}
+
+bool SignalContext::IsMemoryAccess() const {
+  return GetWriteFlag() != SignalContext::UNKNOWN;
+}
+
+SignalContext::WriteFlag SignalContext::GetWriteFlag() const {
+  EXCEPTION_RECORD *exception_record = (EXCEPTION_RECORD *)siginfo;
   // The contents of this array are documented at
   // https://msdn.microsoft.com/en-us/library/windows/desktop/aa363082(v=vs.85).aspx
   // The first element indicates read as 0, write as 1, or execute as 8.  The
   // second element is the faulting address.
-  WriteFlag write_flag = SignalContext::UNKNOWN;
   switch (exception_record->ExceptionInformation[0]) {
-  case 0: write_flag = SignalContext::READ; break;
-  case 1: write_flag = SignalContext::WRITE; break;
-  case 8: write_flag = SignalContext::UNKNOWN; break;
+    case 0:
+      return SignalContext::READ;
+    case 1:
+      return SignalContext::WRITE;
+    case 8:
+      return SignalContext::UNKNOWN;
   }
-  bool is_memory_access = write_flag != SignalContext::UNKNOWN;
-  return SignalContext(context, access_addr, pc, sp, bp, is_memory_access,
-                       write_flag);
+  return SignalContext::UNKNOWN;
 }
 
 void SignalContext::DumpAllRegisters(void *context) {
   // FIXME: Implement this.
 }
 
+int SignalContext::GetType() const {
+  return static_cast<const EXCEPTION_RECORD *>(siginfo)->ExceptionCode;
+}
+
+const char *SignalContext::Describe() const {
+  unsigned code = GetType();
+  // Get the string description of the exception if this is a known deadly
+  // exception.
+  switch (code) {
+    case EXCEPTION_ACCESS_VIOLATION:
+      return "access-violation";
+    case EXCEPTION_ARRAY_BOUNDS_EXCEEDED:
+      return "array-bounds-exceeded";
+    case EXCEPTION_STACK_OVERFLOW:
+      return "stack-overflow";
+    case EXCEPTION_DATATYPE_MISALIGNMENT:
+      return "datatype-misalignment";
+    case EXCEPTION_IN_PAGE_ERROR:
+      return "in-page-error";
+    case EXCEPTION_ILLEGAL_INSTRUCTION:
+      return "illegal-instruction";
+    case EXCEPTION_PRIV_INSTRUCTION:
+      return "priv-instruction";
+    case EXCEPTION_BREAKPOINT:
+      return "breakpoint";
+    case EXCEPTION_FLT_DENORMAL_OPERAND:
+      return "flt-denormal-operand";
+    case EXCEPTION_FLT_DIVIDE_BY_ZERO:
+      return "flt-divide-by-zero";
+    case EXCEPTION_FLT_INEXACT_RESULT:
+      return "flt-inexact-result";
+    case EXCEPTION_FLT_INVALID_OPERATION:
+      return "flt-invalid-operation";
+    case EXCEPTION_FLT_OVERFLOW:
+      return "flt-overflow";
+    case EXCEPTION_FLT_STACK_CHECK:
+      return "flt-stack-check";
+    case EXCEPTION_FLT_UNDERFLOW:
+      return "flt-underflow";
+    case EXCEPTION_INT_DIVIDE_BY_ZERO:
+      return "int-divide-by-zero";
+    case EXCEPTION_INT_OVERFLOW:
+      return "int-overflow";
+  }
+  return "unknown exception";
+}
+
 uptr ReadBinaryName(/*out*/char *buf, uptr buf_len) {
   // FIXME: Actually implement this function.
   CHECK_GT(buf_len, 0);
@@ -994,6 +1088,11 @@
   // Do nothing.
 }
 
+// FIXME: implement on this platform.
+bool GetRandom(void *buffer, uptr length, bool blocking) {
+  UNIMPLEMENTED();
+}
+
 }  // namespace __sanitizer
 
 #endif  // _WIN32
diff --git a/lib/sanitizer_common/scripts/check_lint.sh b/lib/sanitizer_common/scripts/check_lint.sh
index 9108a81..82e4bc8 100755
--- a/lib/sanitizer_common/scripts/check_lint.sh
+++ b/lib/sanitizer_common/scripts/check_lint.sh
@@ -18,7 +18,7 @@
 # Filters
 # TODO: remove some of these filters
 COMMON_LINT_FILTER=-build/include,-build/header_guard,-legal/copyright,-whitespace/comments,-readability/casting,\
--build/namespaces
+-build/namespaces,-readability/braces
 ASAN_RTL_LINT_FILTER=${COMMON_LINT_FILTER},-runtime/int
 ASAN_TEST_LINT_FILTER=${COMMON_LINT_FILTER},-runtime/sizeof,-runtime/int,-runtime/printf,-runtime/threadsafe_fn
 ASAN_LIT_TEST_LINT_FILTER=${ASAN_TEST_LINT_FILTER},-whitespace/line_length
diff --git a/lib/sanitizer_common/scripts/sancov.py b/lib/sanitizer_common/scripts/sancov.py
index e2eba36..3560639 100755
--- a/lib/sanitizer_common/scripts/sancov.py
+++ b/lib/sanitizer_common/scripts/sancov.py
@@ -194,7 +194,7 @@
   # - with call or callq,
   # - directly or via PLT.
   cmd = "objdump -d %s | " \
-        "grep '^\s\+[0-9a-f]\+:.*\scall\(q\|\)\s\+[0-9a-f]\+ <__sanitizer_cov\(_with_check\|\)\(@plt\|\)>' | " \
+        "grep '^\s\+[0-9a-f]\+:.*\scall\(q\|\)\s\+[0-9a-f]\+ <__sanitizer_cov\(_with_check\|\|_trace_pc_guard\)\(@plt\|\)>' | " \
         "grep '^\s\+[0-9a-f]\+' -o" % binary
   proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
                           shell=True)
diff --git a/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh b/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh
index 4a0fb03..0559a2e 100755
--- a/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh
+++ b/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh
@@ -110,12 +110,12 @@
 ninja cxx cxxabi
 
 FLAGS="${FLAGS} -fno-rtti -fno-exceptions"
+LLVM_FLAGS="${FLAGS} -nostdinc++ -I${ZLIB_BUILD} -I${LIBCXX_BUILD}/include/c++/v1"
 
 # Build LLVM.
 if [[ ! -d ${LLVM_BUILD} ]]; then
   mkdir -p ${LLVM_BUILD}
   cd ${LLVM_BUILD}
-  LLVM_FLAGS="${FLAGS} -I${ZLIB_BUILD} -I${LIBCXX_BUILD}/include/c++/v1"
   cmake -GNinja \
     -DCMAKE_BUILD_TYPE=Release \
     -DCMAKE_C_COMPILER=$CC \
@@ -129,7 +129,7 @@
   $LLVM_SRC
 fi
 cd ${LLVM_BUILD}
-ninja LLVMSymbolize LLVMObject LLVMDebugInfoDWARF LLVMSupport LLVMDebugInfoPDB LLVMMC
+ninja LLVMSymbolize LLVMObject LLVMBinaryFormat LLVMDebugInfoDWARF LLVMSupport LLVMDebugInfoPDB LLVMMC
 
 cd ${BUILD_DIR}
 rm -rf ${SYMBOLIZER_BUILD}
@@ -137,7 +137,7 @@
 cd ${SYMBOLIZER_BUILD}
 
 echo "Compiling..."
-SYMBOLIZER_FLAGS="$FLAGS -std=c++11 -I${LLVM_SRC}/include -I${LLVM_BUILD}/include -I${LIBCXX_BUILD}/include/c++/v1"
+SYMBOLIZER_FLAGS="$LLVM_FLAGS -I${LLVM_SRC}/include -I${LLVM_BUILD}/include -std=c++11"
 $CXX $SYMBOLIZER_FLAGS ${SRC_DIR}/sanitizer_symbolize.cc ${SRC_DIR}/sanitizer_wrappers.cc -c
 $AR rc symbolizer.a sanitizer_symbolize.o sanitizer_wrappers.o
 
@@ -148,6 +148,7 @@
                         $LIBCXX_BUILD/lib/libc++abi.a \
                         $LLVM_BUILD/lib/libLLVMSymbolize.a \
                         $LLVM_BUILD/lib/libLLVMObject.a \
+                        $LLVM_BUILD/lib/libLLVMBinaryFormat.a \
                         $LLVM_BUILD/lib/libLLVMDebugInfoDWARF.a \
                         $LLVM_BUILD/lib/libLLVMSupport.a \
                         $LLVM_BUILD/lib/libLLVMDebugInfoPDB.a \
diff --git a/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt b/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt
index 033acf7..a23c953 100644
--- a/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt
+++ b/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt
@@ -37,6 +37,7 @@
 cfgetospeed U
 dl_iterate_phdr U
 dlsym U
+dup U
 dup2 U
 environ U
 execv U
@@ -58,6 +59,7 @@
 getpid U
 gettimeofday U
 ioctl U
+isalpha U
 isatty U
 isprint U
 isupper U
diff --git a/lib/sanitizer_common/tests/CMakeLists.txt b/lib/sanitizer_common/tests/CMakeLists.txt
index b310f93..2e55257 100644
--- a/lib/sanitizer_common/tests/CMakeLists.txt
+++ b/lib/sanitizer_common/tests/CMakeLists.txt
@@ -120,23 +120,13 @@
     FOLDER "Compiler-RT Runtime tests")
 endmacro()
 
-function(get_sanitizer_common_lib_for_arch arch lib lib_name)
+function(get_sanitizer_common_lib_for_arch arch lib)
   if(APPLE)
     set(tgt_name "RTSanitizerCommon.test.osx")
   else()
     set(tgt_name "RTSanitizerCommon.test.${arch}")
   endif()
   set(${lib} "${tgt_name}" PARENT_SCOPE)
-  if(CMAKE_CONFIGURATION_TYPES)
-   set(configuration_path "${CMAKE_CFG_INTDIR}/")
-  else()
-   set(configuration_path "")
-  endif()
-  if(NOT MSVC)
-    set(${lib_name} "${configuration_path}lib${tgt_name}.a" PARENT_SCOPE)
-  else()
-    set(${lib_name} "${configuration_path}${tgt_name}.lib" PARENT_SCOPE)
-  endif()
 endfunction()
 
 # Sanitizer_common unit tests testsuite.
@@ -145,36 +135,22 @@
 
 # Adds sanitizer tests for architecture.
 macro(add_sanitizer_tests_for_arch arch)
-  get_target_flags_for_arch(${arch} TARGET_FLAGS)
-  set(SANITIZER_TEST_SOURCES ${SANITIZER_UNITTESTS}
-                             ${COMPILER_RT_GTEST_SOURCE})
-  set(SANITIZER_TEST_COMPILE_DEPS ${SANITIZER_TEST_HEADERS})
-  if(NOT COMPILER_RT_STANDALONE_BUILD)
-    list(APPEND SANITIZER_TEST_COMPILE_DEPS gtest)
+  set(extra_flags)
+  if( CMAKE_SIZEOF_VOID_P EQUAL 4 )
+    list(APPEND extra_flags "-D_LARGEFILE_SOURCE")
+    list(APPEND extra_flags "-D_FILE_OFFSET_BITS=64")
   endif()
+  get_sanitizer_common_lib_for_arch(${arch} SANITIZER_COMMON_LIB)
+
   set(SANITIZER_TEST_OBJECTS)
-  foreach(source ${SANITIZER_TEST_SOURCES})
-    get_filename_component(basename ${source} NAME)
-    if(CMAKE_CONFIGURATION_TYPES)
-      set(output_obj "${CMAKE_CFG_INTDIR}/${basename}.${arch}.o")
-    else()
-      set(output_obj "${basename}.${arch}.o")
-    endif()
-    clang_compile(${output_obj} ${source}
-                  CFLAGS ${SANITIZER_TEST_CFLAGS_COMMON} ${TARGET_FLAGS}
-                  DEPS ${SANITIZER_TEST_COMPILE_DEPS})
-    list(APPEND SANITIZER_TEST_OBJECTS ${output_obj})
-  endforeach()
-  get_sanitizer_common_lib_for_arch(${arch} SANITIZER_COMMON_LIB
-                                    SANITIZER_COMMON_LIB_NAME)
-  # Add unittest target.
-  set(SANITIZER_TEST_NAME "Sanitizer-${arch}-Test")
-  add_compiler_rt_test(SanitizerUnitTests ${SANITIZER_TEST_NAME}
-                       OBJECTS ${SANITIZER_TEST_OBJECTS}
-                               ${SANITIZER_COMMON_LIB_NAME}
-                       DEPS ${SANITIZER_TEST_OBJECTS} ${SANITIZER_COMMON_LIB}
-                       LINK_FLAGS ${SANITIZER_TEST_LINK_FLAGS_COMMON}
-                                  ${TARGET_FLAGS})
+  generate_compiler_rt_tests(SANITIZER_TEST_OBJECTS SanitizerUnitTests
+    "Sanitizer-${arch}-Test" ${arch}
+    RUNTIME "${SANITIZER_COMMON_LIB}"
+    SOURCES ${SANITIZER_UNITTESTS} ${COMPILER_RT_GTEST_SOURCE}
+    COMPILE_DEPS ${SANITIZER_TEST_HEADERS}
+    DEPS gtest
+    CFLAGS  ${SANITIZER_TEST_CFLAGS_COMMON} ${extra_flags}
+    LINK_FLAGS ${SANITIZER_TEST_LINK_FLAGS_COMMON} ${extra_flags})
 
   if("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux" AND "${arch}" STREQUAL "x86_64")
     # Test that the libc-independent part of sanitizer_common is indeed
@@ -184,7 +160,7 @@
                   sanitizer_nolibc_test_main.cc
                   CFLAGS ${SANITIZER_TEST_CFLAGS_COMMON} ${TARGET_FLAGS}
                   DEPS ${SANITIZER_TEST_COMPILE_DEPS})
-    add_compiler_rt_test(SanitizerUnitTests "Sanitizer-${arch}-Test-Nolibc"
+    add_compiler_rt_test(SanitizerUnitTests "Sanitizer-${arch}-Test-Nolibc" ${arch}
                          OBJECTS sanitizer_nolibc_test_main.${arch}.o
                                  -Wl,-whole-archive
                                  libRTSanitizerCommon.test.nolibc.${arch}.a
diff --git a/lib/sanitizer_common/tests/sanitizer_allocator_test.cc b/lib/sanitizer_common/tests/sanitizer_allocator_test.cc
index e14517f..7b5e3e2 100644
--- a/lib/sanitizer_common/tests/sanitizer_allocator_test.cc
+++ b/lib/sanitizer_common/tests/sanitizer_allocator_test.cc
@@ -20,6 +20,7 @@
 
 #include "gtest/gtest.h"
 
+#include <stdio.h>
 #include <stdlib.h>
 #include <algorithm>
 #include <vector>
@@ -108,13 +109,17 @@
 static const uptr kRegionSizeLog = FIRST_32_SECOND_64(20, 24);
 static const uptr kFlatByteMapSize = kAddressSpaceSize >> kRegionSizeLog;
 
-typedef SizeClassAllocator32<
-  0, kAddressSpaceSize,
-  /*kMetadataSize*/16,
-  CompactSizeClassMap,
-  kRegionSizeLog,
-  FlatByteMap<kFlatByteMapSize> >
-  Allocator32Compact;
+struct AP32Compact {
+  static const uptr kSpaceBeg = 0;
+  static const u64 kSpaceSize = kAddressSpaceSize;
+  static const uptr kMetadataSize = 16;
+  typedef CompactSizeClassMap SizeClassMap;
+  static const uptr kRegionSizeLog = ::kRegionSizeLog;
+  typedef FlatByteMap<kFlatByteMapSize> ByteMap;
+  typedef NoOpMapUnmapCallback MapUnmapCallback;
+  static const uptr kFlags = 0;
+};
+typedef SizeClassAllocator32<AP32Compact> Allocator32Compact;
 
 template <class SizeClassMap>
 void TestSizeClassMap() {
@@ -236,6 +241,23 @@
   TestSizeClassAllocator<Allocator32Compact>();
 }
 
+struct AP32SeparateBatches {
+  static const uptr kSpaceBeg = 0;
+  static const u64 kSpaceSize = kAddressSpaceSize;
+  static const uptr kMetadataSize = 16;
+  typedef DefaultSizeClassMap SizeClassMap;
+  static const uptr kRegionSizeLog = ::kRegionSizeLog;
+  typedef FlatByteMap<kFlatByteMapSize> ByteMap;
+  typedef NoOpMapUnmapCallback MapUnmapCallback;
+  static const uptr kFlags =
+      SizeClassAllocator32FlagMasks::kUseSeparateSizeClassForBatch;
+};
+typedef SizeClassAllocator32<AP32SeparateBatches> Allocator32SeparateBatches;
+
+TEST(SanitizerCommon, SizeClassAllocator32SeparateBatches) {
+  TestSizeClassAllocator<Allocator32SeparateBatches>();
+}
+
 template <class Allocator>
 void SizeClassAllocatorMetadataStress() {
   Allocator *a = new Allocator;
@@ -386,17 +408,21 @@
 #endif
 #endif
 
+struct AP32WithCallback {
+  static const uptr kSpaceBeg = 0;
+  static const u64 kSpaceSize = kAddressSpaceSize;
+  static const uptr kMetadataSize = 16;
+  typedef CompactSizeClassMap SizeClassMap;
+  static const uptr kRegionSizeLog = ::kRegionSizeLog;
+  typedef FlatByteMap<kFlatByteMapSize> ByteMap;
+  typedef TestMapUnmapCallback MapUnmapCallback;
+  static const uptr kFlags = 0;
+};
+
 TEST(SanitizerCommon, SizeClassAllocator32MapUnmapCallback) {
   TestMapUnmapCallback::map_count = 0;
   TestMapUnmapCallback::unmap_count = 0;
-  typedef SizeClassAllocator32<
-      0, kAddressSpaceSize,
-      /*kMetadataSize*/16,
-      CompactSizeClassMap,
-      kRegionSizeLog,
-      FlatByteMap<kFlatByteMapSize>,
-      TestMapUnmapCallback>
-    Allocator32WithCallBack;
+  typedef SizeClassAllocator32<AP32WithCallback> Allocator32WithCallBack;
   Allocator32WithCallBack *a = new Allocator32WithCallBack;
   a->Init(kReleaseToOSIntervalNever);
   EXPECT_EQ(TestMapUnmapCallback::map_count, 0);
@@ -418,8 +444,8 @@
 TEST(SanitizerCommon, LargeMmapAllocatorMapUnmapCallback) {
   TestMapUnmapCallback::map_count = 0;
   TestMapUnmapCallback::unmap_count = 0;
-  LargeMmapAllocator<TestMapUnmapCallback> a;
-  a.Init(/* may_return_null */ false);
+  LargeMmapAllocator<TestMapUnmapCallback, DieOnFailure> a;
+  a.Init();
   AllocatorStats stats;
   stats.Init();
   void *x = a.Allocate(&stats, 1 << 20, 1);
@@ -428,35 +454,36 @@
   EXPECT_EQ(TestMapUnmapCallback::unmap_count, 1);
 }
 
-template<class Allocator>
-void FailInAssertionOnOOM() {
-  Allocator a;
-  a.Init(kReleaseToOSIntervalNever);
-  SizeClassAllocatorLocalCache<Allocator> cache;
-  memset(&cache, 0, sizeof(cache));
-  cache.Init(0);
-  AllocatorStats stats;
-  stats.Init();
-  const size_t kNumChunks = 128;
-  uint32_t chunks[kNumChunks];
-  for (int i = 0; i < 1000000; i++) {
-    a.GetFromAllocator(&stats, 52, chunks, kNumChunks);
-  }
-
-  a.TestOnlyUnmap();
-}
-
 // Don't test OOM conditions on Win64 because it causes other tests on the same
 // machine to OOM.
 #if SANITIZER_CAN_USE_ALLOCATOR64 && !SANITIZER_WINDOWS64 && !SANITIZER_ANDROID
 TEST(SanitizerCommon, SizeClassAllocator64Overflow) {
-  EXPECT_DEATH(FailInAssertionOnOOM<Allocator64>(), "Out of memory");
+  Allocator64 a;
+  a.Init(kReleaseToOSIntervalNever);
+  SizeClassAllocatorLocalCache<Allocator64> cache;
+  memset(&cache, 0, sizeof(cache));
+  cache.Init(0);
+  AllocatorStats stats;
+  stats.Init();
+
+  const size_t kNumChunks = 128;
+  uint32_t chunks[kNumChunks];
+  bool allocation_failed = false;
+  for (int i = 0; i < 1000000; i++) {
+    if (!a.GetFromAllocator(&stats, 52, chunks, kNumChunks)) {
+      allocation_failed = true;
+      break;
+    }
+  }
+  EXPECT_EQ(allocation_failed, true);
+
+  a.TestOnlyUnmap();
 }
 #endif
 
 TEST(SanitizerCommon, LargeMmapAllocator) {
-  LargeMmapAllocator<> a;
-  a.Init(/* may_return_null */ false);
+  LargeMmapAllocator<NoOpMapUnmapCallback, DieOnFailure> a;
+  a.Init();
   AllocatorStats stats;
   stats.Init();
 
@@ -538,8 +565,9 @@
   typedef
       CombinedAllocator<PrimaryAllocator, AllocatorCache, SecondaryAllocator>
       Allocator;
+  SetAllocatorMayReturnNull(true);
   Allocator *a = new Allocator;
-  a->Init(/* may_return_null */ true, kReleaseToOSIntervalNever);
+  a->Init(kReleaseToOSIntervalNever);
   std::mt19937 r;
 
   AllocatorCache cache;
@@ -553,7 +581,7 @@
   EXPECT_EQ(a->Allocate(&cache, (uptr)-1 - 1023, 1024), (void*)0);
 
   // Set to false
-  a->SetMayReturnNull(false);
+  SetAllocatorMayReturnNull(false);
   EXPECT_DEATH(a->Allocate(&cache, -1, 1),
                "allocator is terminating the process");
 
@@ -865,8 +893,8 @@
 }
 
 TEST(SanitizerCommon, LargeMmapAllocatorIteration) {
-  LargeMmapAllocator<> a;
-  a.Init(/* may_return_null */ false);
+  LargeMmapAllocator<NoOpMapUnmapCallback, DieOnFailure> a;
+  a.Init();
   AllocatorStats stats;
   stats.Init();
 
@@ -892,8 +920,8 @@
 }
 
 TEST(SanitizerCommon, LargeMmapAllocatorBlockBegin) {
-  LargeMmapAllocator<> a;
-  a.Init(/* may_return_null */ false);
+  LargeMmapAllocator<NoOpMapUnmapCallback, DieOnFailure> a;
+  a.Init();
   AllocatorStats stats;
   stats.Init();
 
@@ -961,9 +989,9 @@
   const uptr kAllocationSize = SpecialSizeClassMap::Size(kClassID);
   ASSERT_LT(2 * kAllocationSize, kRegionSize);
   ASSERT_GT(3 * kAllocationSize, kRegionSize);
-  cache.Allocate(a, kClassID);
-  EXPECT_DEATH(cache.Allocate(a, kClassID) && cache.Allocate(a, kClassID),
-               "The process has exhausted");
+  EXPECT_NE(cache.Allocate(a, kClassID), nullptr);
+  EXPECT_NE(cache.Allocate(a, kClassID), nullptr);
+  EXPECT_EQ(cache.Allocate(a, kClassID), nullptr);
 
   const uptr Class2 = 100;
   const uptr Size2 = SpecialSizeClassMap::Size(Class2);
@@ -971,11 +999,12 @@
   char *p[7];
   for (int i = 0; i < 7; i++) {
     p[i] = (char*)cache.Allocate(a, Class2);
+    EXPECT_NE(p[i], nullptr);
     fprintf(stderr, "p[%d] %p s = %lx\n", i, (void*)p[i], Size2);
     p[i][Size2 - 1] = 42;
     if (i) ASSERT_LT(p[i - 1], p[i]);
   }
-  EXPECT_DEATH(cache.Allocate(a, Class2), "The process has exhausted");
+  EXPECT_EQ(cache.Allocate(a, Class2), nullptr);
   cache.Deallocate(a, Class2, p[0]);
   cache.Drain(a);
   ASSERT_EQ(p[6][Size2 - 1], 42);
@@ -985,6 +1014,282 @@
 
 #endif
 
+#if SANITIZER_CAN_USE_ALLOCATOR64
+
+class NoMemoryMapper {
+ public:
+  uptr last_request_buffer_size;
+
+  NoMemoryMapper() : last_request_buffer_size(0) {}
+
+  uptr MapPackedCounterArrayBuffer(uptr buffer_size) {
+    last_request_buffer_size = buffer_size;
+    return 0;
+  }
+  void UnmapPackedCounterArrayBuffer(uptr buffer, uptr buffer_size) {}
+};
+
+class RedZoneMemoryMapper {
+ public:
+  RedZoneMemoryMapper() {
+    const auto page_size = GetPageSize();
+    buffer = MmapOrDie(3ULL * page_size, "");
+    MprotectNoAccess(reinterpret_cast<uptr>(buffer), page_size);
+    MprotectNoAccess(reinterpret_cast<uptr>(buffer) + page_size * 2, page_size);
+  }
+  ~RedZoneMemoryMapper() {
+    UnmapOrDie(buffer, 3 * GetPageSize());
+  }
+
+  uptr MapPackedCounterArrayBuffer(uptr buffer_size) {
+    const auto page_size = GetPageSize();
+    CHECK_EQ(buffer_size, page_size);
+    memset(reinterpret_cast<void*>(reinterpret_cast<uptr>(buffer) + page_size),
+           0, page_size);
+    return reinterpret_cast<uptr>(buffer) + page_size;
+  }
+  void UnmapPackedCounterArrayBuffer(uptr buffer, uptr buffer_size) {}
+
+ private:
+  void *buffer;
+};
+
+TEST(SanitizerCommon, SizeClassAllocator64PackedCounterArray) {
+  NoMemoryMapper no_memory_mapper;
+  typedef Allocator64::PackedCounterArray<NoMemoryMapper>
+      NoMemoryPackedCounterArray;
+
+  for (int i = 0; i < 64; i++) {
+    // Various valid counter's max values packed into one word.
+    NoMemoryPackedCounterArray counters_2n(1, 1ULL << i, &no_memory_mapper);
+    EXPECT_EQ(8ULL, no_memory_mapper.last_request_buffer_size);
+
+    // Check the "all bit set" values too.
+    NoMemoryPackedCounterArray counters_2n1_1(1, ~0ULL >> i, &no_memory_mapper);
+    EXPECT_EQ(8ULL, no_memory_mapper.last_request_buffer_size);
+
+    // Verify the packing ratio, the counter is expected to be packed into the
+    // closest power of 2 bits.
+    NoMemoryPackedCounterArray counters(64, 1ULL << i, &no_memory_mapper);
+    EXPECT_EQ(8ULL * RoundUpToPowerOfTwo(i + 1),
+              no_memory_mapper.last_request_buffer_size);
+  }
+
+  RedZoneMemoryMapper memory_mapper;
+  typedef Allocator64::PackedCounterArray<RedZoneMemoryMapper>
+      RedZonePackedCounterArray;
+  // Go through 1, 2, 4, 8, .. 64 bits per counter.
+  for (int i = 0; i < 7; i++) {
+    // Make sure counters request one memory page for the buffer.
+    const u64 kNumCounters = (GetPageSize() / 8) * (64 >> i);
+    RedZonePackedCounterArray counters(kNumCounters,
+                                       1ULL << ((1 << i) - 1),
+                                       &memory_mapper);
+    counters.Inc(0);
+    for (u64 c = 1; c < kNumCounters - 1; c++) {
+      ASSERT_EQ(0ULL, counters.Get(c));
+      counters.Inc(c);
+      ASSERT_EQ(1ULL, counters.Get(c - 1));
+    }
+    ASSERT_EQ(0ULL, counters.Get(kNumCounters - 1));
+    counters.Inc(kNumCounters - 1);
+
+    if (i > 0) {
+      counters.IncRange(0, kNumCounters - 1);
+      for (u64 c = 0; c < kNumCounters; c++)
+        ASSERT_EQ(2ULL, counters.Get(c));
+    }
+  }
+}
+
+class RangeRecorder {
+ public:
+  std::string reported_pages;
+
+  RangeRecorder()
+      : page_size_scaled_log(
+            Log2(GetPageSizeCached() >> Allocator64::kCompactPtrScale)),
+        last_page_reported(0) {}
+
+  void ReleasePageRangeToOS(u32 from, u32 to) {
+    from >>= page_size_scaled_log;
+    to >>= page_size_scaled_log;
+    ASSERT_LT(from, to);
+    if (!reported_pages.empty())
+      ASSERT_LT(last_page_reported, from);
+    reported_pages.append(from - last_page_reported, '.');
+    reported_pages.append(to - from, 'x');
+    last_page_reported = to;
+  }
+ private:
+  const uptr page_size_scaled_log;
+  u32 last_page_reported;
+};
+
+TEST(SanitizerCommon, SizeClassAllocator64FreePagesRangeTracker) {
+  typedef Allocator64::FreePagesRangeTracker<RangeRecorder> RangeTracker;
+
+  // 'x' denotes a page to be released, '.' denotes a page to be kept around.
+  const char* test_cases[] = {
+      "",
+      ".",
+      "x",
+      "........",
+      "xxxxxxxxxxx",
+      "..............xxxxx",
+      "xxxxxxxxxxxxxxxxxx.....",
+      "......xxxxxxxx........",
+      "xxx..........xxxxxxxxxxxxxxx",
+      "......xxxx....xxxx........",
+      "xxx..........xxxxxxxx....xxxxxxx",
+      "x.x.x.x.x.x.x.x.x.x.x.x.",
+      ".x.x.x.x.x.x.x.x.x.x.x.x",
+      ".x.x.x.x.x.x.x.x.x.x.x.x.",
+      "x.x.x.x.x.x.x.x.x.x.x.x.x",
+  };
+
+  for (auto test_case : test_cases) {
+    RangeRecorder range_recorder;
+    RangeTracker tracker(&range_recorder);
+    for (int i = 0; test_case[i] != 0; i++)
+      tracker.NextPage(test_case[i] == 'x');
+    tracker.Done();
+    // Strip trailing '.'-pages before comparing the results as they are not
+    // going to be reported to range_recorder anyway.
+    const char* last_x = strrchr(test_case, 'x');
+    std::string expected(
+        test_case,
+        last_x == nullptr ? 0 : (last_x - test_case + 1));
+    EXPECT_STREQ(expected.c_str(), range_recorder.reported_pages.c_str());
+  }
+}
+
+class ReleasedPagesTrackingMemoryMapper {
+ public:
+  std::set<u32> reported_pages;
+
+  uptr MapPackedCounterArrayBuffer(uptr buffer_size) {
+    reported_pages.clear();
+    return reinterpret_cast<uptr>(calloc(1, buffer_size));
+  }
+  void UnmapPackedCounterArrayBuffer(uptr buffer, uptr buffer_size) {
+    free(reinterpret_cast<void*>(buffer));
+  }
+
+  void ReleasePageRangeToOS(u32 from, u32 to) {
+    uptr page_size_scaled =
+        GetPageSizeCached() >> Allocator64::kCompactPtrScale;
+    for (u32 i = from; i < to; i += page_size_scaled)
+      reported_pages.insert(i);
+  }
+};
+
+template <class Allocator>
+void TestReleaseFreeMemoryToOS() {
+  ReleasedPagesTrackingMemoryMapper memory_mapper;
+  const uptr kAllocatedPagesCount = 1024;
+  const uptr page_size = GetPageSizeCached();
+  const uptr page_size_scaled = page_size >> Allocator::kCompactPtrScale;
+  std::mt19937 r;
+  uint32_t rnd_state = 42;
+
+  for (uptr class_id = 1; class_id <= Allocator::SizeClassMapT::kLargestClassID;
+      class_id++) {
+    const uptr chunk_size = Allocator::SizeClassMapT::Size(class_id);
+    const uptr chunk_size_scaled = chunk_size >> Allocator::kCompactPtrScale;
+    const uptr max_chunks =
+        kAllocatedPagesCount * GetPageSizeCached() / chunk_size;
+
+    // Generate the random free list.
+    std::vector<u32> free_array;
+    bool in_free_range = false;
+    uptr current_range_end = 0;
+    for (uptr i = 0; i < max_chunks; i++) {
+      if (i == current_range_end) {
+        in_free_range = (my_rand_r(&rnd_state) & 1U) == 1;
+        current_range_end += my_rand_r(&rnd_state) % 100 + 1;
+      }
+      if (in_free_range)
+        free_array.push_back(i * chunk_size_scaled);
+    }
+    if (free_array.empty())
+      continue;
+    // Shuffle free_list to verify that ReleaseFreeMemoryToOS does not depend on
+    // the list ordering.
+    std::shuffle(free_array.begin(), free_array.end(), r);
+
+    Allocator::ReleaseFreeMemoryToOS(&free_array[0], free_array.size(),
+                                     chunk_size, kAllocatedPagesCount,
+                                     &memory_mapper);
+
+    // Verify that there are no released pages touched by used chunks and all
+    // ranges of free chunks big enough to contain the entire memory pages had
+    // these pages released.
+    uptr verified_released_pages = 0;
+    std::set<u32> free_chunks(free_array.begin(), free_array.end());
+
+    u32 current_chunk = 0;
+    in_free_range = false;
+    u32 current_free_range_start = 0;
+    for (uptr i = 0; i <= max_chunks; i++) {
+      bool is_free_chunk = free_chunks.find(current_chunk) != free_chunks.end();
+
+      if (is_free_chunk) {
+        if (!in_free_range) {
+          in_free_range = true;
+          current_free_range_start = current_chunk;
+        }
+      } else {
+        // Verify that this used chunk does not touch any released page.
+        for (uptr i_page = current_chunk / page_size_scaled;
+             i_page <= (current_chunk + chunk_size_scaled - 1) /
+                       page_size_scaled;
+             i_page++) {
+          bool page_released =
+              memory_mapper.reported_pages.find(i_page * page_size_scaled) !=
+              memory_mapper.reported_pages.end();
+          ASSERT_EQ(false, page_released);
+        }
+
+        if (in_free_range) {
+          in_free_range = false;
+          // Verify that all entire memory pages covered by this range of free
+          // chunks were released.
+          u32 page = RoundUpTo(current_free_range_start, page_size_scaled);
+          while (page + page_size_scaled <= current_chunk) {
+            bool page_released =
+                memory_mapper.reported_pages.find(page) !=
+                memory_mapper.reported_pages.end();
+            ASSERT_EQ(true, page_released);
+            verified_released_pages++;
+            page += page_size_scaled;
+          }
+        }
+      }
+
+      current_chunk += chunk_size_scaled;
+    }
+
+    ASSERT_EQ(memory_mapper.reported_pages.size(), verified_released_pages);
+  }
+}
+
+TEST(SanitizerCommon, SizeClassAllocator64ReleaseFreeMemoryToOS) {
+  TestReleaseFreeMemoryToOS<Allocator64>();
+}
+
+#if !SANITIZER_ANDROID
+TEST(SanitizerCommon, SizeClassAllocator64CompactReleaseFreeMemoryToOS) {
+  TestReleaseFreeMemoryToOS<Allocator64Compact>();
+}
+
+TEST(SanitizerCommon, SizeClassAllocator64VeryCompactReleaseFreeMemoryToOS) {
+  TestReleaseFreeMemoryToOS<Allocator64VeryCompact>();
+}
+#endif  // !SANITIZER_ANDROID
+
+#endif  // SANITIZER_CAN_USE_ALLOCATOR64
+
 TEST(SanitizerCommon, TwoLevelByteMap) {
   const u64 kSize1 = 1 << 6, kSize2 = 1 << 12;
   const u64 n = kSize1 * kSize2;
diff --git a/lib/sanitizer_common/tests/sanitizer_bitvector_test.cc b/lib/sanitizer_common/tests/sanitizer_bitvector_test.cc
index dec5459..669365b 100644
--- a/lib/sanitizer_common/tests/sanitizer_bitvector_test.cc
+++ b/lib/sanitizer_common/tests/sanitizer_bitvector_test.cc
@@ -62,14 +62,14 @@
   t.copyFrom(bv);
   while (!t.empty()) {
     uptr idx = t.getAndClearFirstOne();
-    fprintf(stderr, "%zd ", idx);
+    fprintf(stderr, "%lu ", idx);
   }
   fprintf(stderr, "\n");
 }
 
 void Print(const set<uptr> &s) {
   for (set<uptr>::iterator it = s.begin(); it != s.end(); ++it) {
-    fprintf(stderr, "%zd ", *it);
+    fprintf(stderr, "%lu ", *it);
   }
   fprintf(stderr, "\n");
 }
diff --git a/lib/sanitizer_common/tests/sanitizer_common_test.cc b/lib/sanitizer_common/tests/sanitizer_common_test.cc
index ebc885d..576649c 100644
--- a/lib/sanitizer_common/tests/sanitizer_common_test.cc
+++ b/lib/sanitizer_common/tests/sanitizer_common_test.cc
@@ -14,6 +14,7 @@
 
 #include "sanitizer_common/sanitizer_allocator_internal.h"
 #include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_file.h"
 #include "sanitizer_common/sanitizer_flags.h"
 #include "sanitizer_common/sanitizer_libc.h"
 #include "sanitizer_common/sanitizer_platform.h"
@@ -72,12 +73,12 @@
   EXPECT_TRUE(IsSorted(array, 2));
 }
 
-TEST(SanitizerCommon, MmapAlignedOrDie) {
+TEST(SanitizerCommon, MmapAlignedOrDieOnFatalError) {
   uptr PageSize = GetPageSizeCached();
   for (uptr size = 1; size <= 32; size *= 2) {
     for (uptr alignment = 1; alignment <= 32; alignment *= 2) {
       for (int iter = 0; iter < 100; iter++) {
-        uptr res = (uptr)MmapAlignedOrDie(
+        uptr res = (uptr)MmapAlignedOrDieOnFatalError(
             size * PageSize, alignment * PageSize, "MmapAlignedOrDieTest");
         EXPECT_EQ(0U, res % (alignment * PageSize));
         internal_memset((void*)res, 1, size * PageSize);
@@ -300,4 +301,88 @@
   EXPECT_STREQ("012345678", str.data());
 }
 
+#if SANITIZER_LINUX
+TEST(SanitizerCommon, GetRandom) {
+  u8 buffer_1[32], buffer_2[32];
+  for (bool blocking : { false, true }) {
+    EXPECT_FALSE(GetRandom(nullptr, 32, blocking));
+    EXPECT_FALSE(GetRandom(buffer_1, 0, blocking));
+    EXPECT_FALSE(GetRandom(buffer_1, 512, blocking));
+    EXPECT_EQ(ARRAY_SIZE(buffer_1), ARRAY_SIZE(buffer_2));
+    for (uptr size = 4; size <= ARRAY_SIZE(buffer_1); size += 4) {
+      for (uptr i = 0; i < 100; i++) {
+        EXPECT_TRUE(GetRandom(buffer_1, size, blocking));
+        EXPECT_TRUE(GetRandom(buffer_2, size, blocking));
+        EXPECT_NE(internal_memcmp(buffer_1, buffer_2, size), 0);
+      }
+    }
+  }
+}
+#endif
+
+TEST(SanitizerCommon, ReservedAddressRangeInit) {
+  uptr init_size = 0xffff;
+  ReservedAddressRange address_range;
+  uptr res = address_range.Init(init_size);
+  CHECK_NE(res, (void*)-1);
+  UnmapOrDie((void*)res, init_size);
+  // Should be able to map into the same space now.
+  ReservedAddressRange address_range2;
+  uptr res2 = address_range2.Init(init_size, nullptr, res);
+  CHECK_EQ(res, res2);
+
+  // TODO(flowerhack): Once this is switched to the "real" implementation
+  // (rather than passing through to MmapNoAccess*), enforce and test "no
+  // double initializations allowed"
+}
+
+TEST(SanitizerCommon, ReservedAddressRangeMap) {
+  constexpr uptr init_size = 0xffff;
+  ReservedAddressRange address_range;
+  uptr res = address_range.Init(init_size);
+  CHECK_NE(res, (void*) -1);
+
+  // Valid mappings should succeed.
+  CHECK_EQ(res, address_range.Map(res, init_size));
+
+  // Valid mappings should be readable.
+  unsigned char buffer[init_size];
+  memcpy(buffer, reinterpret_cast<void *>(res), init_size);
+
+  // TODO(flowerhack): Once this is switched to the "real" implementation, make
+  // sure you can only mmap into offsets in the Init range.
+}
+
+TEST(SanitizerCommon, ReservedAddressRangeUnmap) {
+  uptr PageSize = GetPageSizeCached();
+  uptr init_size = PageSize * 8;
+  ReservedAddressRange address_range;
+  uptr base_addr = address_range.Init(init_size);
+  CHECK_NE(base_addr, (void*)-1);
+  CHECK_EQ(base_addr, address_range.Map(base_addr, init_size));
+
+  // Unmapping the entire range should succeed.
+  address_range.Unmap(base_addr, init_size);
+
+  // Map a new range.
+  base_addr = address_range.Init(init_size);
+  CHECK_EQ(base_addr, address_range.Map(base_addr, init_size));
+
+  // Windows doesn't allow partial unmappings.
+  #if !SANITIZER_WINDOWS
+
+  // Unmapping at the beginning should succeed.
+  address_range.Unmap(base_addr, PageSize);
+
+  // Unmapping at the end should succeed.
+  uptr new_start = reinterpret_cast<uptr>(address_range.base()) +
+                   address_range.size() - PageSize;
+  address_range.Unmap(new_start, PageSize);
+
+  #endif
+
+  // Unmapping in the middle of the ReservedAddressRange should fail.
+  EXPECT_DEATH(address_range.Unmap(base_addr + (PageSize * 2), PageSize), ".*");
+}
+
 }  // namespace __sanitizer
diff --git a/lib/sanitizer_common/tests/sanitizer_flags_test.cc b/lib/sanitizer_common/tests/sanitizer_flags_test.cc
index 24a3f3d..f3fe139 100644
--- a/lib/sanitizer_common/tests/sanitizer_flags_test.cc
+++ b/lib/sanitizer_common/tests/sanitizer_flags_test.cc
@@ -59,6 +59,38 @@
   TestFlag(true, "flag_name=0", false);
   TestFlag(true, "flag_name=no", false);
   TestFlag(true, "flag_name=false", false);
+
+  EXPECT_DEATH(TestFlag(false, "flag_name", true), "expected '='");
+  EXPECT_DEATH(TestFlag(false, "flag_name=", true),
+               "Invalid value for bool option: ''");
+  EXPECT_DEATH(TestFlag(false, "flag_name=2", true),
+               "Invalid value for bool option: '2'");
+  EXPECT_DEATH(TestFlag(false, "flag_name=-1", true),
+               "Invalid value for bool option: '-1'");
+  EXPECT_DEATH(TestFlag(false, "flag_name=on", true),
+               "Invalid value for bool option: 'on'");
+}
+
+TEST(SanitizerCommon, HandleSignalMode) {
+  TestFlag(kHandleSignalNo, "flag_name=1", kHandleSignalYes);
+  TestFlag(kHandleSignalNo, "flag_name=yes", kHandleSignalYes);
+  TestFlag(kHandleSignalNo, "flag_name=true", kHandleSignalYes);
+  TestFlag(kHandleSignalYes, "flag_name=0", kHandleSignalNo);
+  TestFlag(kHandleSignalYes, "flag_name=no", kHandleSignalNo);
+  TestFlag(kHandleSignalYes, "flag_name=false", kHandleSignalNo);
+  TestFlag(kHandleSignalNo, "flag_name=2", kHandleSignalExclusive);
+  TestFlag(kHandleSignalYes, "flag_name=exclusive", kHandleSignalExclusive);
+
+  EXPECT_DEATH(TestFlag(kHandleSignalNo, "flag_name", kHandleSignalNo),
+               "expected '='");
+  EXPECT_DEATH(TestFlag(kHandleSignalNo, "flag_name=", kHandleSignalNo),
+               "Invalid value for signal handler option: ''");
+  EXPECT_DEATH(TestFlag(kHandleSignalNo, "flag_name=3", kHandleSignalNo),
+               "Invalid value for signal handler option: '3'");
+  EXPECT_DEATH(TestFlag(kHandleSignalNo, "flag_name=-1", kHandleSignalNo),
+               "Invalid value for signal handler option: '-1'");
+  EXPECT_DEATH(TestFlag(kHandleSignalNo, "flag_name=on", kHandleSignalNo),
+               "Invalid value for signal handler option: 'on'");
 }
 
 TEST(SanitizerCommon, IntFlags) {
@@ -135,13 +167,13 @@
 
   cf.symbolize = false;
   cf.coverage = true;
-  cf.coverage_direct = true;
+  cf.heap_profile = true;
   cf.log_path = "path/one";
 
-  parser.ParseString("symbolize=1:coverage_direct=false log_path='path/two'");
+  parser.ParseString("symbolize=1:heap_profile=false log_path='path/two'");
   EXPECT_TRUE(cf.symbolize);
   EXPECT_TRUE(cf.coverage);
-  EXPECT_FALSE(cf.coverage_direct);
+  EXPECT_FALSE(cf.heap_profile);
   EXPECT_STREQ("path/two", cf.log_path);
 }
 
diff --git a/lib/sanitizer_common/tests/sanitizer_libc_test.cc b/lib/sanitizer_common/tests/sanitizer_libc_test.cc
index 6252576..a73c65a 100644
--- a/lib/sanitizer_common/tests/sanitizer_libc_test.cc
+++ b/lib/sanitizer_common/tests/sanitizer_libc_test.cc
@@ -11,6 +11,7 @@
 #include <algorithm>
 
 #include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_file.h"
 #include "sanitizer_common/sanitizer_libc.h"
 #include "sanitizer_common/sanitizer_platform.h"
 #include "gtest/gtest.h"
diff --git a/lib/sanitizer_common/tests/sanitizer_linux_test.cc b/lib/sanitizer_common/tests/sanitizer_linux_test.cc
index fb6b109..8a6afab 100644
--- a/lib/sanitizer_common/tests/sanitizer_linux_test.cc
+++ b/lib/sanitizer_common/tests/sanitizer_linux_test.cc
@@ -17,6 +17,7 @@
 #include "sanitizer_common/sanitizer_linux.h"
 
 #include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_file.h"
 #include "gtest/gtest.h"
 
 #include <pthread.h>
diff --git a/lib/sanitizer_common/tests/sanitizer_test_utils.h b/lib/sanitizer_common/tests/sanitizer_test_utils.h
index 9c162a6..f8821a1 100644
--- a/lib/sanitizer_common/tests/sanitizer_test_utils.h
+++ b/lib/sanitizer_common/tests/sanitizer_test_utils.h
@@ -101,8 +101,8 @@
 # define SANITIZER_TEST_HAS_POSIX_MEMALIGN 0
 #endif
 
-#if !defined(__APPLE__) && !defined(__FreeBSD__) && \
-    !defined(__ANDROID__) && !defined(_WIN32)
+#if !defined(__APPLE__) && !defined(__FreeBSD__) && !defined(__ANDROID__) && \
+    !defined(__NetBSD__) && !defined(_WIN32)
 # define SANITIZER_TEST_HAS_MEMALIGN 1
 # define SANITIZER_TEST_HAS_PVALLOC 1
 # define SANITIZER_TEST_HAS_MALLOC_USABLE_SIZE 1
@@ -118,10 +118,16 @@
 # define SANITIZER_TEST_HAS_STRNLEN 0
 #endif
 
-#if defined(__FreeBSD__)
+#if defined(__FreeBSD__) || defined(__NetBSD__)
 # define SANITIZER_TEST_HAS_PRINTF_L 1
 #else
 # define SANITIZER_TEST_HAS_PRINTF_L 0
 #endif
 
+#if !defined(_MSC_VER)
+# define SANITIZER_TEST_HAS_STRNDUP 1
+#else
+# define SANITIZER_TEST_HAS_STRNDUP 0
+#endif
+
 #endif  // SANITIZER_TEST_UTILS_H
diff --git a/lib/scudo/CMakeLists.txt b/lib/scudo/CMakeLists.txt
index ba5e8ac..4d26a34 100644
--- a/lib/scudo/CMakeLists.txt
+++ b/lib/scudo/CMakeLists.txt
@@ -12,10 +12,14 @@
   scudo_flags.cpp
   scudo_crc32.cpp
   scudo_interceptors.cpp
-  scudo_new_delete.cpp
   scudo_termination.cpp
+  scudo_tsd_exclusive.cpp
+  scudo_tsd_shared.cpp
   scudo_utils.cpp)
 
+set(SCUDO_CXX_SOURCES
+  scudo_new_delete.cpp)
+
 # Enable the SSE 4.2 instruction set for scudo_crc32.cpp, if available.
 if (COMPILER_RT_HAS_MSSE4_2_FLAG)
   set_source_files_properties(scudo_crc32.cpp PROPERTIES COMPILE_FLAGS -msse4.2)
@@ -28,15 +32,41 @@
 endif()
 
 if(COMPILER_RT_HAS_SCUDO)
-  foreach(arch ${SCUDO_SUPPORTED_ARCH})
-    add_compiler_rt_runtime(clang_rt.scudo
-      STATIC
-      ARCHS ${arch}
-      SOURCES ${SCUDO_SOURCES}
-              $<TARGET_OBJECTS:RTInterception.${arch}>
-              $<TARGET_OBJECTS:RTSanitizerCommonNoTermination.${arch}>
-              $<TARGET_OBJECTS:RTSanitizerCommonLibc.${arch}>
-      CFLAGS ${SCUDO_CFLAGS}
-      PARENT_TARGET scudo)
-  endforeach()
+  set(SCUDO_DYNAMIC_LIBS ${SANITIZER_COMMON_LINK_LIBS})
+  append_list_if(COMPILER_RT_HAS_LIBDL dl SCUDO_DYNAMIC_LIBS)
+  append_list_if(COMPILER_RT_HAS_LIBRT rt SCUDO_DYNAMIC_LIBS)
+  append_list_if(COMPILER_RT_HAS_LIBPTHREAD pthread SCUDO_DYNAMIC_LIBS)
+  append_list_if(COMPILER_RT_HAS_LIBLOG log SCUDO_DYNAMIC_LIBS)
+
+  add_compiler_rt_runtime(clang_rt.scudo
+    STATIC
+    ARCHS ${SCUDO_SUPPORTED_ARCH}
+    SOURCES ${SCUDO_SOURCES}
+    OBJECT_LIBS RTSanitizerCommonNoTermination
+                RTSanitizerCommonLibc
+                RTInterception
+                RTUbsan
+    CFLAGS ${SCUDO_CFLAGS}
+    PARENT_TARGET scudo)
+
+  add_compiler_rt_runtime(clang_rt.scudo_cxx
+    STATIC
+    ARCHS ${SCUDO_SUPPORTED_ARCH}
+    SOURCES ${SCUDO_CXX_SOURCES}
+    OBJECT_LIBS RTUbsan_cxx
+    CFLAGS ${SCUDO_CFLAGS}
+    PARENT_TARGET scudo)
+
+  add_compiler_rt_runtime(clang_rt.scudo
+    SHARED
+    ARCHS ${SCUDO_SUPPORTED_ARCH}
+    SOURCES ${SCUDO_SOURCES} ${SCUDO_CXX_SOURCES}
+    OBJECT_LIBS RTSanitizerCommonNoTermination
+                RTSanitizerCommonLibc
+                RTInterception
+                RTUbsan
+                RTUbsan_cxx
+    CFLAGS ${SCUDO_CFLAGS}
+    LINK_LIBS ${SCUDO_DYNAMIC_LIBS}
+    PARENT_TARGET scudo)
 endif()
diff --git a/lib/scudo/scudo_allocator.cpp b/lib/scudo/scudo_allocator.cpp
index dab6abe..472017d 100644
--- a/lib/scudo/scudo_allocator.cpp
+++ b/lib/scudo/scudo_allocator.cpp
@@ -15,56 +15,20 @@
 //===----------------------------------------------------------------------===//
 
 #include "scudo_allocator.h"
+#include "scudo_crc32.h"
+#include "scudo_flags.h"
+#include "scudo_tsd.h"
 #include "scudo_utils.h"
 
+#include "sanitizer_common/sanitizer_allocator_checks.h"
 #include "sanitizer_common/sanitizer_allocator_interface.h"
 #include "sanitizer_common/sanitizer_quarantine.h"
 
-#include <limits.h>
-#include <pthread.h>
-
-#include <cstring>
+#include <errno.h>
+#include <string.h>
 
 namespace __scudo {
 
-#if SANITIZER_CAN_USE_ALLOCATOR64
-const uptr AllocatorSpace = ~0ULL;
-const uptr AllocatorSize = 0x40000000000ULL;
-typedef DefaultSizeClassMap SizeClassMap;
-struct AP {
-  static const uptr kSpaceBeg = AllocatorSpace;
-  static const uptr kSpaceSize = AllocatorSize;
-  static const uptr kMetadataSize = 0;
-  typedef __scudo::SizeClassMap SizeClassMap;
-  typedef NoOpMapUnmapCallback MapUnmapCallback;
-  static const uptr kFlags =
-      SizeClassAllocator64FlagMasks::kRandomShuffleChunks;
-};
-typedef SizeClassAllocator64<AP> PrimaryAllocator;
-#else
-// Currently, the 32-bit Sanitizer allocator has not yet benefited from all the
-// security improvements brought to the 64-bit one. This makes the 32-bit
-// version of Scudo slightly less toughened.
-static const uptr RegionSizeLog = 20;
-static const uptr NumRegions = SANITIZER_MMAP_RANGE_SIZE >> RegionSizeLog;
-# if SANITIZER_WORDSIZE == 32
-typedef FlatByteMap<NumRegions> ByteMap;
-# elif SANITIZER_WORDSIZE == 64
-typedef TwoLevelByteMap<(NumRegions >> 12), 1 << 12> ByteMap;
-# endif  // SANITIZER_WORDSIZE
-typedef DefaultSizeClassMap SizeClassMap;
-typedef SizeClassAllocator32<0, SANITIZER_MMAP_RANGE_SIZE, 0, SizeClassMap,
-    RegionSizeLog, ByteMap> PrimaryAllocator;
-#endif  // SANITIZER_CAN_USE_ALLOCATOR64
-
-typedef SizeClassAllocatorLocalCache<PrimaryAllocator> AllocatorCache;
-typedef ScudoLargeMmapAllocator SecondaryAllocator;
-typedef CombinedAllocator<PrimaryAllocator, AllocatorCache, SecondaryAllocator>
-  ScudoAllocator;
-
-static ScudoAllocator &getAllocator();
-
-static thread_local Xorshift128Plus Prng;
 // Global static cookie, initialized at start-up.
 static uptr Cookie;
 
@@ -72,23 +36,32 @@
 // at compilation or at runtime.
 static atomic_uint8_t HashAlgorithm = { CRC32Software };
 
-SANITIZER_WEAK_ATTRIBUTE u32 computeHardwareCRC32(u32 Crc, uptr Data);
-
-INLINE u32 computeCRC32(u32 Crc, uptr Data, u8 HashType) {
-  // If SSE4.2 is defined here, it was enabled everywhere, as opposed to only
-  // for scudo_crc32.cpp. This means that other SSE instructions were likely
-  // emitted at other places, and as a result there is no reason to not use
-  // the hardware version of the CRC32.
+INLINE u32 computeCRC32(u32 Crc, uptr Value, uptr *Array, uptr ArraySize) {
+  // If the hardware CRC32 feature is defined here, it was enabled everywhere,
+  // as opposed to only for scudo_crc32.cpp. This means that other hardware
+  // specific instructions were likely emitted at other places, and as a
+  // result there is no reason to not use it here.
 #if defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32)
-  return computeHardwareCRC32(Crc, Data);
+  Crc = CRC32_INTRINSIC(Crc, Value);
+  for (uptr i = 0; i < ArraySize; i++)
+    Crc = CRC32_INTRINSIC(Crc, Array[i]);
+  return Crc;
 #else
-  if (computeHardwareCRC32 && HashType == CRC32Hardware)
-    return computeHardwareCRC32(Crc, Data);
-  else
-    return computeSoftwareCRC32(Crc, Data);
-#endif  // defined(__SSE4_2__)
+  if (atomic_load_relaxed(&HashAlgorithm) == CRC32Hardware) {
+    Crc = computeHardwareCRC32(Crc, Value);
+    for (uptr i = 0; i < ArraySize; i++)
+      Crc = computeHardwareCRC32(Crc, Array[i]);
+    return Crc;
+  }
+  Crc = computeSoftwareCRC32(Crc, Value);
+  for (uptr i = 0; i < ArraySize; i++)
+    Crc = computeSoftwareCRC32(Crc, Array[i]);
+  return Crc;
+#endif  // defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32)
 }
 
+static ScudoBackendAllocator &getBackendAllocator();
+
 struct ScudoChunk : UnpackedHeader {
   // We can't use the offset member of the chunk itself, as we would double
   // fetch it without any warranty that it wouldn't have been tampered. To
@@ -101,9 +74,11 @@
   // Returns the usable size for a chunk, meaning the amount of bytes from the
   // beginning of the user data to the end of the backend allocated chunk.
   uptr getUsableSize(UnpackedHeader *Header) {
-    uptr Size = getAllocator().GetActuallyAllocatedSize(getAllocBeg(Header));
+    uptr Size =
+        getBackendAllocator().getActuallyAllocatedSize(getAllocBeg(Header),
+                                                       Header->FromPrimary);
     if (Size == 0)
-      return Size;
+      return 0;
     return Size - AlignedChunkHeaderSize - (Header->Offset << MinAlignmentLog);
   }
 
@@ -113,14 +88,14 @@
     ZeroChecksumHeader.Checksum = 0;
     uptr HeaderHolder[sizeof(UnpackedHeader) / sizeof(uptr)];
     memcpy(&HeaderHolder, &ZeroChecksumHeader, sizeof(HeaderHolder));
-    u8 HashType = atomic_load_relaxed(&HashAlgorithm);
-    u32 Crc = computeCRC32(Cookie, reinterpret_cast<uptr>(this), HashType);
-    for (uptr i = 0; i < ARRAY_SIZE(HeaderHolder); i++)
-      Crc = computeCRC32(Crc, HeaderHolder[i], HashType);
+    u32 Crc = computeCRC32(static_cast<u32>(Cookie),
+                           reinterpret_cast<uptr>(this), HeaderHolder,
+                           ARRAY_SIZE(HeaderHolder));
     return static_cast<u16>(Crc);
   }
 
-  // Checks the validity of a chunk by verifying its checksum.
+  // Checks the validity of a chunk by verifying its checksum. It doesn't
+  // incur termination in the event of an invalid chunk.
   bool isValid() {
     UnpackedHeader NewUnpackedHeader;
     const AtomicPackedHeader *AtomicHeader =
@@ -130,13 +105,27 @@
     return (NewUnpackedHeader.Checksum == computeChecksum(&NewUnpackedHeader));
   }
 
+  // Nulls out a chunk header. When returning the chunk to the backend, there
+  // is no need to store a valid ChunkAvailable header, as this would be
+  // computationally expensive. Zeroing out serves the same purpose by making
+  // the header invalid. In the extremely rare event where 0 would be a valid
+  // checksum for the chunk, the state of the chunk is ChunkAvailable anyway.
+  COMPILER_CHECK(ChunkAvailable == 0);
+  void eraseHeader() {
+    PackedHeader NullPackedHeader = 0;
+    AtomicPackedHeader *AtomicHeader =
+        reinterpret_cast<AtomicPackedHeader *>(this);
+    atomic_store_relaxed(AtomicHeader, NullPackedHeader);
+  }
+
   // Loads and unpacks the header, verifying the checksum in the process.
   void loadHeader(UnpackedHeader *NewUnpackedHeader) const {
     const AtomicPackedHeader *AtomicHeader =
         reinterpret_cast<const AtomicPackedHeader *>(this);
     PackedHeader NewPackedHeader = atomic_load_relaxed(AtomicHeader);
     *NewUnpackedHeader = bit_cast<UnpackedHeader>(NewPackedHeader);
-    if (NewUnpackedHeader->Checksum != computeChecksum(NewUnpackedHeader)) {
+    if (UNLIKELY(NewUnpackedHeader->Checksum !=
+        computeChecksum(NewUnpackedHeader))) {
       dieWithMessage("ERROR: corrupted chunk header at address %p\n", this);
     }
   }
@@ -160,155 +149,134 @@
     PackedHeader OldPackedHeader = bit_cast<PackedHeader>(*OldUnpackedHeader);
     AtomicPackedHeader *AtomicHeader =
         reinterpret_cast<AtomicPackedHeader *>(this);
-    if (!atomic_compare_exchange_strong(AtomicHeader,
-                                        &OldPackedHeader,
-                                        NewPackedHeader,
-                                        memory_order_relaxed)) {
+    if (UNLIKELY(!atomic_compare_exchange_strong(AtomicHeader,
+                                                 &OldPackedHeader,
+                                                 NewPackedHeader,
+                                                 memory_order_relaxed))) {
       dieWithMessage("ERROR: race on chunk header at address %p\n", this);
     }
   }
 };
 
-static bool ScudoInitIsRunning = false;
-
-static pthread_once_t GlobalInited = PTHREAD_ONCE_INIT;
-static pthread_key_t PThreadKey;
-
-static thread_local bool ThreadInited = false;
-static thread_local bool ThreadTornDown = false;
-static thread_local AllocatorCache Cache;
-
-static void teardownThread(void *p) {
-  uptr v = reinterpret_cast<uptr>(p);
-  // The glibc POSIX thread-local-storage deallocation routine calls user
-  // provided destructors in a loop of PTHREAD_DESTRUCTOR_ITERATIONS.
-  // We want to be called last since other destructors might call free and the
-  // like, so we wait until PTHREAD_DESTRUCTOR_ITERATIONS before draining the
-  // quarantine and swallowing the cache.
-  if (v < PTHREAD_DESTRUCTOR_ITERATIONS) {
-    pthread_setspecific(PThreadKey, reinterpret_cast<void *>(v + 1));
-    return;
-  }
-  drainQuarantine();
-  getAllocator().DestroyCache(&Cache);
-  ThreadTornDown = true;
+ScudoChunk *getScudoChunk(uptr UserBeg) {
+  return reinterpret_cast<ScudoChunk *>(UserBeg - AlignedChunkHeaderSize);
 }
 
-static void initInternal() {
+struct AllocatorOptions {
+  u32 QuarantineSizeKb;
+  u32 ThreadLocalQuarantineSizeKb;
+  u32 QuarantineChunksUpToSize;
+  bool MayReturnNull;
+  s32 ReleaseToOSIntervalMs;
+  bool DeallocationTypeMismatch;
+  bool DeleteSizeMismatch;
+  bool ZeroContents;
+
+  void setFrom(const Flags *f, const CommonFlags *cf);
+};
+
+void AllocatorOptions::setFrom(const Flags *f, const CommonFlags *cf) {
+  MayReturnNull = cf->allocator_may_return_null;
+  ReleaseToOSIntervalMs = cf->allocator_release_to_os_interval_ms;
+  QuarantineSizeKb = f->QuarantineSizeKb;
+  ThreadLocalQuarantineSizeKb = f->ThreadLocalQuarantineSizeKb;
+  QuarantineChunksUpToSize = f->QuarantineChunksUpToSize;
+  DeallocationTypeMismatch = f->DeallocationTypeMismatch;
+  DeleteSizeMismatch = f->DeleteSizeMismatch;
+  ZeroContents = f->ZeroContents;
+}
+
+static void initScudoInternal(const AllocatorOptions &Options);
+
+static bool ScudoInitIsRunning = false;
+
+void initScudo() {
   SanitizerToolName = "Scudo";
   CHECK(!ScudoInitIsRunning && "Scudo init calls itself!");
   ScudoInitIsRunning = true;
 
-  // Check is SSE4.2 is supported, if so, opt for the CRC32 hardware version.
-  if (testCPUFeature(CRC32CPUFeature)) {
+  // Check if hardware CRC32 is supported in the binary and by the platform, if
+  // so, opt for the CRC32 hardware version of the checksum.
+  if (computeHardwareCRC32 && testCPUFeature(CRC32CPUFeature))
     atomic_store_relaxed(&HashAlgorithm, CRC32Hardware);
-  }
 
   initFlags();
 
   AllocatorOptions Options;
   Options.setFrom(getFlags(), common_flags());
-  initAllocator(Options);
+  initScudoInternal(Options);
 
-  MaybeStartBackgroudThread();
+  // TODO(kostyak): determine if MaybeStartBackgroudThread could be of some use.
 
   ScudoInitIsRunning = false;
 }
 
-static void initGlobal() {
-  pthread_key_create(&PThreadKey, teardownThread);
-  initInternal();
-}
-
-static void NOINLINE initThread() {
-  pthread_once(&GlobalInited, initGlobal);
-  pthread_setspecific(PThreadKey, reinterpret_cast<void *>(1));
-  getAllocator().InitCache(&Cache);
-  ThreadInited = true;
-}
-
 struct QuarantineCallback {
   explicit QuarantineCallback(AllocatorCache *Cache)
     : Cache_(Cache) {}
 
-  // Chunk recycling function, returns a quarantined chunk to the backend.
+  // Chunk recycling function, returns a quarantined chunk to the backend,
+  // first making sure it hasn't been tampered with.
   void Recycle(ScudoChunk *Chunk) {
     UnpackedHeader Header;
     Chunk->loadHeader(&Header);
-    if (Header.State != ChunkQuarantine) {
+    if (UNLIKELY(Header.State != ChunkQuarantine)) {
       dieWithMessage("ERROR: invalid chunk state when recycling address %p\n",
                      Chunk);
     }
+    Chunk->eraseHeader();
     void *Ptr = Chunk->getAllocBeg(&Header);
-    getAllocator().Deallocate(Cache_, Ptr);
+    if (Header.FromPrimary)
+      getBackendAllocator().deallocatePrimary(Cache_, Ptr);
+    else
+      getBackendAllocator().deallocateSecondary(Ptr);
   }
 
-  /// Internal quarantine allocation and deallocation functions.
+  // Internal quarantine allocation and deallocation functions. We first check
+  // that the batches are indeed serviced by the Primary.
+  // TODO(kostyak): figure out the best way to protect the batches.
+  COMPILER_CHECK(sizeof(QuarantineBatch) < SizeClassMap::kMaxSize);
   void *Allocate(uptr Size) {
-    // The internal quarantine memory cannot be protected by us. But the only
-    // structures allocated are QuarantineBatch, that are 8KB for x64. So we
-    // will use mmap for those, and given that Deallocate doesn't pass a size
-    // in, we enforce the size of the allocation to be sizeof(QuarantineBatch).
-    // TODO(kostyak): switching to mmap impacts greatly performances, we have
-    //                to find another solution
-    // CHECK_EQ(Size, sizeof(QuarantineBatch));
-    // return MmapOrDie(Size, "QuarantineBatch");
-    return getAllocator().Allocate(Cache_, Size, 1, false);
+    return getBackendAllocator().allocatePrimary(Cache_, Size);
   }
 
   void Deallocate(void *Ptr) {
-    // UnmapOrDie(Ptr, sizeof(QuarantineBatch));
-    getAllocator().Deallocate(Cache_, Ptr);
+    getBackendAllocator().deallocatePrimary(Cache_, Ptr);
   }
 
   AllocatorCache *Cache_;
 };
 
 typedef Quarantine<QuarantineCallback, ScudoChunk> ScudoQuarantine;
-typedef ScudoQuarantine::Cache QuarantineCache;
-static thread_local QuarantineCache ThreadQuarantineCache;
+typedef ScudoQuarantine::Cache ScudoQuarantineCache;
+COMPILER_CHECK(sizeof(ScudoQuarantineCache) <=
+               sizeof(ScudoTSD::QuarantineCachePlaceHolder));
 
-void AllocatorOptions::setFrom(const Flags *f, const CommonFlags *cf) {
-  MayReturnNull = cf->allocator_may_return_null;
-  ReleaseToOSIntervalMs = cf->allocator_release_to_os_interval_ms;
-  QuarantineSizeMb = f->QuarantineSizeMb;
-  ThreadLocalQuarantineSizeKb = f->ThreadLocalQuarantineSizeKb;
-  DeallocationTypeMismatch = f->DeallocationTypeMismatch;
-  DeleteSizeMismatch = f->DeleteSizeMismatch;
-  ZeroContents = f->ZeroContents;
+ScudoQuarantineCache *getQuarantineCache(ScudoTSD *TSD) {
+  return reinterpret_cast<ScudoQuarantineCache *>(
+      TSD->QuarantineCachePlaceHolder);
 }
 
-void AllocatorOptions::copyTo(Flags *f, CommonFlags *cf) const {
-  cf->allocator_may_return_null = MayReturnNull;
-  cf->allocator_release_to_os_interval_ms = ReleaseToOSIntervalMs;
-  f->QuarantineSizeMb = QuarantineSizeMb;
-  f->ThreadLocalQuarantineSizeKb = ThreadLocalQuarantineSizeKb;
-  f->DeallocationTypeMismatch = DeallocationTypeMismatch;
-  f->DeleteSizeMismatch = DeleteSizeMismatch;
-  f->ZeroContents = ZeroContents;
-}
-
-struct Allocator {
+struct ScudoAllocator {
   static const uptr MaxAllowedMallocSize =
       FIRST_32_SECOND_64(2UL << 30, 1ULL << 40);
 
-  ScudoAllocator BackendAllocator;
+  typedef ReturnNullOrDieOnFailure FailureHandler;
+
+  ScudoBackendAllocator BackendAllocator;
   ScudoQuarantine AllocatorQuarantine;
 
-  // The fallback caches are used when the thread local caches have been
-  // 'detroyed' on thread tear-down. They are protected by a Mutex as they can
-  // be accessed by different threads.
-  StaticSpinMutex FallbackMutex;
-  AllocatorCache FallbackAllocatorCache;
-  QuarantineCache FallbackQuarantineCache;
+  StaticSpinMutex GlobalPrngMutex;
+  ScudoPrng GlobalPrng;
+
+  u32 QuarantineChunksUpToSize;
 
   bool DeallocationTypeMismatch;
   bool ZeroContents;
   bool DeleteSizeMismatch;
 
-  explicit Allocator(LinkerInitialized)
-    : AllocatorQuarantine(LINKER_INITIALIZED),
-      FallbackQuarantineCache(LINKER_INITIALIZED) {}
+  explicit ScudoAllocator(LinkerInitialized)
+    : AllocatorQuarantine(LINKER_INITIALIZED) {}
 
   void init(const AllocatorOptions &Options) {
     // Verify that the header offset field can hold the maximum offset. In the
@@ -320,23 +288,23 @@
     // result, the maximum offset will be at most the maximum alignment for the
     // last size class minus the header size, in multiples of MinAlignment.
     UnpackedHeader Header = {};
-    uptr MaxPrimaryAlignment = 1 << MostSignificantSetBitIndex(
-        SizeClassMap::kMaxSize - MinAlignment);
-    uptr MaxOffset = (MaxPrimaryAlignment - AlignedChunkHeaderSize) >>
-        MinAlignmentLog;
+    uptr MaxPrimaryAlignment =
+        1 << MostSignificantSetBitIndex(SizeClassMap::kMaxSize - MinAlignment);
+    uptr MaxOffset =
+        (MaxPrimaryAlignment - AlignedChunkHeaderSize) >> MinAlignmentLog;
     Header.Offset = MaxOffset;
     if (Header.Offset != MaxOffset) {
       dieWithMessage("ERROR: the maximum possible offset doesn't fit in the "
                      "header\n");
     }
-    // Verify that we can fit the maximum amount of unused bytes in the header.
-    // Given that the Secondary fits the allocation to a page, the worst case
-    // scenario happens in the Primary. It will depend on the second to last
-    // and last class sizes, as well as the dynamic base for the Primary. The
-    // following is an over-approximation that works for our needs.
-    uptr MaxUnusedBytes = SizeClassMap::kMaxSize - 1 - AlignedChunkHeaderSize;
-    Header.UnusedBytes = MaxUnusedBytes;
-    if (Header.UnusedBytes != MaxUnusedBytes) {
+    // Verify that we can fit the maximum size or amount of unused bytes in the
+    // header. Given that the Secondary fits the allocation to a page, the worst
+    // case scenario happens in the Primary. It will depend on the second to
+    // last and last class sizes, as well as the dynamic base for the Primary.
+    // The following is an over-approximation that works for our needs.
+    uptr MaxSizeOrUnusedBytes = SizeClassMap::kMaxSize - 1;
+    Header.SizeOrUnusedBytes = MaxSizeOrUnusedBytes;
+    if (Header.SizeOrUnusedBytes != MaxSizeOrUnusedBytes) {
       dieWithMessage("ERROR: the maximum possible unused bytes doesn't fit in "
                      "the header\n");
     }
@@ -344,184 +312,221 @@
     DeallocationTypeMismatch = Options.DeallocationTypeMismatch;
     DeleteSizeMismatch = Options.DeleteSizeMismatch;
     ZeroContents = Options.ZeroContents;
-    BackendAllocator.Init(Options.MayReturnNull, Options.ReleaseToOSIntervalMs);
+    SetAllocatorMayReturnNull(Options.MayReturnNull);
+    BackendAllocator.init(Options.ReleaseToOSIntervalMs);
     AllocatorQuarantine.Init(
-        static_cast<uptr>(Options.QuarantineSizeMb) << 20,
+        static_cast<uptr>(Options.QuarantineSizeKb) << 10,
         static_cast<uptr>(Options.ThreadLocalQuarantineSizeKb) << 10);
-    BackendAllocator.InitCache(&FallbackAllocatorCache);
-    Cookie = Prng.Next();
+    QuarantineChunksUpToSize = Options.QuarantineChunksUpToSize;
+    GlobalPrng.init();
+    Cookie = GlobalPrng.getU64();
   }
 
-  // Helper function that checks for a valid Scudo chunk.
+  // Helper function that checks for a valid Scudo chunk. nullptr isn't.
   bool isValidPointer(const void *UserPtr) {
-    if (UNLIKELY(!ThreadInited))
-      initThread();
-    uptr ChunkBeg = reinterpret_cast<uptr>(UserPtr);
-    if (!IsAligned(ChunkBeg, MinAlignment)) {
+    initThreadMaybe();
+    if (UNLIKELY(!UserPtr))
       return false;
-    }
-    ScudoChunk *Chunk =
-        reinterpret_cast<ScudoChunk *>(ChunkBeg - AlignedChunkHeaderSize);
-    return Chunk->isValid();
+    uptr UserBeg = reinterpret_cast<uptr>(UserPtr);
+    if (!IsAligned(UserBeg, MinAlignment))
+      return false;
+    return getScudoChunk(UserBeg)->isValid();
   }
 
   // Allocates a chunk.
-  void *allocate(uptr Size, uptr Alignment, AllocType Type) {
-    if (UNLIKELY(!ThreadInited))
-      initThread();
-    if (!IsPowerOfTwo(Alignment)) {
-      dieWithMessage("ERROR: alignment is not a power of 2\n");
-    }
-    if (Alignment > MaxAlignment)
-      return BackendAllocator.ReturnNullOrDieOnBadRequest();
-    if (Alignment < MinAlignment)
+  void *allocate(uptr Size, uptr Alignment, AllocType Type,
+                 bool ForceZeroContents = false) {
+    initThreadMaybe();
+    if (UNLIKELY(Alignment > MaxAlignment))
+      return FailureHandler::OnBadRequest();
+    if (UNLIKELY(Alignment < MinAlignment))
       Alignment = MinAlignment;
-    if (Size == 0)
+    if (UNLIKELY(Size >= MaxAllowedMallocSize))
+      return FailureHandler::OnBadRequest();
+    if (UNLIKELY(Size == 0))
       Size = 1;
-    if (Size >= MaxAllowedMallocSize)
-      return BackendAllocator.ReturnNullOrDieOnBadRequest();
 
     uptr NeededSize = RoundUpTo(Size, MinAlignment) + AlignedChunkHeaderSize;
-    if (Alignment > MinAlignment)
-      NeededSize += Alignment;
-    if (NeededSize >= MaxAllowedMallocSize)
-      return BackendAllocator.ReturnNullOrDieOnBadRequest();
+    uptr AlignedSize = (Alignment > MinAlignment) ?
+        NeededSize + (Alignment - AlignedChunkHeaderSize) : NeededSize;
+    if (UNLIKELY(AlignedSize >= MaxAllowedMallocSize))
+      return FailureHandler::OnBadRequest();
 
-    // Primary backed and Secondary backed allocations have a different
-    // treatment. We deal with alignment requirements of Primary serviced
-    // allocations here, but the Secondary will take care of its own alignment
-    // needs, which means we also have to work around some limitations of the
-    // combined allocator to accommodate the situation.
-    bool FromPrimary = PrimaryAllocator::CanAllocate(NeededSize, MinAlignment);
+    // Primary and Secondary backed allocations have a different treatment. We
+    // deal with alignment requirements of Primary serviced allocations here,
+    // but the Secondary will take care of its own alignment needs.
+    bool FromPrimary = PrimaryAllocator::CanAllocate(AlignedSize, MinAlignment);
 
     void *Ptr;
-    if (LIKELY(!ThreadTornDown)) {
-      Ptr = BackendAllocator.Allocate(&Cache, NeededSize,
-                                      FromPrimary ? MinAlignment : Alignment);
+    u8 Salt;
+    uptr AllocSize;
+    if (FromPrimary) {
+      AllocSize = AlignedSize;
+      ScudoTSD *TSD = getTSDAndLock();
+      Salt = TSD->Prng.getU8();
+      Ptr = BackendAllocator.allocatePrimary(&TSD->Cache, AllocSize);
+      TSD->unlock();
     } else {
-      SpinMutexLock l(&FallbackMutex);
-      Ptr = BackendAllocator.Allocate(&FallbackAllocatorCache, NeededSize,
-                                      FromPrimary ? MinAlignment : Alignment);
+      {
+        SpinMutexLock l(&GlobalPrngMutex);
+        Salt = GlobalPrng.getU8();
+      }
+      AllocSize = NeededSize;
+      Ptr = BackendAllocator.allocateSecondary(AllocSize, Alignment);
     }
-    if (!Ptr)
-      return BackendAllocator.ReturnNullOrDieOnOOM();
+    if (UNLIKELY(!Ptr))
+      return FailureHandler::OnOOM();
 
-    uptr AllocBeg = reinterpret_cast<uptr>(Ptr);
-    // If the allocation was serviced by the secondary, the returned pointer
-    // accounts for ChunkHeaderSize to pass the alignment check of the combined
-    // allocator. Adjust it here.
-    if (!FromPrimary) {
-      AllocBeg -= AlignedChunkHeaderSize;
-      if (Alignment > MinAlignment)
-        NeededSize -= Alignment;
-    }
-
-    uptr ActuallyAllocatedSize = BackendAllocator.GetActuallyAllocatedSize(
-        reinterpret_cast<void *>(AllocBeg));
     // If requested, we will zero out the entire contents of the returned chunk.
-    if (ZeroContents && FromPrimary)
-       memset(Ptr, 0, ActuallyAllocatedSize);
+    if ((ForceZeroContents || ZeroContents) && FromPrimary)
+      memset(Ptr, 0, BackendAllocator.getActuallyAllocatedSize(
+          Ptr, /*FromPrimary=*/true));
 
-    uptr ChunkBeg = AllocBeg + AlignedChunkHeaderSize;
-    if (!IsAligned(ChunkBeg, Alignment))
-      ChunkBeg = RoundUpTo(ChunkBeg, Alignment);
-    CHECK_LE(ChunkBeg + Size, AllocBeg + NeededSize);
-    ScudoChunk *Chunk =
-        reinterpret_cast<ScudoChunk *>(ChunkBeg - AlignedChunkHeaderSize);
     UnpackedHeader Header = {};
+    uptr AllocBeg = reinterpret_cast<uptr>(Ptr);
+    uptr UserBeg = AllocBeg + AlignedChunkHeaderSize;
+    if (UNLIKELY(!IsAligned(UserBeg, Alignment))) {
+      // Since the Secondary takes care of alignment, a non-aligned pointer
+      // means it is from the Primary. It is also the only case where the offset
+      // field of the header would be non-zero.
+      CHECK(FromPrimary);
+      UserBeg = RoundUpTo(UserBeg, Alignment);
+      uptr Offset = UserBeg - AlignedChunkHeaderSize - AllocBeg;
+      Header.Offset = Offset >> MinAlignmentLog;
+    }
+    CHECK_LE(UserBeg + Size, AllocBeg + AllocSize);
     Header.State = ChunkAllocated;
-    uptr Offset = ChunkBeg - AlignedChunkHeaderSize - AllocBeg;
-    Header.Offset = Offset >> MinAlignmentLog;
     Header.AllocType = Type;
-    Header.UnusedBytes = ActuallyAllocatedSize - Offset -
-        AlignedChunkHeaderSize - Size;
-    Header.Salt = static_cast<u8>(Prng.Next());
-    Chunk->storeHeader(&Header);
-    void *UserPtr = reinterpret_cast<void *>(ChunkBeg);
-    // TODO(kostyak): hooks sound like a terrible idea security wise but might
-    //                be needed for things to work properly?
+    if (FromPrimary) {
+      Header.FromPrimary = 1;
+      Header.SizeOrUnusedBytes = Size;
+    } else {
+      // The secondary fits the allocations to a page, so the amount of unused
+      // bytes is the difference between the end of the user allocation and the
+      // next page boundary.
+      uptr PageSize = GetPageSizeCached();
+      uptr TrailingBytes = (UserBeg + Size) & (PageSize - 1);
+      if (TrailingBytes)
+        Header.SizeOrUnusedBytes = PageSize - TrailingBytes;
+    }
+    Header.Salt = Salt;
+    getScudoChunk(UserBeg)->storeHeader(&Header);
+    void *UserPtr = reinterpret_cast<void *>(UserBeg);
     // if (&__sanitizer_malloc_hook) __sanitizer_malloc_hook(UserPtr, Size);
     return UserPtr;
   }
 
+  // Place a chunk in the quarantine or directly deallocate it in the event of
+  // a zero-sized quarantine, or if the size of the chunk is greater than the
+  // quarantine chunk size threshold.
+  void quarantineOrDeallocateChunk(ScudoChunk *Chunk, UnpackedHeader *Header,
+                                   uptr Size) {
+    const bool BypassQuarantine = (AllocatorQuarantine.GetCacheSize() == 0) ||
+        (Size > QuarantineChunksUpToSize);
+    if (BypassQuarantine) {
+      Chunk->eraseHeader();
+      void *Ptr = Chunk->getAllocBeg(Header);
+      if (Header->FromPrimary) {
+        ScudoTSD *TSD = getTSDAndLock();
+        getBackendAllocator().deallocatePrimary(&TSD->Cache, Ptr);
+        TSD->unlock();
+      } else {
+        getBackendAllocator().deallocateSecondary(Ptr);
+      }
+    } else {
+      // If a small memory amount was allocated with a larger alignment, we want
+      // to take that into account. Otherwise the Quarantine would be filled
+      // with tiny chunks, taking a lot of VA memory. This is an approximation
+      // of the usable size, that allows us to not call
+      // GetActuallyAllocatedSize.
+      uptr EstimatedSize = Size + (Header->Offset << MinAlignmentLog);
+      UnpackedHeader NewHeader = *Header;
+      NewHeader.State = ChunkQuarantine;
+      Chunk->compareExchangeHeader(&NewHeader, Header);
+      ScudoTSD *TSD = getTSDAndLock();
+      AllocatorQuarantine.Put(getQuarantineCache(TSD),
+                              QuarantineCallback(&TSD->Cache),
+                              Chunk, EstimatedSize);
+      TSD->unlock();
+    }
+  }
+
   // Deallocates a Chunk, which means adding it to the delayed free list (or
   // Quarantine).
   void deallocate(void *UserPtr, uptr DeleteSize, AllocType Type) {
-    if (UNLIKELY(!ThreadInited))
-      initThread();
-    // TODO(kostyak): see hook comment above
+    // For a deallocation, we only ensure minimal initialization, meaning thread
+    // local data will be left uninitialized for now (when using ELF TLS). The
+    // fallback cache will be used instead. This is a workaround for a situation
+    // where the only heap operation performed in a thread would be a free past
+    // the TLS destructors, ending up in initialized thread specific data never
+    // being destroyed properly. Any other heap operation will do a full init.
+    initThreadMaybe(/*MinimalInit=*/true);
     // if (&__sanitizer_free_hook) __sanitizer_free_hook(UserPtr);
-    if (!UserPtr)
+    if (UNLIKELY(!UserPtr))
       return;
-    uptr ChunkBeg = reinterpret_cast<uptr>(UserPtr);
-    if (!IsAligned(ChunkBeg, MinAlignment)) {
+    uptr UserBeg = reinterpret_cast<uptr>(UserPtr);
+    if (UNLIKELY(!IsAligned(UserBeg, MinAlignment))) {
       dieWithMessage("ERROR: attempted to deallocate a chunk not properly "
                      "aligned at address %p\n", UserPtr);
     }
-    ScudoChunk *Chunk =
-        reinterpret_cast<ScudoChunk *>(ChunkBeg - AlignedChunkHeaderSize);
-    UnpackedHeader OldHeader;
-    Chunk->loadHeader(&OldHeader);
-    if (OldHeader.State != ChunkAllocated) {
+    ScudoChunk *Chunk = getScudoChunk(UserBeg);
+    UnpackedHeader Header;
+    Chunk->loadHeader(&Header);
+    if (UNLIKELY(Header.State != ChunkAllocated)) {
       dieWithMessage("ERROR: invalid chunk state when deallocating address "
                      "%p\n", UserPtr);
     }
-    uptr UsableSize = Chunk->getUsableSize(&OldHeader);
-    UnpackedHeader NewHeader = OldHeader;
-    NewHeader.State = ChunkQuarantine;
-    Chunk->compareExchangeHeader(&NewHeader, &OldHeader);
     if (DeallocationTypeMismatch) {
       // The deallocation type has to match the allocation one.
-      if (NewHeader.AllocType != Type) {
+      if (Header.AllocType != Type) {
         // With the exception of memalign'd Chunks, that can be still be free'd.
-        if (NewHeader.AllocType != FromMemalign || Type != FromMalloc) {
-          dieWithMessage("ERROR: allocation type mismatch on address %p\n",
-                         Chunk);
+        if (Header.AllocType != FromMemalign || Type != FromMalloc) {
+          dieWithMessage("ERROR: allocation type mismatch when deallocating "
+                         "address %p\n", UserPtr);
         }
       }
     }
-    uptr Size = UsableSize - OldHeader.UnusedBytes;
+    uptr Size = Header.FromPrimary ? Header.SizeOrUnusedBytes :
+        Chunk->getUsableSize(&Header) - Header.SizeOrUnusedBytes;
     if (DeleteSizeMismatch) {
       if (DeleteSize && DeleteSize != Size) {
         dieWithMessage("ERROR: invalid sized delete on chunk at address %p\n",
-                       Chunk);
+                       UserPtr);
       }
     }
-
-    if (LIKELY(!ThreadTornDown)) {
-      AllocatorQuarantine.Put(&ThreadQuarantineCache,
-                              QuarantineCallback(&Cache), Chunk, UsableSize);
-    } else {
-      SpinMutexLock l(&FallbackMutex);
-      AllocatorQuarantine.Put(&FallbackQuarantineCache,
-                              QuarantineCallback(&FallbackAllocatorCache),
-                              Chunk, UsableSize);
-    }
+    quarantineOrDeallocateChunk(Chunk, &Header, Size);
   }
 
   // Reallocates a chunk. We can save on a new allocation if the new requested
   // size still fits in the chunk.
   void *reallocate(void *OldPtr, uptr NewSize) {
-    if (UNLIKELY(!ThreadInited))
-      initThread();
-    uptr ChunkBeg = reinterpret_cast<uptr>(OldPtr);
-    ScudoChunk *Chunk =
-        reinterpret_cast<ScudoChunk *>(ChunkBeg - AlignedChunkHeaderSize);
+    initThreadMaybe();
+    uptr UserBeg = reinterpret_cast<uptr>(OldPtr);
+    if (UNLIKELY(!IsAligned(UserBeg, MinAlignment))) {
+      dieWithMessage("ERROR: attempted to reallocate a chunk not properly "
+                     "aligned at address %p\n", OldPtr);
+    }
+    ScudoChunk *Chunk = getScudoChunk(UserBeg);
     UnpackedHeader OldHeader;
     Chunk->loadHeader(&OldHeader);
-    if (OldHeader.State != ChunkAllocated) {
+    if (UNLIKELY(OldHeader.State != ChunkAllocated)) {
       dieWithMessage("ERROR: invalid chunk state when reallocating address "
                      "%p\n", OldPtr);
     }
-    uptr Size = Chunk->getUsableSize(&OldHeader);
-    if (OldHeader.AllocType != FromMalloc) {
-      dieWithMessage("ERROR: invalid chunk type when reallocating address %p\n",
-                     Chunk);
+    if (DeallocationTypeMismatch) {
+      if (UNLIKELY(OldHeader.AllocType != FromMalloc)) {
+        dieWithMessage("ERROR: allocation type mismatch when reallocating "
+                       "address %p\n", OldPtr);
+      }
     }
-    UnpackedHeader NewHeader = OldHeader;
-    // The new size still fits in the current chunk.
-    if (NewSize <= Size) {
-      NewHeader.UnusedBytes = Size - NewSize;
+    uptr UsableSize = Chunk->getUsableSize(&OldHeader);
+    // The new size still fits in the current chunk, and the size difference
+    // is reasonable.
+    if (NewSize <= UsableSize &&
+        (UsableSize - NewSize) < (SizeClassMap::kMaxSize / 2)) {
+      UnpackedHeader NewHeader = OldHeader;
+      NewHeader.SizeOrUnusedBytes =
+                OldHeader.FromPrimary ? NewSize : UsableSize - NewSize;
       Chunk->compareExchangeHeader(&NewHeader, &OldHeader);
       return OldPtr;
     }
@@ -529,36 +534,25 @@
     // old one.
     void *NewPtr = allocate(NewSize, MinAlignment, FromMalloc);
     if (NewPtr) {
-      uptr OldSize = Size - OldHeader.UnusedBytes;
-      memcpy(NewPtr, OldPtr, Min(NewSize, OldSize));
-      NewHeader.State = ChunkQuarantine;
-      Chunk->compareExchangeHeader(&NewHeader, &OldHeader);
-      if (LIKELY(!ThreadTornDown)) {
-        AllocatorQuarantine.Put(&ThreadQuarantineCache,
-                                QuarantineCallback(&Cache), Chunk, Size);
-      } else {
-        SpinMutexLock l(&FallbackMutex);
-        AllocatorQuarantine.Put(&FallbackQuarantineCache,
-                                QuarantineCallback(&FallbackAllocatorCache),
-                                Chunk, Size);
-      }
+      uptr OldSize = OldHeader.FromPrimary ? OldHeader.SizeOrUnusedBytes :
+          UsableSize - OldHeader.SizeOrUnusedBytes;
+      memcpy(NewPtr, OldPtr, Min(NewSize, UsableSize));
+      quarantineOrDeallocateChunk(Chunk, &OldHeader, OldSize);
     }
     return NewPtr;
   }
 
   // Helper function that returns the actual usable size of a chunk.
   uptr getUsableSize(const void *Ptr) {
-    if (UNLIKELY(!ThreadInited))
-      initThread();
-    if (!Ptr)
+    initThreadMaybe();
+    if (UNLIKELY(!Ptr))
       return 0;
-    uptr ChunkBeg = reinterpret_cast<uptr>(Ptr);
-    ScudoChunk *Chunk =
-        reinterpret_cast<ScudoChunk *>(ChunkBeg - AlignedChunkHeaderSize);
+    uptr UserBeg = reinterpret_cast<uptr>(Ptr);
+    ScudoChunk *Chunk = getScudoChunk(UserBeg);
     UnpackedHeader Header;
     Chunk->loadHeader(&Header);
     // Getting the usable size of a chunk only makes sense if it's allocated.
-    if (Header.State != ChunkAllocated) {
+    if (UNLIKELY(Header.State != ChunkAllocated)) {
       dieWithMessage("ERROR: invalid chunk state when sizing address %p\n",
                      Ptr);
     }
@@ -566,48 +560,54 @@
   }
 
   void *calloc(uptr NMemB, uptr Size) {
-    if (UNLIKELY(!ThreadInited))
-      initThread();
-    uptr Total = NMemB * Size;
-    if (Size != 0 && Total / Size != NMemB) // Overflow check
-      return BackendAllocator.ReturnNullOrDieOnBadRequest();
-    void *Ptr = allocate(Total, MinAlignment, FromMalloc);
-    // If ZeroContents, the content of the chunk has already been zero'd out.
-    if (!ZeroContents && Ptr && BackendAllocator.FromPrimary(Ptr))
-      memset(Ptr, 0, getUsableSize(Ptr));
-    return Ptr;
+    initThreadMaybe();
+    if (UNLIKELY(CheckForCallocOverflow(NMemB, Size)))
+      return FailureHandler::OnBadRequest();
+    return allocate(NMemB * Size, MinAlignment, FromMalloc, true);
   }
 
-  void drainQuarantine() {
-    AllocatorQuarantine.Drain(&ThreadQuarantineCache,
-                              QuarantineCallback(&Cache));
+  void commitBack(ScudoTSD *TSD) {
+    AllocatorQuarantine.Drain(getQuarantineCache(TSD),
+                              QuarantineCallback(&TSD->Cache));
+    BackendAllocator.destroyCache(&TSD->Cache);
   }
 
   uptr getStats(AllocatorStat StatType) {
-    if (UNLIKELY(!ThreadInited))
-      initThread();
+    initThreadMaybe();
     uptr stats[AllocatorStatCount];
-    BackendAllocator.GetStats(stats);
+    BackendAllocator.getStats(stats);
     return stats[StatType];
   }
+
+  void *handleBadRequest() {
+    initThreadMaybe();
+    return FailureHandler::OnBadRequest();
+  }
 };
 
-static Allocator Instance(LINKER_INITIALIZED);
+static ScudoAllocator Instance(LINKER_INITIALIZED);
 
-static ScudoAllocator &getAllocator() {
+static ScudoBackendAllocator &getBackendAllocator() {
   return Instance.BackendAllocator;
 }
 
-void initAllocator(const AllocatorOptions &Options) {
+static void initScudoInternal(const AllocatorOptions &Options) {
   Instance.init(Options);
 }
 
-void drainQuarantine() {
-  Instance.drainQuarantine();
+void ScudoTSD::init(bool Shared) {
+  UnlockRequired = Shared;
+  getBackendAllocator().initCache(&Cache);
+  Prng.init();
+  memset(QuarantineCachePlaceHolder, 0, sizeof(QuarantineCachePlaceHolder));
+}
+
+void ScudoTSD::commitBack() {
+  Instance.commitBack(this);
 }
 
 void *scudoMalloc(uptr Size, AllocType Type) {
-  return Instance.allocate(Size, MinAlignment, Type);
+  return SetErrnoOnNull(Instance.allocate(Size, MinAlignment, Type));
 }
 
 void scudoFree(void *Ptr, AllocType Type) {
@@ -620,47 +620,60 @@
 
 void *scudoRealloc(void *Ptr, uptr Size) {
   if (!Ptr)
-    return Instance.allocate(Size, MinAlignment, FromMalloc);
+    return SetErrnoOnNull(Instance.allocate(Size, MinAlignment, FromMalloc));
   if (Size == 0) {
     Instance.deallocate(Ptr, 0, FromMalloc);
     return nullptr;
   }
-  return Instance.reallocate(Ptr, Size);
+  return SetErrnoOnNull(Instance.reallocate(Ptr, Size));
 }
 
 void *scudoCalloc(uptr NMemB, uptr Size) {
-  return Instance.calloc(NMemB, Size);
+  return SetErrnoOnNull(Instance.calloc(NMemB, Size));
 }
 
 void *scudoValloc(uptr Size) {
-  return Instance.allocate(Size, GetPageSizeCached(), FromMemalign);
-}
-
-void *scudoMemalign(uptr Alignment, uptr Size) {
-  return Instance.allocate(Size, Alignment, FromMemalign);
+  return SetErrnoOnNull(
+      Instance.allocate(Size, GetPageSizeCached(), FromMemalign));
 }
 
 void *scudoPvalloc(uptr Size) {
   uptr PageSize = GetPageSizeCached();
-  Size = RoundUpTo(Size, PageSize);
-  if (Size == 0) {
-    // pvalloc(0) should allocate one page.
-    Size = PageSize;
+  if (UNLIKELY(CheckForPvallocOverflow(Size, PageSize))) {
+    errno = ENOMEM;
+    return Instance.handleBadRequest();
   }
-  return Instance.allocate(Size, PageSize, FromMemalign);
+  // pvalloc(0) should allocate one page.
+  Size = Size ? RoundUpTo(Size, PageSize) : PageSize;
+  return SetErrnoOnNull(Instance.allocate(Size, PageSize, FromMemalign));
+}
+
+void *scudoMemalign(uptr Alignment, uptr Size) {
+  if (UNLIKELY(!IsPowerOfTwo(Alignment))) {
+    errno = EINVAL;
+    return Instance.handleBadRequest();
+  }
+  return SetErrnoOnNull(Instance.allocate(Size, Alignment, FromMemalign));
 }
 
 int scudoPosixMemalign(void **MemPtr, uptr Alignment, uptr Size) {
-  *MemPtr = Instance.allocate(Size, Alignment, FromMemalign);
+  if (UNLIKELY(!CheckPosixMemalignAlignment(Alignment))) {
+    Instance.handleBadRequest();
+    return EINVAL;
+  }
+  void *Ptr = Instance.allocate(Size, Alignment, FromMemalign);
+  if (UNLIKELY(!Ptr))
+    return ENOMEM;
+  *MemPtr = Ptr;
   return 0;
 }
 
 void *scudoAlignedAlloc(uptr Alignment, uptr Size) {
-  // size must be a multiple of the alignment. To avoid a division, we first
-  // make sure that alignment is a power of 2.
-  CHECK(IsPowerOfTwo(Alignment));
-  CHECK_EQ((Size & (Alignment - 1)), 0);
-  return Instance.allocate(Size, Alignment, FromMalloc);
+  if (UNLIKELY(!CheckAlignedAllocAlignmentAndSize(Alignment, Size))) {
+    errno = EINVAL;
+    return Instance.handleBadRequest();
+  }
+  return SetErrnoOnNull(Instance.allocate(Size, Alignment, FromMalloc));
 }
 
 uptr scudoMallocUsableSize(void *Ptr) {
diff --git a/lib/scudo/scudo_allocator.h b/lib/scudo/scudo_allocator.h
index 5f5225b..a517058 100644
--- a/lib/scudo/scudo_allocator.h
+++ b/lib/scudo/scudo_allocator.h
@@ -14,21 +14,15 @@
 #ifndef SCUDO_ALLOCATOR_H_
 #define SCUDO_ALLOCATOR_H_
 
-#include "scudo_flags.h"
-
-#include "sanitizer_common/sanitizer_allocator.h"
-
-#if !SANITIZER_LINUX
-# error "The Scudo hardened allocator is currently only supported on Linux."
-#endif
+#include "scudo_platform.h"
 
 namespace __scudo {
 
 enum AllocType : u8 {
-  FromMalloc    = 0, // Memory block came from malloc, realloc, calloc, etc.
-  FromNew       = 1, // Memory block came from operator new.
-  FromNewArray  = 2, // Memory block came from operator new [].
-  FromMemalign  = 3, // Memory block came from memalign, posix_memalign, etc.
+  FromMalloc    = 0,  // Memory block came from malloc, realloc, calloc, etc.
+  FromNew       = 1,  // Memory block came from operator new.
+  FromNewArray  = 2,  // Memory block came from operator new [].
+  FromMemalign  = 3,  // Memory block came from memalign, posix_memalign, etc.
 };
 
 enum ChunkState : u8 {
@@ -41,19 +35,20 @@
 // using functions such as GetBlockBegin, that is fairly costly. Our first
 // implementation used the MetaData as well, which offers the advantage of
 // being stored away from the chunk itself, but accessing it was costly as
-// well. The header will be atomically loaded and stored using the 16-byte
-// primitives offered by the platform (likely requires cmpxchg16b support).
+// well. The header will be atomically loaded and stored.
 typedef u64 PackedHeader;
 struct UnpackedHeader {
-  u64 Checksum    : 16;
-  u64 UnusedBytes : 20; // Needed for reallocation purposes.
-  u64 State       : 2;  // available, allocated, or quarantined
-  u64 AllocType   : 2;  // malloc, new, new[], or memalign
-  u64 Offset      : 16; // Offset from the beginning of the backend
-                        // allocation to the beginning of the chunk itself,
-                        // in multiples of MinAlignment. See comment about
-                        // its maximum value and test in init().
-  u64 Salt        : 8;
+  u64 Checksum          : 16;
+  u64 SizeOrUnusedBytes : 19;  // Size for Primary backed allocations, amount of
+                               // unused bytes in the chunk for Secondary ones.
+  u64 FromPrimary       : 1;
+  u64 State             : 2;   // available, allocated, or quarantined
+  u64 AllocType         : 2;   // malloc, new, new[], or memalign
+  u64 Offset            : 16;  // Offset from the beginning of the backend
+                               // allocation to the beginning of the chunk
+                               // itself, in multiples of MinAlignment. See
+                               // comment about its maximum value and in init().
+  u64 Salt              : 8;
 };
 
 typedef atomic_uint64_t AtomicPackedHeader;
@@ -61,7 +56,7 @@
 
 // Minimum alignment of 8 bytes for 32-bit, 16 for 64-bit
 const uptr MinAlignmentLog = FIRST_32_SECOND_64(3, 4);
-const uptr MaxAlignmentLog = 24; // 16 MB
+const uptr MaxAlignmentLog = 24;  // 16 MB
 const uptr MinAlignment = 1 << MinAlignmentLog;
 const uptr MaxAlignment = 1 << MaxAlignmentLog;
 
@@ -69,21 +64,54 @@
 const uptr AlignedChunkHeaderSize =
     (ChunkHeaderSize + MinAlignment - 1) & ~(MinAlignment - 1);
 
-struct AllocatorOptions {
-  u32 QuarantineSizeMb;
-  u32 ThreadLocalQuarantineSizeKb;
-  bool MayReturnNull;
-  s32 ReleaseToOSIntervalMs;
-  bool DeallocationTypeMismatch;
-  bool DeleteSizeMismatch;
-  bool ZeroContents;
-
-  void setFrom(const Flags *f, const CommonFlags *cf);
-  void copyTo(Flags *f, CommonFlags *cf) const;
+#if SANITIZER_CAN_USE_ALLOCATOR64
+const uptr AllocatorSpace = ~0ULL;
+struct AP64 {
+  static const uptr kSpaceBeg = AllocatorSpace;
+  static const uptr kSpaceSize = AllocatorSize;
+  static const uptr kMetadataSize = 0;
+  typedef __scudo::SizeClassMap SizeClassMap;
+  typedef NoOpMapUnmapCallback MapUnmapCallback;
+  static const uptr kFlags =
+      SizeClassAllocator64FlagMasks::kRandomShuffleChunks;
 };
+typedef SizeClassAllocator64<AP64> PrimaryAllocator;
+#else
+static const uptr NumRegions = SANITIZER_MMAP_RANGE_SIZE >> RegionSizeLog;
+# if SANITIZER_WORDSIZE == 32
+typedef FlatByteMap<NumRegions> ByteMap;
+# elif SANITIZER_WORDSIZE == 64
+typedef TwoLevelByteMap<(NumRegions >> 12), 1 << 12> ByteMap;
+# endif  // SANITIZER_WORDSIZE
+struct AP32 {
+  static const uptr kSpaceBeg = 0;
+  static const u64 kSpaceSize = SANITIZER_MMAP_RANGE_SIZE;
+  static const uptr kMetadataSize = 0;
+  typedef __scudo::SizeClassMap SizeClassMap;
+  static const uptr kRegionSizeLog = RegionSizeLog;
+  typedef __scudo::ByteMap ByteMap;
+  typedef NoOpMapUnmapCallback MapUnmapCallback;
+  static const uptr kFlags =
+      SizeClassAllocator32FlagMasks::kRandomShuffleChunks |
+      SizeClassAllocator32FlagMasks::kUseSeparateSizeClassForBatch;
+};
+typedef SizeClassAllocator32<AP32> PrimaryAllocator;
+#endif  // SANITIZER_CAN_USE_ALLOCATOR64
 
-void initAllocator(const AllocatorOptions &options);
-void drainQuarantine();
+// __sanitizer::RoundUp has a CHECK that is extraneous for us. Use our own.
+INLINE uptr RoundUpTo(uptr Size, uptr Boundary) {
+  return (Size + Boundary - 1) & ~(Boundary - 1);
+}
+
+#include "scudo_allocator_secondary.h"
+#include "scudo_allocator_combined.h"
+
+typedef SizeClassAllocatorLocalCache<PrimaryAllocator> AllocatorCache;
+typedef ScudoLargeMmapAllocator SecondaryAllocator;
+typedef ScudoCombinedAllocator<PrimaryAllocator, AllocatorCache,
+    SecondaryAllocator> ScudoBackendAllocator;
+
+void initScudo();
 
 void *scudoMalloc(uptr Size, AllocType Type);
 void scudoFree(void *Ptr, AllocType Type);
@@ -97,8 +125,6 @@
 void *scudoAlignedAlloc(uptr Alignment, uptr Size);
 uptr scudoMallocUsableSize(void *Ptr);
 
-#include "scudo_allocator_secondary.h"
-
 }  // namespace __scudo
 
 #endif  // SCUDO_ALLOCATOR_H_
diff --git a/lib/scudo/scudo_allocator_combined.h b/lib/scudo/scudo_allocator_combined.h
new file mode 100644
index 0000000..7599c12
--- /dev/null
+++ b/lib/scudo/scudo_allocator_combined.h
@@ -0,0 +1,76 @@
+//===-- scudo_allocator_combined.h ------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// Scudo Combined Allocator, dispatches allocation & deallocation requests to
+/// the Primary or the Secondary backend allocators.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef SCUDO_ALLOCATOR_COMBINED_H_
+#define SCUDO_ALLOCATOR_COMBINED_H_
+
+#ifndef SCUDO_ALLOCATOR_H_
+#error "This file must be included inside scudo_allocator.h."
+#endif
+
+template <class PrimaryAllocator, class AllocatorCache,
+    class SecondaryAllocator>
+class ScudoCombinedAllocator {
+ public:
+  void init(s32 ReleaseToOSIntervalMs) {
+    Primary.Init(ReleaseToOSIntervalMs);
+    Secondary.Init();
+    Stats.Init();
+  }
+
+  // Primary allocations are always MinAlignment aligned, and as such do not
+  // require an Alignment parameter.
+  void *allocatePrimary(AllocatorCache *Cache, uptr Size) {
+    return Cache->Allocate(&Primary, Primary.ClassID(Size));
+  }
+
+  // Secondary allocations do not require a Cache, but do require an Alignment
+  // parameter.
+  void *allocateSecondary(uptr Size, uptr Alignment) {
+    return Secondary.Allocate(&Stats, Size, Alignment);
+  }
+
+  void deallocatePrimary(AllocatorCache *Cache, void *Ptr) {
+    Cache->Deallocate(&Primary, Primary.GetSizeClass(Ptr), Ptr);
+  }
+
+  void deallocateSecondary(void *Ptr) {
+    Secondary.Deallocate(&Stats, Ptr);
+  }
+
+  uptr getActuallyAllocatedSize(void *Ptr, bool FromPrimary) {
+    if (FromPrimary)
+      return PrimaryAllocator::ClassIdToSize(Primary.GetSizeClass(Ptr));
+    return Secondary.GetActuallyAllocatedSize(Ptr);
+  }
+
+  void initCache(AllocatorCache *Cache) {
+    Cache->Init(&Stats);
+  }
+
+  void destroyCache(AllocatorCache *Cache) {
+    Cache->Destroy(&Primary, &Stats);
+  }
+
+  void getStats(AllocatorStatCounters StatType) const {
+    Stats.Get(StatType);
+  }
+
+ private:
+  PrimaryAllocator Primary;
+  SecondaryAllocator Secondary;
+  AllocatorGlobalStats Stats;
+};
+
+#endif  // SCUDO_ALLOCATOR_COMBINED_H_
diff --git a/lib/scudo/scudo_allocator_secondary.h b/lib/scudo/scudo_allocator_secondary.h
index b984f0d..dbfb225 100644
--- a/lib/scudo/scudo_allocator_secondary.h
+++ b/lib/scudo/scudo_allocator_secondary.h
@@ -24,26 +24,24 @@
 class ScudoLargeMmapAllocator {
  public:
 
-  void Init(bool AllocatorMayReturnNull) {
+  void Init() {
     PageSize = GetPageSizeCached();
-    atomic_store(&MayReturnNull, AllocatorMayReturnNull, memory_order_relaxed);
   }
 
   void *Allocate(AllocatorStats *Stats, uptr Size, uptr Alignment) {
+    uptr UserSize = Size - AlignedChunkHeaderSize;
     // The Scudo frontend prevents us from allocating more than
     // MaxAllowedMallocSize, so integer overflow checks would be superfluous.
     uptr MapSize = Size + SecondaryHeaderSize;
+    if (Alignment > MinAlignment)
+      MapSize += Alignment;
     MapSize = RoundUpTo(MapSize, PageSize);
     // Account for 2 guard pages, one before and one after the chunk.
     MapSize += 2 * PageSize;
-    // The size passed to the Secondary comprises the alignment, if large
-    // enough. Subtract it here to get the requested size, including header.
-    if (Alignment > MinAlignment)
-      Size -= Alignment;
 
     uptr MapBeg = reinterpret_cast<uptr>(MmapNoAccess(MapSize));
     if (MapBeg == ~static_cast<uptr>(0))
-      return ReturnNullOrDieOnOOM();
+      return ReturnNullOrDieOnFailure::OnOOM();
     // A page-aligned pointer is assumed after that, so check it now.
     CHECK(IsAligned(MapBeg, PageSize));
     uptr MapEnd = MapBeg + MapSize;
@@ -51,32 +49,32 @@
     // initial guard page, and both headers. This is the pointer that has to
     // abide by alignment requirements.
     uptr UserBeg = MapBeg + PageSize + HeadersSize;
+    uptr UserEnd = UserBeg + UserSize;
 
     // In the rare event of larger alignments, we will attempt to fit the mmap
     // area better and unmap extraneous memory. This will also ensure that the
     // offset and unused bytes field of the header stay small.
     if (Alignment > MinAlignment) {
-      if (UserBeg & (Alignment - 1))
-        UserBeg += Alignment - (UserBeg & (Alignment - 1));
-      CHECK_GE(UserBeg, MapBeg);
-      uptr NewMapBeg = RoundDownTo(UserBeg - HeadersSize, PageSize) - PageSize;
-      CHECK_GE(NewMapBeg, MapBeg);
-      uptr NewMapEnd = RoundUpTo(UserBeg + (Size - AlignedChunkHeaderSize),
-                                 PageSize) + PageSize;
-      CHECK_LE(NewMapEnd, MapEnd);
-      // Unmap the extra memory if it's large enough, on both sides.
-      uptr Diff = NewMapBeg - MapBeg;
-      if (Diff > PageSize)
-        UnmapOrDie(reinterpret_cast<void *>(MapBeg), Diff);
-      Diff = MapEnd - NewMapEnd;
-      if (Diff > PageSize)
-        UnmapOrDie(reinterpret_cast<void *>(NewMapEnd), Diff);
-      MapBeg = NewMapBeg;
-      MapEnd = NewMapEnd;
-      MapSize = NewMapEnd - NewMapBeg;
+      if (!IsAligned(UserBeg, Alignment)) {
+        UserBeg = RoundUpTo(UserBeg, Alignment);
+        CHECK_GE(UserBeg, MapBeg);
+        uptr NewMapBeg = RoundDownTo(UserBeg - HeadersSize, PageSize) -
+            PageSize;
+        CHECK_GE(NewMapBeg, MapBeg);
+        if (NewMapBeg != MapBeg) {
+          UnmapOrDie(reinterpret_cast<void *>(MapBeg), NewMapBeg - MapBeg);
+          MapBeg = NewMapBeg;
+        }
+        UserEnd = UserBeg + UserSize;
+      }
+      uptr NewMapEnd = RoundUpTo(UserEnd, PageSize) + PageSize;
+      if (NewMapEnd != MapEnd) {
+        UnmapOrDie(reinterpret_cast<void *>(NewMapEnd), MapEnd - NewMapEnd);
+        MapEnd = NewMapEnd;
+      }
+      MapSize = MapEnd - MapBeg;
     }
 
-    uptr UserEnd = UserBeg + (Size - AlignedChunkHeaderSize);
     CHECK_LE(UserEnd, MapEnd - PageSize);
     // Actually mmap the memory, preserving the guard pages on either side.
     CHECK_EQ(MapBeg + PageSize, reinterpret_cast<uptr>(
@@ -88,83 +86,35 @@
     // The primary adds the whole class size to the stats when allocating a
     // chunk, so we will do something similar here. But we will not account for
     // the guard pages.
-    Stats->Add(AllocatorStatAllocated, MapSize - 2 * PageSize);
-    Stats->Add(AllocatorStatMapped, MapSize - 2 * PageSize);
+    {
+      SpinMutexLock l(&StatsMutex);
+      Stats->Add(AllocatorStatAllocated, MapSize - 2 * PageSize);
+      Stats->Add(AllocatorStatMapped, MapSize - 2 * PageSize);
+    }
 
-    return reinterpret_cast<void *>(UserBeg);
-  }
-
-  void *ReturnNullOrDieOnBadRequest() {
-    if (atomic_load(&MayReturnNull, memory_order_acquire))
-      return nullptr;
-    ReportAllocatorCannotReturnNull(false);
-  }
-
-  void *ReturnNullOrDieOnOOM() {
-    if (atomic_load(&MayReturnNull, memory_order_acquire))
-      return nullptr;
-    ReportAllocatorCannotReturnNull(true);
-  }
-
-  void SetMayReturnNull(bool AllocatorMayReturnNull) {
-    atomic_store(&MayReturnNull, AllocatorMayReturnNull, memory_order_release);
+    return reinterpret_cast<void *>(Ptr);
   }
 
   void Deallocate(AllocatorStats *Stats, void *Ptr) {
     SecondaryHeader *Header = getHeader(Ptr);
-    Stats->Sub(AllocatorStatAllocated, Header->MapSize - 2 * PageSize);
-    Stats->Sub(AllocatorStatMapped, Header->MapSize - 2 * PageSize);
+    {
+      SpinMutexLock l(&StatsMutex);
+      Stats->Sub(AllocatorStatAllocated, Header->MapSize - 2 * PageSize);
+      Stats->Sub(AllocatorStatMapped, Header->MapSize - 2 * PageSize);
+    }
     UnmapOrDie(reinterpret_cast<void *>(Header->MapBeg), Header->MapSize);
   }
 
-  uptr TotalMemoryUsed() {
-    UNIMPLEMENTED();
-  }
-
-  bool PointerIsMine(const void *Ptr) {
-    UNIMPLEMENTED();
-  }
-
   uptr GetActuallyAllocatedSize(void *Ptr) {
     SecondaryHeader *Header = getHeader(Ptr);
-    // Deduct PageSize as MapEnd includes the trailing guard page.
+    // Deduct PageSize as MapSize includes the trailing guard page.
     uptr MapEnd = Header->MapBeg + Header->MapSize - PageSize;
     return MapEnd - reinterpret_cast<uptr>(Ptr);
   }
 
-  void *GetMetaData(const void *Ptr) {
-    UNIMPLEMENTED();
-  }
-
-  void *GetBlockBegin(const void *Ptr) {
-    UNIMPLEMENTED();
-  }
-
-  void *GetBlockBeginFastLocked(void *Ptr) {
-    UNIMPLEMENTED();
-  }
-
-  void PrintStats() {
-    UNIMPLEMENTED();
-  }
-
-  void ForceLock() {
-    UNIMPLEMENTED();
-  }
-
-  void ForceUnlock() {
-    UNIMPLEMENTED();
-  }
-
-  void ForEachChunk(ForEachChunkCallback Callback, void *Arg) {
-    UNIMPLEMENTED();
-  }
-
  private:
   // A Secondary allocated chunk header contains the base of the mapping and
-  // its size. Currently, the base is always a page before the header, but
-  // we might want to extend that number in the future based on the size of
-  // the allocation.
+  // its size, which comprises the guard pages.
   struct SecondaryHeader {
     uptr MapBeg;
     uptr MapSize;
@@ -182,7 +132,7 @@
   const uptr SecondaryHeaderSize = sizeof(SecondaryHeader);
   const uptr HeadersSize = SecondaryHeaderSize + AlignedChunkHeaderSize;
   uptr PageSize;
-  atomic_uint8_t MayReturnNull;
+  SpinMutex StatsMutex;
 };
 
 #endif  // SCUDO_ALLOCATOR_SECONDARY_H_
diff --git a/lib/scudo/scudo_crc32.cpp b/lib/scudo/scudo_crc32.cpp
index 56be22f..a267dc4 100644
--- a/lib/scudo/scudo_crc32.cpp
+++ b/lib/scudo/scudo_crc32.cpp
@@ -12,24 +12,7 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "sanitizer_common/sanitizer_internal_defs.h"
-
-// Hardware CRC32 is supported at compilation via the following:
-// - for i386 & x86_64: -msse4.2
-// - for ARM & AArch64: -march=armv8-a+crc or -mcrc
-// An additional check must be performed at runtime as well to make sure the
-// emitted instructions are valid on the target host.
-
-#if defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32)
-# ifdef __SSE4_2__
-#  include <smmintrin.h>
-#  define CRC32_INTRINSIC FIRST_32_SECOND_64(_mm_crc32_u32, _mm_crc32_u64)
-# endif
-# ifdef __ARM_FEATURE_CRC32
-#  include <arm_acle.h>
-#  define CRC32_INTRINSIC FIRST_32_SECOND_64(__crc32cw, __crc32cd)
-# endif
-#endif  // defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32)
+#include "scudo_crc32.h"
 
 namespace __scudo {
 
diff --git a/lib/scudo/scudo_crc32.h b/lib/scudo/scudo_crc32.h
new file mode 100644
index 0000000..5ffcc62
--- /dev/null
+++ b/lib/scudo/scudo_crc32.h
@@ -0,0 +1,101 @@
+//===-- scudo_crc32.h -------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// Scudo chunk header checksum related definitions.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef SCUDO_CRC32_H_
+#define SCUDO_CRC32_H_
+
+#include "sanitizer_common/sanitizer_internal_defs.h"
+
+// Hardware CRC32 is supported at compilation via the following:
+// - for i386 & x86_64: -msse4.2
+// - for ARM & AArch64: -march=armv8-a+crc or -mcrc
+// An additional check must be performed at runtime as well to make sure the
+// emitted instructions are valid on the target host.
+
+#if defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32)
+# ifdef __SSE4_2__
+#  include <smmintrin.h>
+#  define CRC32_INTRINSIC FIRST_32_SECOND_64(_mm_crc32_u32, _mm_crc32_u64)
+# endif
+# ifdef __ARM_FEATURE_CRC32
+#  include <arm_acle.h>
+#  define CRC32_INTRINSIC FIRST_32_SECOND_64(__crc32cw, __crc32cd)
+# endif
+#endif  // defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32)
+
+namespace __scudo {
+
+enum : u8 {
+  CRC32Software = 0,
+  CRC32Hardware = 1,
+};
+
+const static u32 CRC32Table[] = {
+  0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
+  0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
+  0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2,
+  0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
+  0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
+  0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
+  0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c,
+  0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
+  0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423,
+  0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
+  0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106,
+  0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
+  0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d,
+  0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
+  0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
+  0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
+  0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7,
+  0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
+  0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa,
+  0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
+  0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81,
+  0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
+  0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84,
+  0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
+  0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
+  0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
+  0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e,
+  0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
+  0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55,
+  0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
+  0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28,
+  0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
+  0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f,
+  0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
+  0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
+  0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
+  0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69,
+  0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
+  0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc,
+  0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
+  0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693,
+  0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
+  0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
+};
+
+INLINE u32 computeSoftwareCRC32(u32 Crc, uptr Data) {
+  for (uptr i = 0; i < sizeof(Data); i++) {
+    Crc = CRC32Table[(Crc ^ Data) & 0xff] ^ (Crc >> 8);
+    Data >>= 8;
+  }
+  return Crc;
+}
+
+SANITIZER_WEAK_ATTRIBUTE u32 computeHardwareCRC32(u32 Crc, uptr Data);
+
+}  // namespace __scudo
+
+#endif  // SCUDO_CRC32_H_
diff --git a/lib/scudo/scudo_flags.cpp b/lib/scudo/scudo_flags.cpp
index 64da1d9..ab94afa 100644
--- a/lib/scudo/scudo_flags.cpp
+++ b/lib/scudo/scudo_flags.cpp
@@ -67,27 +67,52 @@
 
   // Sanity checks and default settings for the Quarantine parameters.
 
-  if (f->QuarantineSizeMb < 0) {
-    const int DefaultQuarantineSizeMb = FIRST_32_SECOND_64(16, 64);
-    f->QuarantineSizeMb = DefaultQuarantineSizeMb;
+  if (f->QuarantineSizeMb >= 0) {
+    // Backward compatible logic if QuarantineSizeMb is set.
+    if (f->QuarantineSizeKb >= 0) {
+      dieWithMessage("ERROR: please use either QuarantineSizeMb (deprecated) "
+          "or QuarantineSizeKb, but not both\n");
+    }
+    if (f->QuarantineChunksUpToSize >= 0) {
+      dieWithMessage("ERROR: QuarantineChunksUpToSize cannot be used in "
+          " conjunction with the deprecated QuarantineSizeMb option\n");
+    }
+    // If everything is in order, update QuarantineSizeKb accordingly.
+    f->QuarantineSizeKb = f->QuarantineSizeMb * 1024;
+  } else {
+    // Otherwise proceed with the new options.
+    if (f->QuarantineSizeKb < 0) {
+      const int DefaultQuarantineSizeKb = FIRST_32_SECOND_64(64, 256);
+      f->QuarantineSizeKb = DefaultQuarantineSizeKb;
+    }
+    if (f->QuarantineChunksUpToSize < 0) {
+      const int DefaultQuarantineChunksUpToSize = FIRST_32_SECOND_64(512, 2048);
+      f->QuarantineChunksUpToSize = DefaultQuarantineChunksUpToSize;
+    }
   }
-  // We enforce an upper limit for the quarantine size of 4Gb.
-  if (f->QuarantineSizeMb > (4 * 1024)) {
+
+  // We enforce an upper limit for the chunk quarantine threshold of 4Mb.
+  if (f->QuarantineChunksUpToSize > (4 * 1024 * 1024)) {
+    dieWithMessage("ERROR: the chunk quarantine threshold is too large\n");
+  }
+
+  // We enforce an upper limit for the quarantine size of 32Mb.
+  if (f->QuarantineSizeKb > (32 * 1024)) {
     dieWithMessage("ERROR: the quarantine size is too large\n");
   }
+
   if (f->ThreadLocalQuarantineSizeKb < 0) {
-    const int DefaultThreadLocalQuarantineSizeKb =
-        FIRST_32_SECOND_64(256, 1024);
+    const int DefaultThreadLocalQuarantineSizeKb = FIRST_32_SECOND_64(16, 64);
     f->ThreadLocalQuarantineSizeKb = DefaultThreadLocalQuarantineSizeKb;
   }
-  // And an upper limit of 128Mb for the thread quarantine cache.
-  if (f->ThreadLocalQuarantineSizeKb > (128 * 1024)) {
+  // And an upper limit of 8Mb for the thread quarantine cache.
+  if (f->ThreadLocalQuarantineSizeKb > (8 * 1024)) {
     dieWithMessage("ERROR: the per thread quarantine cache size is too "
-                   "large\n");
+        "large\n");
   }
-  if (f->ThreadLocalQuarantineSizeKb == 0 && f->QuarantineSizeMb > 0) {
+  if (f->ThreadLocalQuarantineSizeKb == 0 && f->QuarantineSizeKb > 0) {
     dieWithMessage("ERROR: ThreadLocalQuarantineSizeKb can be set to 0 only "
-                   "when QuarantineSizeMb is set to 0\n");
+        "when QuarantineSizeKb is set to 0\n");
   }
 }
 
diff --git a/lib/scudo/scudo_flags.inc b/lib/scudo/scudo_flags.inc
index 45f9ea8..f180478 100644
--- a/lib/scudo/scudo_flags.inc
+++ b/lib/scudo/scudo_flags.inc
@@ -15,17 +15,27 @@
 # error "Define SCUDO_FLAG prior to including this file!"
 #endif
 
-// Default value is set in scudo_flags.cpp based on architecture.
 SCUDO_FLAG(int, QuarantineSizeMb, -1,
-           "Size (in Mb) of quarantine used to delay the actual deallocation "
-           "of chunks. Lower value may reduce memory usage but decrease the "
-           "effectiveness of the mitigation.")
+           "Deprecated. Please use QuarantineSizeKb.")
+
+// Default value is set in scudo_flags.cpp based on architecture.
+SCUDO_FLAG(int, QuarantineSizeKb, -1,
+           "Size in KB of quarantine used to delay the actual deallocation of "
+           "chunks. Lower value may reduce memory usage but decrease the "
+           "effectiveness of the mitigation. Defaults to 64KB (32-bit) or "
+           "256KB (64-bit)")
 
 // Default value is set in scudo_flags.cpp based on architecture.
 SCUDO_FLAG(int, ThreadLocalQuarantineSizeKb, -1,
-          "Size (in Kb) of per-thread cache used to offload the global "
+          "Size in KB of per-thread cache used to offload the global "
           "quarantine. Lower value may reduce memory usage but might increase "
-          "the contention on the global quarantine.")
+          "the contention on the global quarantine. Defaults to 16KB (32-bit) "
+          "or 64KB (64-bit)")
+
+// Default value is set in scudo_flags.cpp based on architecture.
+SCUDO_FLAG(int, QuarantineChunksUpToSize, -1,
+          "Size in bytes up to which chunks will be quarantined (if lower than"
+          "or equal to). Defaults to 256 (32-bit) or 2048 (64-bit)")
 
 SCUDO_FLAG(bool, DeallocationTypeMismatch, true,
           "Report errors on malloc/delete, new/free, new/delete[], etc.")
diff --git a/lib/scudo/scudo_new_delete.cpp b/lib/scudo/scudo_new_delete.cpp
index c022bd0..c5a1abb 100644
--- a/lib/scudo/scudo_new_delete.cpp
+++ b/lib/scudo/scudo_new_delete.cpp
@@ -15,7 +15,7 @@
 
 #include "interception/interception.h"
 
-#include <cstddef>
+#include <stddef.h>
 
 using namespace __scudo;
 
@@ -26,13 +26,18 @@
 struct nothrow_t {};
 }  // namespace std
 
+// TODO(alekseys): throw std::bad_alloc instead of dying on OOM.
 CXX_OPERATOR_ATTRIBUTE
 void *operator new(size_t size) {
-  return scudoMalloc(size, FromNew);
+  void *res = scudoMalloc(size, FromNew);
+  if (UNLIKELY(!res)) DieOnFailure::OnOOM();
+  return res;
 }
 CXX_OPERATOR_ATTRIBUTE
 void *operator new[](size_t size) {
-  return scudoMalloc(size, FromNewArray);
+  void *res = scudoMalloc(size, FromNewArray);
+  if (UNLIKELY(!res)) DieOnFailure::OnOOM();
+  return res;
 }
 CXX_OPERATOR_ATTRIBUTE
 void *operator new(size_t size, std::nothrow_t const&) {
diff --git a/lib/scudo/scudo_platform.h b/lib/scudo/scudo_platform.h
new file mode 100644
index 0000000..7db1197
--- /dev/null
+++ b/lib/scudo/scudo_platform.h
@@ -0,0 +1,73 @@
+//===-- scudo_platform.h ----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// Scudo platform specific definitions.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef SCUDO_PLATFORM_H_
+#define SCUDO_PLATFORM_H_
+
+#include "sanitizer_common/sanitizer_allocator.h"
+
+#if !SANITIZER_LINUX && !SANITIZER_FUCHSIA
+# error "The Scudo hardened allocator is not supported on this platform."
+#endif
+
+#define SCUDO_TSD_EXCLUSIVE_SUPPORTED (!SANITIZER_ANDROID && !SANITIZER_FUCHSIA)
+
+#ifndef SCUDO_TSD_EXCLUSIVE
+// SCUDO_TSD_EXCLUSIVE wasn't defined, use a default TSD model for the platform.
+# if SANITIZER_ANDROID || SANITIZER_FUCHSIA
+// Android and Fuchsia use a pool of TSDs shared between threads.
+#  define SCUDO_TSD_EXCLUSIVE 0
+# elif SANITIZER_LINUX && !SANITIZER_ANDROID
+// Non-Android Linux use an exclusive TSD per thread.
+#  define SCUDO_TSD_EXCLUSIVE 1
+# else
+#  error "No default TSD model defined for this platform."
+# endif  // SANITIZER_ANDROID || SANITIZER_FUCHSIA
+#endif  // SCUDO_TSD_EXCLUSIVE
+
+// If the exclusive TSD model is chosen, make sure the platform supports it.
+#if SCUDO_TSD_EXCLUSIVE && !SCUDO_TSD_EXCLUSIVE_SUPPORTED
+# error "The exclusive TSD model is not supported on this platform."
+#endif
+
+// Maximum number of TSDs that can be created for the Shared model.
+#ifndef SCUDO_SHARED_TSD_POOL_SIZE
+# define SCUDO_SHARED_TSD_POOL_SIZE 32U
+#endif  // SCUDO_SHARED_TSD_POOL_SIZE
+
+namespace __scudo {
+
+#if SANITIZER_CAN_USE_ALLOCATOR64
+# if defined(__aarch64__) && SANITIZER_ANDROID
+const uptr AllocatorSize = 0x2000000000ULL;  // 128G.
+typedef VeryCompactSizeClassMap SizeClassMap;
+# elif defined(__aarch64__)
+const uptr AllocatorSize = 0x10000000000ULL;  // 1T.
+typedef CompactSizeClassMap SizeClassMap;
+# else
+const uptr AllocatorSize = 0x40000000000ULL;  // 4T.
+typedef CompactSizeClassMap SizeClassMap;
+# endif
+#else
+# if SANITIZER_ANDROID
+static const uptr RegionSizeLog = 19;
+typedef VeryCompactSizeClassMap SizeClassMap;
+# else
+static const uptr RegionSizeLog = 20;
+typedef CompactSizeClassMap SizeClassMap;
+# endif
+#endif  // SANITIZER_CAN_USE_ALLOCATOR64
+
+}  // namespace __scudo
+
+#endif // SCUDO_PLATFORM_H_
diff --git a/lib/scudo/scudo_tsd.h b/lib/scudo/scudo_tsd.h
new file mode 100644
index 0000000..d78eb49
--- /dev/null
+++ b/lib/scudo/scudo_tsd.h
@@ -0,0 +1,73 @@
+//===-- scudo_tsd.h ---------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// Scudo thread specific data definition.
+/// Implementation will differ based on the thread local storage primitives
+/// offered by the underlying platform.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef SCUDO_TSD_H_
+#define SCUDO_TSD_H_
+
+#include "scudo_allocator.h"
+#include "scudo_utils.h"
+
+#include <pthread.h>
+
+namespace __scudo {
+
+struct ALIGNED(64) ScudoTSD {
+  AllocatorCache Cache;
+  ScudoPrng Prng;
+  uptr QuarantineCachePlaceHolder[4];
+
+  void init(bool Shared);
+  void commitBack();
+
+  INLINE bool tryLock() {
+    if (Mutex.TryLock()) {
+      atomic_store_relaxed(&Precedence, 0);
+      return true;
+    }
+    if (atomic_load_relaxed(&Precedence) == 0)
+      atomic_store_relaxed(&Precedence, NanoTime());
+    return false;
+  }
+
+  INLINE void lock() {
+    Mutex.Lock();
+    atomic_store_relaxed(&Precedence, 0);
+  }
+
+  INLINE void unlock() {
+    if (!UnlockRequired)
+      return;
+    Mutex.Unlock();
+  }
+
+  INLINE u64 getPrecedence() {
+    return atomic_load_relaxed(&Precedence);
+  }
+
+ private:
+  bool UnlockRequired;
+  StaticSpinMutex Mutex;
+  atomic_uint64_t Precedence;
+};
+
+void initThread(bool MinimalInit);
+
+// TSD model specific fastpath functions definitions.
+#include "scudo_tsd_exclusive.inc"
+#include "scudo_tsd_shared.inc"
+
+}  // namespace __scudo
+
+#endif  // SCUDO_TSD_H_
diff --git a/lib/scudo/scudo_tsd_exclusive.cpp b/lib/scudo/scudo_tsd_exclusive.cpp
new file mode 100644
index 0000000..1084dfa
--- /dev/null
+++ b/lib/scudo/scudo_tsd_exclusive.cpp
@@ -0,0 +1,68 @@
+//===-- scudo_tsd_exclusive.cpp ---------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// Scudo exclusive TSD implementation.
+///
+//===----------------------------------------------------------------------===//
+
+#include "scudo_tsd.h"
+
+#if SCUDO_TSD_EXCLUSIVE
+
+namespace __scudo {
+
+static pthread_once_t GlobalInitialized = PTHREAD_ONCE_INIT;
+static pthread_key_t PThreadKey;
+
+__attribute__((tls_model("initial-exec")))
+THREADLOCAL ThreadState ScudoThreadState = ThreadNotInitialized;
+__attribute__((tls_model("initial-exec")))
+THREADLOCAL ScudoTSD TSD;
+
+// Fallback TSD for when the thread isn't initialized yet or is torn down. It
+// can be shared between multiple threads and as such must be locked.
+ScudoTSD FallbackTSD;
+
+static void teardownThread(void *Ptr) {
+  uptr I = reinterpret_cast<uptr>(Ptr);
+  // The glibc POSIX thread-local-storage deallocation routine calls user
+  // provided destructors in a loop of PTHREAD_DESTRUCTOR_ITERATIONS.
+  // We want to be called last since other destructors might call free and the
+  // like, so we wait until PTHREAD_DESTRUCTOR_ITERATIONS before draining the
+  // quarantine and swallowing the cache.
+  if (I > 1) {
+    // If pthread_setspecific fails, we will go ahead with the teardown.
+    if (LIKELY(pthread_setspecific(PThreadKey,
+                                   reinterpret_cast<void *>(I - 1)) == 0))
+      return;
+  }
+  TSD.commitBack();
+  ScudoThreadState = ThreadTornDown;
+}
+
+
+static void initOnce() {
+  CHECK_EQ(pthread_key_create(&PThreadKey, teardownThread), 0);
+  initScudo();
+  FallbackTSD.init(/*Shared=*/true);
+}
+
+void initThread(bool MinimalInit) {
+  CHECK_EQ(pthread_once(&GlobalInitialized, initOnce), 0);
+  if (UNLIKELY(MinimalInit))
+    return;
+  CHECK_EQ(pthread_setspecific(PThreadKey, reinterpret_cast<void *>(
+      GetPthreadDestructorIterations())), 0);
+  TSD.init(/*Shared=*/false);
+  ScudoThreadState = ThreadInitialized;
+}
+
+}  // namespace __scudo
+
+#endif  // SCUDO_TSD_EXCLUSIVE
diff --git a/lib/scudo/scudo_tsd_exclusive.inc b/lib/scudo/scudo_tsd_exclusive.inc
new file mode 100644
index 0000000..567b6a1
--- /dev/null
+++ b/lib/scudo/scudo_tsd_exclusive.inc
@@ -0,0 +1,46 @@
+//===-- scudo_tsd_exclusive.inc ---------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// Scudo exclusive TSD fastpath functions implementation.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef SCUDO_TSD_H_
+# error "This file must be included inside scudo_tsd.h."
+#endif  // SCUDO_TSD_H_
+
+#if SCUDO_TSD_EXCLUSIVE
+
+enum ThreadState : u8 {
+  ThreadNotInitialized = 0,
+  ThreadInitialized,
+  ThreadTornDown,
+};
+__attribute__((tls_model("initial-exec")))
+extern THREADLOCAL ThreadState ScudoThreadState;
+__attribute__((tls_model("initial-exec")))
+extern THREADLOCAL ScudoTSD TSD;
+
+extern ScudoTSD FallbackTSD;
+
+ALWAYS_INLINE void initThreadMaybe(bool MinimalInit = false) {
+  if (LIKELY(ScudoThreadState != ThreadNotInitialized))
+    return;
+  initThread(MinimalInit);
+}
+
+ALWAYS_INLINE ScudoTSD *getTSDAndLock() {
+  if (UNLIKELY(ScudoThreadState != ThreadInitialized)) {
+    FallbackTSD.lock();
+    return &FallbackTSD;
+  }
+  return &TSD;
+}
+
+#endif  // SCUDO_TSD_EXCLUSIVE
diff --git a/lib/scudo/scudo_tsd_shared.cpp b/lib/scudo/scudo_tsd_shared.cpp
new file mode 100644
index 0000000..191c9ff
--- /dev/null
+++ b/lib/scudo/scudo_tsd_shared.cpp
@@ -0,0 +1,94 @@
+//===-- scudo_tsd_shared.cpp ------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// Scudo shared TSD implementation.
+///
+//===----------------------------------------------------------------------===//
+
+#include "scudo_tsd.h"
+
+#if !SCUDO_TSD_EXCLUSIVE
+
+namespace __scudo {
+
+static pthread_once_t GlobalInitialized = PTHREAD_ONCE_INIT;
+pthread_key_t PThreadKey;
+
+static atomic_uint32_t CurrentIndex;
+static ScudoTSD *TSDs;
+static u32 NumberOfTSDs;
+
+// sysconf(_SC_NPROCESSORS_{CONF,ONLN}) cannot be used as they allocate memory.
+static u32 getNumberOfCPUs() {
+  cpu_set_t CPUs;
+  CHECK_EQ(sched_getaffinity(0, sizeof(cpu_set_t), &CPUs), 0);
+  return CPU_COUNT(&CPUs);
+}
+
+static void initOnce() {
+  CHECK_EQ(pthread_key_create(&PThreadKey, NULL), 0);
+  initScudo();
+  NumberOfTSDs = Min(Max(1U, getNumberOfCPUs()),
+                     static_cast<u32>(SCUDO_SHARED_TSD_POOL_SIZE));
+  TSDs = reinterpret_cast<ScudoTSD *>(
+      MmapOrDie(sizeof(ScudoTSD) * NumberOfTSDs, "ScudoTSDs"));
+  for (u32 i = 0; i < NumberOfTSDs; i++)
+    TSDs[i].init(/*Shared=*/true);
+}
+
+ALWAYS_INLINE void setCurrentTSD(ScudoTSD *TSD) {
+#if SANITIZER_ANDROID
+  *get_android_tls_ptr() = reinterpret_cast<uptr>(TSD);
+#else
+  CHECK_EQ(pthread_setspecific(PThreadKey, reinterpret_cast<void *>(TSD)), 0);
+#endif  // SANITIZER_ANDROID
+}
+
+void initThread(bool MinimalInit) {
+  pthread_once(&GlobalInitialized, initOnce);
+  // Initial context assignment is done in a plain round-robin fashion.
+  u32 Index = atomic_fetch_add(&CurrentIndex, 1, memory_order_relaxed);
+  setCurrentTSD(&TSDs[Index % NumberOfTSDs]);
+}
+
+ScudoTSD *getTSDAndLockSlow() {
+  ScudoTSD *TSD;
+  if (NumberOfTSDs > 1) {
+    // Go through all the contexts and find the first unlocked one.
+    for (u32 i = 0; i < NumberOfTSDs; i++) {
+      TSD = &TSDs[i];
+      if (TSD->tryLock()) {
+        setCurrentTSD(TSD);
+        return TSD;
+      }
+    }
+    // No luck, find the one with the lowest Precedence, and slow lock it.
+    u64 LowestPrecedence = UINT64_MAX;
+    for (u32 i = 0; i < NumberOfTSDs; i++) {
+      u64 Precedence = TSDs[i].getPrecedence();
+      if (Precedence && Precedence < LowestPrecedence) {
+        TSD = &TSDs[i];
+        LowestPrecedence = Precedence;
+      }
+    }
+    if (LIKELY(LowestPrecedence != UINT64_MAX)) {
+      TSD->lock();
+      setCurrentTSD(TSD);
+      return TSD;
+    }
+  }
+  // Last resort, stick with the current one.
+  TSD = getCurrentTSD();
+  TSD->lock();
+  return TSD;
+}
+
+}  // namespace __scudo
+
+#endif  // !SCUDO_TSD_EXCLUSIVE
diff --git a/lib/scudo/scudo_tsd_shared.inc b/lib/scudo/scudo_tsd_shared.inc
new file mode 100644
index 0000000..79fcd65
--- /dev/null
+++ b/lib/scudo/scudo_tsd_shared.inc
@@ -0,0 +1,48 @@
+//===-- scudo_tsd_shared.inc ------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// Scudo shared TSD fastpath functions implementation.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef SCUDO_TSD_H_
+# error "This file must be included inside scudo_tsd.h."
+#endif  // SCUDO_TSD_H_
+
+#if !SCUDO_TSD_EXCLUSIVE
+
+extern pthread_key_t PThreadKey;
+
+ALWAYS_INLINE ScudoTSD* getCurrentTSD() {
+#if SANITIZER_ANDROID
+  return reinterpret_cast<ScudoTSD *>(*get_android_tls_ptr());
+#else
+  return reinterpret_cast<ScudoTSD *>(pthread_getspecific(PThreadKey));
+#endif  // SANITIZER_ANDROID
+}
+
+ALWAYS_INLINE void initThreadMaybe(bool MinimalInit = false) {
+  if (LIKELY(getCurrentTSD()))
+    return;
+  initThread(MinimalInit);
+}
+
+ScudoTSD *getTSDAndLockSlow();
+
+ALWAYS_INLINE ScudoTSD *getTSDAndLock() {
+  ScudoTSD *TSD = getCurrentTSD();
+  CHECK(TSD && "No TSD associated with the current thread!");
+  // Try to lock the currently associated context.
+  if (TSD->tryLock())
+    return TSD;
+  // If it failed, go the slow path.
+  return getTSDAndLockSlow();
+}
+
+#endif  // !SCUDO_TSD_EXCLUSIVE
diff --git a/lib/scudo/scudo_utils.cpp b/lib/scudo/scudo_utils.cpp
index 4e2f6e0..093eafe 100644
--- a/lib/scudo/scudo_utils.cpp
+++ b/lib/scudo/scudo_utils.cpp
@@ -13,15 +13,47 @@
 
 #include "scudo_utils.h"
 
-#include <errno.h>
-#include <fcntl.h>
 #include <stdarg.h>
-#include <unistd.h>
 #if defined(__x86_64__) || defined(__i386__)
 # include <cpuid.h>
 #endif
 #if defined(__arm__) || defined(__aarch64__)
-# include <sys/auxv.h>
+# if SANITIZER_ANDROID && __ANDROID_API__ < 18
+// getauxval() was introduced with API level 18 on Android. Emulate it using
+// /proc/self/auxv for lower API levels.
+#  include "sanitizer_common/sanitizer_posix.h"
+
+#  include <fcntl.h>
+
+#  define AT_HWCAP 16
+
+namespace __sanitizer {
+
+uptr getauxval(uptr Type) {
+  uptr F = internal_open("/proc/self/auxv", O_RDONLY);
+  if (internal_iserror(F))
+    return 0;
+  struct { uptr Tag; uptr Value; } Entry;
+  uptr Result = 0;
+  for (;;) {
+    uptr N = internal_read(F, &Entry, sizeof(Entry));
+    if (internal_iserror(N))
+      break;
+    if (N == 0 || N != sizeof(Entry) || (Entry.Tag == 0 && Entry.Value == 0))
+      break;
+    if (Entry.Tag == Type) {
+      Result =  Entry.Value;
+      break;
+    }
+  }
+  internal_close(F);
+  return Result;
+}
+
+}  // namespace __sanitizer
+# else
+#  include <sys/auxv.h>
+# endif
 #endif
 
 // TODO(kostyak): remove __sanitizer *Printf uses in favor for our own less
@@ -82,9 +114,9 @@
   return FeaturesRegs;
 }
 
-#ifndef bit_SSE4_2
-# define bit_SSE4_2 bit_SSE42  // clang and gcc have different defines.
-#endif
+# ifndef bit_SSE4_2
+#  define bit_SSE4_2 bit_SSE42  // clang and gcc have different defines.
+# endif
 
 bool testCPUFeature(CPUFeature Feature)
 {
@@ -99,12 +131,12 @@
   return false;
 }
 #elif defined(__arm__) || defined(__aarch64__)
-// For ARM and AArch64, hardware CRC32 support is indicated in the
-// AT_HWVAL auxiliary vector.
+// For ARM and AArch64, hardware CRC32 support is indicated in the AT_HWVAL
+// auxiliary vector.
 
-#ifndef HWCAP_CRC32
-# define HWCAP_CRC32 (1<<7)  // HWCAP_CRC32 is missing on older platforms.
-#endif
+# ifndef HWCAP_CRC32
+#  define HWCAP_CRC32 (1 << 7)  // HWCAP_CRC32 is missing on older platforms.
+# endif
 
 bool testCPUFeature(CPUFeature Feature) {
   uptr HWCap = getauxval(AT_HWCAP);
@@ -123,94 +155,4 @@
 }
 #endif  // defined(__x86_64__) || defined(__i386__)
 
-// readRetry will attempt to read Count bytes from the Fd specified, and if
-// interrupted will retry to read additional bytes to reach Count.
-static ssize_t readRetry(int Fd, u8 *Buffer, size_t Count) {
-  ssize_t AmountRead = 0;
-  while (static_cast<size_t>(AmountRead) < Count) {
-    ssize_t Result = read(Fd, Buffer + AmountRead, Count - AmountRead);
-    if (Result > 0)
-      AmountRead += Result;
-    else if (!Result)
-      break;
-    else if (errno != EINTR) {
-      AmountRead = -1;
-      break;
-    }
-  }
-  return AmountRead;
-}
-
-static void fillRandom(u8 *Data, ssize_t Size) {
-  int Fd = open("/dev/urandom", O_RDONLY);
-  if (Fd < 0) {
-    dieWithMessage("ERROR: failed to open /dev/urandom.\n");
-  }
-  bool Success = readRetry(Fd, Data, Size) == Size;
-  close(Fd);
-  if (!Success) {
-    dieWithMessage("ERROR: failed to read enough data from /dev/urandom.\n");
-  }
-}
-
-// Default constructor for Xorshift128Plus seeds the state with /dev/urandom.
-// TODO(kostyak): investigate using getrandom() if available.
-Xorshift128Plus::Xorshift128Plus() {
-  fillRandom(reinterpret_cast<u8 *>(State), sizeof(State));
-}
-
-const static u32 CRC32Table[] = {
-  0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
-  0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
-  0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2,
-  0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
-  0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
-  0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
-  0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c,
-  0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
-  0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423,
-  0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
-  0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106,
-  0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
-  0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d,
-  0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
-  0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
-  0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
-  0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7,
-  0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
-  0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa,
-  0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
-  0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81,
-  0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
-  0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84,
-  0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
-  0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
-  0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
-  0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e,
-  0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
-  0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55,
-  0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
-  0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28,
-  0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
-  0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f,
-  0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
-  0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
-  0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
-  0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69,
-  0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
-  0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc,
-  0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
-  0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693,
-  0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
-  0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
-};
-
-u32 computeSoftwareCRC32(u32 Crc, uptr Data) {
-  for (uptr i = 0; i < sizeof(Data); i++) {
-    Crc = CRC32Table[(Crc ^ Data) & 0xff] ^ (Crc >> 8);
-    Data >>= 8;
-  }
-  return Crc;
-}
-
 }  // namespace __scudo
diff --git a/lib/scudo/scudo_utils.h b/lib/scudo/scudo_utils.h
index 5082d79..1326919 100644
--- a/lib/scudo/scudo_utils.h
+++ b/lib/scudo/scudo_utils.h
@@ -36,29 +36,60 @@
 };
 bool testCPUFeature(CPUFeature feature);
 
-// Tiny PRNG based on https://en.wikipedia.org/wiki/Xorshift#xorshift.2B
-// The state (128 bits) will be stored in thread local storage.
-struct Xorshift128Plus {
+INLINE u64 rotl(const u64 X, int K) {
+  return (X << K) | (X >> (64 - K));
+}
+
+// XoRoShiRo128+ PRNG (http://xoroshiro.di.unimi.it/).
+struct XoRoShiRo128Plus {
  public:
-  Xorshift128Plus();
-  u64 Next() {
-    u64 x = State[0];
-    const u64 y = State[1];
-    State[0] = y;
-    x ^= x << 23;
-    State[1] = x ^ y ^ (x >> 17) ^ (y >> 26);
-    return State[1] + y;
+  void init() {
+    if (UNLIKELY(!GetRandom(reinterpret_cast<void *>(State), sizeof(State),
+                            /*blocking=*/false))) {
+      // On some platforms, early processes like `init` do not have an
+      // initialized random pool (getrandom blocks and /dev/urandom doesn't
+      // exist yet), but we still have to provide them with some degree of
+      // entropy. Not having a secure seed is not as problematic for them, as
+      // they are less likely to be the target of heap based vulnerabilities
+      // exploitation attempts.
+      State[0] = NanoTime();
+      State[1] = 0;
+    }
+    fillCache();
   }
+  u8 getU8() {
+    if (UNLIKELY(isCacheEmpty()))
+      fillCache();
+    const u8 Result = static_cast<u8>(CachedBytes & 0xff);
+    CachedBytes >>= 8;
+    CachedBytesAvailable--;
+    return Result;
+  }
+  u64 getU64() { return next(); }
+
  private:
+  u8 CachedBytesAvailable;
+  u64 CachedBytes;
   u64 State[2];
+  u64 next() {
+    const u64 S0 = State[0];
+    u64 S1 = State[1];
+    const u64 Result = S0 + S1;
+    S1 ^= S0;
+    State[0] = rotl(S0, 55) ^ S1 ^ (S1 << 14);
+    State[1] = rotl(S1, 36);
+    return Result;
+  }
+  bool isCacheEmpty() {
+    return CachedBytesAvailable == 0;
+  }
+  void fillCache() {
+    CachedBytes = next();
+    CachedBytesAvailable = sizeof(CachedBytes);
+  }
 };
 
-enum : u8 {
-  CRC32Software = 0,
-  CRC32Hardware = 1,
-};
-
-u32 computeSoftwareCRC32(u32 Crc, uptr Data);
+typedef XoRoShiRo128Plus ScudoPrng;
 
 }  // namespace __scudo
 
diff --git a/lib/stats/CMakeLists.txt b/lib/stats/CMakeLists.txt
index 2b3d647..6be36a7 100644
--- a/lib/stats/CMakeLists.txt
+++ b/lib/stats/CMakeLists.txt
@@ -6,6 +6,8 @@
 if(APPLE)
   set(STATS_LIB_FLAVOR SHARED)
 
+  set(STATS_LINK_LIBS ${SANITIZER_COMMON_LINK_LIBS})
+
   add_weak_symbols("asan" WEAK_SYMBOL_LINK_FLAGS)
   add_weak_symbols("ubsan" WEAK_SYMBOL_LINK_FLAGS)
   add_weak_symbols("sanitizer_common" WEAK_SYMBOL_LINK_FLAGS)
@@ -23,7 +25,8 @@
   OBJECT_LIBS RTSanitizerCommon
               RTSanitizerCommonLibc
   CFLAGS ${SANITIZER_COMMON_CFLAGS}
-  LINK_FLAGS ${WEAK_SYMBOL_LINK_FLAGS}
+  LINK_FLAGS ${SANITIZER_COMMON_LINK_FLAGS} ${WEAK_SYMBOL_LINK_FLAGS}
+  LINK_LIBS ${STATS_LINK_LIBS}
   PARENT_TARGET stats)
 
 add_compiler_rt_runtime(clang_rt.stats_client
diff --git a/lib/stats/stats.cc b/lib/stats/stats.cc
index df9845a..6a6eb3a 100644
--- a/lib/stats/stats.cc
+++ b/lib/stats/stats.cc
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_file.h"
 #include "sanitizer_common/sanitizer_internal_defs.h"
 #if SANITIZER_POSIX
 #include "sanitizer_common/sanitizer_posix.h"
diff --git a/lib/tsan/CMakeLists.txt b/lib/tsan/CMakeLists.txt
index 195ecb5..08974a4 100644
--- a/lib/tsan/CMakeLists.txt
+++ b/lib/tsan/CMakeLists.txt
@@ -100,7 +100,7 @@
 add_compiler_rt_component(tsan)
 
 if(APPLE)
-  set(TSAN_ASM_SOURCES rtl/tsan_rtl_amd64.S)
+  set(TSAN_ASM_SOURCES rtl/tsan_rtl_amd64.S rtl/tsan_rtl_aarch64.S)
   # Xcode will try to compile this file as C ('clang -x c'), and that will fail.
   if (${CMAKE_GENERATOR} STREQUAL "Xcode")
     enable_language(ASM)
@@ -109,6 +109,8 @@
     set_source_files_properties(${TSAN_ASM_SOURCES} PROPERTIES LANGUAGE C)
   endif()
 
+  set(TSAN_LINK_LIBS ${SANITIZER_COMMON_LINK_LIBS})
+
   add_weak_symbols("ubsan" WEAK_SYMBOL_LINK_FLAGS)
   add_weak_symbols("sanitizer_common" WEAK_SYMBOL_LINK_FLAGS)
 
@@ -122,7 +124,8 @@
                 RTSanitizerCommonLibc
                 RTUbsan
     CFLAGS ${TSAN_RTL_CFLAGS}
-    LINK_FLAGS ${WEAK_SYMBOL_LINK_FLAGS}
+    LINK_FLAGS ${SANITIZER_COMMON_LINK_FLAGS} ${WEAK_SYMBOL_LINK_FLAGS}
+    LINK_LIBS ${TSAN_LINK_LIBS}
     PARENT_TARGET tsan)
   add_compiler_rt_object_libraries(RTTsan_dynamic
     OS ${TSAN_SUPPORTED_OS}
@@ -219,7 +222,8 @@
 
 # Build libcxx instrumented with TSan.
 if(COMPILER_RT_HAS_LIBCXX_SOURCES AND
-   COMPILER_RT_TEST_COMPILER_ID STREQUAL "Clang")
+   COMPILER_RT_TEST_COMPILER_ID STREQUAL "Clang" AND
+   NOT ANDROID)
   set(libcxx_tsan_deps)
   foreach(arch ${TSAN_SUPPORTED_ARCH})
     get_target_flags_for_arch(${arch} TARGET_CFLAGS)
diff --git a/lib/tsan/check_analyze.sh b/lib/tsan/check_analyze.sh
index a5d3632..9b5abc3 100755
--- a/lib/tsan/check_analyze.sh
+++ b/lib/tsan/check_analyze.sh
@@ -2,6 +2,14 @@
 #
 # Script that checks that critical functions in TSan runtime have correct number
 # of push/pop/rsp instructions to verify that runtime is efficient enough.
+#
+# This test can fail when backend code generation changes the output for various
+# tsan interceptors. When such a change happens, you can ensure that the
+# performance has not regressed by running the following benchmarks before and
+# after the breaking change to verify that the values in this file are safe to
+# update:
+# ./projects/compiler-rt/lib/tsan/tests/rtl/TsanRtlTest
+#   --gtest_also_run_disabled_tests --gtest_filter=DISABLED_BENCH.Mop*
 
 set -u
 
@@ -26,22 +34,16 @@
   fi
 }
 
-for f in write1; do
+for f in write1 write2 write4 write8; do
   check $f rsp 1
   check $f push 2
-  check $f pop 2
-done
-
-for f in write2 write4 write8; do
-  check $f rsp 1
-  check $f push 3
-  check $f pop 3
+  check $f pop 12
 done
 
 for f in read1 read2 read4 read8; do
   check $f rsp 1
-  check $f push 5
-  check $f pop 5
+  check $f push 3
+  check $f pop 18
 done
 
 for f in func_entry func_exit; do
diff --git a/lib/tsan/dd/CMakeLists.txt b/lib/tsan/dd/CMakeLists.txt
index bcff35f..07fc300 100644
--- a/lib/tsan/dd/CMakeLists.txt
+++ b/lib/tsan/dd/CMakeLists.txt
@@ -10,7 +10,8 @@
   dd_interceptors.cc
 )
 
-set(DD_LINKLIBS)
+set(DD_LINKLIBS ${SANITIZER_CXX_ABI_LIBRARY} ${SANITIZER_COMMON_LINK_LIBS})
+
 append_list_if(COMPILER_RT_HAS_LIBDL dl DD_LINKLIBS)
 append_list_if(COMPILER_RT_HAS_LIBRT rt DD_LINKLIBS)
 append_list_if(COMPILER_RT_HAS_LIBPTHREAD pthread DD_LINKLIBS)
@@ -40,6 +41,7 @@
             $<TARGET_OBJECTS:RTInterception.${arch}>
             $<TARGET_OBJECTS:RTSanitizerCommon.${arch}>
             $<TARGET_OBJECTS:RTSanitizerCommonLibc.${arch}>
+    LINK_FLAGS ${SANITIZER_COMMON_LINK_FLAGS}
     LINK_LIBS ${DD_LINKLIBS}
     PARENT_TARGET dd)
 endif()
diff --git a/lib/tsan/dd/dd_interceptors.cc b/lib/tsan/dd/dd_interceptors.cc
index 97c72dd..a39218f 100644
--- a/lib/tsan/dd/dd_interceptors.cc
+++ b/lib/tsan/dd/dd_interceptors.cc
@@ -270,20 +270,19 @@
 
 static void InitDataSeg() {
   MemoryMappingLayout proc_maps(true);
-  uptr start, end, offset;
   char name[128];
+  MemoryMappedSegment segment(name, ARRAY_SIZE(name));
   bool prev_is_data = false;
-  while (proc_maps.Next(&start, &end, &offset, name, ARRAY_SIZE(name),
-                        /*protection*/ 0)) {
-    bool is_data = offset != 0 && name[0] != 0;
+  while (proc_maps.Next(&segment)) {
+    bool is_data = segment.offset != 0 && segment.filename[0] != 0;
     // BSS may get merged with [heap] in /proc/self/maps. This is not very
     // reliable.
-    bool is_bss = offset == 0 &&
-      (name[0] == 0 || internal_strcmp(name, "[heap]") == 0) && prev_is_data;
-    if (g_data_start == 0 && is_data)
-      g_data_start = start;
-    if (is_bss)
-      g_data_end = end;
+    bool is_bss = segment.offset == 0 &&
+                  (segment.filename[0] == 0 ||
+                   internal_strcmp(segment.filename, "[heap]") == 0) &&
+                  prev_is_data;
+    if (g_data_start == 0 && is_data) g_data_start = segment.start;
+    if (is_bss) g_data_end = segment.end;
     prev_is_data = is_data;
   }
   VPrintf(1, "guessed data_start=%p data_end=%p\n",  g_data_start, g_data_end);
diff --git a/lib/tsan/go/buildgo.sh b/lib/tsan/go/buildgo.sh
index 42d4790..62ff0fc 100755
--- a/lib/tsan/go/buildgo.sh
+++ b/lib/tsan/go/buildgo.sh
@@ -5,6 +5,7 @@
 SRCS="
 	tsan_go.cc
 	../rtl/tsan_clock.cc
+	../rtl/tsan_external.cc
 	../rtl/tsan_flags.cc
 	../rtl/tsan_interface_atomic.cc
 	../rtl/tsan_md5.cc
@@ -23,6 +24,7 @@
 	../../sanitizer_common/sanitizer_common.cc
 	../../sanitizer_common/sanitizer_common_libcdep.cc
 	../../sanitizer_common/sanitizer_deadlock_detector2.cc
+	../../sanitizer_common/sanitizer_file.cc
 	../../sanitizer_common/sanitizer_flag_parser.cc
 	../../sanitizer_common/sanitizer_flags.cc
 	../../sanitizer_common/sanitizer_libc.cc
@@ -66,9 +68,24 @@
 		../../sanitizer_common/sanitizer_linux_libcdep.cc
 		../../sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc
 	"
+elif [ "`uname -a | grep NetBSD`" != "" ]; then
+	SUFFIX="netbsd_amd64"
+	OSCFLAGS="-fno-strict-aliasing -fPIC -Werror"
+	OSLDFLAGS="-lpthread -fPIC -fpie"
+	SRCS="
+		$SRCS
+		../rtl/tsan_platform_linux.cc
+		../../sanitizer_common/sanitizer_posix.cc
+		../../sanitizer_common/sanitizer_posix_libcdep.cc
+		../../sanitizer_common/sanitizer_procmaps_common.cc
+		../../sanitizer_common/sanitizer_procmaps_freebsd.cc
+		../../sanitizer_common/sanitizer_linux.cc
+		../../sanitizer_common/sanitizer_linux_libcdep.cc
+		../../sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc
+	"
 elif [ "`uname -a | grep Darwin`" != "" ]; then
 	SUFFIX="darwin_amd64"
-	OSCFLAGS="-fPIC -Wno-unused-const-variable -Wno-unknown-warning-option -mmacosx-version-min=10.7"
+	OSCFLAGS="-fPIC -Wno-unused-const-variable -Wno-unknown-warning-option -isysroot $(xcodebuild -version -sdk macosx Path) -mmacosx-version-min=10.7"
 	OSLDFLAGS="-lpthread -fPIC -fpie -mmacosx-version-min=10.7"
 	SRCS="
 		$SRCS
@@ -125,7 +142,7 @@
 fi
 $CC $DIR/gotsan.cc -c -o $DIR/race_$SUFFIX.syso $FLAGS $CFLAGS
 
-$CC $OSCFLAGS test.c $DIR/race_$SUFFIX.syso -m64 -g -o $DIR/test $OSLDFLAGS
+$CC $OSCFLAGS test.c $DIR/race_$SUFFIX.syso -m64 -g -o $DIR/test $OSLDFLAGS $LDFLAGS
 
 export GORACE="exitcode=0 atexit_sleep_ms=0"
 if [ "$SILENT" != "1" ]; then
diff --git a/lib/tsan/go/tsan_go.cc b/lib/tsan/go/tsan_go.cc
index 7fb4eb2..d7a9e0b 100644
--- a/lib/tsan/go/tsan_go.cc
+++ b/lib/tsan/go/tsan_go.cc
@@ -247,13 +247,17 @@
 }
 
 void __tsan_mutex_before_lock(ThreadState *thr, uptr addr, uptr write) {
+  if (write)
+    MutexPreLock(thr, 0, addr);
+  else
+    MutexPreReadLock(thr, 0, addr);
 }
 
 void __tsan_mutex_after_lock(ThreadState *thr, uptr addr, uptr write) {
   if (write)
-    MutexLock(thr, 0, addr);
+    MutexPostLock(thr, 0, addr);
   else
-    MutexReadLock(thr, 0, addr);
+    MutexPostReadLock(thr, 0, addr);
 }
 
 void __tsan_mutex_before_unlock(ThreadState *thr, uptr addr, uptr write) {
diff --git a/lib/tsan/rtl/tsan.syms.extra b/lib/tsan/rtl/tsan.syms.extra
index 22dfde9..ab5b5a4 100644
--- a/lib/tsan/rtl/tsan.syms.extra
+++ b/lib/tsan/rtl/tsan.syms.extra
@@ -9,6 +9,16 @@
 __tsan_unaligned*
 __tsan_release
 __tsan_acquire
+__tsan_mutex_create
+__tsan_mutex_destroy
+__tsan_mutex_pre_lock
+__tsan_mutex_post_lock
+__tsan_mutex_pre_unlock
+__tsan_mutex_post_unlock
+__tsan_mutex_pre_signal
+__tsan_mutex_post_signal
+__tsan_mutex_pre_divert
+__tsan_mutex_post_divert
 __ubsan_*
 Annotate*
 WTFAnnotate*
diff --git a/lib/tsan/rtl/tsan_clock.cc b/lib/tsan/rtl/tsan_clock.cc
index 32435ad..ef984a4 100644
--- a/lib/tsan/rtl/tsan_clock.cc
+++ b/lib/tsan/rtl/tsan_clock.cc
@@ -61,20 +61,13 @@
 // an exclusive lock; ThreadClock's are private to respective threads and so
 // do not need any protection.
 //
-// Description of ThreadClock state:
-// clk_ - fixed size vector clock.
-// nclk_ - effective size of the vector clock (the rest is zeros).
-// tid_ - index of the thread associated with he clock ("current thread").
-// last_acquire_ - current thread time when it acquired something from
-//   other threads.
-//
 // Description of SyncClock state:
 // clk_ - variable size vector clock, low kClkBits hold timestamp,
 //   the remaining bits hold "acquired" flag (the actual value is thread's
 //   reused counter);
 //   if acquried == thr->reused_, then the respective thread has already
-//   acquired this clock (except possibly dirty_tids_).
-// dirty_tids_ - holds up to two indeces in the vector clock that other threads
+//   acquired this clock (except possibly for dirty elements).
+// dirty_ - holds up to two indeces in the vector clock that other threads
 //   need to acquire regardless of "acquired" flag value;
 // release_store_tid_ - denotes that the clock state is a result of
 //   release-store operation by the thread with release_store_tid_ index.
@@ -90,18 +83,51 @@
 
 namespace __tsan {
 
+static atomic_uint32_t *ref_ptr(ClockBlock *cb) {
+  return reinterpret_cast<atomic_uint32_t *>(&cb->table[ClockBlock::kRefIdx]);
+}
+
+// Drop reference to the first level block idx.
+static void UnrefClockBlock(ClockCache *c, u32 idx, uptr blocks) {
+  ClockBlock *cb = ctx->clock_alloc.Map(idx);
+  atomic_uint32_t *ref = ref_ptr(cb);
+  u32 v = atomic_load(ref, memory_order_acquire);
+  for (;;) {
+    CHECK_GT(v, 0);
+    if (v == 1)
+      break;
+    if (atomic_compare_exchange_strong(ref, &v, v - 1, memory_order_acq_rel))
+      return;
+  }
+  // First level block owns second level blocks, so them as well.
+  for (uptr i = 0; i < blocks; i++)
+    ctx->clock_alloc.Free(c, cb->table[ClockBlock::kBlockIdx - i]);
+  ctx->clock_alloc.Free(c, idx);
+}
+
 ThreadClock::ThreadClock(unsigned tid, unsigned reused)
     : tid_(tid)
-    , reused_(reused + 1) {  // 0 has special meaning
+    , reused_(reused + 1)  // 0 has special meaning
+    , cached_idx_()
+    , cached_size_()
+    , cached_blocks_() {
   CHECK_LT(tid, kMaxTidInClock);
   CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits);
   nclk_ = tid_ + 1;
   last_acquire_ = 0;
   internal_memset(clk_, 0, sizeof(clk_));
-  clk_[tid_].reused = reused_;
 }
 
-void ThreadClock::acquire(ClockCache *c, const SyncClock *src) {
+void ThreadClock::ResetCached(ClockCache *c) {
+  if (cached_idx_) {
+    UnrefClockBlock(c, cached_idx_, cached_blocks_);
+    cached_idx_ = 0;
+    cached_size_ = 0;
+    cached_blocks_ = 0;
+  }
+}
+
+void ThreadClock::acquire(ClockCache *c, SyncClock *src) {
   DCHECK_LE(nclk_, kMaxTid);
   DCHECK_LE(src->size_, kMaxTid);
   CPP_STAT_INC(StatClockAcquire);
@@ -113,52 +139,46 @@
     return;
   }
 
-  // Check if we've already acquired src after the last release operation on src
   bool acquired = false;
-  if (nclk > tid_) {
-    CPP_STAT_INC(StatClockAcquireLarge);
-    if (src->elem(tid_).reused == reused_) {
-      CPP_STAT_INC(StatClockAcquireRepeat);
-      for (unsigned i = 0; i < kDirtyTids; i++) {
-        unsigned tid = src->dirty_tids_[i];
-        if (tid != kInvalidTid) {
-          u64 epoch = src->elem(tid).epoch;
-          if (clk_[tid].epoch < epoch) {
-            clk_[tid].epoch = epoch;
-            acquired = true;
-          }
-        }
+  for (unsigned i = 0; i < kDirtyTids; i++) {
+    SyncClock::Dirty dirty = src->dirty_[i];
+    unsigned tid = dirty.tid;
+    if (tid != kInvalidTid) {
+      if (clk_[tid] < dirty.epoch) {
+        clk_[tid] = dirty.epoch;
+        acquired = true;
       }
-      if (acquired) {
-        CPP_STAT_INC(StatClockAcquiredSomething);
-        last_acquire_ = clk_[tid_].epoch;
-      }
-      return;
     }
   }
 
-  // O(N) acquire.
-  CPP_STAT_INC(StatClockAcquireFull);
-  nclk_ = max(nclk_, nclk);
-  for (uptr i = 0; i < nclk; i++) {
-    u64 epoch = src->elem(i).epoch;
-    if (clk_[i].epoch < epoch) {
-      clk_[i].epoch = epoch;
-      acquired = true;
+  // Check if we've already acquired src after the last release operation on src
+  if (tid_ >= nclk || src->elem(tid_).reused != reused_) {
+    // O(N) acquire.
+    CPP_STAT_INC(StatClockAcquireFull);
+    nclk_ = max(nclk_, nclk);
+    u64 *dst_pos = &clk_[0];
+    for (ClockElem &src_elem : *src) {
+      u64 epoch = src_elem.epoch;
+      if (*dst_pos < epoch) {
+        *dst_pos = epoch;
+        acquired = true;
+      }
+      dst_pos++;
     }
-  }
 
-  // Remember that this thread has acquired this clock.
-  if (nclk > tid_)
-    src->elem(tid_).reused = reused_;
+    // Remember that this thread has acquired this clock.
+    if (nclk > tid_)
+      src->elem(tid_).reused = reused_;
+  }
 
   if (acquired) {
     CPP_STAT_INC(StatClockAcquiredSomething);
-    last_acquire_ = clk_[tid_].epoch;
+    last_acquire_ = clk_[tid_];
+    ResetCached(c);
   }
 }
 
-void ThreadClock::release(ClockCache *c, SyncClock *dst) const {
+void ThreadClock::release(ClockCache *c, SyncClock *dst) {
   DCHECK_LE(nclk_, kMaxTid);
   DCHECK_LE(dst->size_, kMaxTid);
 
@@ -178,7 +198,7 @@
   // since the last release on dst. If so, we need to update
   // only dst->elem(tid_).
   if (dst->elem(tid_).epoch > last_acquire_) {
-    UpdateCurrentThread(dst);
+    UpdateCurrentThread(c, dst);
     if (dst->release_store_tid_ != tid_ ||
         dst->release_store_reused_ != reused_)
       dst->release_store_tid_ = kInvalidTid;
@@ -187,23 +207,24 @@
 
   // O(N) release.
   CPP_STAT_INC(StatClockReleaseFull);
+  dst->Unshare(c);
   // First, remember whether we've acquired dst.
   bool acquired = IsAlreadyAcquired(dst);
   if (acquired)
     CPP_STAT_INC(StatClockReleaseAcquired);
   // Update dst->clk_.
-  for (uptr i = 0; i < nclk_; i++) {
-    ClockElem &ce = dst->elem(i);
-    ce.epoch = max(ce.epoch, clk_[i].epoch);
+  dst->FlushDirty();
+  uptr i = 0;
+  for (ClockElem &ce : *dst) {
+    ce.epoch = max(ce.epoch, clk_[i]);
     ce.reused = 0;
+    i++;
   }
   // Clear 'acquired' flag in the remaining elements.
   if (nclk_ < dst->size_)
     CPP_STAT_INC(StatClockReleaseClearTail);
   for (uptr i = nclk_; i < dst->size_; i++)
     dst->elem(i).reused = 0;
-  for (unsigned i = 0; i < kDirtyTids; i++)
-    dst->dirty_tids_[i] = kInvalidTid;
   dst->release_store_tid_ = kInvalidTid;
   dst->release_store_reused_ = 0;
   // If we've acquired dst, remember this fact,
@@ -212,11 +233,37 @@
     dst->elem(tid_).reused = reused_;
 }
 
-void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) const {
+void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) {
   DCHECK_LE(nclk_, kMaxTid);
   DCHECK_LE(dst->size_, kMaxTid);
   CPP_STAT_INC(StatClockStore);
 
+  if (dst->size_ == 0 && cached_idx_ != 0) {
+    // Reuse the cached clock.
+    // Note: we could reuse/cache the cached clock in more cases:
+    // we could update the existing clock and cache it, or replace it with the
+    // currently cached clock and release the old one. And for a shared
+    // existing clock, we could replace it with the currently cached;
+    // or unshare, update and cache. But, for simplicity, we currnetly reuse
+    // cached clock only when the target clock is empty.
+    dst->tab_ = ctx->clock_alloc.Map(cached_idx_);
+    dst->tab_idx_ = cached_idx_;
+    dst->size_ = cached_size_;
+    dst->blocks_ = cached_blocks_;
+    CHECK_EQ(dst->dirty_[0].tid, kInvalidTid);
+    // The cached clock is shared (immutable),
+    // so this is where we store the current clock.
+    dst->dirty_[0].tid = tid_;
+    dst->dirty_[0].epoch = clk_[tid_];
+    dst->release_store_tid_ = tid_;
+    dst->release_store_reused_ = reused_;
+    // Rememeber that we don't need to acquire it in future.
+    dst->elem(tid_).reused = reused_;
+    // Grab a reference.
+    atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed);
+    return;
+  }
+
   // Check if we need to resize dst.
   if (dst->size_ < nclk_)
     dst->Resize(c, nclk_);
@@ -225,32 +272,41 @@
       dst->release_store_reused_ == reused_ &&
       dst->elem(tid_).epoch > last_acquire_) {
     CPP_STAT_INC(StatClockStoreFast);
-    UpdateCurrentThread(dst);
+    UpdateCurrentThread(c, dst);
     return;
   }
 
   // O(N) release-store.
   CPP_STAT_INC(StatClockStoreFull);
-  for (uptr i = 0; i < nclk_; i++) {
-    ClockElem &ce = dst->elem(i);
-    ce.epoch = clk_[i].epoch;
+  dst->Unshare(c);
+  // Note: dst can be larger than this ThreadClock.
+  // This is fine since clk_ beyond size is all zeros.
+  uptr i = 0;
+  for (ClockElem &ce : *dst) {
+    ce.epoch = clk_[i];
     ce.reused = 0;
+    i++;
   }
-  // Clear the tail of dst->clk_.
-  if (nclk_ < dst->size_) {
-    for (uptr i = nclk_; i < dst->size_; i++) {
-      ClockElem &ce = dst->elem(i);
-      ce.epoch = 0;
-      ce.reused = 0;
-    }
-    CPP_STAT_INC(StatClockStoreTail);
-  }
-  for (unsigned i = 0; i < kDirtyTids; i++)
-    dst->dirty_tids_[i] = kInvalidTid;
+  for (uptr i = 0; i < kDirtyTids; i++)
+    dst->dirty_[i].tid = kInvalidTid;
   dst->release_store_tid_ = tid_;
   dst->release_store_reused_ = reused_;
   // Rememeber that we don't need to acquire it in future.
   dst->elem(tid_).reused = reused_;
+
+  // If the resulting clock is cachable, cache it for future release operations.
+  // The clock is always cachable if we released to an empty sync object.
+  if (cached_idx_ == 0 && dst->Cachable()) {
+    // Grab a reference to the ClockBlock.
+    atomic_uint32_t *ref = ref_ptr(dst->tab_);
+    if (atomic_load(ref, memory_order_acquire) == 1)
+      atomic_store_relaxed(ref, 2);
+    else
+      atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed);
+    cached_idx_ = dst->tab_idx_;
+    cached_size_ = dst->size_;
+    cached_blocks_ = dst->blocks_;
+  }
 }
 
 void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) {
@@ -260,157 +316,248 @@
 }
 
 // Updates only single element related to the current thread in dst->clk_.
-void ThreadClock::UpdateCurrentThread(SyncClock *dst) const {
+void ThreadClock::UpdateCurrentThread(ClockCache *c, SyncClock *dst) const {
   // Update the threads time, but preserve 'acquired' flag.
-  dst->elem(tid_).epoch = clk_[tid_].epoch;
-
   for (unsigned i = 0; i < kDirtyTids; i++) {
-    if (dst->dirty_tids_[i] == tid_) {
-      CPP_STAT_INC(StatClockReleaseFast1);
-      return;
-    }
-    if (dst->dirty_tids_[i] == kInvalidTid) {
-      CPP_STAT_INC(StatClockReleaseFast2);
-      dst->dirty_tids_[i] = tid_;
+    SyncClock::Dirty *dirty = &dst->dirty_[i];
+    const unsigned tid = dirty->tid;
+    if (tid == tid_ || tid == kInvalidTid) {
+      CPP_STAT_INC(StatClockReleaseFast);
+      dirty->tid = tid_;
+      dirty->epoch = clk_[tid_];
       return;
     }
   }
   // Reset all 'acquired' flags, O(N).
+  // We are going to touch dst elements, so we need to unshare it.
+  dst->Unshare(c);
   CPP_STAT_INC(StatClockReleaseSlow);
+  dst->elem(tid_).epoch = clk_[tid_];
   for (uptr i = 0; i < dst->size_; i++)
     dst->elem(i).reused = 0;
-  for (unsigned i = 0; i < kDirtyTids; i++)
-    dst->dirty_tids_[i] = kInvalidTid;
+  dst->FlushDirty();
 }
 
-// Checks whether the current threads has already acquired src.
+// Checks whether the current thread has already acquired src.
 bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {
   if (src->elem(tid_).reused != reused_)
     return false;
   for (unsigned i = 0; i < kDirtyTids; i++) {
-    unsigned tid = src->dirty_tids_[i];
-    if (tid != kInvalidTid) {
-      if (clk_[tid].epoch < src->elem(tid).epoch)
+    SyncClock::Dirty dirty = src->dirty_[i];
+    if (dirty.tid != kInvalidTid) {
+      if (clk_[dirty.tid] < dirty.epoch)
         return false;
     }
   }
   return true;
 }
 
-void SyncClock::Resize(ClockCache *c, uptr nclk) {
-  CPP_STAT_INC(StatClockReleaseResize);
-  if (RoundUpTo(nclk, ClockBlock::kClockCount) <=
-      RoundUpTo(size_, ClockBlock::kClockCount)) {
-    // Growing within the same block.
-    // Memory is already allocated, just increase the size.
-    size_ = nclk;
-    return;
-  }
-  if (nclk <= ClockBlock::kClockCount) {
-    // Grow from 0 to one-level table.
-    CHECK_EQ(size_, 0);
-    CHECK_EQ(tab_, 0);
-    CHECK_EQ(tab_idx_, 0);
-    size_ = nclk;
-    tab_idx_ = ctx->clock_alloc.Alloc(c);
-    tab_ = ctx->clock_alloc.Map(tab_idx_);
-    internal_memset(tab_, 0, sizeof(*tab_));
-    return;
-  }
-  // Growing two-level table.
-  if (size_ == 0) {
-    // Allocate first level table.
-    tab_idx_ = ctx->clock_alloc.Alloc(c);
-    tab_ = ctx->clock_alloc.Map(tab_idx_);
-    internal_memset(tab_, 0, sizeof(*tab_));
-  } else if (size_ <= ClockBlock::kClockCount) {
-    // Transform one-level table to two-level table.
-    u32 old = tab_idx_;
-    tab_idx_ = ctx->clock_alloc.Alloc(c);
-    tab_ = ctx->clock_alloc.Map(tab_idx_);
-    internal_memset(tab_, 0, sizeof(*tab_));
-    tab_->table[0] = old;
-  }
-  // At this point we have first level table allocated.
-  // Add second level tables as necessary.
-  for (uptr i = RoundUpTo(size_, ClockBlock::kClockCount);
-      i < nclk; i += ClockBlock::kClockCount) {
-    u32 idx = ctx->clock_alloc.Alloc(c);
-    ClockBlock *cb = ctx->clock_alloc.Map(idx);
-    internal_memset(cb, 0, sizeof(*cb));
-    CHECK_EQ(tab_->table[i/ClockBlock::kClockCount], 0);
-    tab_->table[i/ClockBlock::kClockCount] = idx;
-  }
-  size_ = nclk;
-}
-
 // Sets a single element in the vector clock.
 // This function is called only from weird places like AcquireGlobal.
-void ThreadClock::set(unsigned tid, u64 v) {
+void ThreadClock::set(ClockCache *c, unsigned tid, u64 v) {
   DCHECK_LT(tid, kMaxTid);
-  DCHECK_GE(v, clk_[tid].epoch);
-  clk_[tid].epoch = v;
+  DCHECK_GE(v, clk_[tid]);
+  clk_[tid] = v;
   if (nclk_ <= tid)
     nclk_ = tid + 1;
-  last_acquire_ = clk_[tid_].epoch;
+  last_acquire_ = clk_[tid_];
+  ResetCached(c);
 }
 
 void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) {
   printf("clock=[");
   for (uptr i = 0; i < nclk_; i++)
-    printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch);
-  printf("] reused=[");
-  for (uptr i = 0; i < nclk_; i++)
-    printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused);
-  printf("] tid=%u/%u last_acq=%llu",
-      tid_, reused_, last_acquire_);
+    printf("%s%llu", i == 0 ? "" : ",", clk_[i]);
+  printf("] tid=%u/%u last_acq=%llu", tid_, reused_, last_acquire_);
 }
 
-SyncClock::SyncClock()
-    : release_store_tid_(kInvalidTid)
-    , release_store_reused_()
-    , tab_()
-    , tab_idx_()
-    , size_() {
-  for (uptr i = 0; i < kDirtyTids; i++)
-    dirty_tids_[i] = kInvalidTid;
+SyncClock::SyncClock() {
+  ResetImpl();
 }
 
 SyncClock::~SyncClock() {
   // Reset must be called before dtor.
   CHECK_EQ(size_, 0);
+  CHECK_EQ(blocks_, 0);
   CHECK_EQ(tab_, 0);
   CHECK_EQ(tab_idx_, 0);
 }
 
 void SyncClock::Reset(ClockCache *c) {
-  if (size_ == 0) {
-    // nothing
-  } else if (size_ <= ClockBlock::kClockCount) {
-    // One-level table.
-    ctx->clock_alloc.Free(c, tab_idx_);
-  } else {
-    // Two-level table.
-    for (uptr i = 0; i < size_; i += ClockBlock::kClockCount)
-      ctx->clock_alloc.Free(c, tab_->table[i / ClockBlock::kClockCount]);
-    ctx->clock_alloc.Free(c, tab_idx_);
-  }
+  if (size_)
+    UnrefClockBlock(c, tab_idx_, blocks_);
+  ResetImpl();
+}
+
+void SyncClock::ResetImpl() {
   tab_ = 0;
   tab_idx_ = 0;
   size_ = 0;
+  blocks_ = 0;
   release_store_tid_ = kInvalidTid;
   release_store_reused_ = 0;
   for (uptr i = 0; i < kDirtyTids; i++)
-    dirty_tids_[i] = kInvalidTid;
+    dirty_[i].tid = kInvalidTid;
 }
 
-ClockElem &SyncClock::elem(unsigned tid) const {
+void SyncClock::Resize(ClockCache *c, uptr nclk) {
+  CPP_STAT_INC(StatClockReleaseResize);
+  Unshare(c);
+  if (nclk <= capacity()) {
+    // Memory is already allocated, just increase the size.
+    size_ = nclk;
+    return;
+  }
+  if (size_ == 0) {
+    // Grow from 0 to one-level table.
+    CHECK_EQ(size_, 0);
+    CHECK_EQ(blocks_, 0);
+    CHECK_EQ(tab_, 0);
+    CHECK_EQ(tab_idx_, 0);
+    tab_idx_ = ctx->clock_alloc.Alloc(c);
+    tab_ = ctx->clock_alloc.Map(tab_idx_);
+    internal_memset(tab_, 0, sizeof(*tab_));
+    atomic_store_relaxed(ref_ptr(tab_), 1);
+    size_ = 1;
+  } else if (size_ > blocks_ * ClockBlock::kClockCount) {
+    u32 idx = ctx->clock_alloc.Alloc(c);
+    ClockBlock *new_cb = ctx->clock_alloc.Map(idx);
+    uptr top = size_ - blocks_ * ClockBlock::kClockCount;
+    CHECK_LT(top, ClockBlock::kClockCount);
+    const uptr move = top * sizeof(tab_->clock[0]);
+    internal_memcpy(&new_cb->clock[0], tab_->clock, move);
+    internal_memset(&new_cb->clock[top], 0, sizeof(*new_cb) - move);
+    internal_memset(tab_->clock, 0, move);
+    append_block(idx);
+  }
+  // At this point we have first level table allocated and all clock elements
+  // are evacuated from it to a second level block.
+  // Add second level tables as necessary.
+  while (nclk > capacity()) {
+    u32 idx = ctx->clock_alloc.Alloc(c);
+    ClockBlock *cb = ctx->clock_alloc.Map(idx);
+    internal_memset(cb, 0, sizeof(*cb));
+    append_block(idx);
+  }
+  size_ = nclk;
+}
+
+// Flushes all dirty elements into the main clock array.
+void SyncClock::FlushDirty() {
+  for (unsigned i = 0; i < kDirtyTids; i++) {
+    Dirty *dirty = &dirty_[i];
+    if (dirty->tid != kInvalidTid) {
+      CHECK_LT(dirty->tid, size_);
+      elem(dirty->tid).epoch = dirty->epoch;
+      dirty->tid = kInvalidTid;
+    }
+  }
+}
+
+bool SyncClock::IsShared() const {
+  if (size_ == 0)
+    return false;
+  atomic_uint32_t *ref = ref_ptr(tab_);
+  u32 v = atomic_load(ref, memory_order_acquire);
+  CHECK_GT(v, 0);
+  return v > 1;
+}
+
+// Unshares the current clock if it's shared.
+// Shared clocks are immutable, so they need to be unshared before any updates.
+// Note: this does not apply to dirty entries as they are not shared.
+void SyncClock::Unshare(ClockCache *c) {
+  if (!IsShared())
+    return;
+  // First, copy current state into old.
+  SyncClock old;
+  old.tab_ = tab_;
+  old.tab_idx_ = tab_idx_;
+  old.size_ = size_;
+  old.blocks_ = blocks_;
+  old.release_store_tid_ = release_store_tid_;
+  old.release_store_reused_ = release_store_reused_;
+  for (unsigned i = 0; i < kDirtyTids; i++)
+    old.dirty_[i] = dirty_[i];
+  // Then, clear current object.
+  ResetImpl();
+  // Allocate brand new clock in the current object.
+  Resize(c, old.size_);
+  // Now copy state back into this object.
+  Iter old_iter(&old);
+  for (ClockElem &ce : *this) {
+    ce = *old_iter;
+    ++old_iter;
+  }
+  release_store_tid_ = old.release_store_tid_;
+  release_store_reused_ = old.release_store_reused_;
+  for (unsigned i = 0; i < kDirtyTids; i++)
+    dirty_[i] = old.dirty_[i];
+  // Drop reference to old and delete if necessary.
+  old.Reset(c);
+}
+
+// Can we cache this clock for future release operations?
+ALWAYS_INLINE bool SyncClock::Cachable() const {
+  if (size_ == 0)
+    return false;
+  for (unsigned i = 0; i < kDirtyTids; i++) {
+    if (dirty_[i].tid != kInvalidTid)
+      return false;
+  }
+  return atomic_load_relaxed(ref_ptr(tab_)) == 1;
+}
+
+// elem linearizes the two-level structure into linear array.
+// Note: this is used only for one time accesses, vector operations use
+// the iterator as it is much faster.
+ALWAYS_INLINE ClockElem &SyncClock::elem(unsigned tid) const {
   DCHECK_LT(tid, size_);
-  if (size_ <= ClockBlock::kClockCount)
+  const uptr block = tid / ClockBlock::kClockCount;
+  DCHECK_LE(block, blocks_);
+  tid %= ClockBlock::kClockCount;
+  if (block == blocks_)
     return tab_->clock[tid];
-  u32 idx = tab_->table[tid / ClockBlock::kClockCount];
+  u32 idx = get_block(block);
   ClockBlock *cb = ctx->clock_alloc.Map(idx);
-  return cb->clock[tid % ClockBlock::kClockCount];
+  return cb->clock[tid];
+}
+
+ALWAYS_INLINE uptr SyncClock::capacity() const {
+  if (size_ == 0)
+    return 0;
+  uptr ratio = sizeof(ClockBlock::clock[0]) / sizeof(ClockBlock::table[0]);
+  // How many clock elements we can fit into the first level block.
+  // +1 for ref counter.
+  uptr top = ClockBlock::kClockCount - RoundUpTo(blocks_ + 1, ratio) / ratio;
+  return blocks_ * ClockBlock::kClockCount + top;
+}
+
+ALWAYS_INLINE u32 SyncClock::get_block(uptr bi) const {
+  DCHECK(size_);
+  DCHECK_LT(bi, blocks_);
+  return tab_->table[ClockBlock::kBlockIdx - bi];
+}
+
+ALWAYS_INLINE void SyncClock::append_block(u32 idx) {
+  uptr bi = blocks_++;
+  CHECK_EQ(get_block(bi), 0);
+  tab_->table[ClockBlock::kBlockIdx - bi] = idx;
+}
+
+// Used only by tests.
+u64 SyncClock::get(unsigned tid) const {
+  for (unsigned i = 0; i < kDirtyTids; i++) {
+    Dirty dirty = dirty_[i];
+    if (dirty.tid == tid)
+      return dirty.epoch;
+  }
+  return elem(tid).epoch;
+}
+
+// Used only by Iter test.
+u64 SyncClock::get_clean(unsigned tid) const {
+  return elem(tid).epoch;
 }
 
 void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {
@@ -420,8 +567,32 @@
   printf("] reused=[");
   for (uptr i = 0; i < size_; i++)
     printf("%s%llu", i == 0 ? "" : ",", elem(i).reused);
-  printf("] release_store_tid=%d/%d dirty_tids=%d/%d",
+  printf("] release_store_tid=%d/%d dirty_tids=%d[%llu]/%d[%llu]",
       release_store_tid_, release_store_reused_,
-      dirty_tids_[0], dirty_tids_[1]);
+      dirty_[0].tid, dirty_[0].epoch,
+      dirty_[1].tid, dirty_[1].epoch);
+}
+
+void SyncClock::Iter::Next() {
+  // Finished with the current block, move on to the next one.
+  block_++;
+  if (block_ < parent_->blocks_) {
+    // Iterate over the next second level block.
+    u32 idx = parent_->get_block(block_);
+    ClockBlock *cb = ctx->clock_alloc.Map(idx);
+    pos_ = &cb->clock[0];
+    end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount,
+        ClockBlock::kClockCount);
+    return;
+  }
+  if (block_ == parent_->blocks_ &&
+      parent_->size_ > parent_->blocks_ * ClockBlock::kClockCount) {
+    // Iterate over elements in the first level block.
+    pos_ = &parent_->tab_->clock[0];
+    end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount,
+        ClockBlock::kClockCount);
+    return;
+  }
+  parent_ = nullptr;  // denotes end
 }
 }  // namespace __tsan
diff --git a/lib/tsan/rtl/tsan_clock.h b/lib/tsan/rtl/tsan_clock.h
index 4e352cb..a891d7b 100644
--- a/lib/tsan/rtl/tsan_clock.h
+++ b/lib/tsan/rtl/tsan_clock.h
@@ -18,25 +18,6 @@
 
 namespace __tsan {
 
-struct ClockElem {
-  u64 epoch  : kClkBits;
-  u64 reused : 64 - kClkBits;
-};
-
-struct ClockBlock {
-  static const uptr kSize = 512;
-  static const uptr kTableSize = kSize / sizeof(u32);
-  static const uptr kClockCount = kSize / sizeof(ClockElem);
-
-  union {
-    u32       table[kTableSize];
-    ClockElem clock[kClockCount];
-  };
-
-  ClockBlock() {
-  }
-};
-
 typedef DenseSlabAlloc<ClockBlock, 1<<16, 1<<10> ClockAlloc;
 typedef DenseSlabAllocCache ClockCache;
 
@@ -46,84 +27,200 @@
   SyncClock();
   ~SyncClock();
 
-  uptr size() const {
-    return size_;
-  }
+  uptr size() const;
 
-  u64 get(unsigned tid) const {
-    return elem(tid).epoch;
-  }
+  // These are used only in tests.
+  u64 get(unsigned tid) const;
+  u64 get_clean(unsigned tid) const;
 
   void Resize(ClockCache *c, uptr nclk);
   void Reset(ClockCache *c);
 
   void DebugDump(int(*printf)(const char *s, ...));
 
+  // Clock element iterator.
+  // Note: it iterates only over the table without regard to dirty entries.
+  class Iter {
+   public:
+    explicit Iter(SyncClock* parent);
+    Iter& operator++();
+    bool operator!=(const Iter& other);
+    ClockElem &operator*();
+
+   private:
+    SyncClock *parent_;
+    // [pos_, end_) is the current continuous range of clock elements.
+    ClockElem *pos_;
+    ClockElem *end_;
+    int block_;  // Current number of second level block.
+
+    NOINLINE void Next();
+  };
+
+  Iter begin();
+  Iter end();
+
  private:
-  friend struct ThreadClock;
+  friend class ThreadClock;
+  friend class Iter;
   static const uptr kDirtyTids = 2;
 
+  struct Dirty {
+    u64 epoch  : kClkBits;
+    u64 tid : 64 - kClkBits;  // kInvalidId if not active
+  };
+
   unsigned release_store_tid_;
   unsigned release_store_reused_;
-  unsigned dirty_tids_[kDirtyTids];
-  // tab_ contains indirect pointer to a 512b block using DenseSlabAlloc.
-  // If size_ <= 64, then tab_ points to an array with 64 ClockElem's.
-  // Otherwise, tab_ points to an array with 128 u32 elements,
+  Dirty dirty_[kDirtyTids];
+  // If size_ is 0, tab_ is nullptr.
+  // If size <= 64 (kClockCount), tab_ contains pointer to an array with
+  // 64 ClockElem's (ClockBlock::clock).
+  // Otherwise, tab_ points to an array with up to 127 u32 elements,
   // each pointing to the second-level 512b block with 64 ClockElem's.
+  // Unused space in the first level ClockBlock is used to store additional
+  // clock elements.
+  // The last u32 element in the first level ClockBlock is always used as
+  // reference counter.
+  //
+  // See the following scheme for details.
+  // All memory blocks are 512 bytes (allocated from ClockAlloc).
+  // Clock (clk) elements are 64 bits.
+  // Idx and ref are 32 bits.
+  //
+  // tab_
+  //    |
+  //    \/
+  //    +----------------------------------------------------+
+  //    | clk128 | clk129 | ...unused... | idx1 | idx0 | ref |
+  //    +----------------------------------------------------+
+  //                                        |      |
+  //                                        |      \/
+  //                                        |      +----------------+
+  //                                        |      | clk0 ... clk63 |
+  //                                        |      +----------------+
+  //                                        \/
+  //                                        +------------------+
+  //                                        | clk64 ... clk127 |
+  //                                        +------------------+
+  //
+  // Note: dirty entries, if active, always override what's stored in the clock.
   ClockBlock *tab_;
   u32 tab_idx_;
-  u32 size_;
+  u16 size_;
+  u16 blocks_;  // Number of second level blocks.
 
+  void Unshare(ClockCache *c);
+  bool IsShared() const;
+  bool Cachable() const;
+  void ResetImpl();
+  void FlushDirty();
+  uptr capacity() const;
+  u32 get_block(uptr bi) const;
+  void append_block(u32 idx);
   ClockElem &elem(unsigned tid) const;
 };
 
 // The clock that lives in threads.
-struct ThreadClock {
+class ThreadClock {
  public:
   typedef DenseSlabAllocCache Cache;
 
   explicit ThreadClock(unsigned tid, unsigned reused = 0);
 
-  u64 get(unsigned tid) const {
-    DCHECK_LT(tid, kMaxTidInClock);
-    return clk_[tid].epoch;
-  }
+  u64 get(unsigned tid) const;
+  void set(ClockCache *c, unsigned tid, u64 v);
+  void set(u64 v);
+  void tick();
+  uptr size() const;
 
-  void set(unsigned tid, u64 v);
-
-  void set(u64 v) {
-    DCHECK_GE(v, clk_[tid_].epoch);
-    clk_[tid_].epoch = v;
-  }
-
-  void tick() {
-    clk_[tid_].epoch++;
-  }
-
-  uptr size() const {
-    return nclk_;
-  }
-
-  void acquire(ClockCache *c, const SyncClock *src);
-  void release(ClockCache *c, SyncClock *dst) const;
+  void acquire(ClockCache *c, SyncClock *src);
+  void release(ClockCache *c, SyncClock *dst);
   void acq_rel(ClockCache *c, SyncClock *dst);
-  void ReleaseStore(ClockCache *c, SyncClock *dst) const;
+  void ReleaseStore(ClockCache *c, SyncClock *dst);
+  void ResetCached(ClockCache *c);
 
   void DebugReset();
   void DebugDump(int(*printf)(const char *s, ...));
 
  private:
   static const uptr kDirtyTids = SyncClock::kDirtyTids;
+  // Index of the thread associated with he clock ("current thread").
   const unsigned tid_;
-  const unsigned reused_;
+  const unsigned reused_;  // tid_ reuse count.
+  // Current thread time when it acquired something from other threads.
   u64 last_acquire_;
+
+  // Cached SyncClock (without dirty entries and release_store_tid_).
+  // We reuse it for subsequent store-release operations without intervening
+  // acquire operations. Since it is shared (and thus constant), clock value
+  // for the current thread is then stored in dirty entries in the SyncClock.
+  // We host a refernece to the table while it is cached here.
+  u32 cached_idx_;
+  u16 cached_size_;
+  u16 cached_blocks_;
+
+  // Number of active elements in the clk_ table (the rest is zeros).
   uptr nclk_;
-  ClockElem clk_[kMaxTidInClock];
+  u64 clk_[kMaxTidInClock];  // Fixed size vector clock.
 
   bool IsAlreadyAcquired(const SyncClock *src) const;
-  void UpdateCurrentThread(SyncClock *dst) const;
+  void UpdateCurrentThread(ClockCache *c, SyncClock *dst) const;
 };
 
+ALWAYS_INLINE u64 ThreadClock::get(unsigned tid) const {
+  DCHECK_LT(tid, kMaxTidInClock);
+  return clk_[tid];
+}
+
+ALWAYS_INLINE void ThreadClock::set(u64 v) {
+  DCHECK_GE(v, clk_[tid_]);
+  clk_[tid_] = v;
+}
+
+ALWAYS_INLINE void ThreadClock::tick() {
+  clk_[tid_]++;
+}
+
+ALWAYS_INLINE uptr ThreadClock::size() const {
+  return nclk_;
+}
+
+ALWAYS_INLINE SyncClock::Iter SyncClock::begin() {
+  return Iter(this);
+}
+
+ALWAYS_INLINE SyncClock::Iter SyncClock::end() {
+  return Iter(nullptr);
+}
+
+ALWAYS_INLINE uptr SyncClock::size() const {
+  return size_;
+}
+
+ALWAYS_INLINE SyncClock::Iter::Iter(SyncClock* parent)
+    : parent_(parent)
+    , pos_(nullptr)
+    , end_(nullptr)
+    , block_(-1) {
+  if (parent)
+    Next();
+}
+
+ALWAYS_INLINE SyncClock::Iter& SyncClock::Iter::operator++() {
+  pos_++;
+  if (UNLIKELY(pos_ >= end_))
+    Next();
+  return *this;
+}
+
+ALWAYS_INLINE bool SyncClock::Iter::operator!=(const SyncClock::Iter& other) {
+  return parent_ != other.parent_;
+}
+
+ALWAYS_INLINE ClockElem &SyncClock::Iter::operator*() {
+  return *pos_;
+}
 }  // namespace __tsan
 
 #endif  // TSAN_CLOCK_H
diff --git a/lib/tsan/rtl/tsan_debugging.cc b/lib/tsan/rtl/tsan_debugging.cc
index 06154bc..a44b136 100644
--- a/lib/tsan/rtl/tsan_debugging.cc
+++ b/lib/tsan/rtl/tsan_debugging.cc
@@ -151,7 +151,7 @@
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-int __tsan_get_report_thread(void *report, uptr idx, int *tid, uptr *os_id,
+int __tsan_get_report_thread(void *report, uptr idx, int *tid, tid_t *os_id,
                              int *running, const char **name, int *parent_tid,
                              void **trace, uptr trace_size) {
   const ReportDesc *rep = (ReportDesc *)report;
@@ -228,7 +228,7 @@
 
 SANITIZER_INTERFACE_ATTRIBUTE
 int __tsan_get_alloc_stack(uptr addr, uptr *trace, uptr size, int *thread_id,
-                           uptr *os_id) {
+                           tid_t *os_id) {
   MBlock *b = 0;
   Allocator *a = allocator();
   if (a->PointerIsMine((void *)addr)) {
diff --git a/lib/tsan/rtl/tsan_defs.h b/lib/tsan/rtl/tsan_defs.h
index 8a0381e..3c775de 100644
--- a/lib/tsan/rtl/tsan_defs.h
+++ b/lib/tsan/rtl/tsan_defs.h
@@ -38,15 +38,40 @@
 
 namespace __tsan {
 
+const int kClkBits = 42;
+const unsigned kMaxTidReuse = (1 << (64 - kClkBits)) - 1;
+
+struct ClockElem {
+  u64 epoch  : kClkBits;
+  u64 reused : 64 - kClkBits;  // tid reuse count
+};
+
+struct ClockBlock {
+  static const uptr kSize = 512;
+  static const uptr kTableSize = kSize / sizeof(u32);
+  static const uptr kClockCount = kSize / sizeof(ClockElem);
+  static const uptr kRefIdx = kTableSize - 1;
+  static const uptr kBlockIdx = kTableSize - 2;
+
+  union {
+    u32       table[kTableSize];
+    ClockElem clock[kClockCount];
+  };
+
+  ClockBlock() {
+  }
+};
+
 const int kTidBits = 13;
-const unsigned kMaxTid = 1 << kTidBits;
+// Reduce kMaxTid by kClockCount because one slot in ClockBlock table is
+// occupied by reference counter, so total number of elements we can store
+// in SyncClock is kClockCount * (kTableSize - 1).
+const unsigned kMaxTid = (1 << kTidBits) - ClockBlock::kClockCount;
 #if !SANITIZER_GO
 const unsigned kMaxTidInClock = kMaxTid * 2;  // This includes msb 'freed' bit.
 #else
 const unsigned kMaxTidInClock = kMaxTid;  // Go does not track freed memory.
 #endif
-const int kClkBits = 42;
-const unsigned kMaxTidReuse = (1 << (64 - kClkBits)) - 1;
 const uptr kShadowStackSize = 64 * 1024;
 
 // Count of shadow values in a shadow cell.
@@ -74,7 +99,7 @@
 const bool kCollectHistory = true;
 #endif
 
-const unsigned kInvalidTid = (unsigned)-1;
+const u16 kInvalidTid = kMaxTid + 1;
 
 // The following "build consistency" machinery ensures that all source files
 // are built in the same configuration. Inconsistent builds lead to
@@ -157,6 +182,15 @@
 
 COMPILER_CHECK(sizeof(MBlock) == 16);
 
+enum ExternalTag : uptr {
+  kExternalTagNone = 0,
+  kExternalTagSwiftModifyingAccess = 1,
+  kExternalTagFirstUserAvailable = 2,
+  kExternalTagMax = 1024,
+  // Don't set kExternalTagMax over 65,536, since MBlock only stores tags
+  // as 16-bit values, see tsan_defs.h.
+};
+
 }  // namespace __tsan
 
 #endif  // TSAN_DEFS_H
diff --git a/lib/tsan/rtl/tsan_dense_alloc.h b/lib/tsan/rtl/tsan_dense_alloc.h
index e9815c9..16dbdf3 100644
--- a/lib/tsan/rtl/tsan_dense_alloc.h
+++ b/lib/tsan/rtl/tsan_dense_alloc.h
@@ -39,7 +39,7 @@
   typedef DenseSlabAllocCache Cache;
   typedef typename Cache::IndexT IndexT;
 
-  DenseSlabAlloc() {
+  explicit DenseSlabAlloc(const char *name) {
     // Check that kL1Size and kL2Size are sane.
     CHECK_EQ(kL1Size & (kL1Size - 1), 0);
     CHECK_EQ(kL2Size & (kL2Size - 1), 0);
@@ -49,6 +49,7 @@
     internal_memset(map_, 0, sizeof(map_));
     freelist_ = 0;
     fillpos_ = 0;
+    name_ = name;
   }
 
   ~DenseSlabAlloc() {
@@ -96,15 +97,19 @@
   SpinMutex mtx_;
   IndexT freelist_;
   uptr fillpos_;
+  const char *name_;
 
   void Refill(Cache *c) {
     SpinMutexLock lock(&mtx_);
     if (freelist_ == 0) {
       if (fillpos_ == kL1Size) {
-        Printf("ThreadSanitizer: DenseSlabAllocator overflow. Dying.\n");
+        Printf("ThreadSanitizer: %s overflow (%zu*%zu). Dying.\n",
+            name_, kL1Size, kL2Size);
         Die();
       }
-      T *batch = (T*)MmapOrDie(kL2Size * sizeof(T), "DenseSlabAllocator");
+      VPrintf(2, "ThreadSanitizer: growing %s: %zu out of %zu*%zu\n",
+          name_, fillpos_, kL1Size, kL2Size);
+      T *batch = (T*)MmapOrDie(kL2Size * sizeof(T), name_);
       // Reserve 0 as invalid index.
       IndexT start = fillpos_ == 0 ? 1 : 0;
       for (IndexT i = start; i < kL2Size; i++) {
diff --git a/lib/tsan/rtl/tsan_external.cc b/lib/tsan/rtl/tsan_external.cc
index dc8ec62..6c0e947 100644
--- a/lib/tsan/rtl/tsan_external.cc
+++ b/lib/tsan/rtl/tsan_external.cc
@@ -11,34 +11,91 @@
 //
 //===----------------------------------------------------------------------===//
 #include "tsan_rtl.h"
+#include "tsan_interceptors.h"
 
 namespace __tsan {
 
 #define CALLERPC ((uptr)__builtin_return_address(0))
 
-const uptr kMaxTag = 128;  // Limited to 65,536, since MBlock only stores tags
-                           // as 16-bit values, see tsan_defs.h.
+struct TagData {
+  const char *object_type;
+  const char *header;
+};
 
-const char *registered_tags[kMaxTag];
-static atomic_uint32_t used_tags{1};  // Tag 0 means "no tag". NOLINT
-
-const char *GetObjectTypeFromTag(uptr tag) {
-  if (tag == 0) return nullptr;
+static TagData registered_tags[kExternalTagMax] = {
+  {},
+  {"Swift variable", "Swift access race"},
+};
+static atomic_uint32_t used_tags{kExternalTagFirstUserAvailable};  // NOLINT.
+static TagData *GetTagData(uptr tag) {
   // Invalid/corrupted tag?  Better return NULL and let the caller deal with it.
   if (tag >= atomic_load(&used_tags, memory_order_relaxed)) return nullptr;
-  return registered_tags[tag];
+  return &registered_tags[tag];
+}
+
+const char *GetObjectTypeFromTag(uptr tag) {
+  TagData *tag_data = GetTagData(tag);
+  return tag_data ? tag_data->object_type : nullptr;
+}
+
+const char *GetReportHeaderFromTag(uptr tag) {
+  TagData *tag_data = GetTagData(tag);
+  return tag_data ? tag_data->header : nullptr;
+}
+
+void InsertShadowStackFrameForTag(ThreadState *thr, uptr tag) {
+  FuncEntry(thr, (uptr)&registered_tags[tag]);
+}
+
+uptr TagFromShadowStackFrame(uptr pc) {
+  uptr tag_count = atomic_load(&used_tags, memory_order_relaxed);
+  void *pc_ptr = (void *)pc;
+  if (pc_ptr < GetTagData(0) || pc_ptr > GetTagData(tag_count - 1))
+    return 0;
+  return (TagData *)pc_ptr - GetTagData(0);
+}
+
+#if !SANITIZER_GO
+
+typedef void(*AccessFunc)(ThreadState *, uptr, uptr, int);
+void ExternalAccess(void *addr, void *caller_pc, void *tag, AccessFunc access) {
+  CHECK_LT(tag, atomic_load(&used_tags, memory_order_relaxed));
+  ThreadState *thr = cur_thread();
+  if (caller_pc) FuncEntry(thr, (uptr)caller_pc);
+  InsertShadowStackFrameForTag(thr, (uptr)tag);
+  bool in_ignored_lib;
+  if (!caller_pc || !libignore()->IsIgnored((uptr)caller_pc, &in_ignored_lib)) {
+    access(thr, CALLERPC, (uptr)addr, kSizeLog1);
+  }
+  FuncExit(thr);
+  if (caller_pc) FuncExit(thr);
 }
 
 extern "C" {
 SANITIZER_INTERFACE_ATTRIBUTE
 void *__tsan_external_register_tag(const char *object_type) {
   uptr new_tag = atomic_fetch_add(&used_tags, 1, memory_order_relaxed);
-  CHECK_LT(new_tag, kMaxTag);
-  registered_tags[new_tag] = internal_strdup(object_type);
+  CHECK_LT(new_tag, kExternalTagMax);
+  GetTagData(new_tag)->object_type = internal_strdup(object_type);
+  char header[127] = {0};
+  internal_snprintf(header, sizeof(header), "race on %s", object_type);
+  GetTagData(new_tag)->header = internal_strdup(header);
   return (void *)new_tag;
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_external_register_header(void *tag, const char *header) {
+  CHECK_GE((uptr)tag, kExternalTagFirstUserAvailable);
+  CHECK_LT((uptr)tag, kExternalTagMax);
+  atomic_uintptr_t *header_ptr =
+      (atomic_uintptr_t *)&GetTagData((uptr)tag)->header;
+  header = internal_strdup(header);
+  char *old_header =
+      (char *)atomic_exchange(header_ptr, (uptr)header, memory_order_seq_cst);
+  if (old_header) internal_free(old_header);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
 void __tsan_external_assign_tag(void *addr, void *tag) {
   CHECK_LT(tag, atomic_load(&used_tags, memory_order_relaxed));
   Allocator *a = allocator();
@@ -54,25 +111,15 @@
 
 SANITIZER_INTERFACE_ATTRIBUTE
 void __tsan_external_read(void *addr, void *caller_pc, void *tag) {
-  CHECK_LT(tag, atomic_load(&used_tags, memory_order_relaxed));
-  ThreadState *thr = cur_thread();
-  thr->external_tag = (uptr)tag;
-  FuncEntry(thr, (uptr)caller_pc);
-  MemoryRead(thr, CALLERPC, (uptr)addr, kSizeLog8);
-  FuncExit(thr);
-  thr->external_tag = 0;
+  ExternalAccess(addr, caller_pc, tag, MemoryRead);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
 void __tsan_external_write(void *addr, void *caller_pc, void *tag) {
-  CHECK_LT(tag, atomic_load(&used_tags, memory_order_relaxed));
-  ThreadState *thr = cur_thread();
-  thr->external_tag = (uptr)tag;
-  FuncEntry(thr, (uptr)caller_pc);
-  MemoryWrite(thr, CALLERPC, (uptr)addr, kSizeLog8);
-  FuncExit(thr);
-  thr->external_tag = 0;
+  ExternalAccess(addr, caller_pc, tag, MemoryWrite);
 }
 }  // extern "C"
 
+#endif  // !SANITIZER_GO
+
 }  // namespace __tsan
diff --git a/lib/tsan/rtl/tsan_fd.cc b/lib/tsan/rtl/tsan_fd.cc
index d84df4a..f13a743 100644
--- a/lib/tsan/rtl/tsan_fd.cc
+++ b/lib/tsan/rtl/tsan_fd.cc
@@ -48,8 +48,8 @@
 }
 
 static FdSync *allocsync(ThreadState *thr, uptr pc) {
-  FdSync *s = (FdSync*)user_alloc(thr, pc, sizeof(FdSync), kDefaultAlignment,
-      false);
+  FdSync *s = (FdSync*)user_alloc_internal(thr, pc, sizeof(FdSync),
+      kDefaultAlignment, false);
   atomic_store(&s->rc, 1, memory_order_relaxed);
   return s;
 }
@@ -79,7 +79,7 @@
   if (l1 == 0) {
     uptr size = kTableSizeL2 * sizeof(FdDesc);
     // We need this to reside in user memory to properly catch races on it.
-    void *p = user_alloc(thr, pc, size, kDefaultAlignment, false);
+    void *p = user_alloc_internal(thr, pc, size, kDefaultAlignment, false);
     internal_memset(p, 0, size);
     MemoryResetRange(thr, (uptr)&fddesc, (uptr)p, size);
     if (atomic_compare_exchange_strong(pl1, &l1, (uptr)p, memory_order_acq_rel))
diff --git a/lib/tsan/rtl/tsan_flags.cc b/lib/tsan/rtl/tsan_flags.cc
index d8d4746..89e22a1 100644
--- a/lib/tsan/rtl/tsan_flags.cc
+++ b/lib/tsan/rtl/tsan_flags.cc
@@ -21,10 +21,6 @@
 
 namespace __tsan {
 
-Flags *flags() {
-  return &ctx->flags;
-}
-
 // Can be overriden in frontend.
 #ifdef TSAN_EXTERNAL_HOOKS
 extern "C" const char* __tsan_default_options();
diff --git a/lib/tsan/rtl/tsan_flags.h b/lib/tsan/rtl/tsan_flags.h
index e2f6b3c..66740de 100644
--- a/lib/tsan/rtl/tsan_flags.h
+++ b/lib/tsan/rtl/tsan_flags.h
@@ -28,7 +28,6 @@
   void ParseFromString(const char *str);
 };
 
-Flags *flags();
 void InitializeFlags(Flags *flags, const char *env);
 }  // namespace __tsan
 
diff --git a/lib/tsan/rtl/tsan_interceptors.cc b/lib/tsan/rtl/tsan_interceptors.cc
index 9bf1b28..79e243a 100644
--- a/lib/tsan/rtl/tsan_interceptors.cc
+++ b/lib/tsan/rtl/tsan_interceptors.cc
@@ -14,10 +14,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "sanitizer_common/sanitizer_atomic.h"
+#include "sanitizer_common/sanitizer_errno.h"
 #include "sanitizer_common/sanitizer_libc.h"
 #include "sanitizer_common/sanitizer_linux.h"
+#include "sanitizer_common/sanitizer_platform_limits_netbsd.h"
 #include "sanitizer_common/sanitizer_platform_limits_posix.h"
 #include "sanitizer_common/sanitizer_placement_new.h"
+#include "sanitizer_common/sanitizer_posix.h"
 #include "sanitizer_common/sanitizer_stacktrace.h"
 #include "sanitizer_common/sanitizer_tls_get_addr.h"
 #include "interception/interception.h"
@@ -29,30 +32,33 @@
 #include "tsan_mman.h"
 #include "tsan_fd.h"
 
-#if SANITIZER_POSIX
-#include "sanitizer_common/sanitizer_posix.h"
-#endif
 
 using namespace __tsan;  // NOLINT
 
 #if SANITIZER_FREEBSD || SANITIZER_MAC
-#define __errno_location __error
 #define stdout __stdoutp
 #define stderr __stderrp
 #endif
 
+#if SANITIZER_NETBSD
+#define dirfd(dirp) (*(int *)(dirp))
+#define fileno_unlocked fileno
+
+#if _LP64
+#define __sF_size 152
+#else
+#define __sF_size 88
+#endif
+
+#define stdout ((char*)&__sF + (__sF_size * 1))
+#define stderr ((char*)&__sF + (__sF_size * 2))
+
+#endif
+
 #if SANITIZER_ANDROID
-#define __errno_location __errno
 #define mallopt(a, b)
 #endif
 
-#if SANITIZER_LINUX || SANITIZER_FREEBSD
-#define PTHREAD_CREATE_DETACHED 1
-#elif SANITIZER_MAC
-#define PTHREAD_CREATE_DETACHED 2
-#endif
-
-
 #ifdef __mips__
 const int kSigCount = 129;
 #else
@@ -93,24 +99,26 @@
 DECLARE_REAL_AND_INTERCEPTOR(void, free, void *ptr)
 extern "C" void *pthread_self();
 extern "C" void _exit(int status);
-extern "C" int *__errno_location();
 extern "C" int fileno_unlocked(void *stream);
+#if !SANITIZER_NETBSD
 extern "C" int dirfd(void *dirp);
-#if !SANITIZER_FREEBSD && !SANITIZER_ANDROID
+#endif
+#if !SANITIZER_FREEBSD && !SANITIZER_ANDROID && !SANITIZER_NETBSD
 extern "C" int mallopt(int param, int value);
 #endif
+#if SANITIZER_NETBSD
+extern __sanitizer_FILE __sF[];
+#else
 extern __sanitizer_FILE *stdout, *stderr;
-#if !SANITIZER_FREEBSD && !SANITIZER_MAC
+#endif
+#if !SANITIZER_FREEBSD && !SANITIZER_MAC && !SANITIZER_NETBSD
 const int PTHREAD_MUTEX_RECURSIVE = 1;
 const int PTHREAD_MUTEX_RECURSIVE_NP = 1;
 #else
 const int PTHREAD_MUTEX_RECURSIVE = 2;
 const int PTHREAD_MUTEX_RECURSIVE_NP = 2;
 #endif
-const int EINVAL = 22;
-const int EBUSY = 16;
-const int EOWNERDEAD = 130;
-#if !SANITIZER_FREEBSD && !SANITIZER_MAC
+#if !SANITIZER_FREEBSD && !SANITIZER_MAC && !SANITIZER_NETBSD
 const int EPOLL_CTL_ADD = 1;
 #endif
 const int SIGILL = 4;
@@ -119,7 +127,7 @@
 const int SIGSEGV = 11;
 const int SIGPIPE = 13;
 const int SIGTERM = 15;
-#if defined(__mips__) || SANITIZER_FREEBSD || SANITIZER_MAC
+#if defined(__mips__) || SANITIZER_FREEBSD || SANITIZER_MAC || SANITIZER_NETBSD
 const int SIGBUS = 10;
 const int SIGSYS = 12;
 #else
@@ -127,7 +135,9 @@
 const int SIGSYS = 31;
 #endif
 void *const MAP_FAILED = (void*)-1;
-#if !SANITIZER_MAC
+#if SANITIZER_NETBSD
+const int PTHREAD_BARRIER_SERIAL_THREAD = 1234567;
+#elif !SANITIZER_MAC
 const int PTHREAD_BARRIER_SERIAL_THREAD = -1;
 #endif
 const int MAP_FIXED = 0x10;
@@ -139,8 +149,6 @@
 # define F_TLOCK 2      /* Test and lock a region for exclusive use.  */
 # define F_TEST  3      /* Test a region for other processes locks.  */
 
-#define errno (*__errno_location())
-
 typedef void (*sighandler_t)(int sig);
 typedef void (*sigactionhandler_t)(int sig, my_siginfo_t *siginfo, void *uctx);
 
@@ -154,6 +162,15 @@
   __sanitizer_sigset_t sa_mask;
   void (*sa_restorer)();
 };
+#elif SANITIZER_NETBSD
+struct sigaction_t {
+  union {
+    sighandler_t sa_handler;
+    sigactionhandler_t sa_sigaction;
+  };
+  __sanitizer_sigset_t sa_mask;
+  int sa_flags;
+};
 #else
 struct sigaction_t {
 #ifdef __mips__
@@ -182,7 +199,7 @@
 const sighandler_t SIG_DFL = (sighandler_t)0;
 const sighandler_t SIG_IGN = (sighandler_t)1;
 const sighandler_t SIG_ERR = (sighandler_t)-1;
-#if SANITIZER_FREEBSD || SANITIZER_MAC
+#if SANITIZER_FREEBSD || SANITIZER_MAC || SANITIZER_NETBSD
 const int SA_SIGINFO = 0x40;
 const int SIG_SETMASK = 3;
 #elif defined(__mips__)
@@ -219,7 +236,7 @@
 // The object is 64-byte aligned, because we want hot data to be located in
 // a single cache line if possible (it's accessed in every interceptor).
 static ALIGNED(64) char libignore_placeholder[sizeof(LibIgnore)];
-static LibIgnore *libignore() {
+LibIgnore *libignore() {
   return reinterpret_cast<LibIgnore*>(&libignore_placeholder[0]);
 }
 
@@ -277,7 +294,8 @@
 
 void ScopedInterceptor::EnableIgnores() {
   if (ignoring_) {
-    ThreadIgnoreBegin(thr_, pc_);
+    ThreadIgnoreBegin(thr_, pc_, /*save_stack=*/false);
+    if (flags()->ignore_noninstrumented_modules) thr_->suppress_reports++;
     if (in_ignored_lib_) {
       DCHECK(!thr_->in_ignored_lib);
       thr_->in_ignored_lib = true;
@@ -288,6 +306,7 @@
 void ScopedInterceptor::DisableIgnores() {
   if (ignoring_) {
     ThreadIgnoreEnd(thr_, pc_);
+    if (flags()->ignore_noninstrumented_modules) thr_->suppress_reports--;
     if (in_ignored_lib_) {
       DCHECK(thr_->in_ignored_lib);
       thr_->in_ignored_lib = false;
@@ -296,7 +315,7 @@
 }
 
 #define TSAN_INTERCEPT(func) INTERCEPT_FUNCTION(func)
-#if SANITIZER_FREEBSD
+#if SANITIZER_FREEBSD || SANITIZER_NETBSD
 # define TSAN_INTERCEPT_VER(func, ver) INTERCEPT_FUNCTION(func)
 #else
 # define TSAN_INTERCEPT_VER(func, ver) INTERCEPT_FUNCTION_VER(func, ver)
@@ -360,6 +379,11 @@
   return res;
 }
 
+TSAN_INTERCEPTOR(int, pause, int fake) {
+  SCOPED_TSAN_INTERCEPTOR(pause, fake);
+  return BLOCK_REAL(pause)(fake);
+}
+
 // The sole reason tsan wraps atexit callbacks is to establish synchronization
 // between callback setup and callback execution.
 struct AtExitCtx {
@@ -473,8 +497,14 @@
 static void LongJmp(ThreadState *thr, uptr *env) {
 #ifdef __powerpc__
   uptr mangled_sp = env[0];
-#elif SANITIZER_FREEBSD || SANITIZER_MAC
+#elif SANITIZER_FREEBSD || SANITIZER_NETBSD
   uptr mangled_sp = env[2];
+#elif SANITIZER_MAC
+# ifdef __aarch64__
+    uptr mangled_sp = env[13];
+# else
+    uptr mangled_sp = env[2];
+# endif
 #elif defined(SANITIZER_LINUX)
 # ifdef __aarch64__
   uptr mangled_sp = env[13];
@@ -592,7 +622,7 @@
 
 TSAN_INTERCEPTOR(void*, __libc_memalign, uptr align, uptr sz) {
   SCOPED_TSAN_INTERCEPTOR(__libc_memalign, align, sz);
-  return user_alloc(thr, pc, sz, align);
+  return user_memalign(thr, pc, align, sz);
 }
 
 TSAN_INTERCEPTOR(void*, calloc, uptr size, uptr n) {
@@ -672,7 +702,7 @@
   if (*addr) {
     if (!IsAppMem((uptr)*addr) || !IsAppMem((uptr)*addr + sz - 1)) {
       if (flags & MAP_FIXED) {
-        errno = EINVAL;
+        errno = errno_EINVAL;
         return false;
       } else {
         *addr = 0;
@@ -738,7 +768,7 @@
 #if SANITIZER_LINUX
 TSAN_INTERCEPTOR(void*, memalign, uptr align, uptr sz) {
   SCOPED_INTERCEPTOR_RAW(memalign, align, sz);
-  return user_alloc(thr, pc, sz, align);
+  return user_memalign(thr, pc, align, sz);
 }
 #define TSAN_MAYBE_INTERCEPT_MEMALIGN TSAN_INTERCEPT(memalign)
 #else
@@ -747,21 +777,20 @@
 
 #if !SANITIZER_MAC
 TSAN_INTERCEPTOR(void*, aligned_alloc, uptr align, uptr sz) {
-  SCOPED_INTERCEPTOR_RAW(memalign, align, sz);
-  return user_alloc(thr, pc, sz, align);
+  SCOPED_INTERCEPTOR_RAW(aligned_alloc, align, sz);
+  return user_aligned_alloc(thr, pc, align, sz);
 }
 
 TSAN_INTERCEPTOR(void*, valloc, uptr sz) {
   SCOPED_INTERCEPTOR_RAW(valloc, sz);
-  return user_alloc(thr, pc, sz, GetPageSizeCached());
+  return user_valloc(thr, pc, sz);
 }
 #endif
 
 #if SANITIZER_LINUX
 TSAN_INTERCEPTOR(void*, pvalloc, uptr sz) {
   SCOPED_INTERCEPTOR_RAW(pvalloc, sz);
-  sz = RoundUp(sz, GetPageSizeCached());
-  return user_alloc(thr, pc, sz, GetPageSizeCached());
+  return user_pvalloc(thr, pc, sz);
 }
 #define TSAN_MAYBE_INTERCEPT_PVALLOC TSAN_INTERCEPT(pvalloc)
 #else
@@ -771,8 +800,7 @@
 #if !SANITIZER_MAC
 TSAN_INTERCEPTOR(int, posix_memalign, void **memptr, uptr align, uptr sz) {
   SCOPED_INTERCEPTOR_RAW(posix_memalign, memptr, align, sz);
-  *memptr = user_alloc(thr, pc, sz, align);
-  return 0;
+  return user_posix_memalign(thr, pc, memptr, align, sz);
 }
 #endif
 
@@ -928,8 +956,7 @@
     ThreadIgnoreEnd(thr, pc);
   }
   if (res == 0) {
-    int tid = ThreadCreate(thr, pc, *(uptr*)th,
-                           detached == PTHREAD_CREATE_DETACHED);
+    int tid = ThreadCreate(thr, pc, *(uptr*)th, IsStateDetached(detached));
     CHECK_NE(tid, 0);
     // Synchronization on p.tid serves two purposes:
     // 1. ThreadCreate must finish before the new thread starts.
@@ -1025,7 +1052,7 @@
   ThreadSignalContext *ctx = SigCtx(arg->thr);
   CHECK_EQ(atomic_load(&ctx->in_blocking_func, memory_order_relaxed), 1);
   atomic_store(&ctx->in_blocking_func, 0, memory_order_relaxed);
-  MutexLock(arg->thr, arg->pc, (uptr)arg->m);
+  MutexPostLock(arg->thr, arg->pc, (uptr)arg->m, MutexFlagDoPreLockOnPostLock);
   // Undo BlockingCall ctor effects.
   arg->thr->ignore_interceptors--;
   arg->si->~ScopedInterceptor();
@@ -1054,7 +1081,7 @@
         fn, c, m, t, (void (*)(void *arg))cond_mutex_unlock, &arg);
   }
   if (res == errno_EOWNERDEAD) MutexRepair(thr, pc, (uptr)m);
-  MutexLock(thr, pc, (uptr)m);
+  MutexPostLock(thr, pc, (uptr)m, MutexFlagDoPreLockOnPostLock);
   return res;
 }
 
@@ -1114,14 +1141,15 @@
   SCOPED_TSAN_INTERCEPTOR(pthread_mutex_init, m, a);
   int res = REAL(pthread_mutex_init)(m, a);
   if (res == 0) {
-    bool recursive = false;
+    u32 flagz = 0;
     if (a) {
       int type = 0;
       if (REAL(pthread_mutexattr_gettype)(a, &type) == 0)
-        recursive = (type == PTHREAD_MUTEX_RECURSIVE
-            || type == PTHREAD_MUTEX_RECURSIVE_NP);
+        if (type == PTHREAD_MUTEX_RECURSIVE ||
+            type == PTHREAD_MUTEX_RECURSIVE_NP)
+          flagz |= MutexFlagWriteReentrant;
     }
-    MutexCreate(thr, pc, (uptr)m, false, recursive, false);
+    MutexCreate(thr, pc, (uptr)m, flagz);
   }
   return res;
 }
@@ -1129,7 +1157,7 @@
 TSAN_INTERCEPTOR(int, pthread_mutex_destroy, void *m) {
   SCOPED_TSAN_INTERCEPTOR(pthread_mutex_destroy, m);
   int res = REAL(pthread_mutex_destroy)(m);
-  if (res == 0 || res == EBUSY) {
+  if (res == 0 || res == errno_EBUSY) {
     MutexDestroy(thr, pc, (uptr)m);
   }
   return res;
@@ -1138,10 +1166,10 @@
 TSAN_INTERCEPTOR(int, pthread_mutex_trylock, void *m) {
   SCOPED_TSAN_INTERCEPTOR(pthread_mutex_trylock, m);
   int res = REAL(pthread_mutex_trylock)(m);
-  if (res == EOWNERDEAD)
+  if (res == errno_EOWNERDEAD)
     MutexRepair(thr, pc, (uptr)m);
-  if (res == 0 || res == EOWNERDEAD)
-    MutexLock(thr, pc, (uptr)m, /*rec=*/1, /*try_lock=*/true);
+  if (res == 0 || res == errno_EOWNERDEAD)
+    MutexPostLock(thr, pc, (uptr)m, MutexFlagTryLock);
   return res;
 }
 
@@ -1150,7 +1178,7 @@
   SCOPED_TSAN_INTERCEPTOR(pthread_mutex_timedlock, m, abstime);
   int res = REAL(pthread_mutex_timedlock)(m, abstime);
   if (res == 0) {
-    MutexLock(thr, pc, (uptr)m);
+    MutexPostLock(thr, pc, (uptr)m, MutexFlagTryLock);
   }
   return res;
 }
@@ -1161,7 +1189,7 @@
   SCOPED_TSAN_INTERCEPTOR(pthread_spin_init, m, pshared);
   int res = REAL(pthread_spin_init)(m, pshared);
   if (res == 0) {
-    MutexCreate(thr, pc, (uptr)m, false, false, false);
+    MutexCreate(thr, pc, (uptr)m);
   }
   return res;
 }
@@ -1177,9 +1205,10 @@
 
 TSAN_INTERCEPTOR(int, pthread_spin_lock, void *m) {
   SCOPED_TSAN_INTERCEPTOR(pthread_spin_lock, m);
+  MutexPreLock(thr, pc, (uptr)m);
   int res = REAL(pthread_spin_lock)(m);
   if (res == 0) {
-    MutexLock(thr, pc, (uptr)m);
+    MutexPostLock(thr, pc, (uptr)m);
   }
   return res;
 }
@@ -1188,7 +1217,7 @@
   SCOPED_TSAN_INTERCEPTOR(pthread_spin_trylock, m);
   int res = REAL(pthread_spin_trylock)(m);
   if (res == 0) {
-    MutexLock(thr, pc, (uptr)m, /*rec=*/1, /*try_lock=*/true);
+    MutexPostLock(thr, pc, (uptr)m, MutexFlagTryLock);
   }
   return res;
 }
@@ -1205,7 +1234,7 @@
   SCOPED_TSAN_INTERCEPTOR(pthread_rwlock_init, m, a);
   int res = REAL(pthread_rwlock_init)(m, a);
   if (res == 0) {
-    MutexCreate(thr, pc, (uptr)m, true, false, false);
+    MutexCreate(thr, pc, (uptr)m);
   }
   return res;
 }
@@ -1221,9 +1250,10 @@
 
 TSAN_INTERCEPTOR(int, pthread_rwlock_rdlock, void *m) {
   SCOPED_TSAN_INTERCEPTOR(pthread_rwlock_rdlock, m);
+  MutexPreReadLock(thr, pc, (uptr)m);
   int res = REAL(pthread_rwlock_rdlock)(m);
   if (res == 0) {
-    MutexReadLock(thr, pc, (uptr)m);
+    MutexPostReadLock(thr, pc, (uptr)m);
   }
   return res;
 }
@@ -1232,7 +1262,7 @@
   SCOPED_TSAN_INTERCEPTOR(pthread_rwlock_tryrdlock, m);
   int res = REAL(pthread_rwlock_tryrdlock)(m);
   if (res == 0) {
-    MutexReadLock(thr, pc, (uptr)m, /*try_lock=*/true);
+    MutexPostReadLock(thr, pc, (uptr)m, MutexFlagTryLock);
   }
   return res;
 }
@@ -1242,7 +1272,7 @@
   SCOPED_TSAN_INTERCEPTOR(pthread_rwlock_timedrdlock, m, abstime);
   int res = REAL(pthread_rwlock_timedrdlock)(m, abstime);
   if (res == 0) {
-    MutexReadLock(thr, pc, (uptr)m);
+    MutexPostReadLock(thr, pc, (uptr)m);
   }
   return res;
 }
@@ -1250,9 +1280,10 @@
 
 TSAN_INTERCEPTOR(int, pthread_rwlock_wrlock, void *m) {
   SCOPED_TSAN_INTERCEPTOR(pthread_rwlock_wrlock, m);
+  MutexPreLock(thr, pc, (uptr)m);
   int res = REAL(pthread_rwlock_wrlock)(m);
   if (res == 0) {
-    MutexLock(thr, pc, (uptr)m);
+    MutexPostLock(thr, pc, (uptr)m);
   }
   return res;
 }
@@ -1261,7 +1292,7 @@
   SCOPED_TSAN_INTERCEPTOR(pthread_rwlock_trywrlock, m);
   int res = REAL(pthread_rwlock_trywrlock)(m);
   if (res == 0) {
-    MutexLock(thr, pc, (uptr)m, /*rec=*/1, /*try_lock=*/true);
+    MutexPostLock(thr, pc, (uptr)m, MutexFlagTryLock);
   }
   return res;
 }
@@ -1271,7 +1302,7 @@
   SCOPED_TSAN_INTERCEPTOR(pthread_rwlock_timedwrlock, m, abstime);
   int res = REAL(pthread_rwlock_timedwrlock)(m, abstime);
   if (res == 0) {
-    MutexLock(thr, pc, (uptr)m);
+    MutexPostLock(thr, pc, (uptr)m, MutexFlagTryLock);
   }
   return res;
 }
@@ -1315,7 +1346,7 @@
 TSAN_INTERCEPTOR(int, pthread_once, void *o, void (*f)()) {
   SCOPED_INTERCEPTOR_RAW(pthread_once, o, f);
   if (o == 0 || f == 0)
-    return EINVAL;
+    return errno_EINVAL;
   atomic_uint32_t *a;
   if (!SANITIZER_MAC)
     a = static_cast<atomic_uint32_t*>(o);
@@ -1352,7 +1383,7 @@
 #endif
 
 TSAN_INTERCEPTOR(int, fstat, int fd, void *buf) {
-#if SANITIZER_FREEBSD || SANITIZER_MAC || SANITIZER_ANDROID
+#if SANITIZER_FREEBSD || SANITIZER_MAC || SANITIZER_ANDROID || SANITIZER_NETBSD
   SCOPED_TSAN_INTERCEPTOR(fstat, fd, buf);
   if (fd > 0)
     FdAccess(thr, pc, fd);
@@ -1644,24 +1675,6 @@
 #define TSAN_MAYBE_INTERCEPT_TMPFILE64
 #endif
 
-TSAN_INTERCEPTOR(uptr, fread, void *ptr, uptr size, uptr nmemb, void *f) {
-  // libc file streams can call user-supplied functions, see fopencookie.
-  {
-    SCOPED_TSAN_INTERCEPTOR(fread, ptr, size, nmemb, f);
-    MemoryAccessRange(thr, pc, (uptr)ptr, size * nmemb, true);
-  }
-  return REAL(fread)(ptr, size, nmemb, f);
-}
-
-TSAN_INTERCEPTOR(uptr, fwrite, const void *p, uptr size, uptr nmemb, void *f) {
-  // libc file streams can call user-supplied functions, see fopencookie.
-  {
-    SCOPED_TSAN_INTERCEPTOR(fwrite, p, size, nmemb, f);
-    MemoryAccessRange(thr, pc, (uptr)p, size * nmemb, false);
-  }
-  return REAL(fwrite)(p, size, nmemb, f);
-}
-
 static void FlushStreams() {
   // Flushing all the streams here may freeze the process if a child thread is
   // performing file stream operations at the same time.
@@ -1951,7 +1964,7 @@
   sigactions[sig].sa_flags = *(volatile int*)&act->sa_flags;
   internal_memcpy(&sigactions[sig].sa_mask, &act->sa_mask,
       sizeof(sigactions[sig].sa_mask));
-#if !SANITIZER_FREEBSD && !SANITIZER_MAC
+#if !SANITIZER_FREEBSD && !SANITIZER_MAC && !SANITIZER_NETBSD
   sigactions[sig].sa_restorer = act->sa_restorer;
 #endif
   sigaction_t newact;
@@ -2251,8 +2264,12 @@
 #define COMMON_INTERCEPTOR_ON_EXIT(ctx) \
   OnExit(((TsanInterceptorContext *) ctx)->thr)
 
-#define COMMON_INTERCEPTOR_MUTEX_LOCK(ctx, m) \
-  MutexLock(((TsanInterceptorContext *)ctx)->thr, \
+#define COMMON_INTERCEPTOR_MUTEX_PRE_LOCK(ctx, m) \
+  MutexPreLock(((TsanInterceptorContext *)ctx)->thr, \
+            ((TsanInterceptorContext *)ctx)->pc, (uptr)m)
+
+#define COMMON_INTERCEPTOR_MUTEX_POST_LOCK(ctx, m) \
+  MutexPostLock(((TsanInterceptorContext *)ctx)->thr, \
             ((TsanInterceptorContext *)ctx)->pc, (uptr)m)
 
 #define COMMON_INTERCEPTOR_MUTEX_UNLOCK(ctx, m) \
@@ -2309,7 +2326,7 @@
   }
 };
 
-#if !SANITIZER_FREEBSD && !SANITIZER_MAC
+#if !SANITIZER_FREEBSD && !SANITIZER_MAC && !SANITIZER_NETBSD
 static void syscall_access_range(uptr pc, uptr p, uptr s, bool write) {
   TSAN_SYSCALL();
   MemoryAccessRange(thr, pc, p, s, write);
@@ -2579,6 +2596,7 @@
   TSAN_INTERCEPT(sleep);
   TSAN_INTERCEPT(usleep);
   TSAN_INTERCEPT(nanosleep);
+  TSAN_INTERCEPT(pause);
   TSAN_INTERCEPT(gettimeofday);
   TSAN_INTERCEPT(getaddrinfo);
 
diff --git a/lib/tsan/rtl/tsan_interceptors.h b/lib/tsan/rtl/tsan_interceptors.h
index 72534f4..de47466 100644
--- a/lib/tsan/rtl/tsan_interceptors.h
+++ b/lib/tsan/rtl/tsan_interceptors.h
@@ -19,6 +19,8 @@
   bool ignoring_;
 };
 
+LibIgnore *libignore();
+
 }  // namespace __tsan
 
 #define SCOPED_INTERCEPTOR_RAW(func, ...) \
diff --git a/lib/tsan/rtl/tsan_interceptors_mac.cc b/lib/tsan/rtl/tsan_interceptors_mac.cc
index fc5eb04..4f10794 100644
--- a/lib/tsan/rtl/tsan_interceptors_mac.cc
+++ b/lib/tsan/rtl/tsan_interceptors_mac.cc
@@ -21,7 +21,10 @@
 #include "tsan_interface_ann.h"
 
 #include <libkern/OSAtomic.h>
+
+#if defined(__has_include) && __has_include(<xpc/xpc.h>)
 #include <xpc/xpc.h>
+#endif  // #if defined(__has_include) && __has_include(<xpc/xpc.h>)
 
 typedef long long_t;  // NOLINT
 
@@ -235,6 +238,8 @@
   REAL(os_lock_unlock)(lock);
 }
 
+#if defined(__has_include) && __has_include(<xpc/xpc.h>)
+
 TSAN_INTERCEPTOR(void, xpc_connection_set_event_handler,
                  xpc_connection_t connection, xpc_handler_t handler) {
   SCOPED_TSAN_INTERCEPTOR(xpc_connection_set_event_handler, connection,
@@ -281,6 +286,14 @@
   (connection, message, replyq, new_handler);
 }
 
+TSAN_INTERCEPTOR(void, xpc_connection_cancel, xpc_connection_t connection) {
+  SCOPED_TSAN_INTERCEPTOR(xpc_connection_cancel, connection);
+  Release(thr, pc, (uptr)connection);
+  REAL(xpc_connection_cancel)(connection);
+}
+
+#endif  // #if defined(__has_include) && __has_include(<xpc/xpc.h>)
+
 // On macOS, libc++ is always linked dynamically, so intercepting works the
 // usual way.
 #define STDCXX_INTERCEPTOR TSAN_INTERCEPTOR
diff --git a/lib/tsan/rtl/tsan_interface.h b/lib/tsan/rtl/tsan_interface.h
index 496a871..a80a489 100644
--- a/lib/tsan/rtl/tsan_interface.h
+++ b/lib/tsan/rtl/tsan_interface.h
@@ -18,6 +18,7 @@
 
 #include <sanitizer_common/sanitizer_internal_defs.h>
 using __sanitizer::uptr;
+using __sanitizer::tid_t;
 
 // This header should NOT include any other headers.
 // All functions in this header are extern "C" and start with __tsan_.
@@ -81,6 +82,8 @@
 SANITIZER_INTERFACE_ATTRIBUTE
 void *__tsan_external_register_tag(const char *object_type);
 SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_external_register_header(void *tag, const char *header);
+SANITIZER_INTERFACE_ATTRIBUTE
 void __tsan_external_assign_tag(void *addr, void *tag);
 SANITIZER_INTERFACE_ATTRIBUTE
 void __tsan_external_read(void *addr, void *caller_pc, void *tag);
@@ -143,7 +146,7 @@
 
 // Returns information about threads included in the report.
 SANITIZER_INTERFACE_ATTRIBUTE
-int __tsan_get_report_thread(void *report, uptr idx, int *tid, uptr *os_id,
+int __tsan_get_report_thread(void *report, uptr idx, int *tid, tid_t *os_id,
                              int *running, const char **name, int *parent_tid,
                              void **trace, uptr trace_size);
 
@@ -160,7 +163,7 @@
 // Returns the allocation stack for a heap pointer.
 SANITIZER_INTERFACE_ATTRIBUTE
 int __tsan_get_alloc_stack(uptr addr, uptr *trace, uptr size, int *thread_id,
-                           uptr *os_id);
+                           tid_t *os_id);
 
 #endif  // SANITIZER_GO
 
diff --git a/lib/tsan/rtl/tsan_interface_ann.cc b/lib/tsan/rtl/tsan_interface_ann.cc
index 62db796..f68a046 100644
--- a/lib/tsan/rtl/tsan_interface_ann.cc
+++ b/lib/tsan/rtl/tsan_interface_ann.cc
@@ -31,11 +31,10 @@
 
 class ScopedAnnotation {
  public:
-  ScopedAnnotation(ThreadState *thr, const char *aname, const char *f, int l,
-                   uptr pc)
+  ScopedAnnotation(ThreadState *thr, const char *aname, uptr pc)
       : thr_(thr) {
     FuncEntry(thr_, pc);
-    DPrintf("#%d: annotation %s() %s:%d\n", thr_->tid, aname, f, l);
+    DPrintf("#%d: annotation %s()\n", thr_->tid, aname);
   }
 
   ~ScopedAnnotation() {
@@ -46,18 +45,20 @@
   ThreadState *const thr_;
 };
 
-#define SCOPED_ANNOTATION(typ) \
+#define SCOPED_ANNOTATION_RET(typ, ret) \
     if (!flags()->enable_annotations) \
-      return; \
+      return ret; \
     ThreadState *thr = cur_thread(); \
     const uptr caller_pc = (uptr)__builtin_return_address(0); \
     StatInc(thr, StatAnnotation); \
     StatInc(thr, Stat##typ); \
-    ScopedAnnotation sa(thr, __func__, f, l, caller_pc); \
+    ScopedAnnotation sa(thr, __func__, caller_pc); \
     const uptr pc = StackTrace::GetCurrentPc(); \
     (void)pc; \
 /**/
 
+#define SCOPED_ANNOTATION(typ) SCOPED_ANNOTATION_RET(typ, )
+
 static const int kMaxDescLen = 128;
 
 struct ExpectRace {
@@ -252,12 +253,12 @@
 
 void INTERFACE_ATTRIBUTE AnnotateRWLockCreate(char *f, int l, uptr m) {
   SCOPED_ANNOTATION(AnnotateRWLockCreate);
-  MutexCreate(thr, pc, m, true, true, false);
+  MutexCreate(thr, pc, m, MutexFlagWriteReentrant);
 }
 
 void INTERFACE_ATTRIBUTE AnnotateRWLockCreateStatic(char *f, int l, uptr m) {
   SCOPED_ANNOTATION(AnnotateRWLockCreateStatic);
-  MutexCreate(thr, pc, m, true, true, true);
+  MutexCreate(thr, pc, m, MutexFlagWriteReentrant | MutexFlagLinkerInit);
 }
 
 void INTERFACE_ATTRIBUTE AnnotateRWLockDestroy(char *f, int l, uptr m) {
@@ -269,9 +270,9 @@
                                                 uptr is_w) {
   SCOPED_ANNOTATION(AnnotateRWLockAcquired);
   if (is_w)
-    MutexLock(thr, pc, m);
+    MutexPostLock(thr, pc, m, MutexFlagDoPreLockOnPostLock);
   else
-    MutexReadLock(thr, pc, m);
+    MutexPostReadLock(thr, pc, m, MutexFlagDoPreLockOnPostLock);
 }
 
 void INTERFACE_ATTRIBUTE AnnotateRWLockReleased(char *f, int l, uptr m,
@@ -458,4 +459,95 @@
 AnnotateMemoryIsInitialized(char *f, int l, uptr mem, uptr sz) {}
 void INTERFACE_ATTRIBUTE
 AnnotateMemoryIsUninitialized(char *f, int l, uptr mem, uptr sz) {}
+
+// Note: the parameter is called flagz, because flags is already taken
+// by the global function that returns flags.
+INTERFACE_ATTRIBUTE
+void __tsan_mutex_create(void *m, unsigned flagz) {
+  SCOPED_ANNOTATION(__tsan_mutex_create);
+  MutexCreate(thr, pc, (uptr)m, flagz & MutexCreationFlagMask);
+}
+
+INTERFACE_ATTRIBUTE
+void __tsan_mutex_destroy(void *m, unsigned flagz) {
+  SCOPED_ANNOTATION(__tsan_mutex_destroy);
+  MutexDestroy(thr, pc, (uptr)m, flagz);
+}
+
+INTERFACE_ATTRIBUTE
+void __tsan_mutex_pre_lock(void *m, unsigned flagz) {
+  SCOPED_ANNOTATION(__tsan_mutex_pre_lock);
+  if (!(flagz & MutexFlagTryLock)) {
+    if (flagz & MutexFlagReadLock)
+      MutexPreReadLock(thr, pc, (uptr)m);
+    else
+      MutexPreLock(thr, pc, (uptr)m);
+  }
+  ThreadIgnoreBegin(thr, pc, /*save_stack=*/false);
+  ThreadIgnoreSyncBegin(thr, pc, /*save_stack=*/false);
+}
+
+INTERFACE_ATTRIBUTE
+void __tsan_mutex_post_lock(void *m, unsigned flagz, int rec) {
+  SCOPED_ANNOTATION(__tsan_mutex_post_lock);
+  ThreadIgnoreSyncEnd(thr, pc);
+  ThreadIgnoreEnd(thr, pc);
+  if (!(flagz & MutexFlagTryLockFailed)) {
+    if (flagz & MutexFlagReadLock)
+      MutexPostReadLock(thr, pc, (uptr)m, flagz);
+    else
+      MutexPostLock(thr, pc, (uptr)m, flagz, rec);
+  }
+}
+
+INTERFACE_ATTRIBUTE
+int __tsan_mutex_pre_unlock(void *m, unsigned flagz) {
+  SCOPED_ANNOTATION_RET(__tsan_mutex_pre_unlock, 0);
+  int ret = 0;
+  if (flagz & MutexFlagReadLock) {
+    CHECK(!(flagz & MutexFlagRecursiveUnlock));
+    MutexReadUnlock(thr, pc, (uptr)m);
+  } else {
+    ret = MutexUnlock(thr, pc, (uptr)m, flagz);
+  }
+  ThreadIgnoreBegin(thr, pc, /*save_stack=*/false);
+  ThreadIgnoreSyncBegin(thr, pc, /*save_stack=*/false);
+  return ret;
+}
+
+INTERFACE_ATTRIBUTE
+void __tsan_mutex_post_unlock(void *m, unsigned flagz) {
+  SCOPED_ANNOTATION(__tsan_mutex_post_unlock);
+  ThreadIgnoreSyncEnd(thr, pc);
+  ThreadIgnoreEnd(thr, pc);
+}
+
+INTERFACE_ATTRIBUTE
+void __tsan_mutex_pre_signal(void *addr, unsigned flagz) {
+  SCOPED_ANNOTATION(__tsan_mutex_pre_signal);
+  ThreadIgnoreBegin(thr, pc, /*save_stack=*/false);
+  ThreadIgnoreSyncBegin(thr, pc, /*save_stack=*/false);
+}
+
+INTERFACE_ATTRIBUTE
+void __tsan_mutex_post_signal(void *addr, unsigned flagz) {
+  SCOPED_ANNOTATION(__tsan_mutex_post_signal);
+  ThreadIgnoreSyncEnd(thr, pc);
+  ThreadIgnoreEnd(thr, pc);
+}
+
+INTERFACE_ATTRIBUTE
+void __tsan_mutex_pre_divert(void *addr, unsigned flagz) {
+  SCOPED_ANNOTATION(__tsan_mutex_pre_divert);
+  // Exit from ignore region started in __tsan_mutex_pre_lock/unlock/signal.
+  ThreadIgnoreSyncEnd(thr, pc);
+  ThreadIgnoreEnd(thr, pc);
+}
+
+INTERFACE_ATTRIBUTE
+void __tsan_mutex_post_divert(void *addr, unsigned flagz) {
+  SCOPED_ANNOTATION(__tsan_mutex_post_divert);
+  ThreadIgnoreBegin(thr, pc, /*save_stack=*/false);
+  ThreadIgnoreSyncBegin(thr, pc, /*save_stack=*/false);
+}
 }  // extern "C"
diff --git a/lib/tsan/rtl/tsan_interface_atomic.cc b/lib/tsan/rtl/tsan_interface_atomic.cc
index 5238b66..d334394 100644
--- a/lib/tsan/rtl/tsan_interface_atomic.cc
+++ b/lib/tsan/rtl/tsan_interface_atomic.cc
@@ -220,8 +220,7 @@
 #endif
 
 template<typename T>
-static T AtomicLoad(ThreadState *thr, uptr pc, const volatile T *a,
-    morder mo) {
+static T AtomicLoad(ThreadState *thr, uptr pc, const volatile T *a, morder mo) {
   CHECK(IsLoadOrder(mo));
   // This fast-path is critical for performance.
   // Assume the access is atomic.
@@ -229,10 +228,17 @@
     MemoryReadAtomic(thr, pc, (uptr)a, SizeLog<T>());
     return NoTsanAtomicLoad(a, mo);
   }
-  SyncVar *s = ctx->metamap.GetOrCreateAndLock(thr, pc, (uptr)a, false);
-  AcquireImpl(thr, pc, &s->clock);
+  // Don't create sync object if it does not exist yet. For example, an atomic
+  // pointer is initialized to nullptr and then periodically acquire-loaded.
   T v = NoTsanAtomicLoad(a, mo);
-  s->mtx.ReadUnlock();
+  SyncVar *s = ctx->metamap.GetIfExistsAndLock((uptr)a, false);
+  if (s) {
+    AcquireImpl(thr, pc, &s->clock);
+    // Re-read under sync mutex because we need a consistent snapshot
+    // of the value and the clock we acquire.
+    v = NoTsanAtomicLoad(a, mo);
+    s->mtx.ReadUnlock();
+  }
   MemoryReadAtomic(thr, pc, (uptr)a, SizeLog<T>());
   return v;
 }
@@ -450,13 +456,32 @@
 
 // C/C++
 
+static morder convert_morder(morder mo) {
+  if (flags()->force_seq_cst_atomics)
+    return (morder)mo_seq_cst;
+
+  // Filter out additional memory order flags:
+  // MEMMODEL_SYNC        = 1 << 15
+  // __ATOMIC_HLE_ACQUIRE = 1 << 16
+  // __ATOMIC_HLE_RELEASE = 1 << 17
+  //
+  // HLE is an optimization, and we pretend that elision always fails.
+  // MEMMODEL_SYNC is used when lowering __sync_ atomics,
+  // since we use __sync_ atomics for actual atomic operations,
+  // we can safely ignore it as well. It also subtly affects semantics,
+  // but we don't model the difference.
+  return (morder)(mo & 0x7fff);
+}
+
 #define SCOPED_ATOMIC(func, ...) \
+    ThreadState *const thr = cur_thread(); \
+    if (thr->ignore_sync || thr->ignore_interceptors) { \
+      ProcessPendingSignals(thr); \
+      return NoTsanAtomic##func(__VA_ARGS__); \
+    } \
     const uptr callpc = (uptr)__builtin_return_address(0); \
     uptr pc = StackTrace::GetCurrentPc(); \
-    mo = flags()->force_seq_cst_atomics ? (morder)mo_seq_cst : mo; \
-    ThreadState *const thr = cur_thread(); \
-    if (thr->ignore_interceptors) \
-      return NoTsanAtomic##func(__VA_ARGS__); \
+    mo = convert_morder(mo); \
     AtomicStatInc(thr, sizeof(*a), mo, StatAtomic##func); \
     ScopedAtomic sa(thr, callpc, a, mo, __func__); \
     return Atomic##func(thr, pc, __VA_ARGS__); \
diff --git a/lib/tsan/rtl/tsan_interface_java.cc b/lib/tsan/rtl/tsan_interface_java.cc
index 5bdc04f..75e960e 100644
--- a/lib/tsan/rtl/tsan_interface_java.cc
+++ b/lib/tsan/rtl/tsan_interface_java.cc
@@ -180,8 +180,8 @@
   CHECK_GE(addr, jctx->heap_begin);
   CHECK_LT(addr, jctx->heap_begin + jctx->heap_size);
 
-  MutexCreate(thr, pc, addr, true, true, true);
-  MutexLock(thr, pc, addr);
+  MutexPostLock(thr, pc, addr, MutexFlagLinkerInit | MutexFlagWriteReentrant |
+      MutexFlagDoPreLockOnPostLock);
 }
 
 void __tsan_java_mutex_unlock(jptr addr) {
@@ -201,8 +201,8 @@
   CHECK_GE(addr, jctx->heap_begin);
   CHECK_LT(addr, jctx->heap_begin + jctx->heap_size);
 
-  MutexCreate(thr, pc, addr, true, true, true);
-  MutexReadLock(thr, pc, addr);
+  MutexPostReadLock(thr, pc, addr, MutexFlagLinkerInit |
+      MutexFlagWriteReentrant | MutexFlagDoPreLockOnPostLock);
 }
 
 void __tsan_java_mutex_read_unlock(jptr addr) {
@@ -223,8 +223,8 @@
   CHECK_LT(addr, jctx->heap_begin + jctx->heap_size);
   CHECK_GT(rec, 0);
 
-  MutexCreate(thr, pc, addr, true, true, true);
-  MutexLock(thr, pc, addr, rec);
+  MutexPostLock(thr, pc, addr, MutexFlagLinkerInit | MutexFlagWriteReentrant |
+      MutexFlagDoPreLockOnPostLock | MutexFlagRecursiveLock, rec);
 }
 
 int __tsan_java_mutex_unlock_rec(jptr addr) {
@@ -234,7 +234,7 @@
   CHECK_GE(addr, jctx->heap_begin);
   CHECK_LT(addr, jctx->heap_begin + jctx->heap_size);
 
-  return MutexUnlock(thr, pc, addr, true);
+  return MutexUnlock(thr, pc, addr, MutexFlagRecursiveUnlock);
 }
 
 void __tsan_java_acquire(jptr addr) {
diff --git a/lib/tsan/rtl/tsan_libdispatch_mac.cc b/lib/tsan/rtl/tsan_libdispatch_mac.cc
index d8c689e..0bd0107 100644
--- a/lib/tsan/rtl/tsan_libdispatch_mac.cc
+++ b/lib/tsan/rtl/tsan_libdispatch_mac.cc
@@ -86,21 +86,23 @@
                                           void *orig_context,
                                           dispatch_function_t orig_work) {
   tsan_block_context_t *new_context =
-      (tsan_block_context_t *)user_alloc(thr, pc, sizeof(tsan_block_context_t));
+      (tsan_block_context_t *)user_alloc_internal(thr, pc,
+                                                  sizeof(tsan_block_context_t));
   new_context->queue = queue;
   new_context->orig_context = orig_context;
   new_context->orig_work = orig_work;
   new_context->free_context_in_callback = true;
   new_context->submitted_synchronously = false;
   new_context->is_barrier_block = false;
+  new_context->non_queue_sync_object = 0;
   return new_context;
 }
 
-#define GET_QUEUE_SYNC_VARS(context, q)                      \
-  bool is_queue_serial = q && IsQueueSerial(q);              \
-  uptr sync_ptr = (uptr)q ?: context->non_queue_sync_object; \
-  uptr serial_sync = (uptr)sync_ptr;                         \
-  uptr concurrent_sync = ((uptr)sync_ptr) + sizeof(uptr);    \
+#define GET_QUEUE_SYNC_VARS(context, q)                                  \
+  bool is_queue_serial = q && IsQueueSerial(q);                          \
+  uptr sync_ptr = (uptr)q ?: context->non_queue_sync_object;             \
+  uptr serial_sync = (uptr)sync_ptr;                                     \
+  uptr concurrent_sync = sync_ptr ? ((uptr)sync_ptr) + sizeof(uptr) : 0; \
   bool serial_task = context->is_barrier_block || is_queue_serial
 
 static void dispatch_sync_pre_execute(ThreadState *thr, uptr pc,
@@ -111,8 +113,8 @@
   dispatch_queue_t q = context->queue;
   do {
     GET_QUEUE_SYNC_VARS(context, q);
-    Acquire(thr, pc, serial_sync);
-    if (serial_task) Acquire(thr, pc, concurrent_sync);
+    if (serial_sync) Acquire(thr, pc, serial_sync);
+    if (serial_task && concurrent_sync) Acquire(thr, pc, concurrent_sync);
 
     if (q) q = GetTargetQueueFromQueue(q);
   } while (q);
@@ -126,7 +128,8 @@
   dispatch_queue_t q = context->queue;
   do {
     GET_QUEUE_SYNC_VARS(context, q);
-    Release(thr, pc, serial_task ? serial_sync : concurrent_sync);
+    if (serial_task && serial_sync) Release(thr, pc, serial_sync);
+    if (!serial_task && concurrent_sync) Release(thr, pc, concurrent_sync);
 
     if (q) q = GetTargetQueueFromQueue(q);
   } while (q);
@@ -174,7 +177,8 @@
   }
 
 #define DISPATCH_INTERCEPT_SYNC_B(name, barrier)                             \
-  TSAN_INTERCEPTOR(void, name, dispatch_queue_t q, dispatch_block_t block) { \
+  TSAN_INTERCEPTOR(void, name, dispatch_queue_t q,                           \
+                   DISPATCH_NOESCAPE dispatch_block_t block) {               \
     SCOPED_TSAN_INTERCEPTOR(name, q, block);                                 \
     SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_START();                           \
     dispatch_block_t heap_block = Block_copy(block);                         \
@@ -264,7 +268,7 @@
 // need to undefine the macro.
 #undef dispatch_once
 TSAN_INTERCEPTOR(void, dispatch_once, dispatch_once_t *predicate,
-                 dispatch_block_t block) {
+                 DISPATCH_NOESCAPE dispatch_block_t block) {
   SCOPED_INTERCEPTOR_RAW(dispatch_once, predicate, block);
   atomic_uint32_t *a = reinterpret_cast<atomic_uint32_t *>(predicate);
   u32 v = atomic_load(a, memory_order_acquire);
@@ -474,7 +478,8 @@
 }
 
 TSAN_INTERCEPTOR(void, dispatch_apply, size_t iterations,
-                 dispatch_queue_t queue, void (^block)(size_t)) {
+                 dispatch_queue_t queue,
+                 DISPATCH_NOESCAPE void (^block)(size_t)) {
   SCOPED_TSAN_INTERCEPTOR(dispatch_apply, iterations, queue, block);
 
   void *parent_to_child_sync = nullptr;
diff --git a/lib/tsan/rtl/tsan_malloc_mac.cc b/lib/tsan/rtl/tsan_malloc_mac.cc
index 8d31ccb..455c95d 100644
--- a/lib/tsan/rtl/tsan_malloc_mac.cc
+++ b/lib/tsan/rtl/tsan_malloc_mac.cc
@@ -26,7 +26,7 @@
 #define COMMON_MALLOC_FORCE_UNLOCK()
 #define COMMON_MALLOC_MEMALIGN(alignment, size) \
   void *p =                                     \
-      user_alloc(cur_thread(), StackTrace::GetCurrentPc(), size, alignment)
+      user_memalign(cur_thread(), StackTrace::GetCurrentPc(), alignment, size)
 #define COMMON_MALLOC_MALLOC(size)                             \
   if (cur_thread()->in_symbolizer) return InternalAlloc(size); \
   SCOPED_INTERCEPTOR_RAW(malloc, size);                        \
@@ -43,7 +43,7 @@
   if (cur_thread()->in_symbolizer)                            \
     return InternalAlloc(size, nullptr, GetPageSizeCached()); \
   SCOPED_INTERCEPTOR_RAW(valloc, size);                       \
-  void *p = user_alloc(thr, pc, size, GetPageSizeCached())
+  void *p = user_valloc(thr, pc, size)
 #define COMMON_MALLOC_FREE(ptr)                              \
   if (cur_thread()->in_symbolizer) return InternalFree(ptr); \
   SCOPED_INTERCEPTOR_RAW(free, ptr);                         \
diff --git a/lib/tsan/rtl/tsan_mman.cc b/lib/tsan/rtl/tsan_mman.cc
index 2dea249..1968023 100644
--- a/lib/tsan/rtl/tsan_mman.cc
+++ b/lib/tsan/rtl/tsan_mman.cc
@@ -10,8 +10,10 @@
 // This file is a part of ThreadSanitizer (TSan), a race detector.
 //
 //===----------------------------------------------------------------------===//
+#include "sanitizer_common/sanitizer_allocator_checks.h"
 #include "sanitizer_common/sanitizer_allocator_interface.h"
 #include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_errno.h"
 #include "sanitizer_common/sanitizer_placement_new.h"
 #include "tsan_mman.h"
 #include "tsan_rtl.h"
@@ -112,9 +114,8 @@
 }
 
 void InitializeAllocator() {
-  allocator()->Init(
-      common_flags()->allocator_may_return_null,
-      common_flags()->allocator_release_to_os_interval_ms);
+  SetAllocatorMayReturnNull(common_flags()->allocator_may_return_null);
+  allocator()->Init(common_flags()->allocator_release_to_os_interval_ms);
 }
 
 void InitializeAllocatorLate() {
@@ -149,11 +150,12 @@
   OutputReport(thr, rep);
 }
 
-void *user_alloc(ThreadState *thr, uptr pc, uptr sz, uptr align, bool signal) {
+void *user_alloc_internal(ThreadState *thr, uptr pc, uptr sz, uptr align,
+                          bool signal) {
   if ((sz >= (1ull << 40)) || (align >= (1ull << 40)))
-    return allocator()->ReturnNullOrDieOnBadRequest();
+    return Allocator::FailureHandler::OnBadRequest();
   void *p = allocator()->Allocate(&thr->proc()->alloc_cache, sz, align);
-  if (p == 0)
+  if (UNLIKELY(p == 0))
     return 0;
   if (ctx && ctx->initialized)
     OnUserAlloc(thr, pc, (uptr)p, sz, true);
@@ -162,15 +164,6 @@
   return p;
 }
 
-void *user_calloc(ThreadState *thr, uptr pc, uptr size, uptr n) {
-  if (CallocShouldReturnNullDueToOverflow(size, n))
-    return allocator()->ReturnNullOrDieOnBadRequest();
-  void *p = user_alloc(thr, pc, n * size);
-  if (p)
-    internal_memset(p, 0, n * size);
-  return p;
-}
-
 void user_free(ThreadState *thr, uptr pc, void *p, bool signal) {
   ScopedGlobalProcessor sgp;
   if (ctx && ctx->initialized)
@@ -180,6 +173,19 @@
     SignalUnsafeCall(thr, pc);
 }
 
+void *user_alloc(ThreadState *thr, uptr pc, uptr sz) {
+  return SetErrnoOnNull(user_alloc_internal(thr, pc, sz, kDefaultAlignment));
+}
+
+void *user_calloc(ThreadState *thr, uptr pc, uptr size, uptr n) {
+  if (UNLIKELY(CheckForCallocOverflow(size, n)))
+    return SetErrnoOnNull(Allocator::FailureHandler::OnBadRequest());
+  void *p = user_alloc_internal(thr, pc, n * size);
+  if (p)
+    internal_memset(p, 0, n * size);
+  return SetErrnoOnNull(p);
+}
+
 void OnUserAlloc(ThreadState *thr, uptr pc, uptr p, uptr sz, bool write) {
   DPrintf("#%d: alloc(%zu) = %p\n", thr->tid, sz, p);
   ctx->metamap.AllocBlock(thr, pc, p, sz);
@@ -200,15 +206,64 @@
 void *user_realloc(ThreadState *thr, uptr pc, void *p, uptr sz) {
   // FIXME: Handle "shrinking" more efficiently,
   // it seems that some software actually does this.
-  void *p2 = user_alloc(thr, pc, sz);
-  if (p2 == 0)
-    return 0;
-  if (p) {
-    uptr oldsz = user_alloc_usable_size(p);
-    internal_memcpy(p2, p, min(oldsz, sz));
+  if (!p)
+    return SetErrnoOnNull(user_alloc_internal(thr, pc, sz));
+  if (!sz) {
+    user_free(thr, pc, p);
+    return nullptr;
+  }
+  void *new_p = user_alloc_internal(thr, pc, sz);
+  if (new_p) {
+    uptr old_sz = user_alloc_usable_size(p);
+    internal_memcpy(new_p, p, min(old_sz, sz));
     user_free(thr, pc, p);
   }
-  return p2;
+  return SetErrnoOnNull(new_p);
+}
+
+void *user_memalign(ThreadState *thr, uptr pc, uptr align, uptr sz) {
+  if (UNLIKELY(!IsPowerOfTwo(align))) {
+    errno = errno_EINVAL;
+    return Allocator::FailureHandler::OnBadRequest();
+  }
+  return SetErrnoOnNull(user_alloc_internal(thr, pc, sz, align));
+}
+
+int user_posix_memalign(ThreadState *thr, uptr pc, void **memptr, uptr align,
+                        uptr sz) {
+  if (UNLIKELY(!CheckPosixMemalignAlignment(align))) {
+    Allocator::FailureHandler::OnBadRequest();
+    return errno_EINVAL;
+  }
+  void *ptr = user_alloc_internal(thr, pc, sz, align);
+  if (UNLIKELY(!ptr))
+    return errno_ENOMEM;
+  CHECK(IsAligned((uptr)ptr, align));
+  *memptr = ptr;
+  return 0;
+}
+
+void *user_aligned_alloc(ThreadState *thr, uptr pc, uptr align, uptr sz) {
+  if (UNLIKELY(!CheckAlignedAllocAlignmentAndSize(align, sz))) {
+    errno = errno_EINVAL;
+    return Allocator::FailureHandler::OnBadRequest();
+  }
+  return SetErrnoOnNull(user_alloc_internal(thr, pc, sz, align));
+}
+
+void *user_valloc(ThreadState *thr, uptr pc, uptr sz) {
+  return SetErrnoOnNull(user_alloc_internal(thr, pc, sz, GetPageSizeCached()));
+}
+
+void *user_pvalloc(ThreadState *thr, uptr pc, uptr sz) {
+  uptr PageSize = GetPageSizeCached();
+  if (UNLIKELY(CheckForPvallocOverflow(sz, PageSize))) {
+    errno = errno_ENOMEM;
+    return Allocator::FailureHandler::OnBadRequest();
+  }
+  // pvalloc(0) should allocate one page.
+  sz = sz ? RoundUpTo(sz, PageSize) : PageSize;
+  return SetErrnoOnNull(user_alloc_internal(thr, pc, sz, PageSize));
 }
 
 uptr user_alloc_usable_size(const void *p) {
@@ -295,6 +350,8 @@
 
 void __tsan_on_thread_idle() {
   ThreadState *thr = cur_thread();
+  thr->clock.ResetCached(&thr->proc()->clock_cache);
+  thr->last_sleep_clock.ResetCached(&thr->proc()->clock_cache);
   allocator()->SwallowCache(&thr->proc()->alloc_cache);
   internal_allocator()->SwallowCache(&thr->proc()->internal_alloc_cache);
   ctx->metamap.OnProcIdle(thr->proc());
diff --git a/lib/tsan/rtl/tsan_mman.h b/lib/tsan/rtl/tsan_mman.h
index 8cdeeb3..6042c5c 100644
--- a/lib/tsan/rtl/tsan_mman.h
+++ b/lib/tsan/rtl/tsan_mman.h
@@ -27,13 +27,20 @@
 void AllocatorPrintStats();
 
 // For user allocations.
-void *user_alloc(ThreadState *thr, uptr pc, uptr sz,
-                 uptr align = kDefaultAlignment, bool signal = true);
-void *user_calloc(ThreadState *thr, uptr pc, uptr sz, uptr n);
+void *user_alloc_internal(ThreadState *thr, uptr pc, uptr sz,
+                          uptr align = kDefaultAlignment, bool signal = true);
 // Does not accept NULL.
 void user_free(ThreadState *thr, uptr pc, void *p, bool signal = true);
+// Interceptor implementations.
+void *user_alloc(ThreadState *thr, uptr pc, uptr sz);
+void *user_calloc(ThreadState *thr, uptr pc, uptr sz, uptr n);
 void *user_realloc(ThreadState *thr, uptr pc, void *p, uptr sz);
-void *user_alloc_aligned(ThreadState *thr, uptr pc, uptr sz, uptr align);
+void *user_memalign(ThreadState *thr, uptr pc, uptr align, uptr sz);
+int user_posix_memalign(ThreadState *thr, uptr pc, void **memptr, uptr align,
+                        uptr sz);
+void *user_aligned_alloc(ThreadState *thr, uptr pc, uptr align, uptr sz);
+void *user_valloc(ThreadState *thr, uptr pc, uptr sz);
+void *user_pvalloc(ThreadState *thr, uptr pc, uptr sz);
 uptr user_alloc_usable_size(const void *p);
 
 // Invoking malloc/free hooks that may be installed by the user.
diff --git a/lib/tsan/rtl/tsan_new_delete.cc b/lib/tsan/rtl/tsan_new_delete.cc
index b6478bb..4d03145 100644
--- a/lib/tsan/rtl/tsan_new_delete.cc
+++ b/lib/tsan/rtl/tsan_new_delete.cc
@@ -12,6 +12,7 @@
 // Interceptors for operators new and delete.
 //===----------------------------------------------------------------------===//
 #include "interception/interception.h"
+#include "sanitizer_common/sanitizer_allocator.h"
 #include "sanitizer_common/sanitizer_internal_defs.h"
 #include "tsan_interceptors.h"
 
@@ -24,13 +25,15 @@
 DECLARE_REAL(void *, malloc, uptr size)
 DECLARE_REAL(void, free, void *ptr)
 
-#define OPERATOR_NEW_BODY(mangled_name) \
+// TODO(alekseys): throw std::bad_alloc instead of dying on OOM.
+#define OPERATOR_NEW_BODY(mangled_name, nothrow) \
   if (cur_thread()->in_symbolizer) \
     return InternalAlloc(size); \
   void *p = 0; \
   {  \
     SCOPED_INTERCEPTOR_RAW(mangled_name, size); \
     p = user_alloc(thr, pc, size); \
+    if (!nothrow && UNLIKELY(!p)) DieOnFailure::OnOOM(); \
   }  \
   invoke_malloc_hook(p, size);  \
   return p;
@@ -38,25 +41,25 @@
 SANITIZER_INTERFACE_ATTRIBUTE
 void *operator new(__sanitizer::uptr size);
 void *operator new(__sanitizer::uptr size) {
-  OPERATOR_NEW_BODY(_Znwm);
+  OPERATOR_NEW_BODY(_Znwm, false /*nothrow*/);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
 void *operator new[](__sanitizer::uptr size);
 void *operator new[](__sanitizer::uptr size) {
-  OPERATOR_NEW_BODY(_Znam);
+  OPERATOR_NEW_BODY(_Znam, false /*nothrow*/);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
 void *operator new(__sanitizer::uptr size, std::nothrow_t const&);
 void *operator new(__sanitizer::uptr size, std::nothrow_t const&) {
-  OPERATOR_NEW_BODY(_ZnwmRKSt9nothrow_t);
+  OPERATOR_NEW_BODY(_ZnwmRKSt9nothrow_t, true /*nothrow*/);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
 void *operator new[](__sanitizer::uptr size, std::nothrow_t const&);
 void *operator new[](__sanitizer::uptr size, std::nothrow_t const&) {
-  OPERATOR_NEW_BODY(_ZnamRKSt9nothrow_t);
+  OPERATOR_NEW_BODY(_ZnamRKSt9nothrow_t, true /*nothrow*/);
 }
 
 #define OPERATOR_DELETE_BODY(mangled_name) \
diff --git a/lib/tsan/rtl/tsan_platform.h b/lib/tsan/rtl/tsan_platform.h
index 1dd9d91..4b97713 100644
--- a/lib/tsan/rtl/tsan_platform.h
+++ b/lib/tsan/rtl/tsan_platform.h
@@ -42,6 +42,19 @@
 7b00 0000 0000 - 7c00 0000 0000: heap
 7c00 0000 0000 - 7e80 0000 0000: -
 7e80 0000 0000 - 8000 0000 0000: modules and main thread stack
+
+C/C++ on netbsd/amd64 can reuse the same mapping:
+ * The address space starts from 0x1000 (option with 0x0) and ends with
+   0x7f7ffffff000.
+ * LoAppMem-kHeapMemEnd can be reused as it is.
+ * No VDSO support.
+ * No MidAppMem region.
+ * No additional HeapMem region.
+ * HiAppMem contains the stack, loader, shared libraries and heap.
+ * Stack on NetBSD/amd64 has prereserved 128MB.
+ * Heap grows downwards (top-down).
+ * ASLR must be disabled per-process or globally.
+
 */
 struct Mapping {
   static const uptr kMetaShadowBeg = 0x300000000000ull;
@@ -100,6 +113,37 @@
 };
 
 #define TSAN_MID_APP_RANGE 1
+#elif defined(__aarch64__) && defined(__APPLE__)
+/*
+C/C++ on Darwin/iOS/ARM64 (36-bit VMA, 64 GB VM)
+0000 0000 00 - 0100 0000 00: -                                    (4 GB)
+0100 0000 00 - 0200 0000 00: main binary, modules, thread stacks  (4 GB)
+0200 0000 00 - 0300 0000 00: heap                                 (4 GB)
+0300 0000 00 - 0400 0000 00: -                                    (4 GB)
+0400 0000 00 - 0c00 0000 00: shadow memory                       (32 GB)
+0c00 0000 00 - 0d00 0000 00: -                                    (4 GB)
+0d00 0000 00 - 0e00 0000 00: metainfo                             (4 GB)
+0e00 0000 00 - 0f00 0000 00: -                                    (4 GB)
+0f00 0000 00 - 1000 0000 00: traces                               (4 GB)
+*/
+struct Mapping {
+  static const uptr kLoAppMemBeg   = 0x0100000000ull;
+  static const uptr kLoAppMemEnd   = 0x0200000000ull;
+  static const uptr kHeapMemBeg    = 0x0200000000ull;
+  static const uptr kHeapMemEnd    = 0x0300000000ull;
+  static const uptr kShadowBeg     = 0x0400000000ull;
+  static const uptr kShadowEnd     = 0x0c00000000ull;
+  static const uptr kMetaShadowBeg = 0x0d00000000ull;
+  static const uptr kMetaShadowEnd = 0x0e00000000ull;
+  static const uptr kTraceMemBeg   = 0x0f00000000ull;
+  static const uptr kTraceMemEnd   = 0x1000000000ull;
+  static const uptr kHiAppMemBeg   = 0x1000000000ull;
+  static const uptr kHiAppMemEnd   = 0x1000000000ull;
+  static const uptr kAppMemMsk     =          0x0ull;
+  static const uptr kAppMemXor     =          0x0ull;
+  static const uptr kVdsoBeg       = 0x7000000000000000ull;
+};
+
 #elif defined(__aarch64__)
 // AArch64 supports multiple VMA which leads to multiple address transformation
 // functions.  To support these multiple VMAS transformations and mappings TSAN
@@ -389,7 +433,7 @@
 
 template<int Type>
 uptr MappingArchImpl(void) {
-#ifdef __aarch64__
+#if defined(__aarch64__) && !defined(__APPLE__)
   switch (vmaSize) {
     case 39: return MappingImpl<Mapping39, Type>();
     case 42: return MappingImpl<Mapping42, Type>();
@@ -542,7 +586,7 @@
 
 ALWAYS_INLINE
 bool IsAppMem(uptr mem) {
-#ifdef __aarch64__
+#if defined(__aarch64__) && !defined(__APPLE__)
   switch (vmaSize) {
     case 39: return IsAppMemImpl<Mapping39>(mem);
     case 42: return IsAppMemImpl<Mapping42>(mem);
@@ -569,7 +613,7 @@
 
 ALWAYS_INLINE
 bool IsShadowMem(uptr mem) {
-#ifdef __aarch64__
+#if defined(__aarch64__) && !defined(__APPLE__)
   switch (vmaSize) {
     case 39: return IsShadowMemImpl<Mapping39>(mem);
     case 42: return IsShadowMemImpl<Mapping42>(mem);
@@ -596,7 +640,7 @@
 
 ALWAYS_INLINE
 bool IsMetaMem(uptr mem) {
-#ifdef __aarch64__
+#if defined(__aarch64__) && !defined(__APPLE__)
   switch (vmaSize) {
     case 39: return IsMetaMemImpl<Mapping39>(mem);
     case 42: return IsMetaMemImpl<Mapping42>(mem);
@@ -633,7 +677,7 @@
 
 ALWAYS_INLINE
 uptr MemToShadow(uptr x) {
-#ifdef __aarch64__
+#if defined(__aarch64__) && !defined(__APPLE__)
   switch (vmaSize) {
     case 39: return MemToShadowImpl<Mapping39>(x);
     case 42: return MemToShadowImpl<Mapping42>(x);
@@ -672,7 +716,7 @@
 
 ALWAYS_INLINE
 u32 *MemToMeta(uptr x) {
-#ifdef __aarch64__
+#if defined(__aarch64__) && !defined(__APPLE__)
   switch (vmaSize) {
     case 39: return MemToMetaImpl<Mapping39>(x);
     case 42: return MemToMetaImpl<Mapping42>(x);
@@ -724,7 +768,7 @@
 
 ALWAYS_INLINE
 uptr ShadowToMem(uptr s) {
-#ifdef __aarch64__
+#if defined(__aarch64__) && !defined(__APPLE__)
   switch (vmaSize) {
     case 39: return ShadowToMemImpl<Mapping39>(s);
     case 42: return ShadowToMemImpl<Mapping42>(s);
@@ -759,7 +803,7 @@
 
 ALWAYS_INLINE
 uptr GetThreadTrace(int tid) {
-#ifdef __aarch64__
+#if defined(__aarch64__) && !defined(__APPLE__)
   switch (vmaSize) {
     case 39: return GetThreadTraceImpl<Mapping39>(tid);
     case 42: return GetThreadTraceImpl<Mapping42>(tid);
@@ -789,7 +833,7 @@
 
 ALWAYS_INLINE
 uptr GetThreadTraceHeader(int tid) {
-#ifdef __aarch64__
+#if defined(__aarch64__) && !defined(__APPLE__)
   switch (vmaSize) {
     case 39: return GetThreadTraceHeaderImpl<Mapping39>(tid);
     case 42: return GetThreadTraceHeaderImpl<Mapping42>(tid);
@@ -816,6 +860,7 @@
 void WriteMemoryProfile(char *buf, uptr buf_size, uptr nthread, uptr nlive);
 int ExtractResolvFDs(void *state, int *fds, int nfd);
 int ExtractRecvmsgFDs(void *msg, int *fds, int nfd);
+void ImitateTlsWrite(ThreadState *thr, uptr tls_addr, uptr tls_size);
 
 int call_pthread_cancel_with_cleanup(int(*fn)(void *c, void *m,
     void *abstime), void *c, void *m, void *abstime,
diff --git a/lib/tsan/rtl/tsan_platform_linux.cc b/lib/tsan/rtl/tsan_platform_linux.cc
index 3313288..216eef9 100644
--- a/lib/tsan/rtl/tsan_platform_linux.cc
+++ b/lib/tsan/rtl/tsan_platform_linux.cc
@@ -14,11 +14,12 @@
 
 
 #include "sanitizer_common/sanitizer_platform.h"
-#if SANITIZER_LINUX || SANITIZER_FREEBSD
+#if SANITIZER_LINUX || SANITIZER_FREEBSD || SANITIZER_NETBSD
 
 #include "sanitizer_common/sanitizer_common.h"
 #include "sanitizer_common/sanitizer_libc.h"
 #include "sanitizer_common/sanitizer_linux.h"
+#include "sanitizer_common/sanitizer_platform_limits_netbsd.h"
 #include "sanitizer_common/sanitizer_platform_limits_posix.h"
 #include "sanitizer_common/sanitizer_posix.h"
 #include "sanitizer_common/sanitizer_procmaps.h"
@@ -47,7 +48,6 @@
 #include <sys/resource.h>
 #include <sys/stat.h>
 #include <unistd.h>
-#include <errno.h>
 #include <sched.h>
 #include <dlfcn.h>
 #if SANITIZER_LINUX
@@ -182,17 +182,15 @@
   }
   // Map the file into shadow of .rodata sections.
   MemoryMappingLayout proc_maps(/*cache_enabled*/true);
-  uptr start, end, offset, prot;
   // Reusing the buffer 'name'.
-  while (proc_maps.Next(&start, &end, &offset, name, ARRAY_SIZE(name), &prot)) {
-    if (name[0] != 0 && name[0] != '['
-        && (prot & MemoryMappingLayout::kProtectionRead)
-        && (prot & MemoryMappingLayout::kProtectionExecute)
-        && !(prot & MemoryMappingLayout::kProtectionWrite)
-        && IsAppMem(start)) {
+  MemoryMappedSegment segment(name, ARRAY_SIZE(name));
+  while (proc_maps.Next(&segment)) {
+    if (segment.filename[0] != 0 && segment.filename[0] != '[' &&
+        segment.IsReadable() && segment.IsExecutable() &&
+        !segment.IsWritable() && IsAppMem(segment.start)) {
       // Assume it's .rodata
-      char *shadow_start = (char*)MemToShadow(start);
-      char *shadow_end = (char*)MemToShadow(end);
+      char *shadow_start = (char *)MemToShadow(segment.start);
+      char *shadow_end = (char *)MemToShadow(segment.end);
       for (char *p = shadow_start; p < shadow_end; p += marker.size()) {
         internal_mmap(p, Min<uptr>(marker.size(), shadow_end - p),
                       PROT_READ, MAP_PRIVATE | MAP_FIXED, fd, 0);
@@ -289,7 +287,7 @@
 int ExtractResolvFDs(void *state, int *fds, int nfd) {
 #if SANITIZER_LINUX && !SANITIZER_ANDROID
   int cnt = 0;
-  __res_state *statp = (__res_state*)state;
+  struct __res_state *statp = (struct __res_state*)state;
   for (int i = 0; i < MAXNS && cnt < nfd; i++) {
     if (statp->_u._ext.nsaddrs[i] && statp->_u._ext.nssocks[i] != -1)
       fds[cnt++] = statp->_u._ext.nssocks[i];
@@ -320,6 +318,20 @@
   return res;
 }
 
+void ImitateTlsWrite(ThreadState *thr, uptr tls_addr, uptr tls_size) {
+  // Check that the thr object is in tls;
+  const uptr thr_beg = (uptr)thr;
+  const uptr thr_end = (uptr)thr + sizeof(*thr);
+  CHECK_GE(thr_beg, tls_addr);
+  CHECK_LE(thr_beg, tls_addr + tls_size);
+  CHECK_GE(thr_end, tls_addr);
+  CHECK_LE(thr_end, tls_addr + tls_size);
+  // Since the thr object is huge, skip it.
+  MemoryRangeImitateWrite(thr, /*pc=*/2, tls_addr, thr_beg - tls_addr);
+  MemoryRangeImitateWrite(thr, /*pc=*/2, thr_end,
+                          tls_addr + tls_size - thr_end);
+}
+
 // Note: this function runs with async signals enabled,
 // so it must not touch any tsan state.
 int call_pthread_cancel_with_cleanup(int(*fn)(void *c, void *m,
@@ -341,36 +353,22 @@
 
 #if !SANITIZER_GO
 #if SANITIZER_ANDROID
-
-#if defined(__aarch64__)
-# define __get_tls() \
-    ({ void** __val; __asm__("mrs %0, tpidr_el0" : "=r"(__val)); __val; })
-#elif defined(__x86_64__)
-# define __get_tls() \
-    ({ void** __val; __asm__("mov %%fs:0, %0" : "=r"(__val)); __val; })
-#else
-#error unsupported architecture
-#endif
-
-// On Android, __thread is not supported. So we store the pointer to ThreadState
-// in TLS_SLOT_TSAN, which is the tls slot allocated by Android bionic for tsan.
-static const int TLS_SLOT_TSAN = 8;
 // On Android, one thread can call intercepted functions after
 // DestroyThreadState(), so add a fake thread state for "dead" threads.
 static ThreadState *dead_thread_state = nullptr;
 
 ThreadState *cur_thread() {
-  ThreadState* thr = (ThreadState*)__get_tls()[TLS_SLOT_TSAN];
+  ThreadState* thr = reinterpret_cast<ThreadState*>(*get_android_tls_ptr());
   if (thr == nullptr) {
     __sanitizer_sigset_t emptyset;
     internal_sigfillset(&emptyset);
     __sanitizer_sigset_t oldset;
     CHECK_EQ(0, internal_sigprocmask(SIG_SETMASK, &emptyset, &oldset));
-    thr = reinterpret_cast<ThreadState*>(__get_tls()[TLS_SLOT_TSAN]);
+    thr = reinterpret_cast<ThreadState*>(*get_android_tls_ptr());
     if (thr == nullptr) {
       thr = reinterpret_cast<ThreadState*>(MmapOrDie(sizeof(ThreadState),
                                                      "ThreadState"));
-      __get_tls()[TLS_SLOT_TSAN] = thr;
+      *get_android_tls_ptr() = reinterpret_cast<uptr>(thr);
       if (dead_thread_state == nullptr) {
         dead_thread_state = reinterpret_cast<ThreadState*>(
             MmapOrDie(sizeof(ThreadState), "ThreadState"));
@@ -392,9 +390,9 @@
   internal_sigfillset(&emptyset);
   __sanitizer_sigset_t oldset;
   CHECK_EQ(0, internal_sigprocmask(SIG_SETMASK, &emptyset, &oldset));
-  ThreadState* thr = (ThreadState*)__get_tls()[TLS_SLOT_TSAN];
+  ThreadState* thr = reinterpret_cast<ThreadState*>(*get_android_tls_ptr());
   if (thr != dead_thread_state) {
-    __get_tls()[TLS_SLOT_TSAN] = dead_thread_state;
+    *get_android_tls_ptr() = reinterpret_cast<uptr>(dead_thread_state);
     UnmapOrDie(thr, sizeof(ThreadState));
   }
   CHECK_EQ(0, internal_sigprocmask(SIG_SETMASK, &oldset, nullptr));
@@ -404,4 +402,4 @@
 
 }  // namespace __tsan
 
-#endif  // SANITIZER_LINUX || SANITIZER_FREEBSD
+#endif  // SANITIZER_LINUX || SANITIZER_FREEBSD || SANITIZER_NETBSD
diff --git a/lib/tsan/rtl/tsan_platform_mac.cc b/lib/tsan/rtl/tsan_platform_mac.cc
index b8d3d55..4570286 100644
--- a/lib/tsan/rtl/tsan_platform_mac.cc
+++ b/lib/tsan/rtl/tsan_platform_mac.cc
@@ -75,12 +75,18 @@
 static uptr main_thread_identity = 0;
 ALIGNED(64) static char main_thread_state[sizeof(ThreadState)];
 
+ThreadState **cur_thread_location() {
+  ThreadState **thread_identity = (ThreadState **)pthread_self();
+  return ((uptr)thread_identity == main_thread_identity) ? nullptr
+                                                         : thread_identity;
+}
+
 ThreadState *cur_thread() {
-  uptr thread_identity = (uptr)pthread_self();
-  if (thread_identity == main_thread_identity || main_thread_identity == 0) {
+  ThreadState **thr_state_loc = cur_thread_location();
+  if (thr_state_loc == nullptr || main_thread_identity == 0) {
     return (ThreadState *)&main_thread_state;
   }
-  ThreadState **fake_tls = (ThreadState **)MemToShadow(thread_identity);
+  ThreadState **fake_tls = (ThreadState **)MemToShadow((uptr)thr_state_loc);
   ThreadState *thr = (ThreadState *)SignalSafeGetOrAllocate(
       (uptr *)fake_tls, sizeof(ThreadState));
   return thr;
@@ -90,13 +96,13 @@
 // munmap first and then clear `fake_tls`; if we receive a signal in between,
 // handler will try to access the unmapped ThreadState.
 void cur_thread_finalize() {
-  uptr thread_identity = (uptr)pthread_self();
-  if (thread_identity == main_thread_identity) {
+  ThreadState **thr_state_loc = cur_thread_location();
+  if (thr_state_loc == nullptr) {
     // Calling dispatch_main() or xpc_main() actually invokes pthread_exit to
     // exit the main thread. Let's keep the main thread's ThreadState.
     return;
   }
-  ThreadState **fake_tls = (ThreadState **)MemToShadow(thread_identity);
+  ThreadState **fake_tls = (ThreadState **)MemToShadow((uptr)thr_state_loc);
   internal_munmap(*fake_tls, sizeof(ThreadState));
   *fake_tls = nullptr;
 }
@@ -161,8 +167,8 @@
 #else  // !SANITIZER_GO
     "app      (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n"
 #endif
-    "stacks: %ld unique IDs, %ld kB allocated\n"
-    "threads: %ld total, %ld live\n"
+    "stacks: %zd unique IDs, %zd kB allocated\n"
+    "threads: %zd total, %zd live\n"
     "------------------------------\n",
     ShadowBeg(), ShadowEnd(), shadow_res / 1024, shadow_dirty / 1024,
     MetaShadowBeg(), MetaShadowEnd(), meta_res / 1024, meta_dirty / 1024,
@@ -224,6 +230,14 @@
 #endif
 
 void InitializePlatformEarly() {
+#if defined(__aarch64__)
+  uptr max_vm = GetMaxVirtualAddress() + 1;
+  if (max_vm != Mapping::kHiAppMemEnd) {
+    Printf("ThreadSanitizer: unsupported vm address limit %p, expected %p.\n",
+           max_vm, Mapping::kHiAppMemEnd);
+    Die();
+  }
+#endif
 }
 
 void InitializePlatform() {
@@ -240,6 +254,29 @@
 }
 
 #if !SANITIZER_GO
+void ImitateTlsWrite(ThreadState *thr, uptr tls_addr, uptr tls_size) {
+  // The pointer to the ThreadState object is stored in the shadow memory
+  // of the tls.
+  uptr tls_end = tls_addr + tls_size;
+  ThreadState **thr_state_loc = cur_thread_location();
+  if (thr_state_loc == nullptr) {
+    MemoryRangeImitateWrite(thr, /*pc=*/2, tls_addr, tls_size);
+  } else {
+    uptr thr_state_start = (uptr)thr_state_loc;
+    uptr thr_state_end = thr_state_start + sizeof(uptr);
+    CHECK_GE(thr_state_start, tls_addr);
+    CHECK_LE(thr_state_start, tls_addr + tls_size);
+    CHECK_GE(thr_state_end, tls_addr);
+    CHECK_LE(thr_state_end, tls_addr + tls_size);
+    MemoryRangeImitateWrite(thr, /*pc=*/2, tls_addr,
+                            thr_state_start - tls_addr);
+    MemoryRangeImitateWrite(thr, /*pc=*/2, thr_state_end,
+                            tls_end - thr_state_end);
+  }
+}
+#endif
+
+#if !SANITIZER_GO
 // Note: this function runs with async signals enabled,
 // so it must not touch any tsan state.
 int call_pthread_cancel_with_cleanup(int(*fn)(void *c, void *m,
diff --git a/lib/tsan/rtl/tsan_platform_posix.cc b/lib/tsan/rtl/tsan_platform_posix.cc
index 0732c83..e4f90a8 100644
--- a/lib/tsan/rtl/tsan_platform_posix.cc
+++ b/lib/tsan/rtl/tsan_platform_posix.cc
@@ -46,6 +46,9 @@
 #elif defined(__mips64)
   const uptr kMadviseRangeBeg  = 0xff00000000ull;
   const uptr kMadviseRangeSize = 0x0100000000ull;
+#elif defined(__aarch64__) && defined(__APPLE__)
+  uptr kMadviseRangeBeg = LoAppMemBeg();
+  uptr kMadviseRangeSize = LoAppMemEnd() - LoAppMemBeg();
 #elif defined(__aarch64__)
   uptr kMadviseRangeBeg = 0;
   uptr kMadviseRangeSize = 0;
@@ -115,21 +118,24 @@
 void CheckAndProtect() {
   // Ensure that the binary is indeed compiled with -pie.
   MemoryMappingLayout proc_maps(true);
-  uptr p, end, prot;
-  while (proc_maps.Next(&p, &end, 0, 0, 0, &prot)) {
-    if (IsAppMem(p))
+  MemoryMappedSegment segment;
+  while (proc_maps.Next(&segment)) {
+    if (IsAppMem(segment.start)) continue;
+    if (segment.start >= HeapMemEnd() && segment.start < HeapEnd()) continue;
+    if (segment.protection == 0)  // Zero page or mprotected.
       continue;
-    if (p >= HeapMemEnd() &&
-        p < HeapEnd())
-      continue;
-    if (prot == 0)  // Zero page or mprotected.
-      continue;
-    if (p >= VdsoBeg())  // vdso
+    if (segment.start >= VdsoBeg())  // vdso
       break;
-    Printf("FATAL: ThreadSanitizer: unexpected memory mapping %p-%p\n", p, end);
+    Printf("FATAL: ThreadSanitizer: unexpected memory mapping %p-%p\n",
+           segment.start, segment.end);
     Die();
   }
 
+#if defined(__aarch64__) && defined(__APPLE__)
+  ProtectRange(HeapMemEnd(), ShadowBeg());
+  ProtectRange(ShadowEnd(), MetaShadowBeg());
+  ProtectRange(MetaShadowEnd(), TraceMemBeg());
+#else
   ProtectRange(LoAppMemEnd(), ShadowBeg());
   ProtectRange(ShadowEnd(), MetaShadowBeg());
 #ifdef TSAN_MID_APP_RANGE
@@ -143,6 +149,7 @@
   ProtectRange(TraceMemBeg(), TraceMemEnd());
   ProtectRange(TraceMemEnd(), HeapMemBeg());
   ProtectRange(HeapEnd(), HiAppMemBeg());
+#endif
 }
 #endif
 
diff --git a/lib/tsan/rtl/tsan_report.cc b/lib/tsan/rtl/tsan_report.cc
index 7de0084..be5d6b7 100644
--- a/lib/tsan/rtl/tsan_report.cc
+++ b/lib/tsan/rtl/tsan_report.cc
@@ -13,6 +13,7 @@
 #include "tsan_report.h"
 #include "tsan_platform.h"
 #include "tsan_rtl.h"
+#include "sanitizer_common/sanitizer_file.h"
 #include "sanitizer_common/sanitizer_placement_new.h"
 #include "sanitizer_common/sanitizer_report_decorator.h"
 #include "sanitizer_common/sanitizer_stacktrace_printer.h"
@@ -38,22 +39,16 @@
 class Decorator: public __sanitizer::SanitizerCommonDecorator {
  public:
   Decorator() : SanitizerCommonDecorator() { }
-  const char *Warning()    { return Red(); }
-  const char *EndWarning() { return Default(); }
   const char *Access()     { return Blue(); }
-  const char *EndAccess()  { return Default(); }
   const char *ThreadDescription()    { return Cyan(); }
-  const char *EndThreadDescription() { return Default(); }
   const char *Location()   { return Green(); }
-  const char *EndLocation() { return Default(); }
   const char *Sleep()   { return Yellow(); }
-  const char *EndSleep() { return Default(); }
   const char *Mutex()   { return Magenta(); }
-  const char *EndMutex() { return Default(); }
 };
 
 ReportDesc::ReportDesc()
-    : stacks(MBlockReportStack)
+    : tag(kExternalTagNone)
+    , stacks(MBlockReportStack)
     , mops(MBlockReportMop)
     , locs(MBlockReportLoc)
     , mutexes(MBlockReportMutex)
@@ -81,7 +76,7 @@
   return buf;
 }
 
-static const char *ReportTypeString(ReportType typ) {
+static const char *ReportTypeString(ReportType typ, uptr tag) {
   if (typ == ReportTypeRace)
     return "data race";
   if (typ == ReportTypeVptrRace)
@@ -90,8 +85,10 @@
     return "heap-use-after-free";
   if (typ == ReportTypeVptrUseAfterFree)
     return "heap-use-after-free (virtual call vs free)";
-  if (typ == ReportTypeExternalRace)
-    return "race on a library object";
+  if (typ == ReportTypeExternalRace) {
+    const char *str = GetReportHeaderFromTag(tag);
+    return str ? str : "race on external object";
+  }
   if (typ == ReportTypeThreadLeak)
     return "thread leak";
   if (typ == ReportTypeMutexDestroyLocked)
@@ -155,27 +152,29 @@
 }
 
 static const char *ExternalMopDesc(bool first, bool write) {
-  return first ? (write ? "Mutating" : "Read-only")
-               : (write ? "Previous mutating" : "Previous read-only");
+  return first ? (write ? "Modifying" : "Read-only")
+               : (write ? "Previous modifying" : "Previous read-only");
 }
 
 static void PrintMop(const ReportMop *mop, bool first) {
   Decorator d;
   char thrbuf[kThreadBufSize];
   Printf("%s", d.Access());
-  const char *object_type = GetObjectTypeFromTag(mop->external_tag);
-  if (!object_type) {
+  if (mop->external_tag == kExternalTagNone) {
     Printf("  %s of size %d at %p by %s",
            MopDesc(first, mop->write, mop->atomic), mop->size,
            (void *)mop->addr, thread_name(thrbuf, mop->tid));
   } else {
-    Printf("  %s access of object %s at %p by %s",
+    const char *object_type = GetObjectTypeFromTag(mop->external_tag);
+    if (object_type == nullptr)
+        object_type = "external object";
+    Printf("  %s access of %s at %p by %s",
            ExternalMopDesc(first, mop->write), object_type,
            (void *)mop->addr, thread_name(thrbuf, mop->tid));
   }
   PrintMutexSet(mop->mset);
   Printf(":\n");
-  Printf("%s", d.EndAccess());
+  Printf("%s", d.Default());
   PrintStack(mop->stack);
 }
 
@@ -202,7 +201,7 @@
              loc->heap_chunk_size, loc->heap_chunk_start,
              thread_name(thrbuf, loc->tid));
     } else {
-      Printf("  Location is %s object of size %zu at %p allocated by %s:\n",
+      Printf("  Location is %s of size %zu at %p allocated by %s:\n",
              object_type, loc->heap_chunk_size, loc->heap_chunk_start,
              thread_name(thrbuf, loc->tid));
     }
@@ -216,20 +215,20 @@
         loc->fd, thread_name(thrbuf, loc->tid));
     print_stack = true;
   }
-  Printf("%s", d.EndLocation());
+  Printf("%s", d.Default());
   if (print_stack)
     PrintStack(loc->stack);
 }
 
 static void PrintMutexShort(const ReportMutex *rm, const char *after) {
   Decorator d;
-  Printf("%sM%zd%s%s", d.Mutex(), rm->id, d.EndMutex(), after);
+  Printf("%sM%zd%s%s", d.Mutex(), rm->id, d.Default(), after);
 }
 
 static void PrintMutexShortWithAddress(const ReportMutex *rm,
                                        const char *after) {
   Decorator d;
-  Printf("%sM%zd (%p)%s%s", d.Mutex(), rm->id, rm->addr, d.EndMutex(), after);
+  Printf("%sM%zd (%p)%s%s", d.Mutex(), rm->id, rm->addr, d.Default(), after);
 }
 
 static void PrintMutex(const ReportMutex *rm) {
@@ -237,11 +236,11 @@
   if (rm->destroyed) {
     Printf("%s", d.Mutex());
     Printf("  Mutex M%llu is already destroyed.\n\n", rm->id);
-    Printf("%s", d.EndMutex());
+    Printf("%s", d.Default());
   } else {
     Printf("%s", d.Mutex());
     Printf("  Mutex M%llu (%p) created at:\n", rm->id, rm->addr);
-    Printf("%s", d.EndMutex());
+    Printf("%s", d.Default());
     PrintStack(rm->stack);
   }
 }
@@ -259,7 +258,7 @@
   if (rt->workerthread) {
     Printf(" (tid=%zu, %s) is a GCD worker thread\n", rt->os_id, thread_status);
     Printf("\n");
-    Printf("%s", d.EndThreadDescription());
+    Printf("%s", d.Default());
     return;
   }
   Printf(" (tid=%zu, %s) created by %s", rt->os_id, thread_status,
@@ -267,7 +266,7 @@
   if (rt->stack)
     Printf(" at:");
   Printf("\n");
-  Printf("%s", d.EndThreadDescription());
+  Printf("%s", d.Default());
   PrintStack(rt->stack);
 }
 
@@ -275,7 +274,7 @@
   Decorator d;
   Printf("%s", d.Sleep());
   Printf("  As if synchronized via sleep:\n");
-  Printf("%s", d.EndSleep());
+  Printf("%s", d.Default());
   PrintStack(s);
 }
 
@@ -315,11 +314,11 @@
 void PrintReport(const ReportDesc *rep) {
   Decorator d;
   Printf("==================\n");
-  const char *rep_typ_str = ReportTypeString(rep->typ);
+  const char *rep_typ_str = ReportTypeString(rep->typ, rep->tag);
   Printf("%s", d.Warning());
   Printf("WARNING: ThreadSanitizer: %s (pid=%d)\n", rep_typ_str,
          (int)internal_getpid());
-  Printf("%s", d.EndWarning());
+  Printf("%s", d.Default());
 
   if (rep->typ == ReportTypeDeadlock) {
     char thrbuf[kThreadBufSize];
@@ -337,7 +336,7 @@
       PrintMutexShort(rep->mutexes[i], " in ");
       Printf("%s", d.ThreadDescription());
       Printf("%s:\n", thread_name(thrbuf, rep->unique_tids[i]));
-      Printf("%s", d.EndThreadDescription());
+      Printf("%s", d.Default());
       if (flags()->second_deadlock_stack) {
         PrintStack(rep->stacks[2*i]);
         Printf("  Mutex ");
diff --git a/lib/tsan/rtl/tsan_report.h b/lib/tsan/rtl/tsan_report.h
index 8d8ae0f..bc1582f 100644
--- a/lib/tsan/rtl/tsan_report.h
+++ b/lib/tsan/rtl/tsan_report.h
@@ -90,7 +90,7 @@
 
 struct ReportThread {
   int id;
-  uptr os_id;
+  tid_t os_id;
   bool running;
   bool workerthread;
   char *name;
@@ -108,6 +108,7 @@
 class ReportDesc {
  public:
   ReportType typ;
+  uptr tag;
   Vector<ReportStack*> stacks;
   Vector<ReportMop*> mops;
   Vector<ReportLocation*> locs;
diff --git a/lib/tsan/rtl/tsan_rtl.cc b/lib/tsan/rtl/tsan_rtl.cc
index bc5991c..882e067 100644
--- a/lib/tsan/rtl/tsan_rtl.cc
+++ b/lib/tsan/rtl/tsan_rtl.cc
@@ -14,6 +14,7 @@
 
 #include "sanitizer_common/sanitizer_atomic.h"
 #include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_file.h"
 #include "sanitizer_common/sanitizer_libc.h"
 #include "sanitizer_common/sanitizer_stackdepot.h"
 #include "sanitizer_common/sanitizer_placement_new.h"
@@ -104,7 +105,8 @@
   , racy_stacks(MBlockRacyStacks)
   , racy_addresses(MBlockRacyAddresses)
   , fired_suppressions_mtx(MutexTypeFired, StatMtxFired)
-  , fired_suppressions(8) {
+  , fired_suppressions(8)
+  , clock_alloc("clock allocator") {
 }
 
 // The objects are allocated in TLS, so one may rely on zero-initialization.
@@ -866,9 +868,8 @@
   // Don't want to touch lots of shadow memory.
   // If a program maps 10MB stack, there is no need reset the whole range.
   size = (size + (kShadowCell - 1)) & ~(kShadowCell - 1);
-  // UnmapOrDie/MmapFixedNoReserve does not work on Windows,
-  // so we do it only for C/C++.
-  if (SANITIZER_GO || size < common_flags()->clear_shadow_mmap_threshold) {
+  // UnmapOrDie/MmapFixedNoReserve does not work on Windows.
+  if (SANITIZER_WINDOWS || size < common_flags()->clear_shadow_mmap_threshold) {
     u64 *p = (u64*)MemToShadow(addr);
     CHECK(IsShadowMem((uptr)p));
     CHECK(IsShadowMem((uptr)(p + size * kShadowCnt / kShadowCell - 1)));
@@ -980,21 +981,21 @@
   thr->shadow_stack_pos--;
 }
 
-void ThreadIgnoreBegin(ThreadState *thr, uptr pc) {
+void ThreadIgnoreBegin(ThreadState *thr, uptr pc, bool save_stack) {
   DPrintf("#%d: ThreadIgnoreBegin\n", thr->tid);
   thr->ignore_reads_and_writes++;
   CHECK_GT(thr->ignore_reads_and_writes, 0);
   thr->fast_state.SetIgnoreBit();
 #if !SANITIZER_GO
-  if (!ctx->after_multithreaded_fork)
+  if (save_stack && !ctx->after_multithreaded_fork)
     thr->mop_ignore_set.Add(CurrentStackId(thr, pc));
 #endif
 }
 
 void ThreadIgnoreEnd(ThreadState *thr, uptr pc) {
   DPrintf("#%d: ThreadIgnoreEnd\n", thr->tid);
+  CHECK_GT(thr->ignore_reads_and_writes, 0);
   thr->ignore_reads_and_writes--;
-  CHECK_GE(thr->ignore_reads_and_writes, 0);
   if (thr->ignore_reads_and_writes == 0) {
     thr->fast_state.ClearIgnoreBit();
 #if !SANITIZER_GO
@@ -1011,20 +1012,20 @@
 }
 #endif
 
-void ThreadIgnoreSyncBegin(ThreadState *thr, uptr pc) {
+void ThreadIgnoreSyncBegin(ThreadState *thr, uptr pc, bool save_stack) {
   DPrintf("#%d: ThreadIgnoreSyncBegin\n", thr->tid);
   thr->ignore_sync++;
   CHECK_GT(thr->ignore_sync, 0);
 #if !SANITIZER_GO
-  if (!ctx->after_multithreaded_fork)
+  if (save_stack && !ctx->after_multithreaded_fork)
     thr->sync_ignore_set.Add(CurrentStackId(thr, pc));
 #endif
 }
 
 void ThreadIgnoreSyncEnd(ThreadState *thr, uptr pc) {
   DPrintf("#%d: ThreadIgnoreSyncEnd\n", thr->tid);
+  CHECK_GT(thr->ignore_sync, 0);
   thr->ignore_sync--;
-  CHECK_GE(thr->ignore_sync, 0);
 #if !SANITIZER_GO
   if (thr->ignore_sync == 0)
     thr->sync_ignore_set.Reset();
diff --git a/lib/tsan/rtl/tsan_rtl.h b/lib/tsan/rtl/tsan_rtl.h
index 8853941..99c4d25 100644
--- a/lib/tsan/rtl/tsan_rtl.h
+++ b/lib/tsan/rtl/tsan_rtl.h
@@ -55,16 +55,22 @@
 #if !SANITIZER_GO
 struct MapUnmapCallback;
 #if defined(__mips64) || defined(__aarch64__) || defined(__powerpc__)
-static const uptr kAllocatorSpace = 0;
-static const uptr kAllocatorSize = SANITIZER_MMAP_RANGE_SIZE;
 static const uptr kAllocatorRegionSizeLog = 20;
 static const uptr kAllocatorNumRegions =
-    kAllocatorSize >> kAllocatorRegionSizeLog;
+    SANITIZER_MMAP_RANGE_SIZE >> kAllocatorRegionSizeLog;
 typedef TwoLevelByteMap<(kAllocatorNumRegions >> 12), 1 << 12,
     MapUnmapCallback> ByteMap;
-typedef SizeClassAllocator32<kAllocatorSpace, kAllocatorSize, 0,
-    CompactSizeClassMap, kAllocatorRegionSizeLog, ByteMap,
-    MapUnmapCallback> PrimaryAllocator;
+struct AP32 {
+  static const uptr kSpaceBeg = 0;
+  static const u64 kSpaceSize = SANITIZER_MMAP_RANGE_SIZE;
+  static const uptr kMetadataSize = 0;
+  typedef __sanitizer::CompactSizeClassMap SizeClassMap;
+  static const uptr kRegionSizeLog = kAllocatorRegionSizeLog;
+  typedef __tsan::ByteMap ByteMap;
+  typedef __tsan::MapUnmapCallback MapUnmapCallback;
+  static const uptr kFlags = 0;
+};
+typedef SizeClassAllocator32<AP32> PrimaryAllocator;
 #else
 struct AP64 {  // Allocator64 parameters. Deliberately using a short name.
   static const uptr kSpaceBeg = Mapping::kHeapMemBeg;
@@ -381,6 +387,7 @@
   // for better performance.
   int ignore_reads_and_writes;
   int ignore_sync;
+  int suppress_reports;
   // Go does not support ignores.
 #if !SANITIZER_GO
   IgnoreSet mop_ignore_set;
@@ -410,7 +417,6 @@
   bool is_dead;
   bool is_freeing;
   bool is_vptr_access;
-  uptr external_tag;
   const uptr stk_addr;
   const uptr stk_size;
   const uptr tls_addr;
@@ -546,6 +552,10 @@
 
 extern Context *ctx;  // The one and the only global runtime context.
 
+ALWAYS_INLINE Flags *flags() {
+  return &ctx->flags;
+}
+
 struct ScopedIgnoreInterceptors {
   ScopedIgnoreInterceptors() {
 #if !SANITIZER_GO
@@ -560,9 +570,13 @@
   }
 };
 
+const char *GetObjectTypeFromTag(uptr tag);
+const char *GetReportHeaderFromTag(uptr tag);
+uptr TagFromShadowStackFrame(uptr pc);
+
 class ScopedReport {
  public:
-  explicit ScopedReport(ReportType typ);
+  explicit ScopedReport(ReportType typ, uptr tag = kExternalTagNone);
   ~ScopedReport();
 
   void AddMemoryAccess(uptr addr, uptr external_tag, Shadow s, StackTrace stack,
@@ -593,10 +607,26 @@
 
 ThreadContext *IsThreadStackOrTls(uptr addr, bool *is_stack);
 void RestoreStack(int tid, const u64 epoch, VarSizeStackTrace *stk,
-                  MutexSet *mset);
+                  MutexSet *mset, uptr *tag = nullptr);
+
+// The stack could look like:
+//   <start> | <main> | <foo> | tag | <bar>
+// This will extract the tag and keep:
+//   <start> | <main> | <foo> | <bar>
+template<typename StackTraceTy>
+void ExtractTagFromStack(StackTraceTy *stack, uptr *tag = nullptr) {
+  if (stack->size < 2) return;
+  uptr possible_tag_pc = stack->trace[stack->size - 2];
+  uptr possible_tag = TagFromShadowStackFrame(possible_tag_pc);
+  if (possible_tag == kExternalTagNone) return;
+  stack->trace_buffer[stack->size - 2] = stack->trace_buffer[stack->size - 1];
+  stack->size -= 1;
+  if (tag) *tag = possible_tag;
+}
 
 template<typename StackTraceTy>
-void ObtainCurrentStack(ThreadState *thr, uptr toppc, StackTraceTy *stack) {
+void ObtainCurrentStack(ThreadState *thr, uptr toppc, StackTraceTy *stack,
+                        uptr *tag = nullptr) {
   uptr size = thr->shadow_stack_pos - thr->shadow_stack;
   uptr start = 0;
   if (size + !!toppc > kStackTraceMax) {
@@ -604,6 +634,7 @@
     size = kStackTraceMax - !!toppc;
   }
   stack->Init(&thr->shadow_stack[start], size, toppc);
+  ExtractTagFromStack(stack, tag);
 }
 
 
@@ -641,8 +672,6 @@
 bool IsExpectedReport(uptr addr, uptr size);
 void PrintMatchedBenignRaces();
 
-const char *GetObjectTypeFromTag(uptr tag);
-
 #if defined(TSAN_DEBUG_OUTPUT) && TSAN_DEBUG_OUTPUT >= 1
 # define DPrintf Printf
 #else
@@ -707,16 +736,16 @@
 void MemoryRangeFreed(ThreadState *thr, uptr pc, uptr addr, uptr size);
 void MemoryRangeImitateWrite(ThreadState *thr, uptr pc, uptr addr, uptr size);
 
-void ThreadIgnoreBegin(ThreadState *thr, uptr pc);
+void ThreadIgnoreBegin(ThreadState *thr, uptr pc, bool save_stack = true);
 void ThreadIgnoreEnd(ThreadState *thr, uptr pc);
-void ThreadIgnoreSyncBegin(ThreadState *thr, uptr pc);
+void ThreadIgnoreSyncBegin(ThreadState *thr, uptr pc, bool save_stack = true);
 void ThreadIgnoreSyncEnd(ThreadState *thr, uptr pc);
 
 void FuncEntry(ThreadState *thr, uptr pc);
 void FuncExit(ThreadState *thr);
 
 int ThreadCreate(ThreadState *thr, uptr pc, uptr uid, bool detached);
-void ThreadStart(ThreadState *thr, int tid, uptr os_id, bool workerthread);
+void ThreadStart(ThreadState *thr, int tid, tid_t os_id, bool workerthread);
 void ThreadFinish(ThreadState *thr);
 int ThreadTid(ThreadState *thr, uptr pc, uptr uid);
 void ThreadJoin(ThreadState *thr, uptr pc, int tid);
@@ -731,13 +760,16 @@
 void ProcWire(Processor *proc, ThreadState *thr);
 void ProcUnwire(Processor *proc, ThreadState *thr);
 
-void MutexCreate(ThreadState *thr, uptr pc, uptr addr,
-                 bool rw, bool recursive, bool linker_init);
-void MutexDestroy(ThreadState *thr, uptr pc, uptr addr);
-void MutexLock(ThreadState *thr, uptr pc, uptr addr, int rec = 1,
-               bool try_lock = false);
-int  MutexUnlock(ThreadState *thr, uptr pc, uptr addr, bool all = false);
-void MutexReadLock(ThreadState *thr, uptr pc, uptr addr, bool try_lock = false);
+// Note: the parameter is called flagz, because flags is already taken
+// by the global function that returns flags.
+void MutexCreate(ThreadState *thr, uptr pc, uptr addr, u32 flagz = 0);
+void MutexDestroy(ThreadState *thr, uptr pc, uptr addr, u32 flagz = 0);
+void MutexPreLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz = 0);
+void MutexPostLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz = 0,
+    int rec = 1);
+int  MutexUnlock(ThreadState *thr, uptr pc, uptr addr, u32 flagz = 0);
+void MutexPreReadLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz = 0);
+void MutexPostReadLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz = 0);
 void MutexReadUnlock(ThreadState *thr, uptr pc, uptr addr);
 void MutexReadOrWriteUnlock(ThreadState *thr, uptr pc, uptr addr);
 void MutexRepair(ThreadState *thr, uptr pc, uptr addr);  // call on EOWNERDEAD
@@ -793,7 +825,7 @@
     return;
   DCHECK_GE((int)typ, 0);
   DCHECK_LE((int)typ, 7);
-  DCHECK_EQ(GetLsb(addr, 61), addr);
+  DCHECK_EQ(GetLsb(addr, kEventPCBits), addr);
   StatInc(thr, StatEvents);
   u64 pos = fs.GetTracePos();
   if (UNLIKELY((pos % kTracePartSize) == 0)) {
@@ -805,7 +837,7 @@
   }
   Event *trace = (Event*)GetThreadTrace(fs.tid());
   Event *evp = &trace[pos];
-  Event ev = (u64)addr | ((u64)typ << 61);
+  Event ev = (u64)addr | ((u64)typ << kEventPCBits);
   *evp = ev;
 }
 
diff --git a/lib/tsan/rtl/tsan_rtl_aarch64.S b/lib/tsan/rtl/tsan_rtl_aarch64.S
index ef06f04..61171d6 100644
--- a/lib/tsan/rtl/tsan_rtl_aarch64.S
+++ b/lib/tsan/rtl/tsan_rtl_aarch64.S
@@ -1,13 +1,46 @@
+// The content of this file is AArch64-only:
+#if defined(__aarch64__)
+
 #include "sanitizer_common/sanitizer_asm.h"
 
+#if !defined(__APPLE__)
 .section .bss
 .type	__tsan_pointer_chk_guard, %object
-.size	__tsan_pointer_chk_guard, 8
+ASM_SIZE(ASM_TSAN_SYMBOL_INTERCEPTOR(__tsan_pointer_chk_guard))
 __tsan_pointer_chk_guard:
 .zero	8
+#endif
 
+#if defined(__APPLE__)
+.align  2
+
+.section  __DATA,__nl_symbol_ptr,non_lazy_symbol_pointers
+.long _setjmp$non_lazy_ptr
+_setjmp$non_lazy_ptr:
+.indirect_symbol _setjmp
+.long 0
+
+.section  __DATA,__nl_symbol_ptr,non_lazy_symbol_pointers
+.long __setjmp$non_lazy_ptr
+__setjmp$non_lazy_ptr:
+.indirect_symbol __setjmp
+.long 0
+
+.section  __DATA,__nl_symbol_ptr,non_lazy_symbol_pointers
+.long _sigsetjmp$non_lazy_ptr
+_sigsetjmp$non_lazy_ptr:
+.indirect_symbol _sigsetjmp
+.long 0
+#endif
+
+#if !defined(__APPLE__)
 .section .text
+#else
+.section __TEXT,__text
+.align 3
+#endif
 
+#if !defined(__APPLE__)
 // GLIBC mangles the function pointers in jmp_buf (used in {set,long}*jmp
 // functions) by XORing them with a random guard pointer.  For AArch64 it is a
 // global variable rather than a TCB one (as for x86_64/powerpc) and althought
@@ -16,9 +49,9 @@
 // not stable). So InitializeGuardPtr obtains the pointer guard value by
 // issuing a setjmp and checking the resulting pointers values against the
 // original ones.
-.hidden _Z18InitializeGuardPtrv
+ASM_HIDDEN(_Z18InitializeGuardPtrv)
 .global _Z18InitializeGuardPtrv
-.type _Z18InitializeGuardPtrv, @function
+ASM_TYPE_FUNCTION(ASM_TSAN_SYMBOL_INTERCEPTOR(_Z18InitializeGuardPtrv))
 _Z18InitializeGuardPtrv:
   CFI_STARTPROC
   // Allocates a jmp_buf for the setjmp call.
@@ -55,12 +88,14 @@
   CFI_DEF_CFA (31, 0)
   ret
   CFI_ENDPROC
-.size _Z18InitializeGuardPtrv, .-_Z18InitializeGuardPtrv
+ASM_SIZE(ASM_TSAN_SYMBOL_INTERCEPTOR(_Z18InitializeGuardPtrv))
+#endif
 
-.hidden __tsan_setjmp
+ASM_HIDDEN(__tsan_setjmp)
 .comm _ZN14__interception11real_setjmpE,8,8
-.type setjmp, @function
-setjmp:
+.globl ASM_TSAN_SYMBOL_INTERCEPTOR(setjmp)
+ASM_TYPE_FUNCTION(ASM_TSAN_SYMBOL_INTERCEPTOR(setjmp))
+ASM_TSAN_SYMBOL_INTERCEPTOR(setjmp):
   CFI_STARTPROC
 
   // save env parameters for function call
@@ -78,14 +113,19 @@
   CFI_OFFSET (19, -16)
   mov     x19, x0
 
+#if !defined(__APPLE__)
   // SP pointer mangling (see glibc setjmp)
   adrp    x2, __tsan_pointer_chk_guard
   ldr     x2, [x2, #:lo12:__tsan_pointer_chk_guard]
   add     x0, x29, 32
   eor     x1, x2, x0
+#else
+  add     x0, x29, 32
+  mov     x1, x0
+#endif
 
   // call tsan interceptor
-  bl      __tsan_setjmp
+  bl      ASM_TSAN_SYMBOL(__tsan_setjmp)
 
   // restore env parameter
   mov     x0, x19
@@ -96,18 +136,24 @@
   CFI_DEF_CFA (31, 0)
 
   // tail jump to libc setjmp
+#if !defined(__APPLE__)
   adrp    x1, :got:_ZN14__interception11real_setjmpE
   ldr     x1, [x1, #:got_lo12:_ZN14__interception11real_setjmpE]
   ldr     x1, [x1]
+#else
+  adrp    x1, _setjmp$non_lazy_ptr@page
+  add     x1, x1, _setjmp$non_lazy_ptr@pageoff
+  ldr     x1, [x1]
+#endif
   br      x1
 
   CFI_ENDPROC
-.size setjmp, .-setjmp
+ASM_SIZE(ASM_TSAN_SYMBOL_INTERCEPTOR(setjmp))
 
 .comm _ZN14__interception12real__setjmpE,8,8
-.globl _setjmp
-.type _setjmp, @function
-_setjmp:
+.globl ASM_TSAN_SYMBOL_INTERCEPTOR(_setjmp)
+ASM_TYPE_FUNCTION(ASM_TSAN_SYMBOL_INTERCEPTOR(_setjmp))
+ASM_TSAN_SYMBOL_INTERCEPTOR(_setjmp):
   CFI_STARTPROC
 
   // save env parameters for function call
@@ -125,14 +171,19 @@
   CFI_OFFSET (19, -16)
   mov     x19, x0
 
+#if !defined(__APPLE__)
   // SP pointer mangling (see glibc setjmp)
   adrp    x2, __tsan_pointer_chk_guard
   ldr     x2, [x2, #:lo12:__tsan_pointer_chk_guard]
   add     x0, x29, 32
   eor     x1, x2, x0
+#else
+  add     x0, x29, 32
+  mov     x1, x0
+#endif
 
   // call tsan interceptor
-  bl      __tsan_setjmp
+  bl      ASM_TSAN_SYMBOL(__tsan_setjmp)
 
   // Restore jmp_buf parameter
   mov     x0, x19
@@ -143,18 +194,24 @@
   CFI_DEF_CFA (31, 0)
 
   // tail jump to libc setjmp
+#if !defined(__APPLE__)
   adrp    x1, :got:_ZN14__interception12real__setjmpE
   ldr     x1, [x1, #:got_lo12:_ZN14__interception12real__setjmpE]
   ldr     x1, [x1]
+#else
+  adrp    x1, __setjmp$non_lazy_ptr@page
+  add     x1, x1, __setjmp$non_lazy_ptr@pageoff
+  ldr     x1, [x1]
+#endif
   br      x1
 
   CFI_ENDPROC
-.size _setjmp, .-_setjmp
+ASM_SIZE(ASM_TSAN_SYMBOL_INTERCEPTOR(_setjmp))
 
 .comm _ZN14__interception14real_sigsetjmpE,8,8
-.globl sigsetjmp
-.type sigsetjmp, @function
-sigsetjmp:
+.globl ASM_TSAN_SYMBOL_INTERCEPTOR(sigsetjmp)
+ASM_TYPE_FUNCTION(ASM_TSAN_SYMBOL_INTERCEPTOR(sigsetjmp))
+ASM_TSAN_SYMBOL_INTERCEPTOR(sigsetjmp):
   CFI_STARTPROC
 
   // save env parameters for function call
@@ -174,14 +231,19 @@
   mov     w20, w1
   mov     x19, x0
 
+#if !defined(__APPLE__)
   // SP pointer mangling (see glibc setjmp)
   adrp    x2, __tsan_pointer_chk_guard
   ldr     x2, [x2, #:lo12:__tsan_pointer_chk_guard]
   add     x0, x29, 32
   eor     x1, x2, x0
+#else
+  add     x0, x29, 32
+  mov     x1, x0
+#endif
 
   // call tsan interceptor
-  bl      __tsan_setjmp
+  bl      ASM_TSAN_SYMBOL(__tsan_setjmp)
 
   // restore env parameter
   mov     w1, w20
@@ -195,17 +257,24 @@
   CFI_DEF_CFA (31, 0)
 
   // tail jump to libc sigsetjmp
+#if !defined(__APPLE__)
   adrp    x2, :got:_ZN14__interception14real_sigsetjmpE
   ldr     x2, [x2, #:got_lo12:_ZN14__interception14real_sigsetjmpE]
   ldr     x2, [x2]
+#else
+  adrp    x2, _sigsetjmp$non_lazy_ptr@page
+  add     x2, x2, _sigsetjmp$non_lazy_ptr@pageoff
+  ldr     x2, [x2]
+#endif
   br      x2
   CFI_ENDPROC
-.size sigsetjmp, .-sigsetjmp
+ASM_SIZE(ASM_TSAN_SYMBOL_INTERCEPTOR(sigsetjmp))
 
+#if !defined(__APPLE__)
 .comm _ZN14__interception16real___sigsetjmpE,8,8
-.globl __sigsetjmp
-.type __sigsetjmp, @function
-__sigsetjmp:
+.globl ASM_TSAN_SYMBOL_INTERCEPTOR(__sigsetjmp)
+ASM_TYPE_FUNCTION(ASM_TSAN_SYMBOL_INTERCEPTOR(__sigsetjmp))
+ASM_TSAN_SYMBOL_INTERCEPTOR(__sigsetjmp):
   CFI_STARTPROC
 
   // save env parameters for function call
@@ -225,14 +294,16 @@
   mov     w20, w1
   mov     x19, x0
 
+#if !defined(__APPLE__)
   // SP pointer mangling (see glibc setjmp)
   adrp    x2, __tsan_pointer_chk_guard
   ldr     x2, [x2, #:lo12:__tsan_pointer_chk_guard]
   add     x0, x29, 32
   eor     x1, x2, x0
+#endif
 
   // call tsan interceptor
-  bl      __tsan_setjmp
+  bl      ASM_TSAN_SYMBOL(__tsan_setjmp)
 
   mov     w1, w20
   mov     x0, x19
@@ -245,14 +316,22 @@
   CFI_DEF_CFA (31, 0)
 
   // tail jump to libc __sigsetjmp
+#if !defined(__APPLE__)
   adrp    x2, :got:_ZN14__interception16real___sigsetjmpE
   ldr     x2, [x2, #:got_lo12:_ZN14__interception16real___sigsetjmpE]
   ldr     x2, [x2]
+#else
+  adrp    x2, ASM_TSAN_SYMBOL(__sigsetjmp)@page
+  add     x2, x2, ASM_TSAN_SYMBOL(__sigsetjmp)@pageoff
+#endif
   br      x2
   CFI_ENDPROC
-.size __sigsetjmp, .-__sigsetjmp
+ASM_SIZE(ASM_TSAN_SYMBOL_INTERCEPTOR(__sigsetjmp))
+#endif
 
 #if defined(__linux__)
 /* We do not need executable stack.  */
 .section        .note.GNU-stack,"",@progbits
 #endif
+
+#endif
diff --git a/lib/tsan/rtl/tsan_rtl_amd64.S b/lib/tsan/rtl/tsan_rtl_amd64.S
index caa8323..98947fd 100644
--- a/lib/tsan/rtl/tsan_rtl_amd64.S
+++ b/lib/tsan/rtl/tsan_rtl_amd64.S
@@ -1,4 +1,8 @@
+// The content of this file is x86_64-only:
+#if defined(__x86_64__)
+
 #include "sanitizer_common/sanitizer_asm.h"
+
 #if !defined(__APPLE__)
 .section .text
 #else
@@ -357,3 +361,5 @@
 /* We do not need executable stack.  */
 .section        .note.GNU-stack,"",@progbits
 #endif
+
+#endif
diff --git a/lib/tsan/rtl/tsan_rtl_mutex.cc b/lib/tsan/rtl/tsan_rtl_mutex.cc
index f3b51c3..152b965 100644
--- a/lib/tsan/rtl/tsan_rtl_mutex.cc
+++ b/lib/tsan/rtl/tsan_rtl_mutex.cc
@@ -62,32 +62,31 @@
   OutputReport(thr, rep);
 }
 
-void MutexCreate(ThreadState *thr, uptr pc, uptr addr,
-                 bool rw, bool recursive, bool linker_init) {
-  DPrintf("#%d: MutexCreate %zx\n", thr->tid, addr);
+void MutexCreate(ThreadState *thr, uptr pc, uptr addr, u32 flagz) {
+  DPrintf("#%d: MutexCreate %zx flagz=0x%x\n", thr->tid, addr, flagz);
   StatInc(thr, StatMutexCreate);
-  if (!linker_init && IsAppMem(addr)) {
+  if (!(flagz & MutexFlagLinkerInit) && IsAppMem(addr)) {
     CHECK(!thr->is_freeing);
     thr->is_freeing = true;
     MemoryWrite(thr, pc, addr, kSizeLog1);
     thr->is_freeing = false;
   }
   SyncVar *s = ctx->metamap.GetOrCreateAndLock(thr, pc, addr, true);
-  s->is_rw = rw;
-  s->is_recursive = recursive;
-  s->is_linker_init = linker_init;
+  s->SetFlags(flagz & MutexCreationFlagMask);
   if (!SANITIZER_GO && s->creation_stack_id == 0)
     s->creation_stack_id = CurrentStackId(thr, pc);
   s->mtx.Unlock();
 }
 
-void MutexDestroy(ThreadState *thr, uptr pc, uptr addr) {
+void MutexDestroy(ThreadState *thr, uptr pc, uptr addr, u32 flagz) {
   DPrintf("#%d: MutexDestroy %zx\n", thr->tid, addr);
   StatInc(thr, StatMutexDestroy);
   SyncVar *s = ctx->metamap.GetIfExistsAndLock(addr, true);
   if (s == 0)
     return;
-  if (s->is_linker_init) {
+  if ((flagz & MutexFlagLinkerInit)
+      || s->IsFlagSet(MutexFlagLinkerInit)
+      || ((flagz & MutexFlagNotStatic) && !s->IsFlagSet(MutexFlagNotStatic))) {
     // Destroy is no-op for linker-initialized mutexes.
     s->mtx.Unlock();
     return;
@@ -100,8 +99,8 @@
   bool unlock_locked = false;
   if (flags()->report_destroy_locked
       && s->owner_tid != SyncVar::kInvalidTid
-      && !s->is_broken) {
-    s->is_broken = true;
+      && !s->IsFlagSet(MutexFlagBroken)) {
+    s->SetFlags(MutexFlagBroken);
     unlock_locked = true;
   }
   u64 mid = s->GetId();
@@ -141,12 +140,33 @@
   // s will be destroyed and freed in MetaMap::FreeBlock.
 }
 
-void MutexLock(ThreadState *thr, uptr pc, uptr addr, int rec, bool try_lock) {
-  DPrintf("#%d: MutexLock %zx rec=%d\n", thr->tid, addr, rec);
-  CHECK_GT(rec, 0);
+void MutexPreLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz) {
+  DPrintf("#%d: MutexPreLock %zx flagz=0x%x\n", thr->tid, addr, flagz);
+  if (!(flagz & MutexFlagTryLock) && common_flags()->detect_deadlocks) {
+    SyncVar *s = ctx->metamap.GetOrCreateAndLock(thr, pc, addr, false);
+    s->UpdateFlags(flagz);
+    if (s->owner_tid != thr->tid) {
+      Callback cb(thr, pc);
+      ctx->dd->MutexBeforeLock(&cb, &s->dd, true);
+      s->mtx.ReadUnlock();
+      ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb));
+    } else {
+      s->mtx.ReadUnlock();
+    }
+  }
+}
+
+void MutexPostLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz, int rec) {
+  DPrintf("#%d: MutexPostLock %zx flag=0x%x rec=%d\n",
+      thr->tid, addr, flagz, rec);
+  if (flagz & MutexFlagRecursiveLock)
+    CHECK_GT(rec, 0);
+  else
+    rec = 1;
   if (IsAppMem(addr))
     MemoryReadAtomic(thr, pc, addr, kSizeLog1);
   SyncVar *s = ctx->metamap.GetOrCreateAndLock(thr, pc, addr, true);
+  s->UpdateFlags(flagz);
   thr->fast_state.IncrementEpoch();
   TraceAddEvent(thr, thr->fast_state, EventTypeLock, s->GetId());
   bool report_double_lock = false;
@@ -156,38 +176,43 @@
     s->last_lock = thr->fast_state.raw();
   } else if (s->owner_tid == thr->tid) {
     CHECK_GT(s->recursion, 0);
-  } else if (flags()->report_mutex_bugs && !s->is_broken) {
-    s->is_broken = true;
+  } else if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) {
+    s->SetFlags(MutexFlagBroken);
     report_double_lock = true;
   }
-  if (s->recursion == 0) {
+  const bool first = s->recursion == 0;
+  s->recursion += rec;
+  if (first) {
     StatInc(thr, StatMutexLock);
     AcquireImpl(thr, pc, &s->clock);
     AcquireImpl(thr, pc, &s->read_clock);
-  } else if (!s->is_recursive) {
+  } else if (!s->IsFlagSet(MutexFlagWriteReentrant)) {
     StatInc(thr, StatMutexRecLock);
   }
-  s->recursion += rec;
   thr->mset.Add(s->GetId(), true, thr->fast_state.epoch());
-  if (common_flags()->detect_deadlocks && (s->recursion - rec) == 0) {
+  bool pre_lock = false;
+  if (first && common_flags()->detect_deadlocks) {
+    pre_lock = (flagz & MutexFlagDoPreLockOnPostLock) &&
+        !(flagz & MutexFlagTryLock);
     Callback cb(thr, pc);
-    if (!try_lock)
+    if (pre_lock)
       ctx->dd->MutexBeforeLock(&cb, &s->dd, true);
-    ctx->dd->MutexAfterLock(&cb, &s->dd, true, try_lock);
+    ctx->dd->MutexAfterLock(&cb, &s->dd, true, flagz & MutexFlagTryLock);
   }
   u64 mid = s->GetId();
   s->mtx.Unlock();
   // Can't touch s after this point.
+  s = 0;
   if (report_double_lock)
     ReportMutexMisuse(thr, pc, ReportTypeMutexDoubleLock, addr, mid);
-  if (common_flags()->detect_deadlocks) {
+  if (first && pre_lock && common_flags()->detect_deadlocks) {
     Callback cb(thr, pc);
     ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb));
   }
 }
 
-int MutexUnlock(ThreadState *thr, uptr pc, uptr addr, bool all) {
-  DPrintf("#%d: MutexUnlock %zx all=%d\n", thr->tid, addr, all);
+int MutexUnlock(ThreadState *thr, uptr pc, uptr addr, u32 flagz) {
+  DPrintf("#%d: MutexUnlock %zx flagz=0x%x\n", thr->tid, addr, flagz);
   if (IsAppMem(addr))
     MemoryReadAtomic(thr, pc, addr, kSizeLog1);
   SyncVar *s = ctx->metamap.GetOrCreateAndLock(thr, pc, addr, true);
@@ -196,12 +221,12 @@
   int rec = 0;
   bool report_bad_unlock = false;
   if (!SANITIZER_GO && (s->recursion == 0 || s->owner_tid != thr->tid)) {
-    if (flags()->report_mutex_bugs && !s->is_broken) {
-      s->is_broken = true;
+    if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) {
+      s->SetFlags(MutexFlagBroken);
       report_bad_unlock = true;
     }
   } else {
-    rec = all ? s->recursion : 1;
+    rec = (flagz & MutexFlagRecursiveUnlock) ? s->recursion : 1;
     s->recursion -= rec;
     if (s->recursion == 0) {
       StatInc(thr, StatMutexUnlock);
@@ -229,36 +254,53 @@
   return rec;
 }
 
-void MutexReadLock(ThreadState *thr, uptr pc, uptr addr, bool trylock) {
-  DPrintf("#%d: MutexReadLock %zx\n", thr->tid, addr);
+void MutexPreReadLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz) {
+  DPrintf("#%d: MutexPreReadLock %zx flagz=0x%x\n", thr->tid, addr, flagz);
+  if (!(flagz & MutexFlagTryLock) && common_flags()->detect_deadlocks) {
+    SyncVar *s = ctx->metamap.GetOrCreateAndLock(thr, pc, addr, false);
+    s->UpdateFlags(flagz);
+    Callback cb(thr, pc);
+    ctx->dd->MutexBeforeLock(&cb, &s->dd, false);
+    s->mtx.ReadUnlock();
+    ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb));
+  }
+}
+
+void MutexPostReadLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz) {
+  DPrintf("#%d: MutexPostReadLock %zx flagz=0x%x\n", thr->tid, addr, flagz);
   StatInc(thr, StatMutexReadLock);
   if (IsAppMem(addr))
     MemoryReadAtomic(thr, pc, addr, kSizeLog1);
   SyncVar *s = ctx->metamap.GetOrCreateAndLock(thr, pc, addr, false);
+  s->UpdateFlags(flagz);
   thr->fast_state.IncrementEpoch();
   TraceAddEvent(thr, thr->fast_state, EventTypeRLock, s->GetId());
   bool report_bad_lock = false;
   if (s->owner_tid != SyncVar::kInvalidTid) {
-    if (flags()->report_mutex_bugs && !s->is_broken) {
-      s->is_broken = true;
+    if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) {
+      s->SetFlags(MutexFlagBroken);
       report_bad_lock = true;
     }
   }
   AcquireImpl(thr, pc, &s->clock);
   s->last_lock = thr->fast_state.raw();
   thr->mset.Add(s->GetId(), false, thr->fast_state.epoch());
-  if (common_flags()->detect_deadlocks && s->recursion == 0) {
+  bool pre_lock = false;
+  if (common_flags()->detect_deadlocks) {
+    pre_lock = (flagz & MutexFlagDoPreLockOnPostLock) &&
+        !(flagz & MutexFlagTryLock);
     Callback cb(thr, pc);
-    if (!trylock)
+    if (pre_lock)
       ctx->dd->MutexBeforeLock(&cb, &s->dd, false);
-    ctx->dd->MutexAfterLock(&cb, &s->dd, false, trylock);
+    ctx->dd->MutexAfterLock(&cb, &s->dd, false, flagz & MutexFlagTryLock);
   }
   u64 mid = s->GetId();
   s->mtx.ReadUnlock();
   // Can't touch s after this point.
+  s = 0;
   if (report_bad_lock)
     ReportMutexMisuse(thr, pc, ReportTypeMutexBadReadLock, addr, mid);
-  if (common_flags()->detect_deadlocks) {
+  if (pre_lock  && common_flags()->detect_deadlocks) {
     Callback cb(thr, pc);
     ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb));
   }
@@ -274,8 +316,8 @@
   TraceAddEvent(thr, thr->fast_state, EventTypeRUnlock, s->GetId());
   bool report_bad_unlock = false;
   if (s->owner_tid != SyncVar::kInvalidTid) {
-    if (flags()->report_mutex_bugs && !s->is_broken) {
-      s->is_broken = true;
+    if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) {
+      s->SetFlags(MutexFlagBroken);
       report_bad_unlock = true;
     }
   }
@@ -323,8 +365,8 @@
     } else {
       StatInc(thr, StatMutexRecUnlock);
     }
-  } else if (!s->is_broken) {
-    s->is_broken = true;
+  } else if (!s->IsFlagSet(MutexFlagBroken)) {
+    s->SetFlags(MutexFlagBroken);
     report_bad_unlock = true;
   }
   thr->mset.Del(s->GetId(), write);
@@ -373,10 +415,10 @@
 static void UpdateClockCallback(ThreadContextBase *tctx_base, void *arg) {
   ThreadState *thr = reinterpret_cast<ThreadState*>(arg);
   ThreadContext *tctx = static_cast<ThreadContext*>(tctx_base);
+  u64 epoch = tctx->epoch1;
   if (tctx->status == ThreadStatusRunning)
-    thr->clock.set(tctx->tid, tctx->thr->fast_state.epoch());
-  else
-    thr->clock.set(tctx->tid, tctx->epoch1);
+    epoch = tctx->thr->fast_state.epoch();
+  thr->clock.set(&thr->proc()->clock_cache, tctx->tid, epoch);
 }
 
 void AcquireGlobal(ThreadState *thr, uptr pc) {
@@ -416,10 +458,10 @@
 static void UpdateSleepClockCallback(ThreadContextBase *tctx_base, void *arg) {
   ThreadState *thr = reinterpret_cast<ThreadState*>(arg);
   ThreadContext *tctx = static_cast<ThreadContext*>(tctx_base);
+  u64 epoch = tctx->epoch1;
   if (tctx->status == ThreadStatusRunning)
-    thr->last_sleep_clock.set(tctx->tid, tctx->thr->fast_state.epoch());
-  else
-    thr->last_sleep_clock.set(tctx->tid, tctx->epoch1);
+    epoch = tctx->thr->fast_state.epoch();
+  thr->last_sleep_clock.set(&thr->proc()->clock_cache, tctx->tid, epoch);
 }
 
 void AfterSleep(ThreadState *thr, uptr pc) {
diff --git a/lib/tsan/rtl/tsan_rtl_report.cc b/lib/tsan/rtl/tsan_rtl_report.cc
index 31b9e97..c1d2cc4 100644
--- a/lib/tsan/rtl/tsan_rtl_report.cc
+++ b/lib/tsan/rtl/tsan_rtl_report.cc
@@ -143,11 +143,12 @@
   return stack;
 }
 
-ScopedReport::ScopedReport(ReportType typ) {
+ScopedReport::ScopedReport(ReportType typ, uptr tag) {
   ctx->thread_registry->CheckLocked();
   void *mem = internal_alloc(MBlockReport, sizeof(ReportDesc));
   rep_ = new(mem) ReportDesc;
   rep_->typ = typ;
+  rep_->tag = tag;
   ctx->report_mtx.Lock();
   CommonSanitizerReportMutex.Lock();
 }
@@ -313,7 +314,7 @@
     return;
 #if !SANITIZER_GO
   int fd = -1;
-  int creat_tid = -1;
+  int creat_tid = kInvalidTid;
   u32 creat_stack = 0;
   if (FdLocation(addr, &fd, &creat_tid, &creat_stack)) {
     ReportLocation *loc = ReportLocation::New(ReportLocationFD);
@@ -377,7 +378,7 @@
 }
 
 void RestoreStack(int tid, const u64 epoch, VarSizeStackTrace *stk,
-                  MutexSet *mset) {
+                  MutexSet *mset, uptr *tag) {
   // This function restores stack trace and mutex set for the thread/epoch.
   // It does so by getting stack trace and mutex set at the beginning of
   // trace part, and then replaying the trace till the given epoch.
@@ -405,8 +406,8 @@
   Event *events = (Event*)GetThreadTrace(tid);
   for (uptr i = ebegin; i <= eend; i++) {
     Event ev = events[i];
-    EventType typ = (EventType)(ev >> 61);
-    uptr pc = (uptr)(ev & ((1ull << 61) - 1));
+    EventType typ = (EventType)(ev >> kEventPCBits);
+    uptr pc = (uptr)(ev & ((1ull << kEventPCBits) - 1));
     DPrintf2("  %zu typ=%d pc=%zx\n", i, typ, pc);
     if (typ == EventTypeMop) {
       stack[pos] = pc;
@@ -436,6 +437,7 @@
     return;
   pos++;
   stk->Init(&stack[0], pos);
+  ExtractTagFromStack(stk, tag);
 }
 
 static bool HandleRacyStacks(ThreadState *thr, VarSizeStackTrace traces[2],
@@ -500,7 +502,7 @@
 }
 
 bool OutputReport(ThreadState *thr, const ScopedReport &srep) {
-  if (!flags()->report_bugs)
+  if (!flags()->report_bugs || thr->suppress_reports)
     return false;
   atomic_store_relaxed(&ctx->last_symbolize_time_ns, NanoTime());
   const ReportDesc *rep = srep.GetReport();
@@ -625,16 +627,35 @@
     typ = ReportTypeVptrRace;
   else if (freed)
     typ = ReportTypeUseAfterFree;
-  else if (thr->external_tag > 0)
-    typ = ReportTypeExternalRace;
 
   if (IsFiredSuppression(ctx, typ, addr))
     return;
 
   const uptr kMop = 2;
   VarSizeStackTrace traces[kMop];
-  const uptr toppc = TraceTopPC(thr);
-  ObtainCurrentStack(thr, toppc, &traces[0]);
+  uptr tags[kMop] = {kExternalTagNone};
+  uptr toppc = TraceTopPC(thr);
+  if (toppc >> kEventPCBits) {
+    // This is a work-around for a known issue.
+    // The scenario where this happens is rather elaborate and requires
+    // an instrumented __sanitizer_report_error_summary callback and
+    // a __tsan_symbolize_external callback and a race during a range memory
+    // access larger than 8 bytes. MemoryAccessRange adds the current PC to
+    // the trace and starts processing memory accesses. A first memory access
+    // triggers a race, we report it and call the instrumented
+    // __sanitizer_report_error_summary, which adds more stuff to the trace
+    // since it is intrumented. Then a second memory access in MemoryAccessRange
+    // also triggers a race and we get here and call TraceTopPC to get the
+    // current PC, however now it contains some unrelated events from the
+    // callback. Most likely, TraceTopPC will now return a EventTypeFuncExit
+    // event. Later we subtract -1 from it (in GetPreviousInstructionPc)
+    // and the resulting PC has kExternalPCBit set, so we pass it to
+    // __tsan_symbolize_external. __tsan_symbolize_external is within its rights
+    // to crash since the PC is completely bogus.
+    // test/tsan/double_race.cc contains a test case for this.
+    toppc = 0;
+  }
+  ObtainCurrentStack(thr, toppc, &traces[0], &tags[0]);
   if (IsFiredSuppression(ctx, typ, traces[0]))
     return;
 
@@ -644,18 +665,28 @@
   MutexSet *mset2 = new(&mset_buffer[0]) MutexSet();
 
   Shadow s2(thr->racy_state[1]);
-  RestoreStack(s2.tid(), s2.epoch(), &traces[1], mset2);
+  RestoreStack(s2.tid(), s2.epoch(), &traces[1], mset2, &tags[1]);
   if (IsFiredSuppression(ctx, typ, traces[1]))
     return;
 
   if (HandleRacyStacks(thr, traces, addr_min, addr_max))
     return;
 
+  // If any of the accesses has a tag, treat this as an "external" race.
+  uptr tag = kExternalTagNone;
+  for (uptr i = 0; i < kMop; i++) {
+    if (tags[i] != kExternalTagNone) {
+      typ = ReportTypeExternalRace;
+      tag = tags[i];
+      break;
+    }
+  }
+
   ThreadRegistryLock l0(ctx->thread_registry);
-  ScopedReport rep(typ);
+  ScopedReport rep(typ, tag);
   for (uptr i = 0; i < kMop; i++) {
     Shadow s(thr->racy_state[i]);
-    rep.AddMemoryAccess(addr, thr->external_tag, s, traces[i],
+    rep.AddMemoryAccess(addr, tags[i], s, traces[i],
                         i == 0 ? &thr->mset : mset2);
   }
 
diff --git a/lib/tsan/rtl/tsan_rtl_thread.cc b/lib/tsan/rtl/tsan_rtl_thread.cc
index 7357d97..83fab08 100644
--- a/lib/tsan/rtl/tsan_rtl_thread.cc
+++ b/lib/tsan/rtl/tsan_rtl_thread.cc
@@ -142,6 +142,10 @@
 
   if (common_flags()->detect_deadlocks)
     ctx->dd->DestroyLogicalThread(thr->dd_lt);
+  thr->clock.ResetCached(&thr->proc()->clock_cache);
+#if !SANITIZER_GO
+  thr->last_sleep_clock.ResetCached(&thr->proc()->clock_cache);
+#endif
   thr->~ThreadState();
 #if TSAN_COLLECT_STATS
   StatAggregate(ctx->stat, thr->stat);
@@ -236,7 +240,7 @@
   return tid;
 }
 
-void ThreadStart(ThreadState *thr, int tid, uptr os_id, bool workerthread) {
+void ThreadStart(ThreadState *thr, int tid, tid_t os_id, bool workerthread) {
   uptr stk_addr = 0;
   uptr stk_size = 0;
   uptr tls_addr = 0;
@@ -248,19 +252,7 @@
     if (stk_addr && stk_size)
       MemoryRangeImitateWrite(thr, /*pc=*/ 1, stk_addr, stk_size);
 
-    if (tls_addr && tls_size) {
-      // Check that the thr object is in tls;
-      const uptr thr_beg = (uptr)thr;
-      const uptr thr_end = (uptr)thr + sizeof(*thr);
-      CHECK_GE(thr_beg, tls_addr);
-      CHECK_LE(thr_beg, tls_addr + tls_size);
-      CHECK_GE(thr_end, tls_addr);
-      CHECK_LE(thr_end, tls_addr + tls_size);
-      // Since the thr object is huge, skip it.
-      MemoryRangeImitateWrite(thr, /*pc=*/ 2, tls_addr, thr_beg - tls_addr);
-      MemoryRangeImitateWrite(thr, /*pc=*/ 2,
-          thr_end, tls_addr + tls_size - thr_end);
-    }
+    if (tls_addr && tls_size) ImitateTlsWrite(thr, tls_addr, tls_size);
   }
 #endif
 
@@ -357,6 +349,7 @@
   StatInc(thr, StatMopRange);
 
   if (*shadow_mem == kShadowRodata) {
+    DCHECK(!is_write);
     // Access to .rodata section, no races here.
     // Measurements show that it can be 10-20% of all memory accesses.
     StatInc(thr, StatMopRangeRodata);
diff --git a/lib/tsan/rtl/tsan_stat.cc b/lib/tsan/rtl/tsan_stat.cc
index d1d6ed2..18c83d5 100644
--- a/lib/tsan/rtl/tsan_stat.cc
+++ b/lib/tsan/rtl/tsan_stat.cc
@@ -75,14 +75,11 @@
   name[StatClockAcquire]                 = "Clock acquire                     ";
   name[StatClockAcquireEmpty]            = "  empty clock                     ";
   name[StatClockAcquireFastRelease]      = "  fast from release-store         ";
-  name[StatClockAcquireLarge]            = "  contains my tid                 ";
-  name[StatClockAcquireRepeat]           = "  repeated (fast)                 ";
   name[StatClockAcquireFull]             = "  full (slow)                     ";
   name[StatClockAcquiredSomething]       = "  acquired something              ";
   name[StatClockRelease]                 = "Clock release                     ";
   name[StatClockReleaseResize]           = "  resize                          ";
-  name[StatClockReleaseFast1]            = "  fast1                           ";
-  name[StatClockReleaseFast2]            = "  fast2                           ";
+  name[StatClockReleaseFast]             = "  fast                            ";
   name[StatClockReleaseSlow]             = "  dirty overflow (slow)           ";
   name[StatClockReleaseFull]             = "  full (slow)                     ";
   name[StatClockReleaseAcquired]         = "  was acquired                    ";
@@ -153,6 +150,16 @@
   name[StatAnnotatePublishMemoryRange]   = "  PublishMemoryRange              ";
   name[StatAnnotateUnpublishMemoryRange] = "  UnpublishMemoryRange            ";
   name[StatAnnotateThreadName]           = "  ThreadName                      ";
+  name[Stat__tsan_mutex_create]          = "  __tsan_mutex_create             ";
+  name[Stat__tsan_mutex_destroy]         = "  __tsan_mutex_destroy            ";
+  name[Stat__tsan_mutex_pre_lock]        = "  __tsan_mutex_pre_lock           ";
+  name[Stat__tsan_mutex_post_lock]       = "  __tsan_mutex_post_lock          ";
+  name[Stat__tsan_mutex_pre_unlock]      = "  __tsan_mutex_pre_unlock         ";
+  name[Stat__tsan_mutex_post_unlock]     = "  __tsan_mutex_post_unlock        ";
+  name[Stat__tsan_mutex_pre_signal]      = "  __tsan_mutex_pre_signal         ";
+  name[Stat__tsan_mutex_post_signal]     = "  __tsan_mutex_post_signal        ";
+  name[Stat__tsan_mutex_pre_divert]      = "  __tsan_mutex_pre_divert         ";
+  name[Stat__tsan_mutex_post_divert]     = "  __tsan_mutex_post_divert        ";
 
   name[StatMtxTotal]                     = "Contentionz                       ";
   name[StatMtxTrace]                     = "  Trace                           ";
diff --git a/lib/tsan/rtl/tsan_stat.h b/lib/tsan/rtl/tsan_stat.h
index 8447dd8..42d6a2b 100644
--- a/lib/tsan/rtl/tsan_stat.h
+++ b/lib/tsan/rtl/tsan_stat.h
@@ -74,15 +74,12 @@
   StatClockAcquire,
   StatClockAcquireEmpty,
   StatClockAcquireFastRelease,
-  StatClockAcquireLarge,
-  StatClockAcquireRepeat,
   StatClockAcquireFull,
   StatClockAcquiredSomething,
   // Clocks - release.
   StatClockRelease,
   StatClockReleaseResize,
-  StatClockReleaseFast1,
-  StatClockReleaseFast2,
+  StatClockReleaseFast,
   StatClockReleaseSlow,
   StatClockReleaseFull,
   StatClockReleaseAcquired,
@@ -157,6 +154,16 @@
   StatAnnotatePublishMemoryRange,
   StatAnnotateUnpublishMemoryRange,
   StatAnnotateThreadName,
+  Stat__tsan_mutex_create,
+  Stat__tsan_mutex_destroy,
+  Stat__tsan_mutex_pre_lock,
+  Stat__tsan_mutex_post_lock,
+  Stat__tsan_mutex_pre_unlock,
+  Stat__tsan_mutex_post_unlock,
+  Stat__tsan_mutex_pre_signal,
+  Stat__tsan_mutex_post_signal,
+  Stat__tsan_mutex_pre_divert,
+  Stat__tsan_mutex_post_divert,
 
   // Internal mutex contentionz.
   StatMtxTotal,
diff --git a/lib/tsan/rtl/tsan_sync.cc b/lib/tsan/rtl/tsan_sync.cc
index 2be0474..44ae558 100644
--- a/lib/tsan/rtl/tsan_sync.cc
+++ b/lib/tsan/rtl/tsan_sync.cc
@@ -42,10 +42,7 @@
   owner_tid = kInvalidTid;
   last_lock = 0;
   recursion = 0;
-  is_rw = 0;
-  is_recursive = 0;
-  is_broken = 0;
-  is_linker_init = 0;
+  atomic_store_relaxed(&flags, 0);
 
   if (proc == 0) {
     CHECK_EQ(clock.size(), 0);
@@ -56,7 +53,9 @@
   }
 }
 
-MetaMap::MetaMap() {
+MetaMap::MetaMap()
+    : block_alloc_("heap block allocator")
+    , sync_alloc_("sync allocator") {
   atomic_store(&uid_gen_, 0, memory_order_relaxed);
 }
 
diff --git a/lib/tsan/rtl/tsan_sync.h b/lib/tsan/rtl/tsan_sync.h
index 86e6bbd..9039970 100644
--- a/lib/tsan/rtl/tsan_sync.h
+++ b/lib/tsan/rtl/tsan_sync.h
@@ -23,6 +23,31 @@
 
 namespace __tsan {
 
+// These need to match __tsan_mutex_* flags defined in tsan_interface.h.
+// See documentation there as well.
+enum MutexFlags {
+  MutexFlagLinkerInit          = 1 << 0, // __tsan_mutex_linker_init
+  MutexFlagWriteReentrant      = 1 << 1, // __tsan_mutex_write_reentrant
+  MutexFlagReadReentrant       = 1 << 2, // __tsan_mutex_read_reentrant
+  MutexFlagReadLock            = 1 << 3, // __tsan_mutex_read_lock
+  MutexFlagTryLock             = 1 << 4, // __tsan_mutex_try_lock
+  MutexFlagTryLockFailed       = 1 << 5, // __tsan_mutex_try_lock_failed
+  MutexFlagRecursiveLock       = 1 << 6, // __tsan_mutex_recursive_lock
+  MutexFlagRecursiveUnlock     = 1 << 7, // __tsan_mutex_recursive_unlock
+  MutexFlagNotStatic           = 1 << 8, // __tsan_mutex_not_static
+
+  // The following flags are runtime private.
+  // Mutex API misuse was detected, so don't report any more.
+  MutexFlagBroken              = 1 << 30,
+  // We did not intercept pre lock event, so handle it on post lock.
+  MutexFlagDoPreLockOnPostLock = 1 << 29,
+  // Must list all mutex creation flags.
+  MutexCreationFlagMask        = MutexFlagLinkerInit |
+                                 MutexFlagWriteReentrant |
+                                 MutexFlagReadReentrant |
+                                 MutexFlagNotStatic,
+};
+
 struct SyncVar {
   SyncVar();
 
@@ -35,10 +60,7 @@
   int owner_tid;  // Set only by exclusive owners.
   u64 last_lock;
   int recursion;
-  bool is_rw;
-  bool is_recursive;
-  bool is_broken;
-  bool is_linker_init;
+  atomic_uint32_t flags;
   u32 next;  // in MetaMap
   DDMutex dd;
   SyncClock read_clock;  // Used for rw mutexes only.
@@ -61,6 +83,26 @@
     *uid = id >> 48;
     return (uptr)GetLsb(id, 48);
   }
+
+  bool IsFlagSet(u32 f) const {
+    return atomic_load_relaxed(&flags) & f;
+  }
+
+  void SetFlags(u32 f) {
+    atomic_store_relaxed(&flags, atomic_load_relaxed(&flags) | f);
+  }
+
+  void UpdateFlags(u32 flagz) {
+    // Filter out operation flags.
+    if (!(flagz & MutexCreationFlagMask))
+      return;
+    u32 current = atomic_load_relaxed(&flags);
+    if (current & MutexCreationFlagMask)
+      return;
+    // Note: this can be called from MutexPostReadLock which holds only read
+    // lock on the SyncVar.
+    atomic_store_relaxed(&flags, current | (flagz & MutexCreationFlagMask));
+  }
 };
 
 /* MetaMap allows to map arbitrary user pointers onto various descriptors.
diff --git a/lib/tsan/rtl/tsan_trace.h b/lib/tsan/rtl/tsan_trace.h
index 96a18ac..9aef375 100644
--- a/lib/tsan/rtl/tsan_trace.h
+++ b/lib/tsan/rtl/tsan_trace.h
@@ -41,6 +41,8 @@
 // u64 addr : 61;  // Associated pc.
 typedef u64 Event;
 
+const uptr kEventPCBits = 61;
+
 struct TraceHeader {
 #if !SANITIZER_GO
   BufferedStackTrace stack0;  // Start stack for the trace.
diff --git a/lib/tsan/tests/CMakeLists.txt b/lib/tsan/tests/CMakeLists.txt
index 87e1417..ad8d02e 100644
--- a/lib/tsan/tests/CMakeLists.txt
+++ b/lib/tsan/tests/CMakeLists.txt
@@ -2,18 +2,40 @@
 
 add_custom_target(TsanUnitTests)
 set_target_properties(TsanUnitTests PROPERTIES
-  FOLDER "TSan unittests")
+  FOLDER "Compiler-RT Tests")
 
 set(TSAN_UNITTEST_CFLAGS
   ${TSAN_CFLAGS}
   ${COMPILER_RT_UNITTEST_CFLAGS}
   ${COMPILER_RT_GTEST_CFLAGS}
+  -I${COMPILER_RT_SOURCE_DIR}/include
   -I${COMPILER_RT_SOURCE_DIR}/lib
   -I${COMPILER_RT_SOURCE_DIR}/lib/tsan/rtl
   -DGTEST_HAS_RTTI=0)
 
+set(TSAN_TEST_ARCH ${TSAN_SUPPORTED_ARCH})
 if(APPLE)
+
+  # Create a static library for test dependencies.
+  set(TSAN_TEST_RUNTIME_OBJECTS
+    $<TARGET_OBJECTS:RTTsan_dynamic.osx>
+    $<TARGET_OBJECTS:RTInterception.osx>
+    $<TARGET_OBJECTS:RTSanitizerCommon.osx>
+    $<TARGET_OBJECTS:RTSanitizerCommonLibc.osx>
+    $<TARGET_OBJECTS:RTUbsan.osx>)
+  set(TSAN_TEST_RUNTIME RTTsanTest)
+  add_library(${TSAN_TEST_RUNTIME} STATIC ${TSAN_TEST_RUNTIME_OBJECTS})
+  set_target_properties(${TSAN_TEST_RUNTIME} PROPERTIES
+    ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
+
+  darwin_filter_host_archs(TSAN_SUPPORTED_ARCH TSAN_TEST_ARCH)
   list(APPEND TSAN_UNITTEST_CFLAGS ${DARWIN_osx_CFLAGS})
+
+  set(LINK_FLAGS "-lc++")
+  add_weak_symbols("ubsan" LINK_FLAGS)
+  add_weak_symbols("sanitizer_common" LINK_FLAGS)
+else()
+  set(LINK_FLAGS "-fsanitize=thread;-lstdc++;-lm")
 endif()
 
 set(TSAN_RTL_HEADERS)
@@ -21,79 +43,27 @@
   list(APPEND TSAN_RTL_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/../${header})
 endforeach()
 
-# tsan_compile(obj_list, source, arch, {headers})
-macro(tsan_compile obj_list source arch)
-  get_filename_component(basename ${source} NAME)
-  set(output_obj "${basename}.${arch}.o")
-  get_target_flags_for_arch(${arch} TARGET_CFLAGS)
-  set(COMPILE_DEPS ${TSAN_RTL_HEADERS} ${ARGN})
-  if(NOT COMPILER_RT_STANDALONE_BUILD)
-    list(APPEND COMPILE_DEPS gtest tsan)
-  endif()
-  clang_compile(${output_obj} ${source}
-          CFLAGS ${TSAN_UNITTEST_CFLAGS} ${TARGET_CFLAGS}
-          DEPS ${COMPILE_DEPS})
-  list(APPEND ${obj_list} ${output_obj})
-endmacro()
-
+# add_tsan_unittest(<name>
+#                   SOURCES <sources list>
+#                   HEADERS <extra headers list>)
 macro(add_tsan_unittest testname)
-  set(TSAN_TEST_ARCH ${TSAN_SUPPORTED_ARCH})
-  if(APPLE)
-    darwin_filter_host_archs(TSAN_SUPPORTED_ARCH TSAN_TEST_ARCH)
-  endif()
+  cmake_parse_arguments(TEST "" "" "SOURCES;HEADERS" ${ARGN})
   if(UNIX)
     foreach(arch ${TSAN_TEST_ARCH})
-      cmake_parse_arguments(TEST "" "" "SOURCES;HEADERS" ${ARGN})
-      set(TEST_OBJECTS)
-      foreach(SOURCE ${TEST_SOURCES} ${COMPILER_RT_GTEST_SOURCE})
-        tsan_compile(TEST_OBJECTS ${SOURCE} ${arch} ${TEST_HEADERS})
-      endforeach()
-      get_target_flags_for_arch(${arch} TARGET_LINK_FLAGS)
-      set(TEST_DEPS ${TEST_OBJECTS})
-      if(NOT COMPILER_RT_STANDALONE_BUILD)
-        list(APPEND TEST_DEPS tsan)
-      endif()
-      if(NOT APPLE)
-        # FIXME: Looks like we should link TSan with just-built runtime,
-        # and not rely on -fsanitize=thread, as these tests are essentially
-        # unit tests.
-        add_compiler_rt_test(TsanUnitTests ${testname}
-                OBJECTS ${TEST_OBJECTS}
-                DEPS ${TEST_DEPS}
-                LINK_FLAGS ${TARGET_LINK_FLAGS}
-                           -fsanitize=thread
-                           -lstdc++ -lm)
-      else()
-        set(TSAN_TEST_RUNTIME_OBJECTS
-          $<TARGET_OBJECTS:RTTsan_dynamic.osx>
-          $<TARGET_OBJECTS:RTInterception.osx>
-          $<TARGET_OBJECTS:RTSanitizerCommon.osx>
-          $<TARGET_OBJECTS:RTSanitizerCommonLibc.osx>
-          $<TARGET_OBJECTS:RTUbsan.osx>)
-        set(TSAN_TEST_RUNTIME RTTsanTest.${testname}.${arch})
-        add_library(${TSAN_TEST_RUNTIME} STATIC ${TSAN_TEST_RUNTIME_OBJECTS})
-        set_target_properties(${TSAN_TEST_RUNTIME} PROPERTIES
-          ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
-        list(APPEND TEST_OBJECTS lib${TSAN_TEST_RUNTIME}.a)
-        list(APPEND TEST_DEPS ${TSAN_TEST_RUNTIME})
-
-        add_weak_symbols("ubsan" WEAK_SYMBOL_LINK_FLAGS)
-        add_weak_symbols("sanitizer_common" WEAK_SYMBOL_LINK_FLAGS)
-
-        # Intentionally do *not* link with `-fsanitize=thread`. We already link
-        # against a static version of the runtime, and we don't want the dynamic
-        # one.
-        add_compiler_rt_test(TsanUnitTests "${testname}-${arch}-Test"
-                OBJECTS ${TEST_OBJECTS}
-                DEPS ${TEST_DEPS}
-                LINK_FLAGS ${TARGET_LINK_FLAGS} ${DARWIN_osx_LINK_FLAGS}
-                           ${WEAK_SYMBOL_LINK_FLAGS} -lc++)
-      endif()
+      set(TsanUnitTestsObjects)
+      generate_compiler_rt_tests(TsanUnitTestsObjects TsanUnitTests
+        "${testname}-${arch}-Test" ${arch}
+        SOURCES ${TEST_SOURCES} ${COMPILER_RT_GTEST_SOURCE}
+        RUNTIME ${TSAN_TEST_RUNTIME}
+        COMPILE_DEPS ${TEST_HEADERS} ${TSAN_RTL_HEADERS}
+        DEPS gtest tsan
+        CFLAGS ${TSAN_UNITTEST_CFLAGS}
+        LINK_FLAGS ${LINK_FLAGS})
     endforeach()
   endif()
 endmacro()
 
-if(COMPILER_RT_CAN_EXECUTE_TESTS)
+if(COMPILER_RT_CAN_EXECUTE_TESTS AND NOT ANDROID)
   add_subdirectory(rtl)
   add_subdirectory(unit)
 endif()
diff --git a/lib/tsan/tests/rtl/tsan_posix.cc b/lib/tsan/tests/rtl/tsan_posix.cc
index 9c0e013..e66dab6 100644
--- a/lib/tsan/tests/rtl/tsan_posix.cc
+++ b/lib/tsan/tests/rtl/tsan_posix.cc
@@ -94,8 +94,9 @@
 // The test is failing with high thread count for aarch64.
 // FIXME: track down the issue and re-enable the test.
 // On Darwin, we're running unit tests without interceptors and __thread is
-// using malloc and free, which causes false data race reports.
-#if !defined(__aarch64__) && !defined(__APPLE__)
+// using malloc and free, which causes false data race reports.  On rare
+// occasions on powerpc64le this test also fails.
+#if !defined(__aarch64__) && !defined(__APPLE__) && !defined(powerpc64le)
   local_thread((void*)2);
 #endif
 }
diff --git a/lib/tsan/tests/rtl/tsan_test_util_posix.cc b/lib/tsan/tests/rtl/tsan_test_util_posix.cc
index 834a271..d00e26d 100644
--- a/lib/tsan/tests/rtl/tsan_test_util_posix.cc
+++ b/lib/tsan/tests/rtl/tsan_test_util_posix.cc
@@ -9,7 +9,7 @@
 //
 // This file is a part of ThreadSanitizer (TSan), a race detector.
 //
-// Test utils, Linux, FreeBSD and Darwin implementation.
+// Test utils, Linux, FreeBSD, NetBSD and Darwin implementation.
 //===----------------------------------------------------------------------===//
 
 #include "sanitizer_common/sanitizer_atomic.h"
@@ -270,7 +270,7 @@
       }
     }
     CHECK_NE(tsan_mop, 0);
-#if defined(__FreeBSD__) || defined(__APPLE__)
+#if defined(__FreeBSD__) || defined(__APPLE__) || defined(__NetBSD__)
     const int ErrCode = ESOCKTNOSUPPORT;
 #else
     const int ErrCode = ECHRNG;
diff --git a/lib/tsan/tests/unit/tsan_clock_test.cc b/lib/tsan/tests/unit/tsan_clock_test.cc
index 83e25fb..f6230e1 100644
--- a/lib/tsan/tests/unit/tsan_clock_test.cc
+++ b/lib/tsan/tests/unit/tsan_clock_test.cc
@@ -26,13 +26,13 @@
   clk.tick();
   ASSERT_EQ(clk.size(), 1U);
   ASSERT_EQ(clk.get(0), 1U);
-  clk.set(3, clk.get(3) + 1);
+  clk.set(&cache, 3, clk.get(3) + 1);
   ASSERT_EQ(clk.size(), 4U);
   ASSERT_EQ(clk.get(0), 1U);
   ASSERT_EQ(clk.get(1), 0U);
   ASSERT_EQ(clk.get(2), 0U);
   ASSERT_EQ(clk.get(3), 1U);
-  clk.set(3, clk.get(3) + 1);
+  clk.set(&cache, 3, clk.get(3) + 1);
   ASSERT_EQ(clk.get(3), 2U);
 }
 
@@ -53,6 +53,31 @@
   chunked.Reset(&cache);
 }
 
+static const uptr interesting_sizes[] = {0, 1, 2, 30, 61, 62, 63, 64, 65, 66,
+    100, 124, 125, 126, 127, 128, 129, 130, 188, 189, 190, 191, 192, 193, 254,
+    255};
+
+TEST(Clock, Iter) {
+  const uptr n = ARRAY_SIZE(interesting_sizes);
+  for (uptr fi = 0; fi < n; fi++) {
+    const uptr size = interesting_sizes[fi];
+    SyncClock sync;
+    ThreadClock vector(0);
+    for (uptr i = 0; i < size; i++)
+      vector.set(&cache, i, i + 1);
+    if (size != 0)
+      vector.release(&cache, &sync);
+    uptr i = 0;
+    for (ClockElem &ce : sync) {
+      ASSERT_LT(i, size);
+      ASSERT_EQ(sync.get_clean(i), ce.epoch);
+      i++;
+    }
+    ASSERT_EQ(i, size);
+    sync.Reset(&cache);
+  }
+}
+
 TEST(Clock, AcquireRelease) {
   ThreadClock vector1(100);
   vector1.tick();
@@ -86,24 +111,26 @@
 
 TEST(Clock, ManyThreads) {
   SyncClock chunked;
-  for (unsigned i = 0; i < 100; i++) {
+  for (unsigned i = 0; i < 200; i++) {
     ThreadClock vector(0);
     vector.tick();
-    vector.set(i, 1);
+    vector.set(&cache, i, i + 1);
     vector.release(&cache, &chunked);
     ASSERT_EQ(i + 1, chunked.size());
     vector.acquire(&cache, &chunked);
     ASSERT_EQ(i + 1, vector.size());
   }
 
-  for (unsigned i = 0; i < 100; i++)
-    ASSERT_EQ(1U, chunked.get(i));
+  for (unsigned i = 0; i < 200; i++) {
+    printf("i=%d\n", i);
+    ASSERT_EQ(i + 1, chunked.get(i));
+  }
 
   ThreadClock vector(1);
   vector.acquire(&cache, &chunked);
-  ASSERT_EQ(100U, vector.size());
-  for (unsigned i = 0; i < 100; i++)
-    ASSERT_EQ(1U, vector.get(i));
+  ASSERT_EQ(200U, vector.size());
+  for (unsigned i = 0; i < 200; i++)
+    ASSERT_EQ(i + 1, vector.get(i));
 
   chunked.Reset(&cache);
 }
@@ -151,7 +178,7 @@
   {
     ThreadClock vector(10);
     vector.tick();
-    vector.set(5, 42);
+    vector.set(&cache, 5, 42);
     SyncClock sync;
     vector.release(&cache, &sync);
     ASSERT_EQ(sync.size(), 11U);
@@ -180,8 +207,8 @@
   {
     ThreadClock vector(100);
     vector.tick();
-    vector.set(5, 42);
-    vector.set(90, 84);
+    vector.set(&cache, 5, 42);
+    vector.set(&cache, 90, 84);
     SyncClock sync;
     vector.release(&cache, &sync);
     ASSERT_EQ(sync.size(), 101U);
@@ -212,6 +239,40 @@
   }
 }
 
+TEST(Clock, Growth2) {
+  // Test clock growth for every pair of sizes:
+  const uptr n = ARRAY_SIZE(interesting_sizes);
+  for (uptr fi = 0; fi < n; fi++) {
+    for (uptr ti = fi + 1; ti < n; ti++) {
+      const uptr from = interesting_sizes[fi];
+      const uptr to = interesting_sizes[ti];
+      SyncClock sync;
+      ThreadClock vector(0);
+      for (uptr i = 0; i < from; i++)
+        vector.set(&cache, i, i + 1);
+      if (from != 0)
+        vector.release(&cache, &sync);
+      ASSERT_EQ(sync.size(), from);
+      for (uptr i = 0; i < from; i++)
+        ASSERT_EQ(sync.get(i), i + 1);
+      for (uptr i = 0; i < to; i++)
+        vector.set(&cache, i, i + 1);
+      vector.release(&cache, &sync);
+      ASSERT_EQ(sync.size(), to);
+      for (uptr i = 0; i < to; i++)
+        ASSERT_EQ(sync.get(i), i + 1);
+      vector.set(&cache, to + 1, to + 1);
+      vector.release(&cache, &sync);
+      ASSERT_EQ(sync.size(), to + 2);
+      for (uptr i = 0; i < to; i++)
+        ASSERT_EQ(sync.get(i), i + 1);
+      ASSERT_EQ(sync.get(to), 0U);
+      ASSERT_EQ(sync.get(to + 1), to + 1);
+      sync.Reset(&cache);
+    }
+  }
+}
+
 const uptr kThreads = 4;
 const uptr kClocks = 4;
 
diff --git a/lib/tsan/tests/unit/tsan_mman_test.cc b/lib/tsan/tests/unit/tsan_mman_test.cc
index 60dea3d..05ae428 100644
--- a/lib/tsan/tests/unit/tsan_mman_test.cc
+++ b/lib/tsan/tests/unit/tsan_mman_test.cc
@@ -56,6 +56,7 @@
     // Realloc(NULL, N) is equivalent to malloc(N), thus must return
     // non-NULL pointer.
     EXPECT_NE(p, (void*)0);
+    user_free(thr, pc, p);
   }
   {
     void *p = user_realloc(thr, pc, 0, 100);
@@ -67,8 +68,9 @@
     void *p = user_alloc(thr, pc, 100);
     EXPECT_NE(p, (void*)0);
     memset(p, 0xde, 100);
+    // Realloc(P, 0) is equivalent to free(P) and returns NULL.
     void *p2 = user_realloc(thr, pc, p, 0);
-    EXPECT_NE(p2, (void*)0);
+    EXPECT_EQ(p2, (void*)0);
   }
   {
     void *p = user_realloc(thr, pc, 0, 100);
@@ -135,12 +137,34 @@
   EXPECT_EQ(unmapped0, __sanitizer_get_unmapped_bytes());
 }
 
+TEST(Mman, Valloc) {
+  ThreadState *thr = cur_thread();
+  uptr page_size = GetPageSizeCached();
+
+  void *p = user_valloc(thr, 0, 100);
+  EXPECT_NE(p, (void*)0);
+  user_free(thr, 0, p);
+
+  p = user_pvalloc(thr, 0, 100);
+  EXPECT_NE(p, (void*)0);
+  user_free(thr, 0, p);
+
+  p = user_pvalloc(thr, 0, 0);
+  EXPECT_NE(p, (void*)0);
+  EXPECT_EQ(page_size, __sanitizer_get_allocated_size(p));
+  user_free(thr, 0, p);
+
+  EXPECT_DEATH(p = user_pvalloc(thr, 0, (uptr)-(page_size - 1)),
+               "allocator is terminating the process instead of returning 0");
+  EXPECT_DEATH(p = user_pvalloc(thr, 0, (uptr)-1),
+               "allocator is terminating the process instead of returning 0");
+}
+
+#if !SANITIZER_DEBUG
+// EXPECT_DEATH clones a thread with 4K stack,
+// which is overflown by tsan memory accesses functions in debug mode.
+
 TEST(Mman, CallocOverflow) {
-#if SANITIZER_DEBUG
-  // EXPECT_DEATH clones a thread with 4K stack,
-  // which is overflown by tsan memory accesses functions in debug mode.
-  return;
-#endif
   ThreadState *thr = cur_thread();
   uptr pc = 0;
   size_t kArraySize = 4096;
@@ -152,4 +176,57 @@
   EXPECT_EQ(0L, p);
 }
 
+TEST(Mman, Memalign) {
+  ThreadState *thr = cur_thread();
+
+  void *p = user_memalign(thr, 0, 8, 100);
+  EXPECT_NE(p, (void*)0);
+  user_free(thr, 0, p);
+
+  p = NULL;
+  EXPECT_DEATH(p = user_memalign(thr, 0, 7, 100),
+               "allocator is terminating the process instead of returning 0");
+  EXPECT_EQ(0L, p);
+}
+
+TEST(Mman, PosixMemalign) {
+  ThreadState *thr = cur_thread();
+
+  void *p = NULL;
+  int res = user_posix_memalign(thr, 0, &p, 8, 100);
+  EXPECT_NE(p, (void*)0);
+  EXPECT_EQ(res, 0);
+  user_free(thr, 0, p);
+
+  p = NULL;
+  // Alignment is not a power of two, although is a multiple of sizeof(void*).
+  EXPECT_DEATH(res = user_posix_memalign(thr, 0, &p, 3 * sizeof(p), 100),
+               "allocator is terminating the process instead of returning 0");
+  EXPECT_EQ(0L, p);
+  // Alignment is not a multiple of sizeof(void*), although is a power of 2.
+  EXPECT_DEATH(res = user_posix_memalign(thr, 0, &p, 2, 100),
+               "allocator is terminating the process instead of returning 0");
+  EXPECT_EQ(0L, p);
+}
+
+TEST(Mman, AlignedAlloc) {
+  ThreadState *thr = cur_thread();
+
+  void *p = user_aligned_alloc(thr, 0, 8, 64);
+  EXPECT_NE(p, (void*)0);
+  user_free(thr, 0, p);
+
+  p = NULL;
+  // Alignement is not a power of 2.
+  EXPECT_DEATH(p = user_aligned_alloc(thr, 0, 7, 100),
+               "allocator is terminating the process instead of returning 0");
+  EXPECT_EQ(0L, p);
+  // Size is not a multiple of alignment.
+  EXPECT_DEATH(p = user_aligned_alloc(thr, 0, 8, 100),
+               "allocator is terminating the process instead of returning 0");
+  EXPECT_EQ(0L, p);
+}
+
+#endif
+
 }  // namespace __tsan
diff --git a/lib/ubsan/CMakeLists.txt b/lib/ubsan/CMakeLists.txt
index f35b40f..911bd3e 100644
--- a/lib/ubsan/CMakeLists.txt
+++ b/lib/ubsan/CMakeLists.txt
@@ -9,7 +9,9 @@
   )
 
 set(UBSAN_STANDALONE_SOURCES
+  ubsan_diag_standalone.cc
   ubsan_init_standalone.cc
+  ubsan_signals_standalone.cc
   )
 
 set(UBSAN_CXXABI_SOURCES
@@ -33,6 +35,13 @@
 append_rtti_flag(ON UBSAN_CXXFLAGS)
 append_list_if(SANITIZER_CAN_USE_CXXABI -DUBSAN_CAN_USE_CXXABI UBSAN_CXXFLAGS)
 
+set(UBSAN_DYNAMIC_LIBS ${SANITIZER_CXX_ABI_LIBRARY} ${SANITIZER_COMMON_LINK_LIBS})
+
+append_list_if(COMPILER_RT_HAS_LIBDL dl UBSAN_DYNAMIC_LIBS)
+append_list_if(COMPILER_RT_HAS_LIBLOG log UBSAN_DYNAMIC_LIBS)
+append_list_if(COMPILER_RT_HAS_LIBRT rt UBSAN_DYNAMIC_LIBS)
+append_list_if(COMPILER_RT_HAS_LIBPTHREAD pthread UBSAN_DYNAMIC_LIBS)
+
 add_compiler_rt_component(ubsan)
 
 if(APPLE)
@@ -67,6 +76,19 @@
                   RTUbsan_standalone
                   RTSanitizerCommon
                   RTSanitizerCommonLibc
+                  RTInterception
+      LINK_FLAGS ${WEAK_SYMBOL_LINK_FLAGS}
+      PARENT_TARGET ubsan)
+
+    add_compiler_rt_runtime(clang_rt.ubsan
+      STATIC
+      OS ${SANITIZER_COMMON_SUPPORTED_OS}
+      ARCHS ${UBSAN_SUPPORTED_ARCH}
+      OBJECT_LIBS RTUbsan
+                  RTUbsan_standalone
+                  RTSanitizerCommonNoHooks
+                  RTSanitizerCommonLibcNoHooks
+                  RTInterception
       LINK_FLAGS ${WEAK_SYMBOL_LINK_FLAGS}
       PARENT_TARGET ubsan)
   endif()
@@ -120,22 +142,24 @@
   endif()
 
   if(COMPILER_RT_HAS_UBSAN)
-    # Initializer of standalone UBSan runtime.
     add_compiler_rt_object_libraries(RTUbsan_standalone
       ARCHS ${UBSAN_SUPPORTED_ARCH}
-      SOURCES ${UBSAN_STANDALONE_SOURCES} CFLAGS ${UBSAN_STANDALONE_CFLAGS})
-    
+      SOURCES ${UBSAN_STANDALONE_SOURCES}
+      CFLAGS ${UBSAN_STANDALONE_CFLAGS})
+
     # Standalone UBSan runtimes.
     add_compiler_rt_runtime(clang_rt.ubsan_standalone
       STATIC
       ARCHS ${UBSAN_SUPPORTED_ARCH}
+      SOURCES ubsan_init_standalone_preinit.cc
       OBJECT_LIBS RTSanitizerCommon
               RTSanitizerCommonLibc
               RTUbsan
               RTUbsan_standalone
+              RTInterception
       CFLAGS ${UBSAN_CFLAGS}
       PARENT_TARGET ubsan)
-    
+
     add_compiler_rt_runtime(clang_rt.ubsan_standalone_cxx
       STATIC
       ARCHS ${UBSAN_SUPPORTED_ARCH}
@@ -144,8 +168,22 @@
       PARENT_TARGET ubsan)
 
     if (UNIX)
+      add_compiler_rt_runtime(clang_rt.ubsan_standalone
+        SHARED
+        ARCHS ${UBSAN_SUPPORTED_ARCH}
+        OBJECT_LIBS RTSanitizerCommon
+              RTSanitizerCommonLibc
+              RTUbsan
+              RTUbsan_cxx
+              RTUbsan_standalone
+              RTInterception
+        CFLAGS ${UBSAN_CFLAGS}
+        LINK_FLAGS ${SANITIZER_COMMON_LINK_FLAGS}
+        LINK_LIBS ${UBSAN_DYNAMIC_LIBS}
+        PARENT_TARGET ubsan)
+
       set(ARCHS_FOR_SYMBOLS ${UBSAN_SUPPORTED_ARCH})
-      list(REMOVE_ITEM ARCHS_FOR_SYMBOLS i386 i686)
+      list(REMOVE_ITEM ARCHS_FOR_SYMBOLS i386)
       add_sanitizer_rt_symbols(clang_rt.ubsan_standalone
         ARCHS ${ARCHS_FOR_SYMBOLS}
         PARENT_TARGET ubsan
diff --git a/lib/ubsan/ubsan_checks.inc b/lib/ubsan/ubsan_checks.inc
index 6e08641..73c6936 100644
--- a/lib/ubsan/ubsan_checks.inc
+++ b/lib/ubsan/ubsan_checks.inc
@@ -19,6 +19,7 @@
 
 UBSAN_CHECK(GenericUB, "undefined-behavior", "undefined")
 UBSAN_CHECK(NullPointerUse, "null-pointer-use", "null")
+UBSAN_CHECK(PointerOverflow, "pointer-overflow", "pointer-overflow")
 UBSAN_CHECK(MisalignedPointerUse, "misaligned-pointer-use", "alignment")
 UBSAN_CHECK(InsufficientObjectSize, "insufficient-object-size", "object-size")
 UBSAN_CHECK(SignedIntegerOverflow, "signed-integer-overflow",
@@ -28,6 +29,7 @@
 UBSAN_CHECK(IntegerDivideByZero, "integer-divide-by-zero",
             "integer-divide-by-zero")
 UBSAN_CHECK(FloatDivideByZero, "float-divide-by-zero", "float-divide-by-zero")
+UBSAN_CHECK(InvalidBuiltin, "invalid-builtin-use", "invalid-builtin-use")
 UBSAN_CHECK(InvalidShiftBase, "invalid-shift-base", "shift-base")
 UBSAN_CHECK(InvalidShiftExponent, "invalid-shift-exponent", "shift-exponent")
 UBSAN_CHECK(OutOfBoundsIndex, "out-of-bounds-index", "bounds")
diff --git a/lib/ubsan/ubsan_diag.cc b/lib/ubsan/ubsan_diag.cc
index c531c5f..bc17066 100644
--- a/lib/ubsan/ubsan_diag.cc
+++ b/lib/ubsan/ubsan_diag.cc
@@ -26,20 +26,25 @@
 
 using namespace __ubsan;
 
+void __ubsan::GetStackTraceWithPcBpAndContext(BufferedStackTrace *stack,
+                                              uptr max_depth, uptr pc, uptr bp,
+                                              void *context, bool fast) {
+  uptr top = 0;
+  uptr bottom = 0;
+  if (fast)
+    GetThreadStackTopAndBottom(false, &top, &bottom);
+  stack->Unwind(max_depth, pc, bp, context, top, bottom, fast);
+}
+
 static void MaybePrintStackTrace(uptr pc, uptr bp) {
   // We assume that flags are already parsed, as UBSan runtime
   // will definitely be called when we print the first diagnostics message.
   if (!flags()->print_stacktrace)
     return;
-  // We can only use slow unwind, as we don't have any information about stack
-  // top/bottom.
-  // FIXME: It's better to respect "fast_unwind_on_fatal" runtime flag and
-  // fetch stack top/bottom information if we have it (e.g. if we're running
-  // under ASan).
-  if (StackTrace::WillUseFastUnwind(false))
-    return;
+
   BufferedStackTrace stack;
-  stack.Unwind(kStackTraceMax, pc, bp, 0, 0, 0, false);
+  GetStackTraceWithPcBpAndContext(&stack, kStackTraceMax, pc, bp, nullptr,
+                                  common_flags()->fast_unwind_on_fatal);
   stack.Print();
 }
 
@@ -79,16 +84,16 @@
       AI.line = SLoc.getLine();
       AI.column = SLoc.getColumn();
       AI.function = internal_strdup("");  // Avoid printing ?? as function name.
-      ReportErrorSummary(ErrorKind, AI);
+      ReportErrorSummary(ErrorKind, AI, GetSanititizerToolName());
       AI.Clear();
       return;
     }
   } else if (Loc.isSymbolizedStack()) {
     const AddressInfo &AI = Loc.getSymbolizedStack()->info;
-    ReportErrorSummary(ErrorKind, AI);
+    ReportErrorSummary(ErrorKind, AI, GetSanititizerToolName());
     return;
   }
-  ReportErrorSummary(ErrorKind);
+  ReportErrorSummary(ErrorKind, GetSanititizerToolName());
 }
 
 namespace {
@@ -96,9 +101,7 @@
  public:
   Decorator() : SanitizerCommonDecorator() {}
   const char *Highlight() const { return Green(); }
-  const char *EndHighlight() const { return Default(); }
   const char *Note() const { return Black(); }
-  const char *EndNote() const { return Default(); }
 };
 }
 
@@ -294,7 +297,7 @@
     Buffer.append("%c", P == Loc ? '^' : Byte);
     Buffer.append("%c", Byte);
   }
-  Buffer.append("%s\n", Decor.EndHighlight());
+  Buffer.append("%s\n", Decor.Default());
 
   // Go over the line again, and print names for the ranges.
   InRange = 0;
@@ -334,7 +337,7 @@
 
 Diag::~Diag() {
   // All diagnostics should be printed under report mutex.
-  CommonSanitizerReportMutex.CheckLocked();
+  ScopedReport::CheckLocked();
   Decorator Decor;
   InternalScopedString Buffer(1024);
 
@@ -344,12 +347,12 @@
 
   switch (Level) {
   case DL_Error:
-    Buffer.append("%s runtime error: %s%s", Decor.Warning(), Decor.EndWarning(),
+    Buffer.append("%s runtime error: %s%s", Decor.Warning(), Decor.Default(),
                   Decor.Bold());
     break;
 
   case DL_Note:
-    Buffer.append("%s note: %s", Decor.Note(), Decor.EndNote());
+    Buffer.append("%s note: %s", Decor.Note(), Decor.Default());
     break;
   }
 
@@ -362,17 +365,15 @@
     PrintMemorySnippet(Decor, Loc.getMemoryLocation(), Ranges, NumRanges, Args);
 }
 
+ScopedReport::Initializer::Initializer() { InitAsStandaloneIfNecessary(); }
+
 ScopedReport::ScopedReport(ReportOptions Opts, Location SummaryLoc,
                            ErrorType Type)
-    : Opts(Opts), SummaryLoc(SummaryLoc), Type(Type) {
-  InitAsStandaloneIfNecessary();
-  CommonSanitizerReportMutex.Lock();
-}
+    : Opts(Opts), SummaryLoc(SummaryLoc), Type(Type) {}
 
 ScopedReport::~ScopedReport() {
   MaybePrintStackTrace(Opts.pc, Opts.bp);
   MaybeReportErrorSummary(SummaryLoc, Type);
-  CommonSanitizerReportMutex.Unlock();
   if (flags()->halt_on_error)
     Die();
 }
diff --git a/lib/ubsan/ubsan_diag.h b/lib/ubsan/ubsan_diag.h
index 3edb67a..72b2586 100644
--- a/lib/ubsan/ubsan_diag.h
+++ b/lib/ubsan/ubsan_diag.h
@@ -231,10 +231,20 @@
     GET_CALLER_PC_BP; \
     ReportOptions Opts = {unrecoverable_handler, pc, bp}
 
+void GetStackTraceWithPcBpAndContext(BufferedStackTrace *stack, uptr max_depth,
+                                     uptr pc, uptr bp, void *context,
+                                     bool fast);
+
 /// \brief Instantiate this class before printing diagnostics in the error
 /// report. This class ensures that reports from different threads and from
 /// different sanitizers won't be mixed.
 class ScopedReport {
+  struct Initializer {
+    Initializer();
+  };
+  Initializer initializer_;
+  ScopedErrorReportLock report_lock_;
+
   ReportOptions Opts;
   Location SummaryLoc;
   ErrorType Type;
@@ -242,6 +252,8 @@
 public:
   ScopedReport(ReportOptions Opts, Location SummaryLoc, ErrorType Type);
   ~ScopedReport();
+
+  static void CheckLocked() { ScopedErrorReportLock::CheckLocked(); }
 };
 
 void InitializeSuppressions();
diff --git a/lib/ubsan/ubsan_diag_standalone.cc b/lib/ubsan/ubsan_diag_standalone.cc
new file mode 100644
index 0000000..1f4a5bd
--- /dev/null
+++ b/lib/ubsan/ubsan_diag_standalone.cc
@@ -0,0 +1,38 @@
+//===-- ubsan_diag_standalone.cc ------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Diagnostic reporting for the standalone UBSan runtime.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ubsan_platform.h"
+#if CAN_SANITIZE_UB
+#include "ubsan_diag.h"
+
+using namespace __ubsan;
+
+extern "C" {
+SANITIZER_INTERFACE_ATTRIBUTE
+void __sanitizer_print_stack_trace() {
+  uptr top = 0;
+  uptr bottom = 0;
+  bool request_fast_unwind = common_flags()->fast_unwind_on_fatal;
+  if (request_fast_unwind)
+    __sanitizer::GetThreadStackTopAndBottom(false, &top, &bottom);
+
+  GET_CURRENT_PC_BP_SP;
+  (void)sp;
+  BufferedStackTrace stack;
+  stack.Unwind(kStackTraceMax, pc, bp, nullptr, top, bottom,
+               request_fast_unwind);
+  stack.Print();
+}
+} // extern "C"
+
+#endif  // CAN_SANITIZE_UB
diff --git a/lib/ubsan/ubsan_flags.cc b/lib/ubsan/ubsan_flags.cc
index 3d404c1..8e1f408 100644
--- a/lib/ubsan/ubsan_flags.cc
+++ b/lib/ubsan/ubsan_flags.cc
@@ -45,6 +45,7 @@
     CommonFlags cf;
     cf.CopyFrom(*common_flags());
     cf.print_summary = false;
+    cf.external_symbolizer_path = GetEnv("UBSAN_SYMBOLIZER_PATH");
     OverrideCommonFlags(cf);
   }
 
diff --git a/lib/ubsan/ubsan_handlers.cc b/lib/ubsan/ubsan_handlers.cc
index 6ffffae..1112ce1 100644
--- a/lib/ubsan/ubsan_handlers.cc
+++ b/lib/ubsan/ubsan_handlers.cc
@@ -38,7 +38,7 @@
 const char *TypeCheckKinds[] = {
     "load of", "store to", "reference binding to", "member access within",
     "member call on", "constructor call on", "downcast of", "downcast of",
-    "upcast of", "cast to virtual base of"};
+    "upcast of", "cast to virtual base of", "_Nonnull binding to"};
 }
 
 static void handleTypeMismatchImpl(TypeMismatchData *Data, ValueHandle Pointer,
@@ -390,7 +390,7 @@
   ScopedReport R(Opts, Loc, ET);
 
   Diag(Loc, DL_Error,
-       "value %0 is outside the range of representable values of type %2")
+       "%0 is outside the range of representable values of type %2")
       << Value(*FromType, From) << *FromType << *ToType;
 }
 
@@ -410,7 +410,8 @@
   SourceLocation Loc = Data->Loc.acquire();
   // This check could be more precise if we used different handlers for
   // -fsanitize=bool and -fsanitize=enum.
-  bool IsBool = (0 == internal_strcmp(Data->Type.getTypeName(), "'bool'"));
+  bool IsBool = (0 == internal_strcmp(Data->Type.getTypeName(), "'bool'")) ||
+                (0 == internal_strncmp(Data->Type.getTypeName(), "'BOOL'", 6));
   ErrorType ET =
       IsBool ? ErrorType::InvalidBoolLoad : ErrorType::InvalidEnumLoad;
 
@@ -436,6 +437,30 @@
   Die();
 }
 
+static void handleInvalidBuiltin(InvalidBuiltinData *Data, ReportOptions Opts) {
+  SourceLocation Loc = Data->Loc.acquire();
+  ErrorType ET = ErrorType::InvalidBuiltin;
+
+  if (ignoreReport(Loc, Opts, ET))
+    return;
+
+  ScopedReport R(Opts, Loc, ET);
+
+  Diag(Loc, DL_Error,
+       "passing zero to %0, which is not a valid argument")
+    << ((Data->Kind == BCK_CTZPassedZero) ? "ctz()" : "clz()");
+}
+
+void __ubsan::__ubsan_handle_invalid_builtin(InvalidBuiltinData *Data) {
+  GET_REPORT_OPTIONS(true);
+  handleInvalidBuiltin(Data, Opts);
+}
+void __ubsan::__ubsan_handle_invalid_builtin_abort(InvalidBuiltinData *Data) {
+  GET_REPORT_OPTIONS(true);
+  handleInvalidBuiltin(Data, Opts);
+  Die();
+}
+
 static void handleFunctionTypeMismatch(FunctionTypeMismatchData *Data,
                                        ValueHandle Function,
                                        ReportOptions Opts) {
@@ -472,8 +497,12 @@
   Die();
 }
 
-static void handleNonNullReturn(NonNullReturnData *Data, ReportOptions Opts) {
-  SourceLocation Loc = Data->Loc.acquire();
+static void handleNonNullReturn(NonNullReturnData *Data, SourceLocation *LocPtr,
+                                ReportOptions Opts, bool IsAttr) {
+  if (!LocPtr)
+    UNREACHABLE("source location pointer is null!");
+
+  SourceLocation Loc = LocPtr->acquire();
   ErrorType ET = ErrorType::InvalidNullReturn;
 
   if (ignoreReport(Loc, Opts, ET))
@@ -484,21 +513,39 @@
   Diag(Loc, DL_Error, "null pointer returned from function declared to never "
                       "return null");
   if (!Data->AttrLoc.isInvalid())
-    Diag(Data->AttrLoc, DL_Note, "returns_nonnull attribute specified here");
+    Diag(Data->AttrLoc, DL_Note, "%0 specified here")
+        << (IsAttr ? "returns_nonnull attribute"
+                   : "_Nonnull return type annotation");
 }
 
-void __ubsan::__ubsan_handle_nonnull_return(NonNullReturnData *Data) {
+void __ubsan::__ubsan_handle_nonnull_return_v1(NonNullReturnData *Data,
+                                               SourceLocation *LocPtr) {
   GET_REPORT_OPTIONS(false);
-  handleNonNullReturn(Data, Opts);
+  handleNonNullReturn(Data, LocPtr, Opts, true);
 }
 
-void __ubsan::__ubsan_handle_nonnull_return_abort(NonNullReturnData *Data) {
+void __ubsan::__ubsan_handle_nonnull_return_v1_abort(NonNullReturnData *Data,
+                                                     SourceLocation *LocPtr) {
   GET_REPORT_OPTIONS(true);
-  handleNonNullReturn(Data, Opts);
+  handleNonNullReturn(Data, LocPtr, Opts, true);
   Die();
 }
 
-static void handleNonNullArg(NonNullArgData *Data, ReportOptions Opts) {
+void __ubsan::__ubsan_handle_nullability_return_v1(NonNullReturnData *Data,
+                                                   SourceLocation *LocPtr) {
+  GET_REPORT_OPTIONS(false);
+  handleNonNullReturn(Data, LocPtr, Opts, false);
+}
+
+void __ubsan::__ubsan_handle_nullability_return_v1_abort(
+    NonNullReturnData *Data, SourceLocation *LocPtr) {
+  GET_REPORT_OPTIONS(true);
+  handleNonNullReturn(Data, LocPtr, Opts, false);
+  Die();
+}
+
+static void handleNonNullArg(NonNullArgData *Data, ReportOptions Opts,
+                             bool IsAttr) {
   SourceLocation Loc = Data->Loc.acquire();
   ErrorType ET = ErrorType::InvalidNullArgument;
 
@@ -507,20 +554,76 @@
 
   ScopedReport R(Opts, Loc, ET);
 
-  Diag(Loc, DL_Error, "null pointer passed as argument %0, which is declared to "
-       "never be null") << Data->ArgIndex;
+  Diag(Loc, DL_Error,
+       "null pointer passed as argument %0, which is declared to "
+       "never be null")
+      << Data->ArgIndex;
   if (!Data->AttrLoc.isInvalid())
-    Diag(Data->AttrLoc, DL_Note, "nonnull attribute specified here");
+    Diag(Data->AttrLoc, DL_Note, "%0 specified here")
+        << (IsAttr ? "nonnull attribute" : "_Nonnull type annotation");
 }
 
 void __ubsan::__ubsan_handle_nonnull_arg(NonNullArgData *Data) {
   GET_REPORT_OPTIONS(false);
-  handleNonNullArg(Data, Opts);
+  handleNonNullArg(Data, Opts, true);
 }
 
 void __ubsan::__ubsan_handle_nonnull_arg_abort(NonNullArgData *Data) {
   GET_REPORT_OPTIONS(true);
-  handleNonNullArg(Data, Opts);
+  handleNonNullArg(Data, Opts, true);
+  Die();
+}
+
+void __ubsan::__ubsan_handle_nullability_arg(NonNullArgData *Data) {
+  GET_REPORT_OPTIONS(false);
+  handleNonNullArg(Data, Opts, false);
+}
+
+void __ubsan::__ubsan_handle_nullability_arg_abort(NonNullArgData *Data) {
+  GET_REPORT_OPTIONS(true);
+  handleNonNullArg(Data, Opts, false);
+  Die();
+}
+
+static void handlePointerOverflowImpl(PointerOverflowData *Data,
+                                      ValueHandle Base,
+                                      ValueHandle Result,
+                                      ReportOptions Opts) {
+  SourceLocation Loc = Data->Loc.acquire();
+  ErrorType ET = ErrorType::PointerOverflow;
+
+  if (ignoreReport(Loc, Opts, ET))
+    return;
+
+  ScopedReport R(Opts, Loc, ET);
+
+  if ((sptr(Base) >= 0) == (sptr(Result) >= 0)) {
+    if (Base > Result)
+      Diag(Loc, DL_Error, "addition of unsigned offset to %0 overflowed to %1")
+          << (void *)Base << (void *)Result;
+    else
+      Diag(Loc, DL_Error,
+           "subtraction of unsigned offset from %0 overflowed to %1")
+          << (void *)Base << (void *)Result;
+  } else {
+    Diag(Loc, DL_Error,
+         "pointer index expression with base %0 overflowed to %1")
+        << (void *)Base << (void *)Result;
+  }
+}
+
+void __ubsan::__ubsan_handle_pointer_overflow(PointerOverflowData *Data,
+                                              ValueHandle Base,
+                                              ValueHandle Result) {
+  GET_REPORT_OPTIONS(false);
+  handlePointerOverflowImpl(Data, Base, Result, Opts);
+}
+
+void __ubsan::__ubsan_handle_pointer_overflow_abort(PointerOverflowData *Data,
+                                                    ValueHandle Base,
+                                                    ValueHandle Result) {
+  GET_REPORT_OPTIONS(true);
+  handlePointerOverflowImpl(Data, Base, Result, Opts);
   Die();
 }
 
@@ -549,16 +652,32 @@
 }
 
 namespace __ubsan {
+
 #ifdef UBSAN_CAN_USE_CXXABI
-SANITIZER_WEAK_ATTRIBUTE
-void HandleCFIBadType(CFICheckFailData *Data, ValueHandle Vtable,
-                      bool ValidVtable, ReportOptions Opts);
+
+#ifdef _WIN32
+
+extern "C" void __ubsan_handle_cfi_bad_type_default(CFICheckFailData *Data,
+                                                    ValueHandle Vtable,
+                                                    bool ValidVtable,
+                                                    ReportOptions Opts) {
+  Die();
+}
+
+WIN_WEAK_ALIAS(__ubsan_handle_cfi_bad_type, __ubsan_handle_cfi_bad_type_default)
 #else
-static void HandleCFIBadType(CFICheckFailData *Data, ValueHandle Vtable,
-                             bool ValidVtable, ReportOptions Opts) {
+SANITIZER_WEAK_ATTRIBUTE
+#endif
+void __ubsan_handle_cfi_bad_type(CFICheckFailData *Data, ValueHandle Vtable,
+                                 bool ValidVtable, ReportOptions Opts);
+
+#else
+void __ubsan_handle_cfi_bad_type(CFICheckFailData *Data, ValueHandle Vtable,
+                                 bool ValidVtable, ReportOptions Opts) {
   Die();
 }
 #endif
+
 }  // namespace __ubsan
 
 void __ubsan::__ubsan_handle_cfi_check_fail(CFICheckFailData *Data,
@@ -568,7 +687,7 @@
   if (Data->CheckKind == CFITCK_ICall)
     handleCFIBadIcall(Data, Value, Opts);
   else
-    HandleCFIBadType(Data, Value, ValidVtable, Opts);
+    __ubsan_handle_cfi_bad_type(Data, Value, ValidVtable, Opts);
 }
 
 void __ubsan::__ubsan_handle_cfi_check_fail_abort(CFICheckFailData *Data,
@@ -578,7 +697,7 @@
   if (Data->CheckKind == CFITCK_ICall)
     handleCFIBadIcall(Data, Value, Opts);
   else
-    HandleCFIBadType(Data, Value, ValidVtable, Opts);
+    __ubsan_handle_cfi_bad_type(Data, Value, ValidVtable, Opts);
   Die();
 }
 
diff --git a/lib/ubsan/ubsan_handlers.h b/lib/ubsan/ubsan_handlers.h
index 350eb91..311776b 100644
--- a/lib/ubsan/ubsan_handlers.h
+++ b/lib/ubsan/ubsan_handlers.h
@@ -122,6 +122,21 @@
 /// \brief Handle a load of an invalid value for the type.
 RECOVERABLE(load_invalid_value, InvalidValueData *Data, ValueHandle Val)
 
+/// Known builtin check kinds.
+/// Keep in sync with the enum of the same name in CodeGenFunction.h
+enum BuiltinCheckKind : unsigned char {
+  BCK_CTZPassedZero,
+  BCK_CLZPassedZero,
+};
+
+struct InvalidBuiltinData {
+  SourceLocation Loc;
+  unsigned char Kind;
+};
+
+/// Handle a builtin called in an invalid way.
+RECOVERABLE(invalid_builtin, InvalidBuiltinData *Data)
+
 struct FunctionTypeMismatchData {
   SourceLocation Loc;
   const TypeDescriptor &Type;
@@ -132,12 +147,13 @@
             ValueHandle Val)
 
 struct NonNullReturnData {
-  SourceLocation Loc;
   SourceLocation AttrLoc;
 };
 
-/// \brief Handle returning null from function with returns_nonnull attribute.
-RECOVERABLE(nonnull_return, NonNullReturnData *Data)
+/// \brief Handle returning null from function with the returns_nonnull
+/// attribute, or a return type annotated with _Nonnull.
+RECOVERABLE(nonnull_return_v1, NonNullReturnData *Data, SourceLocation *Loc)
+RECOVERABLE(nullability_return_v1, NonNullReturnData *Data, SourceLocation *Loc)
 
 struct NonNullArgData {
   SourceLocation Loc;
@@ -145,8 +161,17 @@
   int ArgIndex;
 };
 
-/// \brief Handle passing null pointer to function with nonnull attribute.
+/// \brief Handle passing null pointer to a function parameter with the nonnull
+/// attribute, or a _Nonnull type annotation.
 RECOVERABLE(nonnull_arg, NonNullArgData *Data)
+RECOVERABLE(nullability_arg, NonNullArgData *Data)
+
+struct PointerOverflowData {
+  SourceLocation Loc;
+};
+
+RECOVERABLE(pointer_overflow, PointerOverflowData *Data, ValueHandle Base,
+            ValueHandle Result)
 
 /// \brief Known CFI check kinds.
 /// Keep in sync with the enum of the same name in CodeGenFunction.h
@@ -167,6 +192,13 @@
 /// \brief Handle control flow integrity failures.
 RECOVERABLE(cfi_check_fail, CFICheckFailData *Data, ValueHandle Function,
             uptr VtableIsValid)
+
+struct ReportOptions;
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __ubsan_handle_cfi_bad_type(
+    CFICheckFailData *Data, ValueHandle Vtable, bool ValidVtable,
+    ReportOptions Opts);
+
 }
 
 #endif // UBSAN_HANDLERS_H
diff --git a/lib/ubsan/ubsan_handlers_cxx.cc b/lib/ubsan/ubsan_handlers_cxx.cc
index d97ec48..e15abc6 100644
--- a/lib/ubsan/ubsan_handlers_cxx.cc
+++ b/lib/ubsan/ubsan_handlers_cxx.cc
@@ -95,8 +95,8 @@
 }
 
 namespace __ubsan {
-void HandleCFIBadType(CFICheckFailData *Data, ValueHandle Vtable,
-                      bool ValidVtable, ReportOptions Opts) {
+void __ubsan_handle_cfi_bad_type(CFICheckFailData *Data, ValueHandle Vtable,
+                                 bool ValidVtable, ReportOptions Opts) {
   SourceLocation Loc = Data->Loc.acquire();
   ErrorType ET = ErrorType::CFIBadType;
 
diff --git a/lib/ubsan/ubsan_init.cc b/lib/ubsan/ubsan_init.cc
index b4f42c4..32fc434 100644
--- a/lib/ubsan/ubsan_init.cc
+++ b/lib/ubsan/ubsan_init.cc
@@ -23,11 +23,11 @@
 
 using namespace __ubsan;
 
-static enum {
-  UBSAN_MODE_UNKNOWN = 0,
-  UBSAN_MODE_STANDALONE,
-  UBSAN_MODE_PLUGIN
-} ubsan_mode;
+const char *__ubsan::GetSanititizerToolName() {
+  return "UndefinedBehaviorSanitizer";
+}
+
+static bool ubsan_initialized;
 static StaticSpinMutex ubsan_init_mu;
 
 static void CommonInit() {
@@ -35,45 +35,31 @@
 }
 
 static void CommonStandaloneInit() {
-  SanitizerToolName = "UndefinedBehaviorSanitizer";
-  InitializeFlags();
+  SanitizerToolName = GetSanititizerToolName();
   CacheBinaryName();
+  InitializeFlags();
   __sanitizer_set_report_path(common_flags()->log_path);
   AndroidLogInit();
   InitializeCoverage(common_flags()->coverage, common_flags()->coverage_dir);
   CommonInit();
-  ubsan_mode = UBSAN_MODE_STANDALONE;
 }
 
 void __ubsan::InitAsStandalone() {
-  if (SANITIZER_CAN_USE_PREINIT_ARRAY) {
-    CHECK_EQ(UBSAN_MODE_UNKNOWN, ubsan_mode);
-    CommonStandaloneInit();
-    return;
-  }
   SpinMutexLock l(&ubsan_init_mu);
-  CHECK_NE(UBSAN_MODE_PLUGIN, ubsan_mode);
-  if (ubsan_mode == UBSAN_MODE_UNKNOWN)
+  if (!ubsan_initialized) {
     CommonStandaloneInit();
+    ubsan_initialized = true;
+  }
 }
 
-void __ubsan::InitAsStandaloneIfNecessary() {
-  if (SANITIZER_CAN_USE_PREINIT_ARRAY) {
-    CHECK_NE(UBSAN_MODE_UNKNOWN, ubsan_mode);
-    return;
-  }
-  SpinMutexLock l(&ubsan_init_mu);
-  if (ubsan_mode == UBSAN_MODE_UNKNOWN)
-    CommonStandaloneInit();
-}
+void __ubsan::InitAsStandaloneIfNecessary() { return InitAsStandalone(); }
 
 void __ubsan::InitAsPlugin() {
-#if !SANITIZER_CAN_USE_PREINIT_ARRAY
   SpinMutexLock l(&ubsan_init_mu);
-#endif
-  CHECK_EQ(UBSAN_MODE_UNKNOWN, ubsan_mode);
-  CommonInit();
-  ubsan_mode = UBSAN_MODE_PLUGIN;
+  if (!ubsan_initialized) {
+    CommonInit();
+    ubsan_initialized = true;
+  }
 }
 
 #endif  // CAN_SANITIZE_UB
diff --git a/lib/ubsan/ubsan_init.h b/lib/ubsan/ubsan_init.h
index 103ae24..f12fc2c 100644
--- a/lib/ubsan/ubsan_init.h
+++ b/lib/ubsan/ubsan_init.h
@@ -15,6 +15,9 @@
 
 namespace __ubsan {
 
+// Get the full tool name for UBSan.
+const char *GetSanititizerToolName();
+
 // Initialize UBSan as a standalone tool. Typically should be called early
 // during initialization.
 void InitAsStandalone();
diff --git a/lib/ubsan/ubsan_init_standalone.cc b/lib/ubsan/ubsan_init_standalone.cc
index ff1a20e..8bd5000 100644
--- a/lib/ubsan/ubsan_init_standalone.cc
+++ b/lib/ubsan/ubsan_init_standalone.cc
@@ -18,18 +18,17 @@
 
 #include "sanitizer_common/sanitizer_internal_defs.h"
 #include "ubsan_init.h"
+#include "ubsan_signals_standalone.h"
 
-#if SANITIZER_CAN_USE_PREINIT_ARRAY
-__attribute__((section(".preinit_array"), used))
-void (*__local_ubsan_preinit)(void) = __ubsan::InitAsStandalone;
-#else
-// Use a dynamic initializer.
+namespace __ubsan {
+
 class UbsanStandaloneInitializer {
  public:
   UbsanStandaloneInitializer() {
-    __ubsan::InitAsStandalone();
+    InitAsStandalone();
+    InitializeDeadlySignals();
   }
 };
 static UbsanStandaloneInitializer ubsan_standalone_initializer;
-#endif  // SANITIZER_CAN_USE_PREINIT_ARRAY
 
+} // namespace __ubsan
diff --git a/lib/ubsan/ubsan_init_standalone_preinit.cc b/lib/ubsan/ubsan_init_standalone_preinit.cc
new file mode 100644
index 0000000..9a8c688
--- /dev/null
+++ b/lib/ubsan/ubsan_init_standalone_preinit.cc
@@ -0,0 +1,37 @@
+//===-- ubsan_init_standalone_preinit.cc
+//------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Initialization of standalone UBSan runtime.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ubsan_platform.h"
+#if !CAN_SANITIZE_UB
+#error "UBSan is not supported on this platform!"
+#endif
+
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "ubsan_init.h"
+#include "ubsan_signals_standalone.h"
+
+#if SANITIZER_CAN_USE_PREINIT_ARRAY
+
+namespace __ubsan {
+
+static void PreInitAsStandalone() {
+  InitAsStandalone();
+  InitializeDeadlySignals();
+}
+
+} // namespace __ubsan
+
+__attribute__((section(".preinit_array"), used)) void (*__local_ubsan_preinit)(
+    void) = __ubsan::PreInitAsStandalone;
+#endif // SANITIZER_CAN_USE_PREINIT_ARRAY
diff --git a/lib/ubsan/ubsan_interface.inc b/lib/ubsan/ubsan_interface.inc
index 75f080d..9beb3e2 100644
--- a/lib/ubsan/ubsan_interface.inc
+++ b/lib/ubsan/ubsan_interface.inc
@@ -11,14 +11,19 @@
 INTERFACE_FUNCTION(__ubsan_handle_add_overflow)
 INTERFACE_FUNCTION(__ubsan_handle_add_overflow_abort)
 INTERFACE_FUNCTION(__ubsan_handle_builtin_unreachable)
+INTERFACE_FUNCTION(__ubsan_handle_cfi_bad_type)
 INTERFACE_FUNCTION(__ubsan_handle_cfi_check_fail)
 INTERFACE_FUNCTION(__ubsan_handle_cfi_check_fail_abort)
 INTERFACE_FUNCTION(__ubsan_handle_divrem_overflow)
 INTERFACE_FUNCTION(__ubsan_handle_divrem_overflow_abort)
+INTERFACE_FUNCTION(__ubsan_handle_dynamic_type_cache_miss)
+INTERFACE_FUNCTION(__ubsan_handle_dynamic_type_cache_miss_abort)
 INTERFACE_FUNCTION(__ubsan_handle_float_cast_overflow)
 INTERFACE_FUNCTION(__ubsan_handle_float_cast_overflow_abort)
 INTERFACE_FUNCTION(__ubsan_handle_function_type_mismatch)
 INTERFACE_FUNCTION(__ubsan_handle_function_type_mismatch_abort)
+INTERFACE_FUNCTION(__ubsan_handle_invalid_builtin)
+INTERFACE_FUNCTION(__ubsan_handle_invalid_builtin_abort)
 INTERFACE_FUNCTION(__ubsan_handle_load_invalid_value)
 INTERFACE_FUNCTION(__ubsan_handle_load_invalid_value_abort)
 INTERFACE_FUNCTION(__ubsan_handle_missing_return)
@@ -28,10 +33,16 @@
 INTERFACE_FUNCTION(__ubsan_handle_negate_overflow_abort)
 INTERFACE_FUNCTION(__ubsan_handle_nonnull_arg)
 INTERFACE_FUNCTION(__ubsan_handle_nonnull_arg_abort)
-INTERFACE_FUNCTION(__ubsan_handle_nonnull_return)
-INTERFACE_FUNCTION(__ubsan_handle_nonnull_return_abort)
+INTERFACE_FUNCTION(__ubsan_handle_nonnull_return_v1)
+INTERFACE_FUNCTION(__ubsan_handle_nonnull_return_v1_abort)
+INTERFACE_FUNCTION(__ubsan_handle_nullability_arg)
+INTERFACE_FUNCTION(__ubsan_handle_nullability_arg_abort)
+INTERFACE_FUNCTION(__ubsan_handle_nullability_return_v1)
+INTERFACE_FUNCTION(__ubsan_handle_nullability_return_v1_abort)
 INTERFACE_FUNCTION(__ubsan_handle_out_of_bounds)
 INTERFACE_FUNCTION(__ubsan_handle_out_of_bounds_abort)
+INTERFACE_FUNCTION(__ubsan_handle_pointer_overflow)
+INTERFACE_FUNCTION(__ubsan_handle_pointer_overflow_abort)
 INTERFACE_FUNCTION(__ubsan_handle_shift_out_of_bounds)
 INTERFACE_FUNCTION(__ubsan_handle_shift_out_of_bounds_abort)
 INTERFACE_FUNCTION(__ubsan_handle_sub_overflow)
diff --git a/lib/ubsan/ubsan_platform.h b/lib/ubsan/ubsan_platform.h
index 1a3bfd6..a26c375 100644
--- a/lib/ubsan/ubsan_platform.h
+++ b/lib/ubsan/ubsan_platform.h
@@ -14,12 +14,13 @@
 #define UBSAN_PLATFORM_H
 
 // Other platforms should be easy to add, and probably work as-is.
-#if (defined(__linux__) || defined(__FreeBSD__) || defined(__APPLE__)) && \
-    (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || \
-     defined(__aarch64__) || defined(__mips__) || defined(__powerpc64__) || \
+#if (defined(__linux__) || defined(__FreeBSD__) || defined(__APPLE__) ||       \
+     defined(__NetBSD__)) &&                                                   \
+    (defined(__x86_64__) || defined(__i386__) || defined(__arm__) ||           \
+     defined(__aarch64__) || defined(__mips__) || defined(__powerpc64__) ||    \
      defined(__s390__))
 # define CAN_SANITIZE_UB 1
-#elif defined(_WIN32)
+#elif defined(_WIN32) || defined(__Fuchsia__)
 # define CAN_SANITIZE_UB 1
 #else
 # define CAN_SANITIZE_UB 0
diff --git a/lib/ubsan/ubsan_signals_standalone.cc b/lib/ubsan/ubsan_signals_standalone.cc
new file mode 100644
index 0000000..60527f8
--- /dev/null
+++ b/lib/ubsan/ubsan_signals_standalone.cc
@@ -0,0 +1,54 @@
+//=-- ubsan_signals_standalone.cc
+//------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Installs signal handlers and related interceptors for UBSan standalone.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ubsan_platform.h"
+#if CAN_SANITIZE_UB
+#include "interception/interception.h"
+#include "sanitizer_common/sanitizer_stacktrace.h"
+#include "ubsan_diag.h"
+#include "ubsan_init.h"
+
+#define COMMON_INTERCEPT_FUNCTION(name) INTERCEPT_FUNCTION(name)
+#include "sanitizer_common/sanitizer_signal_interceptors.inc"
+
+namespace __ubsan {
+
+#if SANITIZER_FUCHSIA
+void InitializeDeadlySignals() {}
+#else
+static void OnStackUnwind(const SignalContext &sig, const void *,
+                          BufferedStackTrace *stack) {
+  GetStackTraceWithPcBpAndContext(stack, kStackTraceMax, sig.pc, sig.bp,
+                                  sig.context,
+                                  common_flags()->fast_unwind_on_fatal);
+}
+
+static void UBsanOnDeadlySignal(int signo, void *siginfo, void *context) {
+  HandleDeadlySignal(siginfo, context, GetTid(), &OnStackUnwind, nullptr);
+}
+
+static bool is_initialized = false;
+
+void InitializeDeadlySignals() {
+  if (is_initialized)
+    return;
+  is_initialized = true;
+  InitializeSignalInterceptors();
+  InstallDeadlySignalHandlers(&UBsanOnDeadlySignal);
+}
+#endif
+
+} // namespace __ubsan
+
+#endif // CAN_SANITIZE_UB
diff --git a/lib/ubsan/ubsan_signals_standalone.h b/lib/ubsan/ubsan_signals_standalone.h
new file mode 100644
index 0000000..b29c294
--- /dev/null
+++ b/lib/ubsan/ubsan_signals_standalone.h
@@ -0,0 +1,25 @@
+//=-- ubsan_signals_standalone.h
+//------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Installs signal handlers and related interceptors for UBSan standalone.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef UBSAN_SIGNALS_STANDALONE_H
+#define UBSAN_SIGNALS_STANDALONE_H
+
+namespace __ubsan {
+
+// Initializes signal handlers and interceptors.
+void InitializeDeadlySignals();
+
+} // namespace __ubsan
+
+#endif // UBSAN_SIGNALS_STANDALONE_H
diff --git a/lib/ubsan/ubsan_type_hash_itanium.cc b/lib/ubsan/ubsan_type_hash_itanium.cc
index 5ae5ae0..dcce0dd 100644
--- a/lib/ubsan/ubsan_type_hash_itanium.cc
+++ b/lib/ubsan/ubsan_type_hash_itanium.cc
@@ -197,9 +197,9 @@
 };
 VtablePrefix *getVtablePrefix(void *Vtable) {
   VtablePrefix *Vptr = reinterpret_cast<VtablePrefix*>(Vtable);
-  if (!Vptr)
-    return nullptr;
   VtablePrefix *Prefix = Vptr - 1;
+  if (!IsAccessibleMemoryRange((uptr)Prefix, sizeof(VtablePrefix)))
+    return nullptr;
   if (!Prefix->TypeInfo)
     // This can't possibly be a valid vtable.
     return nullptr;
diff --git a/lib/ubsan_minimal/CMakeLists.txt b/lib/ubsan_minimal/CMakeLists.txt
new file mode 100644
index 0000000..e26fc34
--- /dev/null
+++ b/lib/ubsan_minimal/CMakeLists.txt
@@ -0,0 +1,53 @@
+# Build for the undefined behavior sanitizer runtime support library.
+
+set(UBSAN_MINIMAL_SOURCES
+  ubsan_minimal_handlers.cc
+  )
+
+include_directories(..)
+
+set(UBSAN_CFLAGS ${SANITIZER_COMMON_CFLAGS})
+append_rtti_flag(OFF UBSAN_CFLAGS)
+
+set(UBSAN_STANDALONE_CFLAGS ${SANITIZER_COMMON_CFLAGS})
+append_rtti_flag(OFF UBSAN_STANDALONE_CFLAGS)
+
+add_compiler_rt_component(ubsan-minimal)
+
+# Common parts of UBSan runtime.
+add_compiler_rt_object_libraries(RTUbsan_minimal
+  OS ${SANITIZER_COMMON_SUPPORTED_OS}
+  ARCHS ${UBSAN_COMMON_SUPPORTED_ARCH}
+  SOURCES ${UBSAN_MINIMAL_SOURCES} CFLAGS ${UBSAN_CFLAGS})
+
+
+if(COMPILER_RT_HAS_UBSAN_MINIMAL)
+  # Initializer of standalone UBSan runtime.
+
+  # Standalone UBSan runtimes.
+  add_compiler_rt_runtime(clang_rt.ubsan_minimal
+    STATIC
+    OS ${SANITIZER_COMMON_SUPPORTED_OS}
+    ARCHS ${UBSAN_SUPPORTED_ARCH}
+    OBJECT_LIBS RTUbsan_minimal
+    CFLAGS ${UBSAN_CFLAGS}
+    PARENT_TARGET ubsan-minimal)
+
+  add_compiler_rt_runtime(clang_rt.ubsan_minimal
+    SHARED
+    OS ${SANITIZER_COMMON_SUPPORTED_OS}
+    ARCHS ${UBSAN_SUPPORTED_ARCH}
+    OBJECT_LIBS RTUbsan_minimal
+    CFLAGS ${UBSAN_CFLAGS}
+    LINK_LIBS ${UBSAN_DYNAMIC_LIBS}
+    PARENT_TARGET ubsan-minimal)
+
+  if (UNIX AND NOT APPLE)
+    set(ARCHS_FOR_SYMBOLS ${UBSAN_SUPPORTED_ARCH})
+    list(REMOVE_ITEM ARCHS_FOR_SYMBOLS i386 i686)
+    add_sanitizer_rt_symbols(clang_rt.ubsan_minimal
+      ARCHS ${ARCHS_FOR_SYMBOLS}
+      PARENT_TARGET ubsan-minimal
+      EXTRA ubsan.syms.extra)
+  endif()
+endif()
diff --git a/lib/ubsan_minimal/ubsan.syms.extra b/lib/ubsan_minimal/ubsan.syms.extra
new file mode 100644
index 0000000..7f8be69
--- /dev/null
+++ b/lib/ubsan_minimal/ubsan.syms.extra
@@ -0,0 +1 @@
+__ubsan_*
diff --git a/lib/ubsan_minimal/ubsan_minimal_handlers.cc b/lib/ubsan_minimal/ubsan_minimal_handlers.cc
new file mode 100644
index 0000000..dac127b
--- /dev/null
+++ b/lib/ubsan_minimal/ubsan_minimal_handlers.cc
@@ -0,0 +1,96 @@
+#include <atomic>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+static void message(const char *msg) {
+  write(2, msg, strlen(msg));
+}
+
+static const int kMaxCallerPcs = 20;
+static std::atomic<void *> caller_pcs[kMaxCallerPcs];
+// Number of elements in caller_pcs. A special value of kMaxCallerPcs + 1 means
+// that "too many errors" has already been reported.
+static std::atomic<int> caller_pcs_sz;
+
+__attribute__((noinline))
+static bool report_this_error(void *caller) {
+  if (caller == nullptr) return false;
+  while (true) {
+    int sz = caller_pcs_sz.load(std::memory_order_relaxed);
+    if (sz > kMaxCallerPcs) return false; // early exit
+    // when sz==kMaxCallerPcs print "too many errors", but only when cmpxchg
+    // succeeds in order to not print it multiple times.
+    if (sz > 0 && sz < kMaxCallerPcs) {
+      void *p;
+      for (int i = 0; i < sz; ++i) {
+        p = caller_pcs[i].load(std::memory_order_relaxed);
+        if (p == nullptr) break; // Concurrent update.
+        if (p == caller) return false;
+      }
+      if (p == nullptr) continue; // FIXME: yield?
+    }
+
+    if (!caller_pcs_sz.compare_exchange_strong(sz, sz + 1))
+      continue; // Concurrent update! Try again from the start.
+
+    if (sz == kMaxCallerPcs) {
+      message("ubsan: too many errors\n");
+      return false;
+    }
+    caller_pcs[sz].store(caller, std::memory_order_relaxed);
+    return true;
+  }
+}
+
+#if defined(__ANDROID__)
+extern "C" __attribute__((weak)) void android_set_abort_message(const char *);
+static void abort_with_message(const char *msg) {
+  if (&android_set_abort_message) android_set_abort_message(msg);
+  abort();
+}
+#else
+static void abort_with_message(const char *) { abort(); }
+#endif
+
+#define INTERFACE extern "C" __attribute__((visibility("default")))
+
+// FIXME: add caller pc to the error message (possibly as "ubsan: error-type
+// @1234ABCD").
+#define HANDLER_RECOVER(name, msg)                               \
+  INTERFACE void __ubsan_handle_##name##_minimal() {             \
+    if (!report_this_error(__builtin_return_address(0))) return; \
+    message("ubsan: " msg "\n");                                 \
+  }
+
+#define HANDLER_NORECOVER(name, msg)                             \
+  INTERFACE void __ubsan_handle_##name##_minimal_abort() {       \
+    message("ubsan: " msg "\n");                                 \
+    abort_with_message("ubsan: " msg);                           \
+  }
+
+#define HANDLER(name, msg)                                       \
+  HANDLER_RECOVER(name, msg)                                     \
+  HANDLER_NORECOVER(name, msg)
+
+HANDLER(type_mismatch, "type-mismatch")
+HANDLER(add_overflow, "add-overflow")
+HANDLER(sub_overflow, "sub-overflow")
+HANDLER(mul_overflow, "mul-overflow")
+HANDLER(negate_overflow, "negate-overflow")
+HANDLER(divrem_overflow, "divrem-overflow")
+HANDLER(shift_out_of_bounds, "shift-out-of-bounds")
+HANDLER(out_of_bounds, "out-of-bounds")
+HANDLER_RECOVER(builtin_unreachable, "builtin-unreachable")
+HANDLER_RECOVER(missing_return, "missing-return")
+HANDLER(vla_bound_not_positive, "vla-bound-not-positive")
+HANDLER(float_cast_overflow, "float-cast-overflow")
+HANDLER(load_invalid_value, "load-invalid-value")
+HANDLER(invalid_builtin, "invalid-builtin")
+HANDLER(function_type_mismatch, "function-type-mismatch")
+HANDLER(nonnull_arg, "nonnull-arg")
+HANDLER(nonnull_return, "nonnull-return")
+HANDLER(nullability_arg, "nullability-arg")
+HANDLER(nullability_return, "nullability-return")
+HANDLER(pointer_overflow, "pointer-overflow")
+HANDLER(cfi_check_fail, "cfi-check-fail")
diff --git a/lib/xray/CMakeLists.txt b/lib/xray/CMakeLists.txt
index 72caa9f..6d24ba8 100644
--- a/lib/xray/CMakeLists.txt
+++ b/lib/xray/CMakeLists.txt
@@ -62,6 +62,8 @@
 set(XRAY_COMMON_DEFINITIONS XRAY_HAS_EXCEPTIONS=1)
 append_list_if(
   COMPILER_RT_HAS_XRAY_COMPILER_FLAG XRAY_SUPPORTED=1 XRAY_COMMON_DEFINITIONS)
+append_list_if(
+  COMPILER_RT_BUILD_XRAY_NO_PREINIT XRAY_NO_PREINIT XRAY_COMMON_DEFINITIONS)
 
 add_compiler_rt_object_libraries(RTXray
   ARCHS ${XRAY_SUPPORTED_ARCH}
diff --git a/lib/xray/tests/CMakeLists.txt b/lib/xray/tests/CMakeLists.txt
index 43878cb..e54e63f 100644
--- a/lib/xray/tests/CMakeLists.txt
+++ b/lib/xray/tests/CMakeLists.txt
@@ -11,47 +11,23 @@
   -I${COMPILER_RT_SOURCE_DIR}/lib/xray
   -I${COMPILER_RT_SOURCE_DIR}/lib)
 
-macro(xray_compile obj_list source arch)
-  get_filename_component(basename ${source} NAME)
-  set(output_obj "${basename}.${arch}.o")
-  get_target_flags_for_arch(${arch} TARGET_CFLAGS)
-  if(NOT COMPILER_RT_STANDALONE_BUILD)
-    list(APPEND COMPILE_DEPS gtest_main xray)
-  endif()
-  clang_compile(${output_obj} ${source}
-    CFLAGS ${XRAY_UNITTEST_CFLAGS} ${TARGET_CFLAGS}
-    DEPS ${COMPILE_DEPS})
-  list(APPEND ${obj_list} ${output_obj})
-endmacro()
-
+set(XRAY_TEST_ARCH ${XRAY_SUPPORTED_ARCH})
 macro(add_xray_unittest testname)
-  set(XRAY_TEST_ARCH ${XRAY_SUPPORTED_ARCH})
-  if (APPLE)
-    darwin_filter_host_archs(XRAY_SUPPORTED_ARCH)
-  endif()
-  if(UNIX)
+  cmake_parse_arguments(TEST "" "" "SOURCES;HEADERS" ${ARGN})
+  if(UNIX AND NOT APPLE)
     foreach(arch ${XRAY_TEST_ARCH})
-      cmake_parse_arguments(TEST "" "" "SOURCES;HEADERS" ${ARGN})
       set(TEST_OBJECTS)
-      foreach(SOURCE ${TEST_SOURCES} ${COMPILER_RT_GTEST_SOURCE})
-        xray_compile(TEST_OBJECTS ${SOURCE} ${arch} ${TEST_HEADERS})
-      endforeach()
-      get_target_flags_for_arch(${arch} TARGET_LINK_FLAGS)
-      set(TEST_DEPS ${TEST_OBJECTS})
-      if(NOT COMPILER_RT_STANDALONE_BUILD)
-        list(APPEND TEST_DEPS gtest_main xray)
-      endif()
-      if(NOT APPLE)
-        add_compiler_rt_test(XRayUnitTests ${testname}-${arch}
-          OBJECTS ${TEST_OBJECTS}
-          DEPS ${TEST_DEPS}
-          LINK_FLAGS ${TARGET_LINK_FLAGS}
+      generate_compiler_rt_tests(TEST_OBJECTS
+        XRayUnitTests "${testname}-${arch}-Test" "${arch}"
+        SOURCES ${TEST_SOURCES} ${COMPILER_RT_GTEST_SOURCE}
+        DEPS gtest xray llvm-xray
+        CFLAGS ${XRAY_UNITTEST_CFLAGS}
+        LINK_FLAGS -fxray-instrument
+          ${TARGET_LINK_FLAGS}
           -lstdc++ -lm ${CMAKE_THREAD_LIBS_INIT}
           -lpthread
-          -L${COMPILER_RT_LIBRARY_OUTPUT_DIR} -lclang_rt.xray-${arch}
-          -latomic -ldl -lrt)
-      endif()
-      # FIXME: Figure out how to run even just the unit tests on APPLE.
+          -ldl -lrt)
+      set_target_properties(XRayUnitTests PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
     endforeach()
   endif()
 endmacro()
diff --git a/lib/xray/tests/unit/buffer_queue_test.cc b/lib/xray/tests/unit/buffer_queue_test.cc
index 4db762d..1ec7469 100644
--- a/lib/xray/tests/unit/buffer_queue_test.cc
+++ b/lib/xray/tests/unit/buffer_queue_test.cc
@@ -14,7 +14,6 @@
 #include "gtest/gtest.h"
 
 #include <future>
-#include <system_error>
 #include <unistd.h>
 
 namespace __xray {
@@ -32,9 +31,9 @@
   BufferQueue Buffers(kSize, 1, Success);
   ASSERT_TRUE(Success);
   BufferQueue::Buffer Buf;
-  ASSERT_EQ(Buffers.getBuffer(Buf), std::error_code());
+  ASSERT_EQ(Buffers.getBuffer(Buf), BufferQueue::ErrorCode::Ok);
   ASSERT_NE(nullptr, Buf.Buffer);
-  ASSERT_EQ(Buffers.releaseBuffer(Buf), std::error_code());
+  ASSERT_EQ(Buffers.releaseBuffer(Buf), BufferQueue::ErrorCode::Ok);
   ASSERT_EQ(nullptr, Buf.Buffer);
 }
 
@@ -43,11 +42,10 @@
   BufferQueue Buffers(kSize, 1, Success);
   ASSERT_TRUE(Success);
   BufferQueue::Buffer Buf0;
-  EXPECT_EQ(Buffers.getBuffer(Buf0), std::error_code());
+  EXPECT_EQ(Buffers.getBuffer(Buf0), BufferQueue::ErrorCode::Ok);
   BufferQueue::Buffer Buf1;
-  EXPECT_EQ(std::make_error_code(std::errc::not_enough_memory),
-            Buffers.getBuffer(Buf1));
-  EXPECT_EQ(Buffers.releaseBuffer(Buf0), std::error_code());
+  EXPECT_EQ(BufferQueue::ErrorCode::NotEnoughMemory, Buffers.getBuffer(Buf1));
+  EXPECT_EQ(Buffers.releaseBuffer(Buf0), BufferQueue::ErrorCode::Ok);
 }
 
 TEST(BufferQueueTest, ReleaseUnknown) {
@@ -57,7 +55,7 @@
   BufferQueue::Buffer Buf;
   Buf.Buffer = reinterpret_cast<void *>(0xdeadbeef);
   Buf.Size = kSize;
-  EXPECT_EQ(std::make_error_code(std::errc::argument_out_of_domain),
+  EXPECT_EQ(BufferQueue::ErrorCode::UnrecognizedBuffer,
             Buffers.releaseBuffer(Buf));
 }
 
@@ -66,15 +64,15 @@
   BufferQueue Buffers(kSize, 2, Success);
   ASSERT_TRUE(Success);
   BufferQueue::Buffer Buf;
-  ASSERT_EQ(Buffers.getBuffer(Buf), std::error_code());
+  ASSERT_EQ(Buffers.getBuffer(Buf), BufferQueue::ErrorCode::Ok);
   ASSERT_NE(nullptr, Buf.Buffer);
-  ASSERT_EQ(Buffers.finalize(), std::error_code());
+  ASSERT_EQ(Buffers.finalize(), BufferQueue::ErrorCode::Ok);
   BufferQueue::Buffer OtherBuf;
-  ASSERT_EQ(std::make_error_code(std::errc::state_not_recoverable),
+  ASSERT_EQ(BufferQueue::ErrorCode::QueueFinalizing,
             Buffers.getBuffer(OtherBuf));
-  ASSERT_EQ(std::make_error_code(std::errc::state_not_recoverable),
+  ASSERT_EQ(BufferQueue::ErrorCode::QueueFinalizing,
             Buffers.finalize());
-  ASSERT_EQ(Buffers.releaseBuffer(Buf), std::error_code());
+  ASSERT_EQ(Buffers.releaseBuffer(Buf), BufferQueue::ErrorCode::Ok);
 }
 
 TEST(BufferQueueTest, MultiThreaded) {
@@ -83,14 +81,17 @@
   ASSERT_TRUE(Success);
   auto F = [&] {
     BufferQueue::Buffer B;
-    while (!Buffers.getBuffer(B)) {
+    while (true) {
+      auto EC = Buffers.getBuffer(B);
+      if (EC != BufferQueue::ErrorCode::Ok)
+        return;
       Buffers.releaseBuffer(B);
     }
   };
   auto T0 = std::async(std::launch::async, F);
   auto T1 = std::async(std::launch::async, F);
   auto T2 = std::async(std::launch::async, [&] {
-    while (!Buffers.finalize())
+    while (Buffers.finalize() != BufferQueue::ErrorCode::Ok)
       ;
   });
   F();
@@ -103,8 +104,8 @@
   auto Count = 0;
   BufferQueue::Buffer B;
   for (int I = 0; I < 10; ++I) {
-    ASSERT_FALSE(Buffers.getBuffer(B));
-    ASSERT_FALSE(Buffers.releaseBuffer(B));
+    ASSERT_EQ(Buffers.getBuffer(B), BufferQueue::ErrorCode::Ok);
+    ASSERT_EQ(Buffers.releaseBuffer(B), BufferQueue::ErrorCode::Ok);
   }
   Buffers.apply([&](const BufferQueue::Buffer &B) { ++Count; });
   ASSERT_EQ(Count, 10);
diff --git a/lib/xray/tests/unit/fdr_logging_test.cc b/lib/xray/tests/unit/fdr_logging_test.cc
index 0d5e99a..edc5e3c 100644
--- a/lib/xray/tests/unit/fdr_logging_test.cc
+++ b/lib/xray/tests/unit/fdr_logging_test.cc
@@ -57,7 +57,6 @@
   fdrLoggingHandleArg0(1, XRayEntryType::EXIT);
   ASSERT_EQ(fdrLoggingFinalize(), XRayLogInitStatus::XRAY_LOG_FINALIZED);
   ASSERT_EQ(fdrLoggingFlush(), XRayLogFlushStatus::XRAY_LOG_FLUSHED);
-  ASSERT_EQ(fdrLoggingReset(), XRayLogInitStatus::XRAY_LOG_UNINITIALIZED);
 
   // To do this properly, we have to close the file descriptor then re-open the
   // file for reading this time.
@@ -98,7 +97,6 @@
   }
   ASSERT_EQ(fdrLoggingFinalize(), XRayLogInitStatus::XRAY_LOG_FINALIZED);
   ASSERT_EQ(fdrLoggingFlush(), XRayLogFlushStatus::XRAY_LOG_FLUSHED);
-  ASSERT_EQ(fdrLoggingReset(), XRayLogInitStatus::XRAY_LOG_UNINITIALIZED);
 
   // To do this properly, we have to close the file descriptor then re-open the
   // file for reading this time.
diff --git a/lib/xray/xray_AArch64.cc b/lib/xray/xray_AArch64.cc
index 8d1c7c5..f26e77d 100644
--- a/lib/xray/xray_AArch64.cc
+++ b/lib/xray/xray_AArch64.cc
@@ -18,8 +18,7 @@
 #include <atomic>
 #include <cassert>
 
-
-extern "C" void __clear_cache(void* start, void* end);
+extern "C" void __clear_cache(void *start, void *end);
 
 namespace __xray {
 
@@ -86,8 +85,8 @@
         reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
         uint32_t(PatchOpcodes::PO_B32), std::memory_order_release);
   }
-  __clear_cache(reinterpret_cast<char*>(FirstAddress),
-      reinterpret_cast<char*>(CurAddress));
+  __clear_cache(reinterpret_cast<char *>(FirstAddress),
+                reinterpret_cast<char *>(CurAddress));
   return true;
 }
 
@@ -107,6 +106,12 @@
   return patchSled(Enable, FuncId, Sled, __xray_FunctionTailExit);
 }
 
+bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
+                      const XRaySledEntry &Sled)
+    XRAY_NEVER_INSTRUMENT { // FIXME: Implement in aarch64?
+  return false;
+}
+
 // FIXME: Maybe implement this better?
 bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; }
 
diff --git a/lib/xray/xray_always_instrument.txt b/lib/xray/xray_always_instrument.txt
new file mode 100644
index 0000000..151ed70
--- /dev/null
+++ b/lib/xray/xray_always_instrument.txt
@@ -0,0 +1,6 @@
+# List of function matchers common to C/C++ applications that make sense to
+# always instrument. You can use this as an argument to
+# -fxray-always-instrument=<path> along with your project-specific lists.
+
+# Always instrument the main function.
+fun:main
diff --git a/lib/xray/xray_arm.cc b/lib/xray/xray_arm.cc
index 26d673e..da4efcd 100644
--- a/lib/xray/xray_arm.cc
+++ b/lib/xray/xray_arm.cc
@@ -18,7 +18,7 @@
 #include <atomic>
 #include <cassert>
 
-extern "C" void __clear_cache(void* start, void* end);
+extern "C" void __clear_cache(void *start, void *end);
 
 namespace __xray {
 
@@ -122,8 +122,8 @@
         reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
         uint32_t(PatchOpcodes::PO_B20), std::memory_order_release);
   }
-  __clear_cache(reinterpret_cast<char*>(FirstAddress),
-      reinterpret_cast<char*>(CurAddress));
+  __clear_cache(reinterpret_cast<char *>(FirstAddress),
+                reinterpret_cast<char *>(CurAddress));
   return true;
 }
 
@@ -143,6 +143,12 @@
   return patchSled(Enable, FuncId, Sled, __xray_FunctionTailExit);
 }
 
+bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
+                      const XRaySledEntry &Sled)
+    XRAY_NEVER_INSTRUMENT { // FIXME: Implement in arm?
+  return false;
+}
+
 // FIXME: Maybe implement this better?
 bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; }
 
diff --git a/lib/xray/xray_buffer_queue.cc b/lib/xray/xray_buffer_queue.cc
index bd8f496..34bf3cf 100644
--- a/lib/xray/xray_buffer_queue.cc
+++ b/lib/xray/xray_buffer_queue.cc
@@ -13,64 +13,94 @@
 //
 //===----------------------------------------------------------------------===//
 #include "xray_buffer_queue.h"
-#include <cassert>
-#include <cstdlib>
+#include "sanitizer_common/sanitizer_allocator_internal.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_libc.h"
 
 using namespace __xray;
+using namespace __sanitizer;
 
-BufferQueue::BufferQueue(std::size_t B, std::size_t N, bool &Success)
-    : BufferSize(B), Buffers(N), Mutex(), OwnedBuffers(), Finalizing(false) {
-  for (auto &T : Buffers) {
-    void *Tmp = malloc(BufferSize);
+BufferQueue::BufferQueue(size_t B, size_t N, bool &Success)
+    : BufferSize(B),
+      Buffers(new BufferRep[N]()),
+      BufferCount(N),
+      Finalizing{0},
+      OwnedBuffers(new void *[N]()),
+      Next(Buffers),
+      First(Buffers),
+      LiveBuffers(0) {
+  for (size_t i = 0; i < N; ++i) {
+    auto &T = Buffers[i];
+    void *Tmp = InternalAlloc(BufferSize);
     if (Tmp == nullptr) {
       Success = false;
       return;
     }
-
-    auto &Buf = std::get<0>(T);
+    auto &Buf = T.Buff;
     Buf.Buffer = Tmp;
     Buf.Size = B;
-    OwnedBuffers.emplace(Tmp);
+    OwnedBuffers[i] = Tmp;
   }
   Success = true;
 }
 
-std::error_code BufferQueue::getBuffer(Buffer &Buf) {
-  if (Finalizing.load(std::memory_order_acquire))
-    return std::make_error_code(std::errc::state_not_recoverable);
-  std::lock_guard<std::mutex> Guard(Mutex);
-  if (Buffers.empty())
-    return std::make_error_code(std::errc::not_enough_memory);
-  auto &T = Buffers.front();
-  auto &B = std::get<0>(T);
+BufferQueue::ErrorCode BufferQueue::getBuffer(Buffer &Buf) {
+  if (__sanitizer::atomic_load(&Finalizing, __sanitizer::memory_order_acquire))
+    return ErrorCode::QueueFinalizing;
+  __sanitizer::SpinMutexLock Guard(&Mutex);
+  if (LiveBuffers == BufferCount) return ErrorCode::NotEnoughMemory;
+
+  auto &T = *Next;
+  auto &B = T.Buff;
   Buf = B;
-  B.Buffer = nullptr;
-  B.Size = 0;
-  Buffers.pop_front();
-  return {};
+  ++LiveBuffers;
+
+  if (++Next == (Buffers + BufferCount)) Next = Buffers;
+
+  return ErrorCode::Ok;
 }
 
-std::error_code BufferQueue::releaseBuffer(Buffer &Buf) {
-  if (OwnedBuffers.count(Buf.Buffer) == 0)
-    return std::make_error_code(std::errc::argument_out_of_domain);
-  std::lock_guard<std::mutex> Guard(Mutex);
+BufferQueue::ErrorCode BufferQueue::releaseBuffer(Buffer &Buf) {
+  // Blitz through the buffers array to find the buffer.
+  bool Found = false;
+  for (auto I = OwnedBuffers, E = OwnedBuffers + BufferCount; I != E; ++I) {
+    if (*I == Buf.Buffer) {
+      Found = true;
+      break;
+    }
+  }
+  if (!Found) return ErrorCode::UnrecognizedBuffer;
+
+  __sanitizer::SpinMutexLock Guard(&Mutex);
+
+  // This points to a semantic bug, we really ought to not be releasing more
+  // buffers than we actually get.
+  if (LiveBuffers == 0) return ErrorCode::NotEnoughMemory;
 
   // Now that the buffer has been released, we mark it as "used".
-  Buffers.emplace(Buffers.end(), Buf, true /* used */);
+  First->Buff = Buf;
+  First->Used = true;
   Buf.Buffer = nullptr;
   Buf.Size = 0;
-  return {};
+  --LiveBuffers;
+  if (++First == (Buffers + BufferCount)) First = Buffers;
+
+  return ErrorCode::Ok;
 }
 
-std::error_code BufferQueue::finalize() {
-  if (Finalizing.exchange(true, std::memory_order_acq_rel))
-    return std::make_error_code(std::errc::state_not_recoverable);
-  return {};
+BufferQueue::ErrorCode BufferQueue::finalize() {
+  if (__sanitizer::atomic_exchange(&Finalizing, 1,
+                                   __sanitizer::memory_order_acq_rel))
+    return ErrorCode::QueueFinalizing;
+  return ErrorCode::Ok;
 }
 
 BufferQueue::~BufferQueue() {
-  for (auto &T : Buffers) {
-    auto &Buf = std::get<0>(T);
-    free(Buf.Buffer);
+  for (auto I = Buffers, E = Buffers + BufferCount; I != E; ++I) {
+    auto &T = *I;
+    auto &Buf = T.Buff;
+    InternalFree(Buf.Buffer);
   }
+  delete[] Buffers;
+  delete[] OwnedBuffers;
 }
diff --git a/lib/xray/xray_buffer_queue.h b/lib/xray/xray_buffer_queue.h
index 3898437..9257657 100644
--- a/lib/xray/xray_buffer_queue.h
+++ b/lib/xray/xray_buffer_queue.h
@@ -15,13 +15,9 @@
 #ifndef XRAY_BUFFER_QUEUE_H
 #define XRAY_BUFFER_QUEUE_H
 
-#include <atomic>
-#include <cstdint>
-#include <deque>
-#include <mutex>
-#include <system_error>
-#include <unordered_set>
-#include <utility>
+#include <cstddef>
+#include "sanitizer_common/sanitizer_atomic.h"
+#include "sanitizer_common/sanitizer_mutex.h"
 
 namespace __xray {
 
@@ -31,26 +27,72 @@
 /// the "flight data recorder" (FDR) mode to support ongoing XRay function call
 /// trace collection.
 class BufferQueue {
-public:
+ public:
   struct Buffer {
     void *Buffer = nullptr;
-    std::size_t Size = 0;
+    size_t Size = 0;
   };
 
-private:
-  std::size_t BufferSize;
+ private:
+  struct BufferRep {
+    // The managed buffer.
+    Buffer Buff;
 
-  // We use a bool to indicate whether the Buffer has been used in this
-  // freelist implementation.
-  std::deque<std::tuple<Buffer, bool>> Buffers;
-  std::mutex Mutex;
-  std::unordered_set<void *> OwnedBuffers;
-  std::atomic<bool> Finalizing;
+    // This is true if the buffer has been returned to the available queue, and
+    // is considered "used" by another thread.
+    bool Used = false;
+  };
 
-public:
+  // Size of each individual Buffer.
+  size_t BufferSize;
+
+  BufferRep *Buffers;
+  size_t BufferCount;
+
+  __sanitizer::SpinMutex Mutex;
+  __sanitizer::atomic_uint8_t Finalizing;
+
+  // Pointers to buffers managed/owned by the BufferQueue.
+  void **OwnedBuffers;
+
+  // Pointer to the next buffer to be handed out.
+  BufferRep *Next;
+
+  // Pointer to the entry in the array where the next released buffer will be
+  // placed.
+  BufferRep *First;
+
+  // Count of buffers that have been handed out through 'getBuffer'.
+  size_t LiveBuffers;
+
+ public:
+  enum class ErrorCode : unsigned {
+    Ok,
+    NotEnoughMemory,
+    QueueFinalizing,
+    UnrecognizedBuffer,
+    AlreadyFinalized,
+  };
+
+  static const char *getErrorString(ErrorCode E) {
+    switch (E) {
+      case ErrorCode::Ok:
+        return "(none)";
+      case ErrorCode::NotEnoughMemory:
+        return "no available buffers in the queue";
+      case ErrorCode::QueueFinalizing:
+        return "queue already finalizing";
+      case ErrorCode::UnrecognizedBuffer:
+        return "buffer being returned not owned by buffer queue";
+      case ErrorCode::AlreadyFinalized:
+        return "queue already finalized";
+    }
+    return "unknown error";
+  }
+
   /// Initialise a queue of size |N| with buffers of size |B|. We report success
   /// through |Success|.
-  BufferQueue(std::size_t B, std::size_t N, bool &Success);
+  BufferQueue(size_t B, size_t N, bool &Success);
 
   /// Updates |Buf| to contain the pointer to an appropriate buffer. Returns an
   /// error in case there are no available buffers to return when we will run
@@ -60,18 +102,27 @@
   ///   - BufferQueue is not finalising.
   ///
   /// Returns:
-  ///   - std::errc::not_enough_memory on exceeding MaxSize.
-  ///   - no error when we find a Buffer.
-  ///   - std::errc::state_not_recoverable on finalising BufferQueue.
-  std::error_code getBuffer(Buffer &Buf);
+  ///   - ErrorCode::NotEnoughMemory on exceeding MaxSize.
+  ///   - ErrorCode::Ok when we find a Buffer.
+  ///   - ErrorCode::QueueFinalizing or ErrorCode::AlreadyFinalized on
+  ///     a finalizing/finalized BufferQueue.
+  ErrorCode getBuffer(Buffer &Buf);
 
   /// Updates |Buf| to point to nullptr, with size 0.
   ///
   /// Returns:
-  ///   - ...
-  std::error_code releaseBuffer(Buffer &Buf);
+  ///   - ErrorCode::Ok when we successfully release the buffer.
+  ///   - ErrorCode::UnrecognizedBuffer for when this BufferQueue does not own
+  ///     the buffer being released.
+  ErrorCode releaseBuffer(Buffer &Buf);
 
-  bool finalizing() const { return Finalizing.load(std::memory_order_acquire); }
+  bool finalizing() const {
+    return __sanitizer::atomic_load(&Finalizing,
+                                    __sanitizer::memory_order_acquire);
+  }
+
+  /// Returns the configured size of the buffers in the buffer queue.
+  size_t ConfiguredBufferSize() const { return BufferSize; }
 
   /// Sets the state of the BufferQueue to finalizing, which ensures that:
   ///
@@ -79,17 +130,18 @@
   ///   - All releaseBuffer operations will not fail.
   ///
   /// After a call to finalize succeeds, all subsequent calls to finalize will
-  /// fail with std::errc::state_not_recoverable.
-  std::error_code finalize();
+  /// fail with ErrorCode::QueueFinalizing.
+  ErrorCode finalize();
 
   /// Applies the provided function F to each Buffer in the queue, only if the
   /// Buffer is marked 'used' (i.e. has been the result of getBuffer(...) and a
-  /// releaseBuffer(...) operation.
-  template <class F> void apply(F Fn) {
-    std::lock_guard<std::mutex> G(Mutex);
-    for (const auto &T : Buffers) {
-      if (std::get<1>(T))
-        Fn(std::get<0>(T));
+  /// releaseBuffer(...) operation).
+  template <class F>
+  void apply(F Fn) {
+    __sanitizer::SpinMutexLock G(&Mutex);
+    for (auto I = Buffers, E = Buffers + BufferCount; I != E; ++I) {
+      const auto &T = *I;
+      if (T.Used) Fn(T.Buff);
     }
   }
 
@@ -97,6 +149,6 @@
   ~BufferQueue();
 };
 
-} // namespace __xray
+}  // namespace __xray
 
-#endif // XRAY_BUFFER_QUEUE_H
+#endif  // XRAY_BUFFER_QUEUE_H
diff --git a/lib/xray/xray_fdr_log_records.h b/lib/xray/xray_fdr_log_records.h
new file mode 100644
index 0000000..f475e81
--- /dev/null
+++ b/lib/xray/xray_fdr_log_records.h
@@ -0,0 +1,67 @@
+//===-- xray_fdr_log_records.h  -------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a function call tracing system.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_XRAY_FDR_LOG_RECORDS_H
+#define XRAY_XRAY_FDR_LOG_RECORDS_H
+
+enum class RecordType : uint8_t { Function, Metadata };
+
+// A MetadataRecord encodes the kind of record in its first byte, and have 15
+// additional bytes in the end to hold free-form data.
+struct alignas(16) MetadataRecord {
+  // A MetadataRecord must always have a type of 1.
+  /* RecordType */ uint8_t Type : 1;
+
+  // Each kind of record is represented as a 7-bit value (even though we use an
+  // unsigned 8-bit enum class to do so).
+  enum class RecordKinds : uint8_t {
+    NewBuffer,
+    EndOfBuffer,
+    NewCPUId,
+    TSCWrap,
+    WalltimeMarker,
+    CustomEventMarker,
+    CallArgument,
+  };
+  // Use 7 bits to identify this record type.
+  /* RecordKinds */ uint8_t RecordKind : 7;
+  char Data[15];
+} __attribute__((packed));
+
+static_assert(sizeof(MetadataRecord) == 16, "Wrong size for MetadataRecord.");
+
+struct alignas(8) FunctionRecord {
+  // A FunctionRecord must always have a type of 0.
+  /* RecordType */ uint8_t Type : 1;
+  enum class RecordKinds {
+    FunctionEnter = 0x00,
+    FunctionExit = 0x01,
+    FunctionTailExit = 0x02,
+  };
+  /* RecordKinds */ uint8_t RecordKind : 3;
+
+  // We only use 28 bits of the function ID, so that we can use as few bytes as
+  // possible. This means we only support 2^28 (268,435,456) unique function ids
+  // in a single binary.
+  int FuncId : 28;
+
+  // We use another 4 bytes to hold the delta between the previous entry's TSC.
+  // In case we've found that the distance is greater than the allowable 32 bits
+  // (either because we are running in a different CPU and the TSC might be
+  // different then), we should use a MetadataRecord before this FunctionRecord
+  // that will contain the full TSC for that CPU, and keep this to 0.
+  uint32_t TSCDelta;
+} __attribute__((packed));
+
+static_assert(sizeof(FunctionRecord) == 8, "Wrong size for FunctionRecord.");
+
+#endif // XRAY_XRAY_FDR_LOG_RECORDS_H
diff --git a/lib/xray/xray_fdr_logging.cc b/lib/xray/xray_fdr_logging.cc
index bae7d4c..cf27acc 100644
--- a/lib/xray/xray_fdr_logging.cc
+++ b/lib/xray/xray_fdr_logging.cc
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file is a part of XRay, a dynamic runtime instruementation system.
+// This file is a part of XRay, a dynamic runtime instrumentation system.
 //
 // Here we implement the Flight Data Recorder mode for XRay, where we use
 // compact structures to store records in memory as well as when writing out the
@@ -15,22 +15,19 @@
 //
 //===----------------------------------------------------------------------===//
 #include "xray_fdr_logging.h"
-#include <algorithm>
-#include <bitset>
-#include <cassert>
-#include <cstring>
-#include <memory>
 #include <sys/syscall.h>
 #include <sys/time.h>
+#include <errno.h>
 #include <time.h>
 #include <unistd.h>
-#include <unordered_map>
 
+#include "sanitizer_common/sanitizer_atomic.h"
 #include "sanitizer_common/sanitizer_common.h"
 #include "xray/xray_interface.h"
 #include "xray/xray_records.h"
 #include "xray_buffer_queue.h"
 #include "xray_defs.h"
+#include "xray_fdr_logging_impl.h"
 #include "xray_flags.h"
 #include "xray_tsc.h"
 #include "xray_utils.h"
@@ -38,62 +35,35 @@
 namespace __xray {
 
 // Global BufferQueue.
-std::shared_ptr<BufferQueue> BQ;
+// NOTE: This is a pointer to avoid having to do atomic operations at
+// initialization time. This is OK to leak as there will only be one bufferqueue
+// for the runtime, initialized once through the fdrInit(...) sequence.
+std::shared_ptr<BufferQueue> *BQ = nullptr;
 
-std::atomic<XRayLogInitStatus> LoggingStatus{
-    XRayLogInitStatus::XRAY_LOG_UNINITIALIZED};
-
-std::atomic<XRayLogFlushStatus> LogFlushStatus{
+__sanitizer::atomic_sint32_t LogFlushStatus = {
     XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING};
 
-std::unique_ptr<FDRLoggingOptions> FDROptions;
+FDRLoggingOptions FDROptions;
 
-XRayLogInitStatus fdrLoggingInit(std::size_t BufferSize, std::size_t BufferMax,
-                                  void *Options,
-                                  size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
-  assert(OptionsSize == sizeof(FDRLoggingOptions));
-  XRayLogInitStatus CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
-  if (!LoggingStatus.compare_exchange_strong(
-          CurrentStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZING,
-          std::memory_order_release, std::memory_order_relaxed))
-    return CurrentStatus;
-
-  FDROptions.reset(new FDRLoggingOptions());
-  *FDROptions = *reinterpret_cast<FDRLoggingOptions *>(Options);
-  if (FDROptions->ReportErrors)
-    SetPrintfAndReportCallback(printToStdErr);
-
-  bool Success = false;
-  BQ = std::make_shared<BufferQueue>(BufferSize, BufferMax, Success);
-  if (!Success) {
-    Report("BufferQueue init failed.\n");
-    return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
-  }
-
-  // Install the actual handleArg0 handler after initialising the buffers.
-  __xray_set_handler(fdrLoggingHandleArg0);
-
-  LoggingStatus.store(XRayLogInitStatus::XRAY_LOG_INITIALIZED,
-                      std::memory_order_release);
-  return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
-}
+__sanitizer::SpinMutex FDROptionsMutex;
 
 // Must finalize before flushing.
 XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT {
-  if (LoggingStatus.load(std::memory_order_acquire) !=
+  if (__sanitizer::atomic_load(&LoggingStatus,
+                               __sanitizer::memory_order_acquire) !=
       XRayLogInitStatus::XRAY_LOG_FINALIZED)
     return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
 
-  XRayLogFlushStatus Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
-  if (!LogFlushStatus.compare_exchange_strong(
-          Result, XRayLogFlushStatus::XRAY_LOG_FLUSHING,
-          std::memory_order_release, std::memory_order_relaxed))
-    return Result;
+  s32 Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
+  if (!__sanitizer::atomic_compare_exchange_strong(
+          &LogFlushStatus, &Result, XRayLogFlushStatus::XRAY_LOG_FLUSHING,
+          __sanitizer::memory_order_release))
+    return static_cast<XRayLogFlushStatus>(Result);
 
   // Make a copy of the BufferQueue pointer to prevent other threads that may be
   // resetting it from blowing away the queue prematurely while we're dealing
   // with it.
-  auto LocalBQ = BQ;
+  auto LocalBQ = *BQ;
 
   // We write out the file in the following format:
   //
@@ -104,67 +74,95 @@
   //      (fixed-sized) and let the tools reading the buffers deal with the data
   //      afterwards.
   //
-  int Fd = FDROptions->Fd;
+  int Fd = -1;
+  {
+    __sanitizer::SpinMutexLock Guard(&FDROptionsMutex);
+    Fd = FDROptions.Fd;
+  }
   if (Fd == -1)
     Fd = getLogFD();
   if (Fd == -1) {
     auto Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
-    LogFlushStatus.store(Result, std::memory_order_release);
+    __sanitizer::atomic_store(&LogFlushStatus, Result,
+                              __sanitizer::memory_order_release);
     return Result;
   }
 
+  // Test for required CPU features and cache the cycle frequency
+  static bool TSCSupported = probeRequiredCPUFeatures();
+  static uint64_t CycleFrequency =
+      TSCSupported ? getTSCFrequency() : __xray::NanosecondsPerSecond;
+
   XRayFileHeader Header;
   Header.Version = 1;
   Header.Type = FileTypes::FDR_LOG;
-  Header.CycleFrequency = getTSCFrequency();
+  Header.CycleFrequency = CycleFrequency;
   // FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc'
   // before setting the values in the header.
   Header.ConstantTSC = 1;
   Header.NonstopTSC = 1;
-  clock_gettime(CLOCK_REALTIME, &Header.TS);
+  Header.FdrData = FdrAdditionalHeaderData{LocalBQ->ConfiguredBufferSize()};
   retryingWriteAll(Fd, reinterpret_cast<char *>(&Header),
                    reinterpret_cast<char *>(&Header) + sizeof(Header));
+
   LocalBQ->apply([&](const BufferQueue::Buffer &B) {
-    retryingWriteAll(Fd, reinterpret_cast<char *>(B.Buffer),
-                     reinterpret_cast<char *>(B.Buffer) + B.Size);
+    uint64_t BufferSize = B.Size;
+    if (BufferSize > 0) {
+      retryingWriteAll(Fd, reinterpret_cast<char *>(B.Buffer),
+                       reinterpret_cast<char *>(B.Buffer) + B.Size);
+    }
   });
-  LogFlushStatus.store(XRayLogFlushStatus::XRAY_LOG_FLUSHED,
-                       std::memory_order_release);
+
+  // The buffer for this particular thread would have been finalised after
+  // we've written everything to disk, and we'd lose the thread's trace.
+  auto &TLD = __xray::__xray_fdr_internal::getThreadLocalData();
+  if (TLD.Buffer.Buffer != nullptr) {
+    __xray::__xray_fdr_internal::writeEOBMetadata();
+    auto Start = reinterpret_cast<char *>(TLD.Buffer.Buffer);
+    retryingWriteAll(Fd, Start, Start + TLD.Buffer.Size);
+  }
+
+  __sanitizer::atomic_store(&LogFlushStatus,
+                            XRayLogFlushStatus::XRAY_LOG_FLUSHED,
+                            __sanitizer::memory_order_release);
   return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
 }
 
 XRayLogInitStatus fdrLoggingFinalize() XRAY_NEVER_INSTRUMENT {
-  XRayLogInitStatus CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED;
-  if (!LoggingStatus.compare_exchange_strong(
-          CurrentStatus, XRayLogInitStatus::XRAY_LOG_FINALIZING,
-          std::memory_order_release, std::memory_order_relaxed))
-    return CurrentStatus;
+  s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED;
+  if (!__sanitizer::atomic_compare_exchange_strong(
+          &LoggingStatus, &CurrentStatus,
+          XRayLogInitStatus::XRAY_LOG_FINALIZING,
+          __sanitizer::memory_order_release))
+    return static_cast<XRayLogInitStatus>(CurrentStatus);
 
   // Do special things to make the log finalize itself, and not allow any more
   // operations to be performed until re-initialized.
-  BQ->finalize();
+  (*BQ)->finalize();
 
-  LoggingStatus.store(XRayLogInitStatus::XRAY_LOG_FINALIZED,
-                      std::memory_order_release);
+  __sanitizer::atomic_store(&LoggingStatus,
+                            XRayLogInitStatus::XRAY_LOG_FINALIZED,
+                            __sanitizer::memory_order_release);
   return XRayLogInitStatus::XRAY_LOG_FINALIZED;
 }
 
 XRayLogInitStatus fdrLoggingReset() XRAY_NEVER_INSTRUMENT {
-  XRayLogInitStatus CurrentStatus = XRayLogInitStatus::XRAY_LOG_FINALIZED;
-  if (!LoggingStatus.compare_exchange_strong(
-          CurrentStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
-          std::memory_order_release, std::memory_order_relaxed))
-    return CurrentStatus;
+  s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_FINALIZED;
+  if (__sanitizer::atomic_compare_exchange_strong(
+          &LoggingStatus, &CurrentStatus,
+          XRayLogInitStatus::XRAY_LOG_INITIALIZED,
+          __sanitizer::memory_order_release))
+    return static_cast<XRayLogInitStatus>(CurrentStatus);
 
   // Release the in-memory buffer queue.
-  BQ.reset();
+  BQ->reset();
 
   // Spin until the flushing status is flushed.
-  XRayLogFlushStatus CurrentFlushingStatus =
-      XRayLogFlushStatus::XRAY_LOG_FLUSHED;
-  while (!LogFlushStatus.compare_exchange_weak(
-      CurrentFlushingStatus, XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING,
-      std::memory_order_release, std::memory_order_relaxed)) {
+  s32 CurrentFlushingStatus = XRayLogFlushStatus::XRAY_LOG_FLUSHED;
+  while (__sanitizer::atomic_compare_exchange_weak(
+      &LogFlushStatus, &CurrentFlushingStatus,
+      XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING,
+      __sanitizer::memory_order_release)) {
     if (CurrentFlushingStatus == XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING)
       break;
     CurrentFlushingStatus = XRayLogFlushStatus::XRAY_LOG_FLUSHED;
@@ -174,352 +172,141 @@
   return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
 }
 
-namespace {
-thread_local BufferQueue::Buffer Buffer;
-thread_local char *RecordPtr = nullptr;
-
-void setupNewBuffer(const BufferQueue::Buffer &Buffer) XRAY_NEVER_INSTRUMENT {
-  RecordPtr = static_cast<char *>(Buffer.Buffer);
-
-  static constexpr int InitRecordsCount = 2;
-  std::aligned_storage<sizeof(MetadataRecord)>::type Records[InitRecordsCount];
-  {
-    // Write out a MetadataRecord to signify that this is the start of a new
-    // buffer, associated with a particular thread, with a new CPU.  For the
-    // data, we have 15 bytes to squeeze as much information as we can.  At this
-    // point we only write down the following bytes:
-    //   - Thread ID (pid_t, 4 bytes)
-    auto &NewBuffer = *reinterpret_cast<MetadataRecord *>(&Records[0]);
-    NewBuffer.Type = uint8_t(RecordType::Metadata);
-    NewBuffer.RecordKind = uint8_t(MetadataRecord::RecordKinds::NewBuffer);
-    pid_t Tid = syscall(SYS_gettid);
-    std::memcpy(&NewBuffer.Data, &Tid, sizeof(pid_t));
-  }
-
-  // Also write the WalltimeMarker record.
-  {
-    static_assert(sizeof(time_t) <= 8, "time_t needs to be at most 8 bytes");
-    auto &WalltimeMarker = *reinterpret_cast<MetadataRecord *>(&Records[1]);
-    WalltimeMarker.Type = uint8_t(RecordType::Metadata);
-    WalltimeMarker.RecordKind =
-        uint8_t(MetadataRecord::RecordKinds::WalltimeMarker);
-    timespec TS{0, 0};
-    clock_gettime(CLOCK_MONOTONIC, &TS);
-
-    // We only really need microsecond precision here, and enforce across
-    // platforms that we need 64-bit seconds and 32-bit microseconds encoded in
-    // the Metadata record.
-    int32_t Micros = TS.tv_nsec / 1000;
-    int64_t Seconds = TS.tv_sec;
-    std::memcpy(WalltimeMarker.Data, &Seconds, sizeof(Seconds));
-    std::memcpy(WalltimeMarker.Data + sizeof(Seconds), &Micros, sizeof(Micros));
-  }
-  std::memcpy(RecordPtr, Records, sizeof(MetadataRecord) * InitRecordsCount);
-  RecordPtr += sizeof(MetadataRecord) * InitRecordsCount;
-}
-
-void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC) XRAY_NEVER_INSTRUMENT {
-  MetadataRecord NewCPUId;
-  NewCPUId.Type = uint8_t(RecordType::Metadata);
-  NewCPUId.RecordKind = uint8_t(MetadataRecord::RecordKinds::NewCPUId);
-
-  // The data for the New CPU will contain the following bytes:
-  //   - CPU ID (uint16_t, 2 bytes)
-  //   - Full TSC (uint64_t, 8 bytes)
-  // Total = 12 bytes.
-  std::memcpy(&NewCPUId.Data, &CPU, sizeof(CPU));
-  std::memcpy(&NewCPUId.Data[sizeof(CPU)], &TSC, sizeof(TSC));
-  std::memcpy(RecordPtr, &NewCPUId, sizeof(MetadataRecord));
-  RecordPtr += sizeof(MetadataRecord);
-}
-
-void writeEOBMetadata() XRAY_NEVER_INSTRUMENT {
-  MetadataRecord EOBMeta;
-  EOBMeta.Type = uint8_t(RecordType::Metadata);
-  EOBMeta.RecordKind = uint8_t(MetadataRecord::RecordKinds::EndOfBuffer);
-  // For now we don't write any bytes into the Data field.
-  std::memcpy(RecordPtr, &EOBMeta, sizeof(MetadataRecord));
-  RecordPtr += sizeof(MetadataRecord);
-}
-
-void writeTSCWrapMetadata(uint64_t TSC) XRAY_NEVER_INSTRUMENT {
-  MetadataRecord TSCWrap;
-  TSCWrap.Type = uint8_t(RecordType::Metadata);
-  TSCWrap.RecordKind = uint8_t(MetadataRecord::RecordKinds::TSCWrap);
-
-  // The data for the TSCWrap record contains the following bytes:
-  //   - Full TSC (uint64_t, 8 bytes)
-  // Total = 8 bytes.
-  std::memcpy(&TSCWrap.Data, &TSC, sizeof(TSC));
-  std::memcpy(RecordPtr, &TSCWrap, sizeof(MetadataRecord));
-  RecordPtr += sizeof(MetadataRecord);
-}
-
-constexpr auto MetadataRecSize = sizeof(MetadataRecord);
-constexpr auto FunctionRecSize = sizeof(FunctionRecord);
-
-class ThreadExitBufferCleanup {
-  std::weak_ptr<BufferQueue> Buffers;
-  BufferQueue::Buffer &Buffer;
-
-public:
-  explicit ThreadExitBufferCleanup(std::weak_ptr<BufferQueue> BQ,
-                                   BufferQueue::Buffer &Buffer)
-      XRAY_NEVER_INSTRUMENT : Buffers(BQ),
-                              Buffer(Buffer) {}
-
-  ~ThreadExitBufferCleanup() noexcept XRAY_NEVER_INSTRUMENT {
-    if (RecordPtr == nullptr)
-      return;
-
-    // We make sure that upon exit, a thread will write out the EOB
-    // MetadataRecord in the thread-local log, and also release the buffer to
-    // the queue.
-    assert((RecordPtr + MetadataRecSize) - static_cast<char *>(Buffer.Buffer) >=
-           static_cast<ptrdiff_t>(MetadataRecSize));
-    if (auto BQ = Buffers.lock()) {
-      writeEOBMetadata();
-      if (auto EC = BQ->releaseBuffer(Buffer))
-        Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer,
-               EC.message().c_str());
-      return;
-    }
-  }
+struct TSCAndCPU {
+  uint64_t TSC;
+  unsigned char CPU;
 };
 
-class RecursionGuard {
-  bool &Running;
-  const bool Valid;
-
-public:
-  explicit RecursionGuard(bool &R) : Running(R), Valid(!R) {
-    if (Valid)
-      Running = true;
-  }
-
-  RecursionGuard(const RecursionGuard &) = delete;
-  RecursionGuard(RecursionGuard &&) = delete;
-  RecursionGuard &operator=(const RecursionGuard &) = delete;
-  RecursionGuard &operator=(RecursionGuard &&) = delete;
-
-  explicit operator bool() const { return Valid; }
-
-  ~RecursionGuard() noexcept {
-    if (Valid)
-      Running = false;
-  }
-};
-
-inline bool loggingInitialized() {
-  return LoggingStatus.load(std::memory_order_acquire) ==
-         XRayLogInitStatus::XRAY_LOG_INITIALIZED;
-}
-
-} // namespace
-
-void fdrLoggingHandleArg0(int32_t FuncId,
-                           XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
+static TSCAndCPU getTimestamp() XRAY_NEVER_INSTRUMENT {
   // We want to get the TSC as early as possible, so that we can check whether
   // we've seen this CPU before. We also do it before we load anything else, to
   // allow for forward progress with the scheduling.
-  unsigned char CPU;
-  uint64_t TSC = __xray::readTSC(CPU);
+  TSCAndCPU Result;
 
-  // Bail out right away if logging is not initialized yet.
-  if (LoggingStatus.load(std::memory_order_acquire) !=
-      XRayLogInitStatus::XRAY_LOG_INITIALIZED)
-    return;
+  // Test once for required CPU features
+  static bool TSCSupported = probeRequiredCPUFeatures();
 
-  // We use a thread_local variable to keep track of which CPUs we've already
-  // run, and the TSC times for these CPUs. This allows us to stop repeating the
-  // CPU field in the function records.
-  //
-  // We assume that we'll support only 65536 CPUs for x86_64.
-  thread_local uint16_t CurrentCPU = std::numeric_limits<uint16_t>::max();
-  thread_local uint64_t LastTSC = 0;
+  if (TSCSupported) {
+    Result.TSC = __xray::readTSC(Result.CPU);
+  } else {
+    // FIXME: This code needs refactoring as it appears in multiple locations
+    timespec TS;
+    int result = clock_gettime(CLOCK_REALTIME, &TS);
+    if (result != 0) {
+      Report("clock_gettime(2) return %d, errno=%d", result, int(errno));
+      TS = {0, 0};
+    }
+    Result.CPU = 0;
+    Result.TSC = TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec;
+  }
+  return Result;
+}
 
-  // Make sure a thread that's ever called handleArg0 has a thread-local
-  // live reference to the buffer queue for this particular instance of
-  // FDRLogging, and that we're going to clean it up when the thread exits.
-  thread_local auto LocalBQ = BQ;
-  thread_local ThreadExitBufferCleanup Cleanup(LocalBQ, Buffer);
+void fdrLoggingHandleArg0(int32_t FuncId,
+                          XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
+  auto TC = getTimestamp();
+  __xray_fdr_internal::processFunctionHook(FuncId, Entry, TC.TSC,
+                                           TC.CPU, 0, clock_gettime, *BQ);
+}
 
-  // Prevent signal handler recursion, so in case we're already in a log writing
-  // mode and the signal handler comes in (and is also instrumented) then we
-  // don't want to be clobbering potentially partial writes already happening in
-  // the thread. We use a simple thread_local latch to only allow one on-going
-  // handleArg0 to happen at any given time.
-  thread_local bool Running = false;
+void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry,
+                          uint64_t Arg) XRAY_NEVER_INSTRUMENT {
+  auto TC = getTimestamp();
+  __xray_fdr_internal::processFunctionHook(
+      FuncId, Entry, TC.TSC, TC.CPU, Arg, clock_gettime, *BQ);
+}
+
+void fdrLoggingHandleCustomEvent(void *Event,
+                                 std::size_t EventSize) XRAY_NEVER_INSTRUMENT {
+  using namespace __xray_fdr_internal;
+  auto TC = getTimestamp();
+  auto &TSC = TC.TSC;
+  auto &CPU = TC.CPU;
   RecursionGuard Guard{Running};
   if (!Guard) {
-    assert(Running == true && "RecursionGuard is buggy!");
+    assert(Running && "RecursionGuard is buggy!");
+    return;
+  }
+  if (EventSize > std::numeric_limits<int32_t>::max()) {
+    using Empty = struct {};
+    static Empty Once = [&] {
+      Report("Event size too large = %zu ; > max = %d\n", EventSize,
+             std::numeric_limits<int32_t>::max());
+      return Empty();
+    }();
+    (void)Once;
+  }
+  int32_t ReducedEventSize = static_cast<int32_t>(EventSize);
+  auto &TLD = getThreadLocalData();
+  if (!isLogInitializedAndReady(TLD.LocalBQ, TSC, CPU, clock_gettime))
+    return;
+
+  // Here we need to prepare the log to handle:
+  //   - The metadata record we're going to write. (16 bytes)
+  //   - The additional data we're going to write. Currently, that's the size of
+  //   the event we're going to dump into the log as free-form bytes.
+  if (!prepareBuffer(TSC, CPU, clock_gettime, MetadataRecSize + EventSize)) {
+    TLD.LocalBQ = nullptr;
     return;
   }
 
-  if (!loggingInitialized() || LocalBQ->finalizing()) {
-    writeEOBMetadata();
-    if (auto EC = BQ->releaseBuffer(Buffer)) {
-      Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer,
-             EC.message().c_str());
-      return;
-    }
-    RecordPtr = nullptr;
+  // Write the custom event metadata record, which consists of the following
+  // information:
+  //   - 8 bytes (64-bits) for the full TSC when the event started.
+  //   - 4 bytes (32-bits) for the length of the data.
+  MetadataRecord CustomEvent;
+  CustomEvent.Type = uint8_t(RecordType::Metadata);
+  CustomEvent.RecordKind =
+      uint8_t(MetadataRecord::RecordKinds::CustomEventMarker);
+  constexpr auto TSCSize = sizeof(TC.TSC);
+  std::memcpy(&CustomEvent.Data, &ReducedEventSize, sizeof(int32_t));
+  std::memcpy(&CustomEvent.Data[sizeof(int32_t)], &TSC, TSCSize);
+  std::memcpy(TLD.RecordPtr, &CustomEvent, sizeof(CustomEvent));
+  TLD.RecordPtr += sizeof(CustomEvent);
+  std::memcpy(TLD.RecordPtr, Event, ReducedEventSize);
+  endBufferIfFull();
+}
+
+XRayLogInitStatus fdrLoggingInit(std::size_t BufferSize, std::size_t BufferMax,
+                                 void *Options,
+                                 size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
+  if (OptionsSize != sizeof(FDRLoggingOptions))
+    return static_cast<XRayLogInitStatus>(__sanitizer::atomic_load(
+        &LoggingStatus, __sanitizer::memory_order_acquire));
+  s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
+  if (!__sanitizer::atomic_compare_exchange_strong(
+          &LoggingStatus, &CurrentStatus,
+          XRayLogInitStatus::XRAY_LOG_INITIALIZING,
+          __sanitizer::memory_order_release))
+    return static_cast<XRayLogInitStatus>(CurrentStatus);
+
+  {
+    __sanitizer::SpinMutexLock Guard(&FDROptionsMutex);
+    memcpy(&FDROptions, Options, OptionsSize);
   }
 
-  if (Buffer.Buffer == nullptr) {
-    if (auto EC = LocalBQ->getBuffer(Buffer)) {
-      auto LS = LoggingStatus.load(std::memory_order_acquire);
-      if (LS != XRayLogInitStatus::XRAY_LOG_FINALIZING &&
-          LS != XRayLogInitStatus::XRAY_LOG_FINALIZED)
-        Report("Failed to acquire a buffer; error=%s\n", EC.message().c_str());
-      return;
-    }
+  bool Success = false;
+  if (BQ == nullptr)
+    BQ = new std::shared_ptr<BufferQueue>();
 
-    setupNewBuffer(Buffer);
+  *BQ = std::make_shared<BufferQueue>(BufferSize, BufferMax, Success);
+  if (!Success) {
+    Report("BufferQueue init failed.\n");
+    return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
   }
 
-  if (CurrentCPU == std::numeric_limits<uint16_t>::max()) {
-    // This means this is the first CPU this thread has ever run on. We set the
-    // current CPU and record this as the first TSC we've seen.
-    CurrentCPU = CPU;
-    writeNewCPUIdMetadata(CPU, TSC);
-  }
+  // Arg1 handler should go in first to avoid concurrent code accidentally
+  // falling back to arg0 when it should have ran arg1.
+  __xray_set_handler_arg1(fdrLoggingHandleArg1);
+  // Install the actual handleArg0 handler after initialising the buffers.
+  __xray_set_handler(fdrLoggingHandleArg0);
+  __xray_set_customevent_handler(fdrLoggingHandleCustomEvent);
 
-  // Before we go setting up writing new function entries, we need to be really
-  // careful about the pointer math we're doing. This means we need to ensure
-  // that the record we are about to write is going to fit into the buffer,
-  // without overflowing the buffer.
-  //
-  // To do this properly, we use the following assumptions:
-  //
-  //   - The least number of bytes we will ever write is 8
-  //     (sizeof(FunctionRecord)) only if the delta between the previous entry
-  //     and this entry is within 32 bits.
-  //   - The most number of bytes we will ever write is 8 + 16 = 24. This is
-  //     computed by:
-  //
-  //       sizeof(FunctionRecord) + sizeof(MetadataRecord)
-  //
-  //     These arise in the following cases:
-  //
-  //       1. When the delta between the TSC we get and the previous TSC for the
-  //          same CPU is outside of the uint32_t range, we end up having to
-  //          write a MetadataRecord to indicate a "tsc wrap" before the actual
-  //          FunctionRecord.
-  //       2. When we learn that we've moved CPUs, we need to write a
-  //          MetadataRecord to indicate a "cpu change", and thus write out the
-  //          current TSC for that CPU before writing out the actual
-  //          FunctionRecord.
-  //       3. When we learn about a new CPU ID, we need to write down a "new cpu
-  //          id" MetadataRecord before writing out the actual FunctionRecord.
-  //
-  //   - An End-of-Buffer (EOB) MetadataRecord is 16 bytes.
-  //
-  // So the math we need to do is to determine whether writing 24 bytes past the
-  // current pointer leaves us with enough bytes to write the EOB
-  // MetadataRecord. If we don't have enough space after writing as much as 24
-  // bytes in the end of the buffer, we need to write out the EOB, get a new
-  // Buffer, set it up properly before doing any further writing.
-  //
-  char *BufferStart = static_cast<char *>(Buffer.Buffer);
-  if ((RecordPtr + (MetadataRecSize + FunctionRecSize)) - BufferStart <
-      static_cast<ptrdiff_t>(MetadataRecSize)) {
-    writeEOBMetadata();
-    if (auto EC = LocalBQ->releaseBuffer(Buffer)) {
-      Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer,
-             EC.message().c_str());
-      return;
-    }
-    if (auto EC = LocalBQ->getBuffer(Buffer)) {
-      Report("Failed to acquire a buffer; error=%s\n", EC.message().c_str());
-      return;
-    }
-    setupNewBuffer(Buffer);
-  }
-
-  // By this point, we are now ready to write at most 24 bytes (one metadata
-  // record and one function record).
-  BufferStart = static_cast<char *>(Buffer.Buffer);
-  assert((RecordPtr + (MetadataRecSize + FunctionRecSize)) - BufferStart >=
-             static_cast<ptrdiff_t>(MetadataRecSize) &&
-         "Misconfigured BufferQueue provided; Buffer size not large enough.");
-
-  std::aligned_storage<sizeof(FunctionRecord), alignof(FunctionRecord)>::type
-      AlignedFuncRecordBuffer;
-  auto &FuncRecord =
-      *reinterpret_cast<FunctionRecord *>(&AlignedFuncRecordBuffer);
-  FuncRecord.Type = uint8_t(RecordType::Function);
-
-  // Only get the lower 28 bits of the function id.
-  FuncRecord.FuncId = FuncId & ~(0x0F << 28);
-
-  // Here we compute the TSC Delta. There are a few interesting situations we
-  // need to account for:
-  //
-  //   - The thread has migrated to a different CPU. If this is the case, then
-  //     we write down the following records:
-  //
-  //       1. A 'NewCPUId' Metadata record.
-  //       2. A FunctionRecord with a 0 for the TSCDelta field.
-  //
-  //   - The TSC delta is greater than the 32 bits we can store in a
-  //     FunctionRecord. In this case we write down the following records:
-  //
-  //       1. A 'TSCWrap' Metadata record.
-  //       2. A FunctionRecord with a 0 for the TSCDelta field.
-  //
-  //   - The TSC delta is representable within the 32 bits we can store in a
-  //     FunctionRecord. In this case we write down just a FunctionRecord with
-  //     the correct TSC delta.
-  //
-  FuncRecord.TSCDelta = 0;
-  if (CPU != CurrentCPU) {
-    // We've moved to a new CPU.
-    writeNewCPUIdMetadata(CPU, TSC);
-  } else {
-    // If the delta is greater than the range for a uint32_t, then we write out
-    // the TSC wrap metadata entry with the full TSC, and the TSC for the
-    // function record be 0.
-    auto Delta = LastTSC - TSC;
-    if (Delta > (1ULL << 32) - 1)
-      writeTSCWrapMetadata(TSC);
-    else
-      FuncRecord.TSCDelta = Delta;
-  }
-
-  // We then update our "LastTSC" and "CurrentCPU" thread-local variables to aid
-  // us in future computations of this TSC delta value.
-  LastTSC = TSC;
-  CurrentCPU = CPU;
-
-  switch (Entry) {
-  case XRayEntryType::ENTRY:
-  case XRayEntryType::LOG_ARGS_ENTRY:
-    FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionEnter);
-    break;
-  case XRayEntryType::EXIT:
-    FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionExit);
-    break;
-  case XRayEntryType::TAIL:
-    FuncRecord.RecordKind =
-        uint8_t(FunctionRecord::RecordKinds::FunctionTailExit);
-    break;
-  }
-
-  std::memcpy(RecordPtr, &AlignedFuncRecordBuffer, sizeof(FunctionRecord));
-  RecordPtr += sizeof(FunctionRecord);
-
-  // If we've exhausted the buffer by this time, we then release the buffer to
-  // make sure that other threads may start using this buffer.
-  if ((RecordPtr + MetadataRecSize) - BufferStart == MetadataRecSize) {
-    writeEOBMetadata();
-    if (auto EC = LocalBQ->releaseBuffer(Buffer)) {
-      Report("Failed releasing buffer at %p; error=%s\n", Buffer.Buffer,
-             EC.message().c_str());
-      return;
-    }
-    RecordPtr = nullptr;
-  }
+  __sanitizer::atomic_store(&LoggingStatus,
+                            XRayLogInitStatus::XRAY_LOG_INITIALIZED,
+                            __sanitizer::memory_order_release);
+  Report("XRay FDR init successful.\n");
+  return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
 }
 
 } // namespace __xray
@@ -528,7 +315,9 @@
   using namespace __xray;
   if (flags()->xray_fdr_log) {
     XRayLogImpl Impl{
-        fdrLoggingInit, fdrLoggingFinalize, fdrLoggingHandleArg0,
+        fdrLoggingInit,
+        fdrLoggingFinalize,
+        fdrLoggingHandleArg0,
         fdrLoggingFlush,
     };
     __xray_set_log_impl(Impl);
diff --git a/lib/xray/xray_fdr_logging.h b/lib/xray/xray_fdr_logging.h
index 28c8296..1639d55 100644
--- a/lib/xray/xray_fdr_logging.h
+++ b/lib/xray/xray_fdr_logging.h
@@ -14,6 +14,7 @@
 #define XRAY_XRAY_FDR_LOGGING_H
 
 #include "xray/xray_log_interface.h"
+#include "xray_fdr_log_records.h"
 
 // FDR (Flight Data Recorder) Mode
 // ===============================
@@ -25,69 +26,11 @@
 // default mode of always writing fixed-size records.
 
 namespace __xray {
-
-enum class RecordType : uint8_t {
-  Function, Metadata
-};
-
-// A MetadataRecord encodes the kind of record in its first byte, and have 15
-// additional bytes in the end to hold free-form data.
-struct alignas(16) MetadataRecord {
-  // A MetadataRecord must always have a type of 1.
-  /* RecordType */ uint8_t Type : 1;
-
-  // Each kind of record is represented as a 7-bit value (even though we use an
-  // unsigned 8-bit enum class to do so).
-  enum class RecordKinds : uint8_t {
-    NewBuffer,
-    EndOfBuffer,
-    NewCPUId,
-    TSCWrap,
-    WalltimeMarker,
-  };
-  // Use 7 bits to identify this record type.
-  /* RecordKinds */ uint8_t RecordKind : 7;
-  char Data[15];
-} __attribute__((packed));
-
-static_assert(sizeof(MetadataRecord) == 16, "Wrong size for MetadataRecord.");
-
-struct alignas(8) FunctionRecord {
-  // A FunctionRecord must always have a type of 0.
-  /* RecordType */ uint8_t Type : 1;
-  enum class RecordKinds {
-    FunctionEnter = 0x00,
-    FunctionExit = 0x01,
-    FunctionTailExit = 0x02,
-  };
-  /* RecordKinds */ uint8_t RecordKind : 3;
-
-  // We only use 28 bits of the function ID, so that we can use as few bytes as
-  // possible. This means we only support 2^28 (268,435,456) unique function ids
-  // in a single binary.
-  int FuncId : 28;
-
-  // We use another 4 bytes to hold the delta between the previous entry's TSC.
-  // In case we've found that the distance is greater than the allowable 32 bits
-  // (either because we are running in a different CPU and the TSC might be
-  // different then), we should use a MetadataRecord before this FunctionRecord
-  // that will contain the full TSC for that CPU, and keep this to 0.
-  uint32_t TSCDelta;
-} __attribute__((packed));
-
-static_assert(sizeof(FunctionRecord) == 8, "Wrong size for FunctionRecord.");
-
-// Options used by the FDR implementation.
-struct FDRLoggingOptions {
-  bool ReportErrors = false;
-  int Fd = -1;
-};
-
-// Flight Data Recorder mode implementation interfaces.
 XRayLogInitStatus fdrLoggingInit(size_t BufferSize, size_t BufferMax,
-                                  void *Options, size_t OptionsSize);
+                                 void *Options, size_t OptionsSize);
 XRayLogInitStatus fdrLoggingFinalize();
 void fdrLoggingHandleArg0(int32_t FuncId, XRayEntryType Entry);
+void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry, uint64_t Arg1);
 XRayLogFlushStatus fdrLoggingFlush();
 XRayLogInitStatus fdrLoggingReset();
 
diff --git a/lib/xray/xray_fdr_logging_impl.h b/lib/xray/xray_fdr_logging_impl.h
new file mode 100644
index 0000000..c109d13
--- /dev/null
+++ b/lib/xray/xray_fdr_logging_impl.h
@@ -0,0 +1,783 @@
+//===-- xray_fdr_logging_impl.h ---------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Here we implement the thread local state management and record i/o for Flight
+// Data Recorder mode for XRay, where we use compact structures to store records
+// in memory as well as when writing out the data to files.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_XRAY_FDR_LOGGING_IMPL_H
+#define XRAY_XRAY_FDR_LOGGING_IMPL_H
+
+#include <cassert>
+#include <cstddef>
+#include <cstring>
+#include <limits>
+#include <pthread.h>
+#include <sys/syscall.h>
+#include <time.h>
+#include <unistd.h>
+
+// FIXME: Implement analogues to std::shared_ptr and std::weak_ptr
+#include <memory>
+
+#include "sanitizer_common/sanitizer_common.h"
+#include "xray/xray_log_interface.h"
+#include "xray_buffer_queue.h"
+#include "xray_defs.h"
+#include "xray_fdr_log_records.h"
+#include "xray_flags.h"
+#include "xray_tsc.h"
+
+namespace __xray {
+
+__sanitizer::atomic_sint32_t LoggingStatus = {
+    XRayLogInitStatus::XRAY_LOG_UNINITIALIZED};
+
+/// We expose some of the state transitions when FDR logging mode is operating
+/// such that we can simulate a series of log events that may occur without
+/// and test with determinism without worrying about the real CPU time.
+///
+/// Because the code uses thread_local allocation extensively as part of its
+/// design, callers that wish to test events occuring on different threads
+/// will actually have to run them on different threads.
+///
+/// This also means that it is possible to break invariants maintained by
+/// cooperation with xray_fdr_logging class, so be careful and think twice.
+namespace __xray_fdr_internal {
+
+/// Writes the new buffer record and wallclock time that begin a buffer for a
+/// thread to MemPtr and increments MemPtr. Bypasses the thread local state
+/// machine and writes directly to memory without checks.
+static void writeNewBufferPreamble(pid_t Tid, timespec TS, char *&MemPtr);
+
+/// Write a metadata record to switch to a new CPU to MemPtr and increments
+/// MemPtr. Bypasses the thread local state machine and writes directly to
+/// memory without checks.
+static void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC, char *&MemPtr);
+
+/// Writes an EOB metadata record to MemPtr and increments MemPtr. Bypasses the
+/// thread local state machine and writes directly to memory without checks.
+static void writeEOBMetadata(char *&MemPtr);
+
+/// Writes a TSC Wrap metadata record to MemPtr and increments MemPtr. Bypasses
+/// the thread local state machine and directly writes to memory without checks.
+static void writeTSCWrapMetadata(uint64_t TSC, char *&MemPtr);
+
+/// Writes a Function Record to MemPtr and increments MemPtr. Bypasses the
+/// thread local state machine and writes the function record directly to
+/// memory.
+static void writeFunctionRecord(int FuncId, uint32_t TSCDelta,
+                                XRayEntryType EntryType, char *&MemPtr);
+
+/// Sets up a new buffer in thread_local storage and writes a preamble. The
+/// wall_clock_reader function is used to populate the WallTimeRecord entry.
+static void setupNewBuffer(int (*wall_clock_reader)(clockid_t,
+                                                    struct timespec *));
+
+/// Called to record CPU time for a new CPU within the current thread.
+static void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC);
+
+/// Called to close the buffer when the thread exhausts the buffer or when the
+/// thread exits (via a thread local variable destructor).
+static void writeEOBMetadata();
+
+/// TSC Wrap records are written when a TSC delta encoding scheme overflows.
+static void writeTSCWrapMetadata(uint64_t TSC);
+
+// Group together thread-local-data in a struct, then hide it behind a function
+// call so that it can be initialized on first use instead of as a global. We
+// force the alignment to 64-bytes for x86 cache line alignment, as this
+// structure is used in the hot path of implementation.
+struct alignas(64) ThreadLocalData {
+  BufferQueue::Buffer Buffer;
+  char *RecordPtr = nullptr;
+  // The number of FunctionEntry records immediately preceding RecordPtr.
+  uint8_t NumConsecutiveFnEnters = 0;
+
+  // The number of adjacent, consecutive pairs of FunctionEntry, Tail Exit
+  // records preceding RecordPtr.
+  uint8_t NumTailCalls = 0;
+
+  // We use a thread_local variable to keep track of which CPUs we've already
+  // run, and the TSC times for these CPUs. This allows us to stop repeating the
+  // CPU field in the function records.
+  //
+  // We assume that we'll support only 65536 CPUs for x86_64.
+  uint16_t CurrentCPU = std::numeric_limits<uint16_t>::max();
+  uint64_t LastTSC = 0;
+  uint64_t LastFunctionEntryTSC = 0;
+
+  // Make sure a thread that's ever called handleArg0 has a thread-local
+  // live reference to the buffer queue for this particular instance of
+  // FDRLogging, and that we're going to clean it up when the thread exits.
+  std::shared_ptr<BufferQueue> LocalBQ = nullptr;
+};
+
+// Forward-declare, defined later.
+static ThreadLocalData &getThreadLocalData();
+
+static constexpr auto MetadataRecSize = sizeof(MetadataRecord);
+static constexpr auto FunctionRecSize = sizeof(FunctionRecord);
+
+// This function will initialize the thread-local data structure used by the FDR
+// logging implementation and return a reference to it. The implementation
+// details require a bit of care to maintain.
+//
+// First, some requirements on the implementation in general:
+//
+//   - XRay handlers should not call any memory allocation routines that may
+//     delegate to an instrumented implementation. This means functions like
+//     malloc() and free() should not be called while instrumenting.
+//
+//   - We would like to use some thread-local data initialized on first-use of
+//     the XRay instrumentation. These allow us to implement unsynchronized
+//     routines that access resources associated with the thread.
+//
+// The implementation here uses a few mechanisms that allow us to provide both
+// the requirements listed above. We do this by:
+//
+//   1. Using a thread-local aligned storage buffer for representing the
+//      ThreadLocalData struct. This data will be uninitialized memory by
+//      design.
+//
+//   2. Using pthread_once(...) to initialize the thread-local data structures
+//      on first use, for every thread. We don't use std::call_once so we don't
+//      have a reliance on the C++ runtime library.
+//
+//   3. Registering a cleanup function that gets run at the end of a thread's
+//      lifetime through pthread_create_key(...). The cleanup function would
+//      allow us to release the thread-local resources in a manner that would
+//      let the rest of the XRay runtime implementation handle the records
+//      written for this thread's active buffer.
+//
+// We're doing this to avoid using a `thread_local` object that has a
+// non-trivial destructor, because the C++ runtime might call std::malloc(...)
+// to register calls to destructors. Deadlocks may arise when, for example, an
+// externally provided malloc implementation is XRay instrumented, and
+// initializing the thread-locals involves calling into malloc. A malloc
+// implementation that does global synchronization might be holding a lock for a
+// critical section, calling a function that might be XRay instrumented (and
+// thus in turn calling into malloc by virtue of registration of the
+// thread_local's destructor).
+//
+// With the approach taken where, we attempt to avoid the potential for
+// deadlocks by relying instead on pthread's memory management routines.
+static ThreadLocalData &getThreadLocalData() {
+  thread_local pthread_key_t key;
+
+  // We need aligned, uninitialized storage for the TLS object which is
+  // trivially destructible. We're going to use this as raw storage and
+  // placement-new the ThreadLocalData object into it later.
+  alignas(alignof(ThreadLocalData)) thread_local unsigned char
+      TLSBuffer[sizeof(ThreadLocalData)];
+
+  // Ensure that we only actually ever do the pthread initialization once.
+  thread_local bool UNUSED Unused = [] {
+    new (&TLSBuffer) ThreadLocalData();
+    auto result = pthread_key_create(&key, +[](void *) {
+      auto &TLD = *reinterpret_cast<ThreadLocalData *>(&TLSBuffer);
+      auto &RecordPtr = TLD.RecordPtr;
+      auto &Buffers = TLD.LocalBQ;
+      auto &Buffer = TLD.Buffer;
+      if (RecordPtr == nullptr)
+        return;
+
+      // We make sure that upon exit, a thread will write out the EOB
+      // MetadataRecord in the thread-local log, and also release the buffer
+      // to the queue.
+      assert((RecordPtr + MetadataRecSize) -
+                 static_cast<char *>(Buffer.Buffer) >=
+             static_cast<ptrdiff_t>(MetadataRecSize));
+      if (Buffers) {
+        writeEOBMetadata();
+        auto EC = Buffers->releaseBuffer(Buffer);
+        if (EC != BufferQueue::ErrorCode::Ok)
+          Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer,
+                 BufferQueue::getErrorString(EC));
+        Buffers = nullptr;
+        return;
+      }
+    });
+    if (result != 0) {
+      Report("Failed to allocate thread-local data through pthread; error=%d",
+             result);
+      return false;
+    }
+    pthread_setspecific(key, &TLSBuffer);
+    return true;
+  }();
+
+  return *reinterpret_cast<ThreadLocalData *>(TLSBuffer);
+}
+
+//-----------------------------------------------------------------------------|
+// The rest of the file is implementation.                                     |
+//-----------------------------------------------------------------------------|
+// Functions are implemented in the header for inlining since we don't want    |
+// to grow the stack when we've hijacked the binary for logging.               |
+//-----------------------------------------------------------------------------|
+
+namespace {
+
+class RecursionGuard {
+  volatile bool &Running;
+  const bool Valid;
+
+public:
+  explicit RecursionGuard(volatile bool &R) : Running(R), Valid(!R) {
+    if (Valid)
+      Running = true;
+  }
+
+  RecursionGuard(const RecursionGuard &) = delete;
+  RecursionGuard(RecursionGuard &&) = delete;
+  RecursionGuard &operator=(const RecursionGuard &) = delete;
+  RecursionGuard &operator=(RecursionGuard &&) = delete;
+
+  explicit operator bool() const { return Valid; }
+
+  ~RecursionGuard() noexcept {
+    if (Valid)
+      Running = false;
+  }
+};
+
+} // namespace
+
+inline void writeNewBufferPreamble(pid_t Tid, timespec TS,
+                                   char *&MemPtr) XRAY_NEVER_INSTRUMENT {
+  static constexpr int InitRecordsCount = 2;
+  alignas(alignof(MetadataRecord)) unsigned char
+      Records[InitRecordsCount * MetadataRecSize];
+  {
+    // Write out a MetadataRecord to signify that this is the start of a new
+    // buffer, associated with a particular thread, with a new CPU.  For the
+    // data, we have 15 bytes to squeeze as much information as we can.  At this
+    // point we only write down the following bytes:
+    //   - Thread ID (pid_t, 4 bytes)
+    auto &NewBuffer = *reinterpret_cast<MetadataRecord *>(Records);
+    NewBuffer.Type = uint8_t(RecordType::Metadata);
+    NewBuffer.RecordKind = uint8_t(MetadataRecord::RecordKinds::NewBuffer);
+    std::memcpy(&NewBuffer.Data, &Tid, sizeof(pid_t));
+  }
+  // Also write the WalltimeMarker record.
+  {
+    static_assert(sizeof(time_t) <= 8, "time_t needs to be at most 8 bytes");
+    auto &WalltimeMarker =
+        *reinterpret_cast<MetadataRecord *>(Records + MetadataRecSize);
+    WalltimeMarker.Type = uint8_t(RecordType::Metadata);
+    WalltimeMarker.RecordKind =
+        uint8_t(MetadataRecord::RecordKinds::WalltimeMarker);
+
+    // We only really need microsecond precision here, and enforce across
+    // platforms that we need 64-bit seconds and 32-bit microseconds encoded in
+    // the Metadata record.
+    int32_t Micros = TS.tv_nsec / 1000;
+    int64_t Seconds = TS.tv_sec;
+    std::memcpy(WalltimeMarker.Data, &Seconds, sizeof(Seconds));
+    std::memcpy(WalltimeMarker.Data + sizeof(Seconds), &Micros, sizeof(Micros));
+  }
+  std::memcpy(MemPtr, Records, sizeof(MetadataRecord) * InitRecordsCount);
+  MemPtr += sizeof(MetadataRecord) * InitRecordsCount;
+  auto &TLD = getThreadLocalData();
+  TLD.NumConsecutiveFnEnters = 0;
+  TLD.NumTailCalls = 0;
+}
+
+inline void setupNewBuffer(int (*wall_clock_reader)(
+    clockid_t, struct timespec *)) XRAY_NEVER_INSTRUMENT {
+  auto &TLD = getThreadLocalData();
+  auto &Buffer = TLD.Buffer;
+  auto &RecordPtr = TLD.RecordPtr;
+  RecordPtr = static_cast<char *>(Buffer.Buffer);
+  pid_t Tid = syscall(SYS_gettid);
+  timespec TS{0, 0};
+  // This is typically clock_gettime, but callers have injection ability.
+  wall_clock_reader(CLOCK_MONOTONIC, &TS);
+  writeNewBufferPreamble(Tid, TS, RecordPtr);
+  TLD.NumConsecutiveFnEnters = 0;
+  TLD.NumTailCalls = 0;
+}
+
+inline void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC,
+                                  char *&MemPtr) XRAY_NEVER_INSTRUMENT {
+  auto &TLD = getThreadLocalData();
+  MetadataRecord NewCPUId;
+  NewCPUId.Type = uint8_t(RecordType::Metadata);
+  NewCPUId.RecordKind = uint8_t(MetadataRecord::RecordKinds::NewCPUId);
+
+  // The data for the New CPU will contain the following bytes:
+  //   - CPU ID (uint16_t, 2 bytes)
+  //   - Full TSC (uint64_t, 8 bytes)
+  // Total = 10 bytes.
+  std::memcpy(&NewCPUId.Data, &CPU, sizeof(CPU));
+  std::memcpy(&NewCPUId.Data[sizeof(CPU)], &TSC, sizeof(TSC));
+  std::memcpy(MemPtr, &NewCPUId, sizeof(MetadataRecord));
+  MemPtr += sizeof(MetadataRecord);
+  TLD.NumConsecutiveFnEnters = 0;
+  TLD.NumTailCalls = 0;
+}
+
+inline void writeNewCPUIdMetadata(uint16_t CPU,
+                                  uint64_t TSC) XRAY_NEVER_INSTRUMENT {
+  writeNewCPUIdMetadata(CPU, TSC, getThreadLocalData().RecordPtr);
+}
+
+inline void writeEOBMetadata(char *&MemPtr) XRAY_NEVER_INSTRUMENT {
+  auto &TLD = getThreadLocalData();
+  MetadataRecord EOBMeta;
+  EOBMeta.Type = uint8_t(RecordType::Metadata);
+  EOBMeta.RecordKind = uint8_t(MetadataRecord::RecordKinds::EndOfBuffer);
+  // For now we don't write any bytes into the Data field.
+  std::memcpy(MemPtr, &EOBMeta, sizeof(MetadataRecord));
+  MemPtr += sizeof(MetadataRecord);
+  TLD.NumConsecutiveFnEnters = 0;
+  TLD.NumTailCalls = 0;
+}
+
+inline void writeEOBMetadata() XRAY_NEVER_INSTRUMENT {
+  writeEOBMetadata(getThreadLocalData().RecordPtr);
+}
+
+inline void writeTSCWrapMetadata(uint64_t TSC,
+                                 char *&MemPtr) XRAY_NEVER_INSTRUMENT {
+  auto &TLD = getThreadLocalData();
+  MetadataRecord TSCWrap;
+  TSCWrap.Type = uint8_t(RecordType::Metadata);
+  TSCWrap.RecordKind = uint8_t(MetadataRecord::RecordKinds::TSCWrap);
+
+  // The data for the TSCWrap record contains the following bytes:
+  //   - Full TSC (uint64_t, 8 bytes)
+  // Total = 8 bytes.
+  std::memcpy(&TSCWrap.Data, &TSC, sizeof(TSC));
+  std::memcpy(MemPtr, &TSCWrap, sizeof(MetadataRecord));
+  MemPtr += sizeof(MetadataRecord);
+  TLD.NumConsecutiveFnEnters = 0;
+  TLD.NumTailCalls = 0;
+}
+
+inline void writeTSCWrapMetadata(uint64_t TSC) XRAY_NEVER_INSTRUMENT {
+  writeTSCWrapMetadata(TSC, getThreadLocalData().RecordPtr);
+}
+
+// Call Argument metadata records store the arguments to a function in the
+// order of their appearance; holes are not supported by the buffer format.
+static inline void writeCallArgumentMetadata(uint64_t A) XRAY_NEVER_INSTRUMENT {
+  auto &TLD = getThreadLocalData();
+  MetadataRecord CallArg;
+  CallArg.Type = uint8_t(RecordType::Metadata);
+  CallArg.RecordKind = uint8_t(MetadataRecord::RecordKinds::CallArgument);
+
+  std::memcpy(CallArg.Data, &A, sizeof(A));
+  std::memcpy(TLD.RecordPtr, &CallArg, sizeof(MetadataRecord));
+  TLD.RecordPtr += sizeof(MetadataRecord);
+}
+
+static inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta,
+                                       XRayEntryType EntryType,
+                                       char *&MemPtr) XRAY_NEVER_INSTRUMENT {
+  FunctionRecord FuncRecord;
+  FuncRecord.Type = uint8_t(RecordType::Function);
+  // Only take 28 bits of the function id.
+  FuncRecord.FuncId = FuncId & ~(0x0F << 28);
+  FuncRecord.TSCDelta = TSCDelta;
+
+  auto &TLD = getThreadLocalData();
+  switch (EntryType) {
+  case XRayEntryType::ENTRY:
+    ++TLD.NumConsecutiveFnEnters;
+    FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionEnter);
+    break;
+  case XRayEntryType::LOG_ARGS_ENTRY:
+    // We should not rewind functions with logged args.
+    TLD.NumConsecutiveFnEnters = 0;
+    TLD.NumTailCalls = 0;
+    FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionEnter);
+    break;
+  case XRayEntryType::EXIT:
+    // If we've decided to log the function exit, we will never erase the log
+    // before it.
+    TLD.NumConsecutiveFnEnters = 0;
+    TLD.NumTailCalls = 0;
+    FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionExit);
+    break;
+  case XRayEntryType::TAIL:
+    // If we just entered the function we're tail exiting from or erased every
+    // invocation since then, this function entry tail pair is a candidate to
+    // be erased when the child function exits.
+    if (TLD.NumConsecutiveFnEnters > 0) {
+      ++TLD.NumTailCalls;
+      TLD.NumConsecutiveFnEnters = 0;
+    } else {
+      // We will never be able to erase this tail call since we have logged
+      // something in between the function entry and tail exit.
+      TLD.NumTailCalls = 0;
+      TLD.NumConsecutiveFnEnters = 0;
+    }
+    FuncRecord.RecordKind =
+        uint8_t(FunctionRecord::RecordKinds::FunctionTailExit);
+    break;
+  case XRayEntryType::CUSTOM_EVENT: {
+    // This is a bug in patching, so we'll report it once and move on.
+    static bool Once = [&] {
+      Report("Internal error: patched an XRay custom event call as a function; "
+             "func id = %d\n",
+             FuncId);
+      return true;
+    }();
+    (void)Once;
+    return;
+  }
+  }
+
+  std::memcpy(MemPtr, &FuncRecord, sizeof(FunctionRecord));
+  MemPtr += sizeof(FunctionRecord);
+}
+
+static uint64_t thresholdTicks() {
+  static uint64_t TicksPerSec = probeRequiredCPUFeatures()
+                                    ? getTSCFrequency()
+                                    : __xray::NanosecondsPerSecond;
+  static const uint64_t ThresholdTicks =
+      TicksPerSec * flags()->xray_fdr_log_func_duration_threshold_us / 1000000;
+  return ThresholdTicks;
+}
+
+// Re-point the thread local pointer into this thread's Buffer before the recent
+// "Function Entry" record and any "Tail Call Exit" records after that.
+static void rewindRecentCall(uint64_t TSC, uint64_t &LastTSC,
+                             uint64_t &LastFunctionEntryTSC, int32_t FuncId) {
+  auto &TLD = getThreadLocalData();
+  TLD.RecordPtr -= FunctionRecSize;
+  FunctionRecord FuncRecord;
+  std::memcpy(&FuncRecord, TLD.RecordPtr, FunctionRecSize);
+  assert(FuncRecord.RecordKind ==
+             uint8_t(FunctionRecord::RecordKinds::FunctionEnter) &&
+         "Expected to find function entry recording when rewinding.");
+  assert(FuncRecord.FuncId == (FuncId & ~(0x0F << 28)) &&
+         "Expected matching function id when rewinding Exit");
+  --TLD.NumConsecutiveFnEnters;
+  LastTSC -= FuncRecord.TSCDelta;
+
+  // We unwound one call. Update the state and return without writing a log.
+  if (TLD.NumConsecutiveFnEnters != 0) {
+    LastFunctionEntryTSC -= FuncRecord.TSCDelta;
+    return;
+  }
+
+  // Otherwise we've rewound the stack of all function entries, we might be
+  // able to rewind further by erasing tail call functions that are being
+  // exited from via this exit.
+  LastFunctionEntryTSC = 0;
+  auto RewindingTSC = LastTSC;
+  auto RewindingRecordPtr = TLD.RecordPtr - FunctionRecSize;
+  while (TLD.NumTailCalls > 0) {
+    // Rewind the TSC back over the TAIL EXIT record.
+    FunctionRecord ExpectedTailExit;
+    std::memcpy(&ExpectedTailExit, RewindingRecordPtr, FunctionRecSize);
+
+    assert(ExpectedTailExit.RecordKind ==
+               uint8_t(FunctionRecord::RecordKinds::FunctionTailExit) &&
+           "Expected to find tail exit when rewinding.");
+    RewindingRecordPtr -= FunctionRecSize;
+    RewindingTSC -= ExpectedTailExit.TSCDelta;
+    FunctionRecord ExpectedFunctionEntry;
+    std::memcpy(&ExpectedFunctionEntry, RewindingRecordPtr, FunctionRecSize);
+    assert(ExpectedFunctionEntry.RecordKind ==
+               uint8_t(FunctionRecord::RecordKinds::FunctionEnter) &&
+           "Expected to find function entry when rewinding tail call.");
+    assert(ExpectedFunctionEntry.FuncId == ExpectedTailExit.FuncId &&
+           "Expected funcids to match when rewinding tail call.");
+
+    // This tail call exceeded the threshold duration. It will not be erased.
+    if ((TSC - RewindingTSC) >= thresholdTicks()) {
+      TLD.NumTailCalls = 0;
+      return;
+    }
+
+    // We can erase a tail exit pair that we're exiting through since
+    // its duration is under threshold.
+    --TLD.NumTailCalls;
+    RewindingRecordPtr -= FunctionRecSize;
+    RewindingTSC -= ExpectedFunctionEntry.TSCDelta;
+    TLD.RecordPtr -= 2 * FunctionRecSize;
+    LastTSC = RewindingTSC;
+  }
+}
+
+inline bool releaseThreadLocalBuffer(BufferQueue &BQArg) {
+  auto &TLD = getThreadLocalData();
+  auto EC = BQArg.releaseBuffer(TLD.Buffer);
+  if (EC != BufferQueue::ErrorCode::Ok) {
+    Report("Failed to release buffer at %p; error=%s\n", TLD.Buffer.Buffer,
+           BufferQueue::getErrorString(EC));
+    return false;
+  }
+  return true;
+}
+
+inline bool prepareBuffer(uint64_t TSC, unsigned char CPU,
+                          int (*wall_clock_reader)(clockid_t,
+                                                   struct timespec *),
+                          size_t MaxSize) XRAY_NEVER_INSTRUMENT {
+  auto &TLD = getThreadLocalData();
+  char *BufferStart = static_cast<char *>(TLD.Buffer.Buffer);
+  if ((TLD.RecordPtr + MaxSize) >
+      (BufferStart + TLD.Buffer.Size - MetadataRecSize)) {
+    writeEOBMetadata();
+    if (!releaseThreadLocalBuffer(*TLD.LocalBQ))
+      return false;
+    auto EC = TLD.LocalBQ->getBuffer(TLD.Buffer);
+    if (EC != BufferQueue::ErrorCode::Ok) {
+      Report("Failed to acquire a buffer; error=%s\n",
+             BufferQueue::getErrorString(EC));
+      return false;
+    }
+    setupNewBuffer(wall_clock_reader);
+
+    // Always write the CPU metadata as the first record in the buffer.
+    writeNewCPUIdMetadata(CPU, TSC);
+  }
+  return true;
+}
+
+inline bool isLogInitializedAndReady(
+    std::shared_ptr<BufferQueue> &LBQ, uint64_t TSC, unsigned char CPU,
+    int (*wall_clock_reader)(clockid_t,
+                             struct timespec *)) XRAY_NEVER_INSTRUMENT {
+  // Bail out right away if logging is not initialized yet.
+  // We should take the opportunity to release the buffer though.
+  auto Status = __sanitizer::atomic_load(&LoggingStatus,
+                                         __sanitizer::memory_order_acquire);
+  auto &TLD = getThreadLocalData();
+  if (Status != XRayLogInitStatus::XRAY_LOG_INITIALIZED) {
+    if (TLD.RecordPtr != nullptr &&
+        (Status == XRayLogInitStatus::XRAY_LOG_FINALIZING ||
+         Status == XRayLogInitStatus::XRAY_LOG_FINALIZED)) {
+      writeEOBMetadata();
+      if (!releaseThreadLocalBuffer(*LBQ))
+        return false;
+      TLD.RecordPtr = nullptr;
+      LBQ = nullptr;
+      return false;
+    }
+    return false;
+  }
+
+  if (__sanitizer::atomic_load(&LoggingStatus,
+                               __sanitizer::memory_order_acquire) !=
+          XRayLogInitStatus::XRAY_LOG_INITIALIZED ||
+      LBQ->finalizing()) {
+    writeEOBMetadata();
+    if (!releaseThreadLocalBuffer(*LBQ))
+      return false;
+    TLD.RecordPtr = nullptr;
+  }
+
+  if (TLD.Buffer.Buffer == nullptr) {
+    auto EC = LBQ->getBuffer(TLD.Buffer);
+    if (EC != BufferQueue::ErrorCode::Ok) {
+      auto LS = __sanitizer::atomic_load(&LoggingStatus,
+                                         __sanitizer::memory_order_acquire);
+      if (LS != XRayLogInitStatus::XRAY_LOG_FINALIZING &&
+          LS != XRayLogInitStatus::XRAY_LOG_FINALIZED)
+        Report("Failed to acquire a buffer; error=%s\n",
+               BufferQueue::getErrorString(EC));
+      return false;
+    }
+
+    setupNewBuffer(wall_clock_reader);
+
+    // Always write the CPU metadata as the first record in the buffer.
+    writeNewCPUIdMetadata(CPU, TSC);
+  }
+
+  if (TLD.CurrentCPU == std::numeric_limits<uint16_t>::max()) {
+    // This means this is the first CPU this thread has ever run on. We set
+    // the current CPU and record this as the first TSC we've seen.
+    TLD.CurrentCPU = CPU;
+    writeNewCPUIdMetadata(CPU, TSC);
+  }
+
+  return true;
+} // namespace __xray_fdr_internal
+
+// Compute the TSC difference between the time of measurement and the previous
+// event. There are a few interesting situations we need to account for:
+//
+//   - The thread has migrated to a different CPU. If this is the case, then
+//     we write down the following records:
+//
+//       1. A 'NewCPUId' Metadata record.
+//       2. A FunctionRecord with a 0 for the TSCDelta field.
+//
+//   - The TSC delta is greater than the 32 bits we can store in a
+//     FunctionRecord. In this case we write down the following records:
+//
+//       1. A 'TSCWrap' Metadata record.
+//       2. A FunctionRecord with a 0 for the TSCDelta field.
+//
+//   - The TSC delta is representable within the 32 bits we can store in a
+//     FunctionRecord. In this case we write down just a FunctionRecord with
+//     the correct TSC delta.
+inline uint32_t writeCurrentCPUTSC(ThreadLocalData &TLD, uint64_t TSC,
+                                   uint8_t CPU) {
+  if (CPU != TLD.CurrentCPU) {
+    // We've moved to a new CPU.
+    writeNewCPUIdMetadata(CPU, TSC);
+    return 0;
+  }
+  // If the delta is greater than the range for a uint32_t, then we write out
+  // the TSC wrap metadata entry with the full TSC, and the TSC for the
+  // function record be 0.
+  uint64_t Delta = TSC - TLD.LastTSC;
+  if (Delta <= std::numeric_limits<uint32_t>::max())
+    return Delta;
+
+  writeTSCWrapMetadata(TSC);
+  return 0;
+}
+
+inline void endBufferIfFull() XRAY_NEVER_INSTRUMENT {
+  auto &TLD = getThreadLocalData();
+  auto BufferStart = static_cast<char *>(TLD.Buffer.Buffer);
+  if ((TLD.RecordPtr + MetadataRecSize) - BufferStart == MetadataRecSize) {
+    writeEOBMetadata();
+    if (!releaseThreadLocalBuffer(*TLD.LocalBQ))
+      return;
+    TLD.RecordPtr = nullptr;
+  }
+}
+
+thread_local volatile bool Running = false;
+
+/// Here's where the meat of the processing happens. The writer captures
+/// function entry, exit and tail exit points with a time and will create
+/// TSCWrap, NewCPUId and Function records as necessary. The writer might
+/// walk backward through its buffer and erase trivial functions to avoid
+/// polluting the log and may use the buffer queue to obtain or release a
+/// buffer.
+inline void processFunctionHook(
+    int32_t FuncId, XRayEntryType Entry, uint64_t TSC, unsigned char CPU,
+    uint64_t Arg1, int (*wall_clock_reader)(clockid_t, struct timespec *),
+    const std::shared_ptr<BufferQueue> &BQ) XRAY_NEVER_INSTRUMENT {
+  // Prevent signal handler recursion, so in case we're already in a log writing
+  // mode and the signal handler comes in (and is also instrumented) then we
+  // don't want to be clobbering potentially partial writes already happening in
+  // the thread. We use a simple thread_local latch to only allow one on-going
+  // handleArg0 to happen at any given time.
+  RecursionGuard Guard{Running};
+  if (!Guard) {
+    assert(Running == true && "RecursionGuard is buggy!");
+    return;
+  }
+
+  auto &TLD = getThreadLocalData();
+
+  // In case the reference has been cleaned up before, we make sure we
+  // initialize it to the provided BufferQueue.
+  if (TLD.LocalBQ == nullptr)
+    TLD.LocalBQ = BQ;
+
+  if (!isLogInitializedAndReady(TLD.LocalBQ, TSC, CPU, wall_clock_reader))
+    return;
+
+  // Before we go setting up writing new function entries, we need to be really
+  // careful about the pointer math we're doing. This means we need to ensure
+  // that the record we are about to write is going to fit into the buffer,
+  // without overflowing the buffer.
+  //
+  // To do this properly, we use the following assumptions:
+  //
+  //   - The least number of bytes we will ever write is 8
+  //     (sizeof(FunctionRecord)) only if the delta between the previous entry
+  //     and this entry is within 32 bits.
+  //   - The most number of bytes we will ever write is 8 + 16 + 16 = 40.
+  //     This is computed by:
+  //
+  //       MaxSize = sizeof(FunctionRecord) + 2 * sizeof(MetadataRecord)
+  //
+  //     These arise in the following cases:
+  //
+  //       1. When the delta between the TSC we get and the previous TSC for the
+  //          same CPU is outside of the uint32_t range, we end up having to
+  //          write a MetadataRecord to indicate a "tsc wrap" before the actual
+  //          FunctionRecord.
+  //       2. When we learn that we've moved CPUs, we need to write a
+  //          MetadataRecord to indicate a "cpu change", and thus write out the
+  //          current TSC for that CPU before writing out the actual
+  //          FunctionRecord.
+  //       3. When we learn about a new CPU ID, we need to write down a "new cpu
+  //          id" MetadataRecord before writing out the actual FunctionRecord.
+  //       4. The second MetadataRecord is the optional function call argument.
+  //
+  //   - An End-of-Buffer (EOB) MetadataRecord is 16 bytes.
+  //
+  // So the math we need to do is to determine whether writing 24 bytes past the
+  // current pointer leaves us with enough bytes to write the EOB
+  // MetadataRecord. If we don't have enough space after writing as much as 24
+  // bytes in the end of the buffer, we need to write out the EOB, get a new
+  // Buffer, set it up properly before doing any further writing.
+  size_t MaxSize = FunctionRecSize + 2 * MetadataRecSize;
+  if (!prepareBuffer(TSC, CPU, wall_clock_reader, MaxSize)) {
+    TLD.LocalBQ = nullptr;
+    return;
+  }
+
+  // By this point, we are now ready to write up to 40 bytes (explained above).
+  assert((TLD.RecordPtr + MaxSize) - static_cast<char *>(TLD.Buffer.Buffer) >=
+             static_cast<ptrdiff_t>(MetadataRecSize) &&
+         "Misconfigured BufferQueue provided; Buffer size not large enough.");
+
+  auto RecordTSCDelta = writeCurrentCPUTSC(TLD, TSC, CPU);
+  TLD.LastTSC = TSC;
+  TLD.CurrentCPU = CPU;
+  switch (Entry) {
+  case XRayEntryType::ENTRY:
+  case XRayEntryType::LOG_ARGS_ENTRY:
+    // Update the thread local state for the next invocation.
+    TLD.LastFunctionEntryTSC = TSC;
+    break;
+  case XRayEntryType::TAIL:
+  case XRayEntryType::EXIT:
+    // Break out and write the exit record if we can't erase any functions.
+    if (TLD.NumConsecutiveFnEnters == 0 ||
+        (TSC - TLD.LastFunctionEntryTSC) >= thresholdTicks())
+      break;
+    rewindRecentCall(TSC, TLD.LastTSC, TLD.LastFunctionEntryTSC, FuncId);
+    return; // without writing log.
+  case XRayEntryType::CUSTOM_EVENT: {
+    // This is a bug in patching, so we'll report it once and move on.
+    static bool Once = [&] {
+      Report("Internal error: patched an XRay custom event call as a function; "
+             "func id = %d",
+             FuncId);
+      return true;
+    }();
+    (void)Once;
+    return;
+  }
+  }
+
+  writeFunctionRecord(FuncId, RecordTSCDelta, Entry, TLD.RecordPtr);
+  if (Entry == XRayEntryType::LOG_ARGS_ENTRY)
+    writeCallArgumentMetadata(Arg1);
+
+  // If we've exhausted the buffer by this time, we then release the buffer to
+  // make sure that other threads may start using this buffer.
+  endBufferIfFull();
+}
+
+} // namespace __xray_fdr_internal
+} // namespace __xray
+
+#endif // XRAY_XRAY_FDR_LOGGING_IMPL_H
diff --git a/lib/xray/xray_flags.inc b/lib/xray/xray_flags.inc
index 7a16c41..7ddce78 100644
--- a/lib/xray/xray_flags.inc
+++ b/lib/xray/xray_flags.inc
@@ -22,3 +22,6 @@
           "Filename base for the xray logfile.")
 XRAY_FLAG(bool, xray_fdr_log, false,
           "Whether to install the flight data recorder logging implementation.")
+XRAY_FLAG(int, xray_fdr_log_func_duration_threshold_us, 5,
+          "FDR logging will try to skip functions that execute for fewer "
+          "microseconds than this threshold.")
diff --git a/lib/xray/xray_init.cc b/lib/xray/xray_init.cc
index 9d012e9..07f6924 100644
--- a/lib/xray/xray_init.cc
+++ b/lib/xray/xray_init.cc
@@ -12,7 +12,6 @@
 // XRay initialisation logic.
 //===----------------------------------------------------------------------===//
 
-#include <atomic>
 #include <fcntl.h>
 #include <strings.h>
 #include <unistd.h>
@@ -26,9 +25,10 @@
 void __xray_init();
 extern const XRaySledEntry __start_xray_instr_map[] __attribute__((weak));
 extern const XRaySledEntry __stop_xray_instr_map[] __attribute__((weak));
+extern const XRayFunctionSledIndex __start_xray_fn_idx[] __attribute__((weak));
+extern const XRayFunctionSledIndex __stop_xray_fn_idx[] __attribute__((weak));
 }
 
-using namespace __sanitizer;
 using namespace __xray;
 
 // When set to 'true' this means the XRay runtime has been initialised. We use
@@ -38,33 +38,57 @@
 //
 // FIXME: Support DSO instrumentation maps too. The current solution only works
 // for statically linked executables.
-std::atomic<bool> XRayInitialized{false};
+__sanitizer::atomic_uint8_t XRayInitialized{0};
 
 // This should always be updated before XRayInitialized is updated.
-std::atomic<__xray::XRaySledMap> XRayInstrMap{};
+__sanitizer::SpinMutex XRayInstrMapMutex;
+XRaySledMap XRayInstrMap;
+
+// Global flag to determine whether the flags have been initialized.
+__sanitizer::atomic_uint8_t XRayFlagsInitialized{0};
+
+// A mutex to allow only one thread to initialize the XRay data structures.
+__sanitizer::SpinMutex XRayInitMutex;
 
 // __xray_init() will do the actual loading of the current process' memory map
 // and then proceed to look for the .xray_instr_map section/segment.
 void __xray_init() XRAY_NEVER_INSTRUMENT {
-  initializeFlags();
+  __sanitizer::SpinMutexLock Guard(&XRayInitMutex);
+  // Short-circuit if we've already initialized XRay before.
+  if (__sanitizer::atomic_load(&XRayInitialized,
+                               __sanitizer::memory_order_acquire))
+    return;
+
+  if (!__sanitizer::atomic_load(&XRayFlagsInitialized,
+                                __sanitizer::memory_order_acquire)) {
+    initializeFlags();
+    __sanitizer::atomic_store(&XRayFlagsInitialized, true,
+                              __sanitizer::memory_order_release);
+  }
+
   if (__start_xray_instr_map == nullptr) {
-    Report("XRay instrumentation map missing. Not initializing XRay.\n");
+    if (Verbosity())
+      Report("XRay instrumentation map missing. Not initializing XRay.\n");
     return;
   }
 
-  // Now initialize the XRayInstrMap global struct with the address of the
-  // entries, reinterpreted as an array of XRaySledEntry objects. We use the
-  // virtual pointer we have from the section to provide us the correct
-  // information.
-  __xray::XRaySledMap SledMap{};
-  SledMap.Sleds = __start_xray_instr_map;
-  SledMap.Entries = __stop_xray_instr_map - __start_xray_instr_map;
-  XRayInstrMap.store(SledMap, std::memory_order_release);
-  XRayInitialized.store(true, std::memory_order_release);
+  {
+    __sanitizer::SpinMutexLock Guard(&XRayInstrMapMutex);
+    XRayInstrMap.Sleds = __start_xray_instr_map;
+    XRayInstrMap.Entries = __stop_xray_instr_map - __start_xray_instr_map;
+    XRayInstrMap.SledsIndex = __start_xray_fn_idx;
+    XRayInstrMap.Functions = __stop_xray_fn_idx - __start_xray_fn_idx;
+  }
+  __sanitizer::atomic_store(&XRayInitialized, true,
+                            __sanitizer::memory_order_release);
 
+#ifndef XRAY_NO_PREINIT
   if (flags()->patch_premain)
     __xray_patch();
+#endif
 }
 
+#ifndef XRAY_NO_PREINIT
 __attribute__((section(".preinit_array"),
                used)) void (*__local_xray_preinit)(void) = __xray_init;
+#endif
diff --git a/lib/xray/xray_inmemory_log.cc b/lib/xray/xray_inmemory_log.cc
index fd4f549..188bae6 100644
--- a/lib/xray/xray_inmemory_log.cc
+++ b/lib/xray/xray_inmemory_log.cc
@@ -16,12 +16,13 @@
 //===----------------------------------------------------------------------===//
 
 #include <cassert>
+#include <cstring>
+#include <errno.h>
 #include <fcntl.h>
-#include <mutex>
 #include <sys/stat.h>
 #include <sys/syscall.h>
 #include <sys/types.h>
-#include <thread>
+#include <time.h>
 #include <unistd.h>
 
 #include "sanitizer_common/sanitizer_libc.h"
@@ -43,7 +44,7 @@
 
 namespace __xray {
 
-std::mutex LogMutex;
+__sanitizer::SpinMutex LogMutex;
 
 class ThreadExitFlusher {
   int Fd;
@@ -58,7 +59,7 @@
         Offset(Offset) {}
 
   ~ThreadExitFlusher() XRAY_NEVER_INSTRUMENT {
-    std::lock_guard<std::mutex> L(LogMutex);
+    __sanitizer::SpinMutexLock L(&LogMutex);
     if (Fd > 0 && Start != nullptr) {
       retryingWriteAll(Fd, reinterpret_cast<char *>(Start),
                        reinterpret_cast<char *>(Start + Offset));
@@ -77,16 +78,21 @@
 
 static int __xray_OpenLogFile() XRAY_NEVER_INSTRUMENT {
   int F = getLogFD();
-  auto TSCFrequency = getTSCFrequency();
   if (F == -1)
     return -1;
+
+  // Test for required CPU features and cache the cycle frequency
+  static bool TSCSupported = probeRequiredCPUFeatures();
+  static uint64_t CycleFrequency =
+      TSCSupported ? getTSCFrequency() : __xray::NanosecondsPerSecond;
+
   // Since we're here, we get to write the header. We set it up so that the
   // header will only be written once, at the start, and let the threads
   // logging do writes which just append.
   XRayFileHeader Header;
-  Header.Version = 1;
+  Header.Version = 2; // Version 2 includes tail exit records.
   Header.Type = FileTypes::NAIVE_LOG;
-  Header.CycleFrequency = TSCFrequency;
+  Header.CycleFrequency = CycleFrequency;
 
   // FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc'
   // before setting the values in the header.
@@ -97,45 +103,165 @@
   return F;
 }
 
-void __xray_InMemoryRawLog(int32_t FuncId,
-                           XRayEntryType Type) XRAY_NEVER_INSTRUMENT {
-  using Buffer =
-      std::aligned_storage<sizeof(XRayRecord), alignof(XRayRecord)>::type;
-  static constexpr size_t BuffLen = 1024;
+using Buffer =
+    std::aligned_storage<sizeof(XRayRecord), alignof(XRayRecord)>::type;
+
+static constexpr size_t BuffLen = 1024;
+thread_local size_t Offset = 0;
+
+Buffer (&getThreadLocalBuffer())[BuffLen] XRAY_NEVER_INSTRUMENT {
   thread_local static Buffer InMemoryBuffer[BuffLen] = {};
-  thread_local static size_t Offset = 0;
+  return InMemoryBuffer;
+}
+
+pid_t getTId() XRAY_NEVER_INSTRUMENT {
+  thread_local pid_t TId = syscall(SYS_gettid);
+  return TId;
+}
+
+int getGlobalFd() XRAY_NEVER_INSTRUMENT {
   static int Fd = __xray_OpenLogFile();
+  return Fd;
+}
+
+thread_local volatile bool RecusionGuard = false;
+template <class RDTSC>
+void __xray_InMemoryRawLog(int32_t FuncId, XRayEntryType Type,
+                           RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT {
+  auto &InMemoryBuffer = getThreadLocalBuffer();
+  int Fd = getGlobalFd();
   if (Fd == -1)
     return;
   thread_local __xray::ThreadExitFlusher Flusher(
       Fd, reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer), Offset);
-  thread_local pid_t TId = syscall(SYS_gettid);
+
+  // Use a simple recursion guard, to handle cases where we're already logging
+  // and for one reason or another, this function gets called again in the same
+  // thread.
+  if (RecusionGuard)
+    return;
+  RecusionGuard = true;
 
   // First we get the useful data, and stuff it into the already aligned buffer
   // through a pointer offset.
   auto &R = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer)[Offset];
   R.RecordType = RecordTypes::NORMAL;
-  R.TSC = __xray::readTSC(R.CPU);
-  R.TId = TId;
+  R.TSC = ReadTSC(R.CPU);
+  R.TId = getTId();
   R.Type = Type;
   R.FuncId = FuncId;
   ++Offset;
   if (Offset == BuffLen) {
-    std::lock_guard<std::mutex> L(LogMutex);
+    __sanitizer::SpinMutexLock L(&LogMutex);
     auto RecordBuffer = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer);
     retryingWriteAll(Fd, reinterpret_cast<char *>(RecordBuffer),
                      reinterpret_cast<char *>(RecordBuffer + Offset));
     Offset = 0;
   }
+
+  RecusionGuard = false;
+}
+
+template <class RDTSC>
+void __xray_InMemoryRawLogWithArg(int32_t FuncId, XRayEntryType Type,
+                                  uint64_t Arg1,
+                                  RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT {
+  auto &InMemoryBuffer = getThreadLocalBuffer();
+  int Fd = getGlobalFd();
+  if (Fd == -1)
+    return;
+
+  // First we check whether there's enough space to write the data consecutively
+  // in the thread-local buffer. If not, we first flush the buffer before
+  // attempting to write the two records that must be consecutive.
+  if (Offset + 2 > BuffLen) {
+    __sanitizer::SpinMutexLock L(&LogMutex);
+    auto RecordBuffer = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer);
+    retryingWriteAll(Fd, reinterpret_cast<char *>(RecordBuffer),
+                     reinterpret_cast<char *>(RecordBuffer + Offset));
+    Offset = 0;
+  }
+
+  // Then we write the "we have an argument" record.
+  __xray_InMemoryRawLog(FuncId, Type, ReadTSC);
+
+  if (RecusionGuard)
+    return;
+
+  RecusionGuard = true;
+
+  // And from here on write the arg payload.
+  __xray::XRayArgPayload R;
+  R.RecordType = RecordTypes::ARG_PAYLOAD;
+  R.FuncId = FuncId;
+  R.TId = getTId();
+  R.Arg = Arg1;
+  auto EntryPtr =
+      &reinterpret_cast<__xray::XRayArgPayload *>(&InMemoryBuffer)[Offset];
+  std::memcpy(EntryPtr, &R, sizeof(R));
+  ++Offset;
+  if (Offset == BuffLen) {
+    __sanitizer::SpinMutexLock L(&LogMutex);
+    auto RecordBuffer = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer);
+    retryingWriteAll(Fd, reinterpret_cast<char *>(RecordBuffer),
+                     reinterpret_cast<char *>(RecordBuffer + Offset));
+    Offset = 0;
+  }
+
+  RecusionGuard = false;
+}
+
+void __xray_InMemoryRawLogRealTSC(int32_t FuncId,
+                                  XRayEntryType Type) XRAY_NEVER_INSTRUMENT {
+  __xray_InMemoryRawLog(FuncId, Type, __xray::readTSC);
+}
+
+void __xray_InMemoryEmulateTSC(int32_t FuncId,
+                               XRayEntryType Type) XRAY_NEVER_INSTRUMENT {
+  __xray_InMemoryRawLog(FuncId, Type, [](uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
+    timespec TS;
+    int result = clock_gettime(CLOCK_REALTIME, &TS);
+    if (result != 0) {
+      Report("clock_gettimg(2) return %d, errno=%d.", result, int(errno));
+      TS = {0, 0};
+    }
+    CPU = 0;
+    return TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec;
+  });
+}
+
+void __xray_InMemoryRawLogWithArgRealTSC(int32_t FuncId, XRayEntryType Type,
+                                         uint64_t Arg1) XRAY_NEVER_INSTRUMENT {
+  __xray_InMemoryRawLogWithArg(FuncId, Type, Arg1, __xray::readTSC);
+}
+
+void __xray_InMemoryRawLogWithArgEmulateTSC(
+    int32_t FuncId, XRayEntryType Type, uint64_t Arg1) XRAY_NEVER_INSTRUMENT {
+  __xray_InMemoryRawLogWithArg(
+      FuncId, Type, Arg1, [](uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
+        timespec TS;
+        int result = clock_gettime(CLOCK_REALTIME, &TS);
+        if (result != 0) {
+          Report("clock_gettimg(2) return %d, errno=%d.", result, int(errno));
+          TS = {0, 0};
+        }
+        CPU = 0;
+        return TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec;
+      });
 }
 
 static auto UNUSED Unused = [] {
-  if (!probeRequiredCPUFeatures()) {
-    Report("Required CPU features missing for XRay instrumentation, not "
-           "installing instrumentation hooks.\n");
-    return false;
+  auto UseRealTSC = probeRequiredCPUFeatures();
+  if (!UseRealTSC)
+    Report("WARNING: Required CPU features missing for XRay instrumentation, "
+           "using emulation instead.\n");
+  if (flags()->xray_naive_log) {
+    __xray_set_handler_arg1(UseRealTSC
+                                ? __xray_InMemoryRawLogWithArgRealTSC
+                                : __xray_InMemoryRawLogWithArgEmulateTSC);
+    __xray_set_handler(UseRealTSC ? __xray_InMemoryRawLogRealTSC
+                                  : __xray_InMemoryEmulateTSC);
   }
-  if (flags()->xray_naive_log)
-    __xray_set_handler(__xray_InMemoryRawLog);
+
   return true;
 }();
diff --git a/lib/xray/xray_interface.cc b/lib/xray/xray_interface.cc
index 39cf8ef..7ad6a9b 100644
--- a/lib/xray/xray_interface.cc
+++ b/lib/xray/xray_interface.cc
@@ -15,7 +15,6 @@
 
 #include "xray_interface_internal.h"
 
-#include <atomic>
 #include <cstdint>
 #include <cstdio>
 #include <errno.h>
@@ -46,10 +45,13 @@
 #endif /* CPU architecture */
 
 // This is the function to call when we encounter the entry or exit sleds.
-std::atomic<void (*)(int32_t, XRayEntryType)> XRayPatchedFunction{nullptr};
+__sanitizer::atomic_uintptr_t XRayPatchedFunction{0};
 
 // This is the function to call from the arg1-enabled sleds/trampolines.
-std::atomic<void (*)(int32_t, XRayEntryType, uint64_t)> XRayArgLogger{nullptr};
+__sanitizer::atomic_uintptr_t XRayArgLogger{0};
+
+// This is the function to call when we encounter a custom event log call.
+__sanitizer::atomic_uintptr_t XRayPatchedCustomEvent{0};
 
 // MProtectHelper is an RAII wrapper for calls to mprotect(...) that will undo
 // any successful mprotect(...) changes. This is used to make a page writeable
@@ -88,23 +90,45 @@
 
 } // namespace __xray
 
-extern std::atomic<bool> XRayInitialized;
-extern std::atomic<__xray::XRaySledMap> XRayInstrMap;
+extern __sanitizer::SpinMutex XRayInstrMapMutex;
+extern __sanitizer::atomic_uint8_t XRayInitialized;
+extern __xray::XRaySledMap XRayInstrMap;
 
 int __xray_set_handler(void (*entry)(int32_t,
                                      XRayEntryType)) XRAY_NEVER_INSTRUMENT {
-  if (XRayInitialized.load(std::memory_order_acquire)) {
-    __xray::XRayPatchedFunction.store(entry, std::memory_order_release);
+  if (__sanitizer::atomic_load(&XRayInitialized,
+                               __sanitizer::memory_order_acquire)) {
+
+    __sanitizer::atomic_store(&__xray::XRayPatchedFunction,
+                              reinterpret_cast<uintptr_t>(entry),
+                              __sanitizer::memory_order_release);
     return 1;
   }
   return 0;
 }
 
+int __xray_set_customevent_handler(void (*entry)(void *, size_t))
+    XRAY_NEVER_INSTRUMENT {
+  if (__sanitizer::atomic_load(&XRayInitialized,
+                               __sanitizer::memory_order_acquire)) {
+    __sanitizer::atomic_store(&__xray::XRayPatchedCustomEvent,
+                              reinterpret_cast<uintptr_t>(entry),
+                              __sanitizer::memory_order_release);
+    return 1;
+  }
+  return 0;
+}
+
+
 int __xray_remove_handler() XRAY_NEVER_INSTRUMENT {
   return __xray_set_handler(nullptr);
 }
 
-std::atomic<bool> XRayPatching{false};
+int __xray_remove_customevent_handler() XRAY_NEVER_INSTRUMENT {
+  return __xray_set_customevent_handler(nullptr);
+}
+
+__sanitizer::atomic_uint8_t XRayPatching{0};
 
 using namespace __xray;
 
@@ -128,30 +152,72 @@
   return CleanupInvoker<Function>{Fn};
 }
 
+inline bool patchSled(const XRaySledEntry &Sled, bool Enable,
+                      int32_t FuncId) XRAY_NEVER_INSTRUMENT {
+  // While we're here, we should patch the nop sled. To do that we mprotect
+  // the page containing the function to be writeable.
+  const uint64_t PageSize = GetPageSizeCached();
+  void *PageAlignedAddr =
+      reinterpret_cast<void *>(Sled.Address & ~(PageSize - 1));
+  std::size_t MProtectLen = (Sled.Address + cSledLength) -
+                            reinterpret_cast<uint64_t>(PageAlignedAddr);
+  MProtectHelper Protector(PageAlignedAddr, MProtectLen);
+  if (Protector.MakeWriteable() == -1) {
+    printf("Failed mprotect: %d\n", errno);
+    return XRayPatchingStatus::FAILED;
+  }
+
+  bool Success = false;
+  switch (Sled.Kind) {
+  case XRayEntryType::ENTRY:
+    Success = patchFunctionEntry(Enable, FuncId, Sled, __xray_FunctionEntry);
+    break;
+  case XRayEntryType::EXIT:
+    Success = patchFunctionExit(Enable, FuncId, Sled);
+    break;
+  case XRayEntryType::TAIL:
+    Success = patchFunctionTailExit(Enable, FuncId, Sled);
+    break;
+  case XRayEntryType::LOG_ARGS_ENTRY:
+    Success = patchFunctionEntry(Enable, FuncId, Sled, __xray_ArgLoggerEntry);
+    break;
+  case XRayEntryType::CUSTOM_EVENT:
+    Success = patchCustomEvent(Enable, FuncId, Sled);
+    break;
+  default:
+    Report("Unsupported sled kind '%d' @%04x\n", Sled.Address, int(Sled.Kind));
+    return false;
+  }
+  return Success;
+}
+
 // controlPatching implements the common internals of the patching/unpatching
 // implementation. |Enable| defines whether we're enabling or disabling the
 // runtime XRay instrumentation.
 XRayPatchingStatus controlPatching(bool Enable) XRAY_NEVER_INSTRUMENT {
-  if (!XRayInitialized.load(std::memory_order_acquire))
+  if (!__sanitizer::atomic_load(&XRayInitialized,
+                                __sanitizer::memory_order_acquire))
     return XRayPatchingStatus::NOT_INITIALIZED; // Not initialized.
 
-  static bool NotPatching = false;
-  if (!XRayPatching.compare_exchange_strong(NotPatching, true,
-                                            std::memory_order_acq_rel,
-                                            std::memory_order_acquire)) {
+  uint8_t NotPatching = false;
+  if (!__sanitizer::atomic_compare_exchange_strong(
+          &XRayPatching, &NotPatching, true, __sanitizer::memory_order_acq_rel))
     return XRayPatchingStatus::ONGOING; // Already patching.
-  }
 
-  bool PatchingSuccess = false;
+  uint8_t PatchingSuccess = false;
   auto XRayPatchingStatusResetter = scopeCleanup([&PatchingSuccess] {
-    if (!PatchingSuccess) {
-      XRayPatching.store(false, std::memory_order_release);
-    }
+    if (!PatchingSuccess)
+      __sanitizer::atomic_store(&XRayPatching, false,
+                                __sanitizer::memory_order_release);
   });
 
   // Step 1: Compute the function id, as a unique identifier per function in the
   // instrumentation map.
-  XRaySledMap InstrMap = XRayInstrMap.load(std::memory_order_acquire);
+  XRaySledMap InstrMap;
+  {
+    __sanitizer::SpinMutexLock Guard(&XRayInstrMapMutex);
+    InstrMap = XRayInstrMap;
+  }
   if (InstrMap.Entries == 0)
     return XRayPatchingStatus::NOT_INITIALIZED;
 
@@ -172,40 +238,10 @@
       ++FuncId;
       CurFun = F;
     }
-
-    // While we're here, we should patch the nop sled. To do that we mprotect
-    // the page containing the function to be writeable.
-    void *PageAlignedAddr =
-        reinterpret_cast<void *>(Sled.Address & ~(PageSize - 1));
-    std::size_t MProtectLen = (Sled.Address + cSledLength) -
-                              reinterpret_cast<uint64_t>(PageAlignedAddr);
-    MProtectHelper Protector(PageAlignedAddr, MProtectLen);
-    if (Protector.MakeWriteable() == -1) {
-      printf("Failed mprotect: %d\n", errno);
-      return XRayPatchingStatus::FAILED;
-    }
-
-    bool Success = false;
-    switch (Sled.Kind) {
-    case XRayEntryType::ENTRY:
-      Success = patchFunctionEntry(Enable, FuncId, Sled, __xray_FunctionEntry);
-      break;
-    case XRayEntryType::EXIT:
-      Success = patchFunctionExit(Enable, FuncId, Sled);
-      break;
-    case XRayEntryType::TAIL:
-      Success = patchFunctionTailExit(Enable, FuncId, Sled);
-      break;
-    case XRayEntryType::LOG_ARGS_ENTRY:
-      Success = patchFunctionEntry(Enable, FuncId, Sled, __xray_ArgLoggerEntry);
-      break;
-    default:
-      Report("Unsupported sled kind: %d\n", int(Sled.Kind));
-      continue;
-    }
-    (void)Success;
+    patchSled(Sled, Enable, FuncId);
   }
-  XRayPatching.store(false, std::memory_order_release);
+  __sanitizer::atomic_store(&XRayPatching, false,
+                            __sanitizer::memory_order_release);
   PatchingSuccess = true;
   return XRayPatchingStatus::SUCCESS;
 }
@@ -218,15 +254,94 @@
   return controlPatching(false);
 }
 
-int __xray_set_handler_arg1(void (*Handler)(int32_t, XRayEntryType, uint64_t))
-{
-  if (!XRayInitialized.load(std::memory_order_acquire)) {
-    return 0;
+XRayPatchingStatus patchFunction(int32_t FuncId,
+                                 bool Enable) XRAY_NEVER_INSTRUMENT {
+  if (!__sanitizer::atomic_load(&XRayInitialized,
+                                __sanitizer::memory_order_acquire))
+    return XRayPatchingStatus::NOT_INITIALIZED; // Not initialized.
+
+  uint8_t NotPatching = false;
+  if (!__sanitizer::atomic_compare_exchange_strong(
+          &XRayPatching, &NotPatching, true, __sanitizer::memory_order_acq_rel))
+    return XRayPatchingStatus::ONGOING; // Already patching.
+
+  // Next, we look for the function index.
+  XRaySledMap InstrMap;
+  {
+    __sanitizer::SpinMutexLock Guard(&XRayInstrMapMutex);
+    InstrMap = XRayInstrMap;
   }
+
+  // If we don't have an index, we can't patch individual functions.
+  if (InstrMap.Functions == 0)
+    return XRayPatchingStatus::NOT_INITIALIZED;
+
+  // FuncId must be a positive number, less than the number of functions
+  // instrumented.
+  if (FuncId <= 0 || static_cast<size_t>(FuncId) > InstrMap.Functions) {
+    Report("Invalid function id provided: %d\n", FuncId);
+    return XRayPatchingStatus::FAILED;
+  }
+
+  // Now we patch ths sleds for this specific function.
+  auto SledRange = InstrMap.SledsIndex[FuncId - 1];
+  auto *f = SledRange.Begin;
+  auto *e = SledRange.End;
+
+  bool SucceedOnce = false;
+  while (f != e)
+    SucceedOnce |= patchSled(*f++, Enable, FuncId);
+
+  __sanitizer::atomic_store(&XRayPatching, false,
+                            __sanitizer::memory_order_release);
+
+  if (!SucceedOnce) {
+    Report("Failed patching any sled for function '%d'.", FuncId);
+    return XRayPatchingStatus::FAILED;
+  }
+
+  return XRayPatchingStatus::SUCCESS;
+}
+
+XRayPatchingStatus __xray_patch_function(int32_t FuncId) XRAY_NEVER_INSTRUMENT {
+  return patchFunction(FuncId, true);
+}
+
+XRayPatchingStatus
+__xray_unpatch_function(int32_t FuncId) XRAY_NEVER_INSTRUMENT {
+  return patchFunction(FuncId, false);
+}
+
+int __xray_set_handler_arg1(void (*entry)(int32_t, XRayEntryType, uint64_t)) {
+  if (!__sanitizer::atomic_load(&XRayInitialized,
+                                __sanitizer::memory_order_acquire))
+    return 0;
+
   // A relaxed write might not be visible even if the current thread gets
   // scheduled on a different CPU/NUMA node.  We need to wait for everyone to
   // have this handler installed for consistency of collected data across CPUs.
-  XRayArgLogger.store(Handler, std::memory_order_release);
+  __sanitizer::atomic_store(&XRayArgLogger, reinterpret_cast<uint64_t>(entry),
+                            __sanitizer::memory_order_release);
   return 1;
 }
+
 int __xray_remove_handler_arg1() { return __xray_set_handler_arg1(nullptr); }
+
+uintptr_t __xray_function_address(int32_t FuncId) XRAY_NEVER_INSTRUMENT {
+  __sanitizer::SpinMutexLock Guard(&XRayInstrMapMutex);
+  if (FuncId <= 0 || static_cast<size_t>(FuncId) > XRayInstrMap.Functions)
+    return 0;
+  return XRayInstrMap.SledsIndex[FuncId - 1].Begin->Function
+// On PPC, function entries are always aligned to 16 bytes. The beginning of a
+// sled might be a local entry, which is always +8 based on the global entry.
+// Always return the global entry.
+#ifdef __PPC__
+         & ~0xf
+#endif
+      ;
+}
+
+size_t __xray_max_function_id() XRAY_NEVER_INSTRUMENT {
+  __sanitizer::SpinMutexLock Guard(&XRayInstrMapMutex);
+  return XRayInstrMap.Functions;
+}
diff --git a/lib/xray/xray_interface_internal.h b/lib/xray/xray_interface_internal.h
index 0e3a251..5811e2b 100644
--- a/lib/xray/xray_interface_internal.h
+++ b/lib/xray/xray_interface_internal.h
@@ -28,17 +28,24 @@
   uint64_t Function;
   unsigned char Kind;
   unsigned char AlwaysInstrument;
-  unsigned char Padding[14]; // Need 32 bytes
+  unsigned char Version;
+  unsigned char Padding[13]; // Need 32 bytes
 #elif SANITIZER_WORDSIZE == 32
   uint32_t Address;
   uint32_t Function;
   unsigned char Kind;
   unsigned char AlwaysInstrument;
-  unsigned char Padding[6]; // Need 16 bytes
+  unsigned char Version;
+  unsigned char Padding[5]; // Need 16 bytes
 #else
 #error "Unsupported word size."
 #endif
 };
+
+struct XRayFunctionSledIndex {
+  const XRaySledEntry* Begin;
+  const XRaySledEntry* End;
+};
 }
 
 namespace __xray {
@@ -46,6 +53,8 @@
 struct XRaySledMap {
   const XRaySledEntry *Sleds;
   size_t Entries;
+  const XRayFunctionSledIndex *SledsIndex;
+  size_t Functions;
 };
 
 bool patchFunctionEntry(bool Enable, uint32_t FuncId,
@@ -53,6 +62,7 @@
 bool patchFunctionExit(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled);
 bool patchFunctionTailExit(bool Enable, uint32_t FuncId,
                            const XRaySledEntry &Sled);
+bool patchCustomEvent(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled);
 
 } // namespace __xray
 
@@ -63,6 +73,7 @@
 extern void __xray_FunctionExit();
 extern void __xray_FunctionTailExit();
 extern void __xray_ArgLoggerEntry();
+extern void __xray_CustomEvent();
 }
 
 #endif
diff --git a/lib/xray/xray_log_interface.cc b/lib/xray/xray_log_interface.cc
index c7cfe50..5cc6ade 100644
--- a/lib/xray/xray_log_interface.cc
+++ b/lib/xray/xray_log_interface.cc
@@ -12,45 +12,57 @@
 //===----------------------------------------------------------------------===//
 #include "xray/xray_log_interface.h"
 
+#include "sanitizer_common/sanitizer_atomic.h"
+#include "sanitizer_common/sanitizer_mutex.h"
 #include "xray/xray_interface.h"
 #include "xray_defs.h"
 
-#include <memory>
-#include <mutex>
-
-std::mutex XRayImplMutex;
-std::unique_ptr<XRayLogImpl> GlobalXRayImpl;
+__sanitizer::SpinMutex XRayImplMutex;
+XRayLogImpl *GlobalXRayImpl = nullptr;
 
 void __xray_set_log_impl(XRayLogImpl Impl) XRAY_NEVER_INSTRUMENT {
   if (Impl.log_init == nullptr || Impl.log_finalize == nullptr ||
       Impl.handle_arg0 == nullptr || Impl.flush_log == nullptr) {
-    std::lock_guard<std::mutex> Guard(XRayImplMutex);
-    GlobalXRayImpl.reset();
+    __sanitizer::SpinMutexLock Guard(&XRayImplMutex);
+    delete GlobalXRayImpl;
+    GlobalXRayImpl = nullptr;
+    __xray_remove_handler();
+    __xray_remove_handler_arg1();
     return;
   }
 
-  std::lock_guard<std::mutex> Guard(XRayImplMutex);
-  GlobalXRayImpl.reset(new XRayLogImpl);
+  __sanitizer::SpinMutexLock Guard(&XRayImplMutex);
+  GlobalXRayImpl = new XRayLogImpl();
   *GlobalXRayImpl = Impl;
+  __xray_set_handler(Impl.handle_arg0);
 }
 
-XRayLogInitStatus __xray_init(size_t BufferSize, size_t MaxBuffers, void *Args,
-                              size_t ArgsSize) XRAY_NEVER_INSTRUMENT {
-  std::lock_guard<std::mutex> Guard(XRayImplMutex);
+void __xray_remove_log_impl() XRAY_NEVER_INSTRUMENT {
+  __sanitizer::SpinMutexLock Guard(&XRayImplMutex);
+  delete GlobalXRayImpl;
+  GlobalXRayImpl = nullptr;
+  __xray_remove_handler();
+  __xray_remove_handler_arg1();
+}
+
+XRayLogInitStatus __xray_log_init(size_t BufferSize, size_t MaxBuffers,
+                                  void *Args,
+                                  size_t ArgsSize) XRAY_NEVER_INSTRUMENT {
+  __sanitizer::SpinMutexLock Guard(&XRayImplMutex);
   if (!GlobalXRayImpl)
     return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
   return GlobalXRayImpl->log_init(BufferSize, MaxBuffers, Args, ArgsSize);
 }
 
 XRayLogInitStatus __xray_log_finalize() XRAY_NEVER_INSTRUMENT {
-  std::lock_guard<std::mutex> Guard(XRayImplMutex);
+  __sanitizer::SpinMutexLock Guard(&XRayImplMutex);
   if (!GlobalXRayImpl)
     return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
   return GlobalXRayImpl->log_finalize();
 }
 
 XRayLogFlushStatus __xray_log_flushLog() XRAY_NEVER_INSTRUMENT {
-  std::lock_guard<std::mutex> Guard(XRayImplMutex);
+  __sanitizer::SpinMutexLock Guard(&XRayImplMutex);
   if (!GlobalXRayImpl)
     return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
   return GlobalXRayImpl->flush_log();
diff --git a/lib/xray/xray_mips.cc b/lib/xray/xray_mips.cc
index c8ff399..cd86330 100644
--- a/lib/xray/xray_mips.cc
+++ b/lib/xray/xray_mips.cc
@@ -95,7 +95,8 @@
   //   B #44
 
   if (Enable) {
-    uint32_t LoTracingHookAddr = reinterpret_cast<int32_t>(TracingHook) & 0xffff;
+    uint32_t LoTracingHookAddr =
+        reinterpret_cast<int32_t>(TracingHook) & 0xffff;
     uint32_t HiTracingHookAddr =
         (reinterpret_cast<int32_t>(TracingHook) >> 16) & 0xffff;
     uint32_t LoFunctionID = FuncId & 0xffff;
@@ -151,6 +152,12 @@
   return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
 }
 
+bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
+                      const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+  // FIXME: Implement in mips?
+  return false;
+}
+
 } // namespace __xray
 
 extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT {
diff --git a/lib/xray/xray_mips64.cc b/lib/xray/xray_mips64.cc
index 2113684..fa8fdd5 100644
--- a/lib/xray/xray_mips64.cc
+++ b/lib/xray/xray_mips64.cc
@@ -93,7 +93,8 @@
   if (Enable) {
     uint32_t LoTracingHookAddr =
         reinterpret_cast<int64_t>(TracingHook) & 0xffff;
-    uint32_t HiTracingHookAddr = (reinterpret_cast<int64_t>(TracingHook) >> 16) & 0xffff;
+    uint32_t HiTracingHookAddr =
+        (reinterpret_cast<int64_t>(TracingHook) >> 16) & 0xffff;
     uint32_t HigherTracingHookAddr =
         (reinterpret_cast<int64_t>(TracingHook) >> 32) & 0xffff;
     uint32_t HighestTracingHookAddr =
@@ -160,6 +161,11 @@
   return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
 }
 
+bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
+                      const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+  // FIXME: Implement in mips64?
+  return false;
+}
 } // namespace __xray
 
 extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT {
diff --git a/lib/xray/xray_never_instrument.txt b/lib/xray/xray_never_instrument.txt
new file mode 100644
index 0000000..7fa48dd
--- /dev/null
+++ b/lib/xray/xray_never_instrument.txt
@@ -0,0 +1,6 @@
+# List of function matchers common to C/C++ applications that make sense to
+# never instrument. You can use this as an argument to
+# -fxray-never-instrument=<path> along with your project-specific lists.
+
+# Never instrument any function whose symbol starts with __xray.
+fun:__xray*
diff --git a/lib/xray/xray_powerpc64.cc b/lib/xray/xray_powerpc64.cc
index 6a7554c..ab03cb1 100644
--- a/lib/xray/xray_powerpc64.cc
+++ b/lib/xray/xray_powerpc64.cc
@@ -93,6 +93,12 @@
 // FIXME: Maybe implement this better?
 bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; }
 
+bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
+                      const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+  // FIXME: Implement in powerpc64?
+  return false;
+}
+
 } // namespace __xray
 
 extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT {
diff --git a/lib/xray/xray_trampoline_AArch64.S b/lib/xray/xray_trampoline_AArch64.S
index b8c89e4..4d1b04f 100644
--- a/lib/xray/xray_trampoline_AArch64.S
+++ b/lib/xray/xray_trampoline_AArch64.S
@@ -1,3 +1,5 @@
+#include "../builtins/assembly.h"
+
     .text
     /* The variable containing the handler function pointer */
     .global _ZN6__xray19XRayPatchedFunctionE
@@ -138,3 +140,5 @@
     LDP X3, X4, [SP], #16
     LDP X1, X2, [SP], #16
     RET
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/lib/xray/xray_trampoline_arm.S b/lib/xray/xray_trampoline_arm.S
index ee6763e..71dbee6 100644
--- a/lib/xray/xray_trampoline_arm.S
+++ b/lib/xray/xray_trampoline_arm.S
@@ -1,3 +1,5 @@
+#include "../builtins/assembly.h"
+
     .syntax unified
     .arch armv6t2
     .fpu vfpv2
@@ -96,3 +98,5 @@
     @ Restore floating-point parameters of the instrumented function
     VPOP {d0-d7}
     POP {r1-r3,pc}
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/lib/xray/xray_trampoline_powerpc64_asm.S b/lib/xray/xray_trampoline_powerpc64_asm.S
index d43231e..250e2e5 100644
--- a/lib/xray/xray_trampoline_powerpc64_asm.S
+++ b/lib/xray/xray_trampoline_powerpc64_asm.S
@@ -145,27 +145,91 @@
 	.p2align	4
 __xray_FunctionExit:
 	std 0, 16(1)
-	ld 0, -8(1) # FuncId
-	stdu 1, -72(1)
-# Spill r3, f1, and vsr34, the return value registers.
+	stdu 1, -256(1)
+# Spill r3-r4, f1-f8, and vsr34-vsr41, which are return registers.
+# If this appears to be slow, the caller needs to pass in number of generic,
+# floating point, and vector parameters, so that we only spill those live ones.
 	std 3, 32(1)
-	mr 3, 0
-	addi 4, 1, 40
-	stxsdx 1, 0, 4
+	ld 3, 248(1) # FuncId
+	std 4, 40(1)
 	addi 4, 1, 48
+	stxsdx 1, 0, 4
+	addi 4, 1, 56
+	stxsdx 2, 0, 4
+	addi 4, 1, 64
+	stxsdx 3, 0, 4
+	addi 4, 1, 72
+	stxsdx 4, 0, 4
+	addi 4, 1, 80
+	stxsdx 5, 0, 4
+	addi 4, 1, 88
+	stxsdx 6, 0, 4
+	addi 4, 1, 96
+	stxsdx 7, 0, 4
+	addi 4, 1, 104
+	stxsdx 8, 0, 4
+	addi 4, 1, 112
 	stxvd2x 34, 0, 4
+	addi 4, 1, 128
+	stxvd2x 35, 0, 4
+	addi 4, 1, 144
+	stxvd2x 36, 0, 4
+	addi 4, 1, 160
+	stxvd2x 37, 0, 4
+	addi 4, 1, 176
+	stxvd2x 38, 0, 4
+	addi 4, 1, 192
+	stxvd2x 39, 0, 4
+	addi 4, 1, 208
+	stxvd2x 40, 0, 4
+	addi 4, 1, 224
+	stxvd2x 41, 0, 4
+	std 2, 240(1)
 	mflr 0
-	std 0, 64(1)
+	std 0, 248(1)
+
 	li 4, 1
 	bl _ZN6__xray23CallXRayPatchedFunctionEi13XRayEntryType
 	nop
-	ld 0, 64(1)
-	mtlr 0
-	ld 3, 32(1)
-	addi 4, 1, 40
-	lxsdx 1, 0, 4
+
 	addi 4, 1, 48
+	lxsdx 1, 0, 4
+	addi 4, 1, 56
+	lxsdx 2, 0, 4
+	addi 4, 1, 64
+	lxsdx 3, 0, 4
+	addi 4, 1, 72
+	lxsdx 4, 0, 4
+	addi 4, 1, 80
+	lxsdx 5, 0, 4
+	addi 4, 1, 88
+	lxsdx 6, 0, 4
+	addi 4, 1, 96
+	lxsdx 7, 0, 4
+	addi 4, 1, 104
+	lxsdx 8, 0, 4
+	addi 4, 1, 112
 	lxvd2x 34, 0, 4
-	addi 1, 1, 72
+	addi 4, 1, 128
+	lxvd2x 35, 0, 4
+	addi 4, 1, 144
+	lxvd2x 36, 0, 4
+	addi 4, 1, 160
+	lxvd2x 37, 0, 4
+	addi 4, 1, 176
+	lxvd2x 38, 0, 4
+	addi 4, 1, 192
+	lxvd2x 39, 0, 4
+	addi 4, 1, 208
+	lxvd2x 40, 0, 4
+	addi 4, 1, 224
+	lxvd2x 41, 0, 4
+	ld 0, 248(1)
+	mtlr 0
+	ld 2, 240(1)
+	ld 3, 32(1)
+	ld 4, 40(1)
+
+	addi 1, 1, 256
 	ld 0, 16(1)
 	blr
diff --git a/lib/xray/xray_trampoline_x86_64.S b/lib/xray/xray_trampoline_x86_64.S
index b9fef6d..ffbfb5c 100644
--- a/lib/xray/xray_trampoline_x86_64.S
+++ b/lib/xray/xray_trampoline_x86_64.S
@@ -13,42 +13,51 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "../builtins/assembly.h"
+
 .macro SAVE_REGISTERS
-	subq $200, %rsp
-	movupd	%xmm0, 184(%rsp)
-	movupd	%xmm1, 168(%rsp)
-	movupd	%xmm2, 152(%rsp)
-	movupd	%xmm3, 136(%rsp)
-	movupd	%xmm4, 120(%rsp)
-	movupd	%xmm5, 104(%rsp)
-	movupd	%xmm6, 88(%rsp)
-	movupd	%xmm7, 72(%rsp)
-	movq	%rdi, 64(%rsp)
-	movq	%rax, 56(%rsp)
-	movq	%rdx, 48(%rsp)
-	movq	%rsi, 40(%rsp)
-	movq	%rcx, 32(%rsp)
-	movq	%r8, 24(%rsp)
-	movq	%r9, 16(%rsp)
+	subq $192, %rsp
+	.cfi_def_cfa_offset 200
+	// At this point, the stack pointer should be aligned to an 8-byte boundary,
+	// because any call instructions that come after this will add another 8
+	// bytes and therefore align it to 16-bytes.
+	movq %rbp, 184(%rsp)
+	movupd	%xmm0, 168(%rsp)
+	movupd	%xmm1, 152(%rsp)
+	movupd	%xmm2, 136(%rsp)
+	movupd	%xmm3, 120(%rsp)
+	movupd	%xmm4, 104(%rsp)
+	movupd	%xmm5, 88(%rsp)
+	movupd	%xmm6, 72(%rsp)
+	movupd	%xmm7, 56(%rsp)
+	movq	%rdi, 48(%rsp)
+	movq	%rax, 40(%rsp)
+	movq	%rdx, 32(%rsp)
+	movq	%rsi, 24(%rsp)
+	movq	%rcx, 16(%rsp)
+	movq	%r8, 8(%rsp)
+	movq	%r9, 0(%rsp)
 .endm
 
 .macro RESTORE_REGISTERS
-	movupd	184(%rsp), %xmm0
-	movupd	168(%rsp), %xmm1
-	movupd	152(%rsp), %xmm2
-	movupd	136(%rsp), %xmm3
-	movupd	120(%rsp), %xmm4
-	movupd	104(%rsp), %xmm5
-	movupd	88(%rsp) , %xmm6
-	movupd	72(%rsp) , %xmm7
-	movq	64(%rsp), %rdi
-	movq	56(%rsp), %rax
-	movq	48(%rsp), %rdx
-	movq	40(%rsp), %rsi
-	movq	32(%rsp), %rcx
-	movq	24(%rsp), %r8
-	movq	16(%rsp), %r9
-	addq	$200, %rsp
+	movq  184(%rsp), %rbp
+	movupd	168(%rsp), %xmm0
+	movupd	152(%rsp), %xmm1
+	movupd	136(%rsp), %xmm2
+	movupd	120(%rsp), %xmm3
+	movupd	104(%rsp), %xmm4
+	movupd	88(%rsp), %xmm5
+	movupd	72(%rsp) , %xmm6
+	movupd	56(%rsp) , %xmm7
+	movq	48(%rsp), %rdi
+	movq	40(%rsp), %rax
+	movq	32(%rsp), %rdx
+	movq	24(%rsp), %rsi
+	movq	16(%rsp), %rcx
+	movq	8(%rsp), %r8
+	movq	0(%rsp), %r9
+	addq	$192, %rsp
+	.cfi_def_cfa_offset 8
 .endm
 
 	.text
@@ -62,8 +71,6 @@
 
 __xray_FunctionEntry:
 	.cfi_startproc
-	pushq %rbp
-	.cfi_def_cfa_offset 16
 	SAVE_REGISTERS
 
 	// This load has to be atomic, it's concurrent with __xray_patch().
@@ -78,7 +85,6 @@
 	callq	*%rax
 .Ltmp0:
 	RESTORE_REGISTERS
-	popq	%rbp
 	retq
 .Ltmp1:
 	.size __xray_FunctionEntry, .Ltmp1-__xray_FunctionEntry
@@ -94,14 +100,13 @@
 	// Save the important registers first. Since we're assuming that this
 	// function is only jumped into, we only preserve the registers for
 	// returning.
-	pushq	%rbp
-	.cfi_def_cfa_offset 16
 	subq	$56, %rsp
-	.cfi_def_cfa_offset 32
-	movupd	%xmm0, 40(%rsp)
-	movupd	%xmm1, 24(%rsp)
-	movq	%rax, 16(%rsp)
-	movq	%rdx, 8(%rsp)
+	.cfi_def_cfa_offset 64
+	movq  %rbp, 48(%rsp)
+	movupd	%xmm0, 32(%rsp)
+	movupd	%xmm1, 16(%rsp)
+	movq	%rax, 8(%rsp)
+	movq	%rdx, 0(%rsp)
 	movq	_ZN6__xray19XRayPatchedFunctionE(%rip), %rax
 	testq %rax,%rax
 	je	.Ltmp2
@@ -111,12 +116,13 @@
 	callq	*%rax
 .Ltmp2:
 	// Restore the important registers.
-	movupd	40(%rsp), %xmm0
-	movupd	24(%rsp), %xmm1
-	movq	16(%rsp), %rax
-	movq	8(%rsp), %rdx
+	movq  48(%rsp), %rbp
+	movupd	32(%rsp), %xmm0
+	movupd	16(%rsp), %xmm1
+	movq	8(%rsp), %rax
+	movq	0(%rsp), %rdx
 	addq	$56, %rsp
-	popq	%rbp
+	.cfi_def_cfa_offset 8
 	retq
 .Ltmp3:
 	.size __xray_FunctionExit, .Ltmp3-__xray_FunctionExit
@@ -129,12 +135,6 @@
 	.type __xray_FunctionTailExit,@function
 __xray_FunctionTailExit:
 	.cfi_startproc
-	// Save the important registers as in the entry trampoline, but indicate that
-	// this is an exit. In the future, we will introduce a new entry type that
-	// differentiates between a normal exit and a tail exit, but we'd have to do
-	// this and increment the version number for the header.
-	pushq %rbp
-	.cfi_def_cfa_offset 16
 	SAVE_REGISTERS
 
 	movq	_ZN6__xray19XRayPatchedFunctionE(%rip), %rax
@@ -142,12 +142,11 @@
 	je	.Ltmp4
 
 	movl	%r10d, %edi
-	movl	$1, %esi
+	movl	$2, %esi
 	callq	*%rax
 
 .Ltmp4:
 	RESTORE_REGISTERS
-	popq	%rbp
 	retq
 .Ltmp5:
 	.size __xray_FunctionTailExit, .Ltmp5-__xray_FunctionTailExit
@@ -160,8 +159,6 @@
 	.type __xray_ArgLoggerEntry,@function
 __xray_ArgLoggerEntry:
 	.cfi_startproc
-	pushq	%rbp
-	.cfi_def_cfa_offset 16
 	SAVE_REGISTERS
 
 	// Again, these function pointer loads must be atomic; MOV is fine.
@@ -175,16 +172,60 @@
 	je	.Larg1entryFail
 
 .Larg1entryLog:
-	movq	%rdi, %rdx	// first argument will become the third
-	xorq	%rsi, %rsi	// XRayEntryType::ENTRY into the second
-	movl	%r10d, %edi	// 32-bit function ID becomes the first
+
+	// First argument will become the third
+	movq	%rdi, %rdx
+
+	// XRayEntryType::LOG_ARGS_ENTRY into the second
+	mov	$0x3, %esi
+
+	// 32-bit function ID becomes the first
+	movl	%r10d, %edi
 	callq	*%rax
 
 .Larg1entryFail:
 	RESTORE_REGISTERS
-	popq	%rbp
 	retq
 
 .Larg1entryEnd:
 	.size __xray_ArgLoggerEntry, .Larg1entryEnd-__xray_ArgLoggerEntry
 	.cfi_endproc
+
+//===----------------------------------------------------------------------===//
+
+	.global __xray_CustomEvent
+	.align 16, 0x90
+	.type __xray_CustomEvent,@function
+__xray_CustomEvent:
+  .cfi_startproc
+	SAVE_REGISTERS
+
+	// We take two arguments to this trampoline, which should be in rdi	and rsi
+	// already. We also make sure that we stash %rax because we use that register
+	// to call the logging handler.
+	movq _ZN6__xray22XRayPatchedCustomEventE(%rip), %rax
+	testq %rax,%rax
+	je .LcustomEventCleanup
+
+	// At this point we know that rcx and rdx already has the data, so we just
+	// call the logging handler, after aligning the stack to a 16-byte boundary.
+	// The approach we're taking here uses additional stack space to stash the
+	// stack pointer twice before aligning the pointer to 16-bytes. If the stack
+	// was 8-byte aligned, it will become 16-byte aligned -- when restoring the
+	// pointer, we can always look -8 bytes from the current position to get
+	// either of the values we've stashed in the first place.
+	pushq %rsp
+	pushq (%rsp)
+	andq $-0x10, %rsp
+  callq *%rax
+	movq 8(%rsp), %rsp
+
+.LcustomEventCleanup:
+	RESTORE_REGISTERS
+	retq
+
+.Ltmp8:
+	.size __xray_CustomEvent, .Ltmp8-__xray_CustomEvent
+	.cfi_endproc
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/lib/xray/xray_tsc.h b/lib/xray/xray_tsc.h
index e9ed7c7..4507564 100644
--- a/lib/xray/xray_tsc.h
+++ b/lib/xray/xray_tsc.h
@@ -13,6 +13,10 @@
 #ifndef XRAY_EMULATE_TSC_H
 #define XRAY_EMULATE_TSC_H
 
+namespace __xray {
+static constexpr uint64_t NanosecondsPerSecond = 1000ULL * 1000 * 1000;
+}
+
 #if defined(__x86_64__)
 #include "xray_x86_64.inc"
 #elif defined(__powerpc64__)
@@ -37,8 +41,6 @@
 
 namespace __xray {
 
-static constexpr uint64_t NanosecondsPerSecond = 1000ULL * 1000 * 1000;
-
 inline bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; }
 
 ALWAYS_INLINE uint64_t readTSC(uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
@@ -60,7 +62,7 @@
 } // namespace __xray
 
 #else
-"Unsupported CPU Architecture"
+#error Target architecture is not supported.
 #endif // CPU architecture
 
 #endif // XRAY_EMULATE_TSC_H
diff --git a/lib/xray/xray_utils.cc b/lib/xray/xray_utils.cc
index 69e7b98..b9a38d1 100644
--- a/lib/xray/xray_utils.cc
+++ b/lib/xray/xray_utils.cc
@@ -15,6 +15,7 @@
 #include "sanitizer_common/sanitizer_common.h"
 #include "xray_defs.h"
 #include "xray_flags.h"
+#include <stdlib.h>
 #include <cstdio>
 #include <errno.h>
 #include <fcntl.h>
@@ -92,8 +93,6 @@
 }
 
 int getLogFD() XRAY_NEVER_INSTRUMENT {
-  // FIXME: Figure out how to make this less stderr-dependent.
-  SetPrintfAndReportCallback(printToStdErr);
   // Open a temporary file once for the log.
   static char TmpFilename[256] = {};
   static char TmpWildcardPattern[] = "XXXXXX";
@@ -118,8 +117,7 @@
            TmpFilename);
     return -1;
   }
-  if (Verbosity())
-    fprintf(stderr, "XRay: Log file in '%s'\n", TmpFilename);
+  Report("XRay: Log file in '%s'\n", TmpFilename);
 
   return Fd;
 }
diff --git a/lib/xray/xray_x86_64.cc b/lib/xray/xray_x86_64.cc
index 1b91313..e17f00a 100644
--- a/lib/xray/xray_x86_64.cc
+++ b/lib/xray/xray_x86_64.cc
@@ -44,9 +44,9 @@
   ssize_t BytesRead;
   bool Success;
   std::tie(BytesRead, Success) = retryingReadSome(Fd, Line, Line + BufSize);
+  close(Fd);
   if (!Success)
     return false;
-  close(Fd);
   char *End = nullptr;
   long long Tmp = internal_simple_strtoll(Line, &End, 10);
   bool Result = false;
@@ -75,8 +75,11 @@
 static constexpr uint8_t CallOpCode = 0xe8;
 static constexpr uint16_t MovR10Seq = 0xba41;
 static constexpr uint16_t Jmp9Seq = 0x09eb;
+static constexpr uint16_t Jmp20Seq = 0x14eb;
+static constexpr uint16_t Jmp15Seq = 0x0feb;
 static constexpr uint8_t JmpOpCode = 0xe9;
 static constexpr uint8_t RetOpCode = 0xc3;
+static constexpr uint16_t NopwSeq = 0x9066;
 
 static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()};
 static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()};
@@ -201,6 +204,53 @@
   return true;
 }
 
+bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
+                      const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+  // Here we do the dance of replacing the following sled:
+  //
+  // In Version 0:
+  //
+  // xray_sled_n:
+  //   jmp +20          // 2 bytes
+  //   ...
+  //
+  // With the following:
+  //
+  //   nopw             // 2 bytes*
+  //   ...
+  //
+  //
+  // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'.
+  //
+  // ---
+  //
+  // In Version 1:
+  //
+  //   The jump offset is now 15 bytes (0x0f), so when restoring the nopw back
+  //   to a jmp, use 15 bytes instead.
+  //
+  if (Enable) {
+    std::atomic_store_explicit(
+        reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), NopwSeq,
+        std::memory_order_release);
+  } else {
+    switch (Sled.Version) {
+    case 1:
+      std::atomic_store_explicit(
+          reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp15Seq,
+          std::memory_order_release);
+      break;
+    case 0:
+    default:
+      std::atomic_store_explicit(
+          reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp20Seq,
+          std::memory_order_release);
+      break;
+    }
+    }
+  return false;
+}
+
 // We determine whether the CPU we're running on has the correct features we
 // need. In x86_64 this will be rdtscp support.
 bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT {
@@ -208,12 +258,18 @@
 
   // We check whether rdtscp support is enabled. According to the x86_64 manual,
   // level should be set at 0x80000001, and we should have a look at bit 27 in
-  // EDX. That's 0x8000000 (or 1u << 26).
+  // EDX. That's 0x8000000 (or 1u << 27).
   __get_cpuid(0x80000001, &EAX, &EBX, &ECX, &EDX);
-  if (!(EDX & (1u << 26))) {
+  if (!(EDX & (1u << 27))) {
     Report("Missing rdtscp support.\n");
     return false;
   }
+  // Also check whether we can determine the CPU frequency, since if we cannot,
+  // we should use the emulated TSC instead.
+  if (!getTSCFrequency()) {
+    Report("Unable to determine CPU frequency.\n");
+    return false;
+  }
   return true;
 }
 
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index addc579..0acf87b 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -9,6 +9,11 @@
 # add_subdirectory(BlocksRuntime)
 
 set(SANITIZER_COMMON_LIT_TEST_DEPS)
+
+if(COMPILER_RT_BUILD_PROFILE AND COMPILER_RT_HAS_PROFILE)
+  list(APPEND SANITIZER_COMMON_LIT_TEST_DEPS profile)
+endif()
+
 if(COMPILER_RT_STANDALONE_BUILD)
   add_executable(FileCheck IMPORTED GLOBAL)
   set_property(TARGET FileCheck PROPERTY IMPORTED_LOCATION ${LLVM_TOOLS_BINARY_DIR}/FileCheck)
@@ -23,9 +28,6 @@
     list(APPEND SANITIZER_COMMON_LIT_TEST_DEPS
       clang clang-headers FileCheck count not llvm-config llvm-nm llvm-objdump
       llvm-readobj llvm-symbolizer compiler-rt-headers sancov)
-    if (COMPILER_RT_HAS_PROFILE)
-      list(APPEND SANITIZER_COMMON_LIT_TEST_DEPS profile)
-    endif()
     if (WIN32)
       list(APPEND SANITIZER_COMMON_LIT_TEST_DEPS KillTheDoctor)
     endif()
@@ -35,54 +37,46 @@
   endif()
 endif()
 
+function(compiler_rt_test_runtime runtime)
+  string(TOUPPER ${runtime} runtime_uppercase)
+  if(COMPILER_RT_HAS_${runtime_uppercase})
+    add_subdirectory(${runtime})
+    foreach(directory ${ARGN})
+      add_subdirectory(${directory})
+    endforeach()
+  endif()
+endfunction()
+
 # Run sanitizer tests only if we're sure that clang would produce
 # working binaries.
 if(COMPILER_RT_CAN_EXECUTE_TESTS)
   if(COMPILER_RT_BUILD_BUILTINS)
     add_subdirectory(builtins)
   endif()
-  if(COMPILER_RT_HAS_ASAN)
-    add_subdirectory(asan)
+  if(COMPILER_RT_BUILD_SANITIZERS)
+    compiler_rt_test_runtime(interception)
+
+    compiler_rt_test_runtime(lsan)
+    # CFI tests require diagnostic mode, which is implemented in UBSan.
+    compiler_rt_test_runtime(ubsan cfi)
+    compiler_rt_test_runtime(sanitizer_common)
+
+    if(COMPILER_RT_BUILD_LIBFUZZER)
+      compiler_rt_test_runtime(fuzzer)
+    endif()
+
+    foreach(sanitizer ${COMPILER_RT_SANITIZERS_TO_BUILD})
+      # cfi testing is gated on ubsan
+      if(NOT ${sanitizer} STREQUAL cfi)
+        compiler_rt_test_runtime(${sanitizer})
+      endif()
+    endforeach()
   endif()
-  if(COMPILER_RT_HAS_DFSAN)
-    add_subdirectory(dfsan)
+  if(COMPILER_RT_BUILD_PROFILE AND COMPILER_RT_HAS_PROFILE)
+    compiler_rt_test_runtime(profile)
   endif()
-  if (COMPILER_RT_HAS_INTERCEPTION)
-    add_subdirectory(interception)
-  endif()
-  if(COMPILER_RT_HAS_LSAN)
-    add_subdirectory(lsan)
-  endif()
-  if(COMPILER_RT_HAS_MSAN)
-    add_subdirectory(msan)
-  endif()
-  if(COMPILER_RT_HAS_PROFILE)
-    add_subdirectory(profile)
-  endif()
-  if(COMPILER_RT_HAS_SANITIZER_COMMON)
-    add_subdirectory(sanitizer_common)
-  endif()
-  if(COMPILER_RT_HAS_TSAN)
-    add_subdirectory(tsan)
-  endif()
-  if(COMPILER_RT_HAS_UBSAN)
-    add_subdirectory(ubsan)
-  endif()
-  # CFI tests require diagnostic mode, which is implemented in UBSan.
-  if(COMPILER_RT_HAS_UBSAN)
-    add_subdirectory(cfi)
-  endif()
-  if(COMPILER_RT_HAS_SAFESTACK)
-    add_subdirectory(safestack)
-  endif()
-  if(COMPILER_RT_HAS_ESAN)
-    add_subdirectory(esan)
-  endif()
-  if(COMPILER_RT_HAS_SCUDO)
-    add_subdirectory(scudo)
-  endif()
-  if(COMPILER_RT_HAS_XRAY)
-    add_subdirectory(xray)
+  if(COMPILER_RT_BUILD_XRAY)
+    compiler_rt_test_runtime(xray)
   endif()
 endif()
 
diff --git a/test/asan/CMakeLists.txt b/test/asan/CMakeLists.txt
index 8932767..739ae56 100644
--- a/test/asan/CMakeLists.txt
+++ b/test/asan/CMakeLists.txt
@@ -3,8 +3,22 @@
 set(ASAN_TESTSUITES)
 set(ASAN_DYNAMIC_TESTSUITES)
 
+# Before Windows 8 (CMAKE_SYSTEM_VERSION 6.2), reserving large regions of shadow
+# memory allocated physical memory for page tables, which made it very
+# unreliable. Remove the asan tests from check-all in this configuration.
+set(SHADOW_MAPPING_UNRELIABLE FALSE)
+if(OS_NAME MATCHES "Windows" AND CMAKE_SIZEOF_VOID_P EQUAL 8 AND
+    ${CMAKE_SYSTEM_VERSION} LESS 6.2)
+  set(SHADOW_MAPPING_UNRELIABLE TRUE)
+  message(WARNING "Disabling ASan tests because they are unreliable on Windows 7 and earlier")
+endif()
+
+if (SHADOW_MAPPING_UNRELIABLE)
+  set(EXCLUDE_FROM_ALL TRUE)
+endif()
+
 macro(get_bits_for_arch arch bits)
-  if (${arch} MATCHES "i386|i686|arm|mips|mipsel")
+  if (${arch} MATCHES "i386|arm|mips|mipsel")
     set(${bits} 32)
   elseif (${arch} MATCHES "x86_64|powerpc64|powerpc64le|aarch64|mips64|mips64el|s390x")
     set(${bits} 64)
@@ -16,10 +30,8 @@
 set(ASAN_TEST_DEPS ${SANITIZER_COMMON_LIT_TEST_DEPS})
 if(NOT COMPILER_RT_STANDALONE_BUILD)
   list(APPEND ASAN_TEST_DEPS asan)
-  if(WIN32 AND COMPILER_RT_HAS_LLD_SOURCES)
-    list(APPEND ASAN_TEST_DEPS
-      lld
-    )
+  if(NOT APPLE AND COMPILER_RT_HAS_LLD)
+    list(APPEND ASAN_TEST_DEPS lld)
   endif()
 endif()
 set(ASAN_DYNAMIC_TEST_DEPS ${ASAN_TEST_DEPS})
@@ -35,6 +47,12 @@
   else()
     set(ASAN_TEST_TARGET_ARCH ${arch})
   endif()
+
+  set(ASAN_TEST_IOS "0")
+  pythonize_bool(ASAN_TEST_IOS)
+  set(ASAN_TEST_IOSSIM "0")
+  pythonize_bool(ASAN_TEST_IOSSIM)
+
   string(TOLOWER "-${arch}-${OS_NAME}" ASAN_TEST_CONFIG_SUFFIX)
   get_bits_for_arch(${arch} ASAN_TEST_BITS)
   get_test_cc_for_arch(${arch} ASAN_TEST_TARGET_CC ASAN_TEST_TARGET_CFLAGS)
@@ -63,6 +81,58 @@
   endif()
 endforeach()
 
+# iOS and iOS simulator test suites
+# These are not added into "check-all", in order to run these tests, use
+# "check-asan-iossim-x86_64" and similar. They also require that an extra env
+# variable to select which iOS device or simulator to use, e.g.:
+# SANITIZER_IOSSIM_TEST_DEVICE_IDENTIFIER="iPhone 6"
+if(APPLE)
+  set(EXCLUDE_FROM_ALL ON)
+
+  set(ASAN_TEST_TARGET_CC ${COMPILER_RT_TEST_COMPILER})
+  set(ASAN_TEST_IOS "1")
+  pythonize_bool(ASAN_TEST_IOS)
+  set(ASAN_TEST_DYNAMIC True)
+
+  foreach(arch ${DARWIN_iossim_ARCHS})
+    set(ASAN_TEST_IOSSIM "1")
+    pythonize_bool(ASAN_TEST_IOSSIM)
+    set(ASAN_TEST_TARGET_ARCH ${arch})
+    set(ASAN_TEST_TARGET_CFLAGS "-arch ${arch} -isysroot ${DARWIN_iossim_SYSROOT} ${COMPILER_RT_TEST_COMPILER_CFLAGS}")
+    set(ASAN_TEST_CONFIG_SUFFIX "-${arch}-iossim")
+    get_bits_for_arch(${arch} ASAN_TEST_BITS)
+    string(TOUPPER ${arch} ARCH_UPPER_CASE)
+    set(CONFIG_NAME "IOSSim${ARCH_UPPER_CASE}Config")
+    configure_lit_site_cfg(
+      ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
+      ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/lit.site.cfg
+      )
+    add_lit_testsuite(check-asan-iossim-${arch} "AddressSanitizer iOS Simulator ${arch} tests"
+      ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/
+      DEPENDS ${ASAN_TEST_DEPS})
+  endforeach()
+
+  foreach (arch ${DARWIN_ios_ARCHS})
+    set(ASAN_TEST_IOSSIM "0")
+    pythonize_bool(ASAN_TEST_IOSSIM)
+    set(ASAN_TEST_TARGET_ARCH ${arch})
+    set(ASAN_TEST_TARGET_CFLAGS "-arch ${arch} -isysroot ${DARWIN_ios_SYSROOT} ${COMPILER_RT_TEST_COMPILER_CFLAGS}")
+    set(ASAN_TEST_CONFIG_SUFFIX "-${arch}-ios")
+    get_bits_for_arch(${arch} ASAN_TEST_BITS)
+    string(TOUPPER ${arch} ARCH_UPPER_CASE)
+    set(CONFIG_NAME "IOS${ARCH_UPPER_CASE}Config")
+    configure_lit_site_cfg(
+      ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
+      ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/lit.site.cfg
+      )
+    add_lit_testsuite(check-asan-ios-${arch} "AddressSanitizer iOS ${arch} tests"
+      ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/
+      DEPENDS ${ASAN_TEST_DEPS})
+  endforeach()
+
+  set(EXCLUDE_FROM_ALL OFF)
+endif()
+
 # Add unit tests.
 if(COMPILER_RT_INCLUDE_TESTS)
   set(ASAN_TEST_DYNAMIC False)
@@ -92,19 +162,15 @@
 set_target_properties(check-asan PROPERTIES FOLDER "Compiler-RT Misc")
 
 if(COMPILER_RT_ASAN_HAS_STATIC_RUNTIME)
-  # Add check-dynamic-asan target. It is a part of check-all only on Windows,
-  # where we want to always test both dynamic and static runtime.
-
-  if(NOT OS_NAME MATCHES "Windows")
-    set(EXCLUDE_FROM_ALL TRUE)
-  endif()
   add_lit_testsuite(check-asan-dynamic
                     "Running the AddressSanitizer tests with dynamic runtime"
                     ${ASAN_DYNAMIC_TESTSUITES}
                     DEPENDS ${ASAN_DYNAMIC_TEST_DEPS})
   set_target_properties(check-asan-dynamic
                         PROPERTIES FOLDER "Compiler-RT Misc")
-  if(NOT OS_NAME MATCHES "Windows")
-    set(EXCLUDE_FROM_ALL FALSE)
-  endif()
+endif()
+
+# Reset EXCLUDE_FROM_ALL to its initial value.
+if (SHADOW_MAPPING_UNRELIABLE)
+  set(EXCLUDE_FROM_ALL FALSE)
 endif()
diff --git a/test/asan/TestCases/Android/coverage-android.cc b/test/asan/TestCases/Android/coverage-android.cc
deleted file mode 100644
index cf4f33e..0000000
--- a/test/asan/TestCases/Android/coverage-android.cc
+++ /dev/null
@@ -1,147 +0,0 @@
-// Test for direct coverage writing with dlopen.
-
-// Test normal exit, coverage level 1.
-// RUN: %clangxx_asan -fsanitize-coverage=func -DSHARED %s -shared -o %T/libcoverage_android_test_1.so -fPIC
-// RUN: %clangxx_asan -fsanitize-coverage=func -DSO_DIR=\"%device\" %s -o %t
-
-// RUN: adb shell rm -rf %device/coverage-android
-// RUN: rm -rf %T/coverage-android
-
-// RUN: adb shell mkdir -p %device/coverage-android/direct
-// RUN: mkdir -p %T/coverage-android/direct
-// RUN: %env_asan_opts=coverage=1:coverage_direct=1:coverage_dir=%device/coverage-android/direct:verbosity=1 %run %t
-// RUN: adb pull %device/coverage-android/direct %T/coverage-android/direct
-// RUN: ls; pwd
-// RUN: cd %T/coverage-android/direct
-// RUN: %sancov rawunpack *.sancov.raw
-// RUN: %sancov print *.sancov |& FileCheck --check-prefix=CHECK1 %s
-
-
-// Test sudden death, coverage level 1.
-// RUN: %clangxx_asan -fsanitize-coverage=func -DSHARED -DKILL %s -shared -o %T/libcoverage_android_test_1.so -fPIC
-// RUN: %clangxx_asan -fsanitize-coverage=func -DSO_DIR=\"%device\" %s -o %t
-
-// RUN: adb shell rm -rf %device/coverage-android-kill
-// RUN: rm -rf %T/coverage-android-kill
-
-// RUN: adb shell mkdir -p %device/coverage-android-kill/direct
-// RUN: mkdir -p %T/coverage-android-kill/direct
-// RUN: %env_asan_opts=coverage=1:coverage_direct=1:coverage_dir=%device/coverage-android-kill/direct:verbosity=1 not %run %t
-// RUN: adb pull %device/coverage-android-kill/direct %T/coverage-android-kill/direct
-// RUN: ls; pwd
-// RUN: cd %T/coverage-android-kill/direct
-// RUN: %sancov rawunpack *.sancov.raw
-// RUN: %sancov print *.sancov |& FileCheck --check-prefix=CHECK1 %s
-
-
-// Test normal exit, coverage level 2.
-// RUN: %clangxx_asan -fsanitize-coverage=bb -DSHARED %s -shared -o %T/libcoverage_android_test_1.so -fPIC
-// RUN: %clangxx_asan -fsanitize-coverage=bb -DSO_DIR=\"%device\" %s -o %t
-
-// RUN: adb shell rm -rf %device/coverage-android
-// RUN: rm -rf %T/coverage-android
-
-// RUN: adb shell mkdir -p %device/coverage-android/direct
-// RUN: mkdir -p %T/coverage-android/direct
-// RUN: %env_asan_opts=coverage=1:coverage_direct=1:coverage_dir=%device/coverage-android/direct:verbosity=1 %run %t
-// RUN: adb pull %device/coverage-android/direct %T/coverage-android/direct
-// RUN: ls; pwd
-// RUN: cd %T/coverage-android/direct
-// RUN: %sancov rawunpack *.sancov.raw
-// RUN: %sancov print *.sancov |& FileCheck --check-prefix=CHECK2 %s
-
-
-// Test sudden death, coverage level 2.
-// RUN: %clangxx_asan -fsanitize-coverage=bb -DSHARED -DKILL %s -shared -o %T/libcoverage_android_test_1.so -fPIC
-// RUN: %clangxx_asan -fsanitize-coverage=bb -DSO_DIR=\"%device\" %s -o %t
-
-// RUN: adb shell rm -rf %device/coverage-android-kill
-// RUN: rm -rf %T/coverage-android-kill
-
-// RUN: adb shell mkdir -p %device/coverage-android-kill/direct
-// RUN: mkdir -p %T/coverage-android-kill/direct
-// RUN: %env_asan_opts=coverage=1:coverage_direct=1:coverage_dir=%device/coverage-android-kill/direct:verbosity=1 not %run %t
-// RUN: adb pull %device/coverage-android-kill/direct %T/coverage-android-kill/direct
-// RUN: ls; pwd
-// RUN: cd %T/coverage-android-kill/direct
-// RUN: %sancov rawunpack *.sancov.raw
-// RUN: %sancov print *.sancov |& FileCheck --check-prefix=CHECK2 %s
-
-
-// Test normal exit, coverage level 3.
-// RUN: %clangxx_asan -fsanitize-coverage=edge -DSHARED %s -shared -o %T/libcoverage_android_test_1.so -fPIC
-// RUN: %clangxx_asan -fsanitize-coverage=edge -DSO_DIR=\"%device\" %s -o %t
-
-// RUN: adb shell rm -rf %device/coverage-android
-// RUN: rm -rf %T/coverage-android
-
-// RUN: adb shell mkdir -p %device/coverage-android/direct
-// RUN: mkdir -p %T/coverage-android/direct
-// RUN: %env_asan_opts=coverage=1:coverage_direct=1:coverage_dir=%device/coverage-android/direct:verbosity=1 %run %t
-// RUN: adb pull %device/coverage-android/direct %T/coverage-android/direct
-// RUN: ls; pwd
-// RUN: cd %T/coverage-android/direct
-// RUN: %sancov rawunpack *.sancov.raw
-// RUN: %sancov print *.sancov |& FileCheck --check-prefix=CHECK3 %s
-
-
-// Test sudden death, coverage level 3.
-// RUN: %clangxx_asan -fsanitize-coverage=edge -DSHARED -DKILL %s -shared -o %T/libcoverage_android_test_1.so -fPIC
-// RUN: %clangxx_asan -fsanitize-coverage=edge -DSO_DIR=\"%device\" %s -o %t
-
-// RUN: adb shell rm -rf %device/coverage-android-kill
-// RUN: rm -rf %T/coverage-android-kill
-
-// RUN: adb shell mkdir -p %device/coverage-android-kill/direct
-// RUN: mkdir -p %T/coverage-android-kill/direct
-// RUN: %env_asan_opts=coverage=1:coverage_direct=1:coverage_dir=%device/coverage-android-kill/direct:verbosity=1 not %run %t
-// RUN: adb pull %device/coverage-android-kill/direct %T/coverage-android-kill/direct
-// RUN: ls; pwd
-// RUN: cd %T/coverage-android-kill/direct
-// RUN: %sancov rawunpack *.sancov.raw
-// RUN: %sancov print *.sancov |& FileCheck --check-prefix=CHECK3 %s
-
-// PC counts in CHECK lines are platform dependent and match arm32 at the moment.
-// sancov tool does not support Android well enough to match function names
-// REQUIRES: arm
-
-#include <assert.h>
-#include <dlfcn.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <signal.h>
-
-#ifdef SHARED
-extern "C" {
-void bar() {
-  printf("bar\n");
-#ifdef KILL
-  kill(getpid(), SIGKILL);
-#endif
-}
-}
-#else
-
-volatile int sink;
-
-int main(int argc, char **argv) {
-  fprintf(stderr, "PID: %d\n", getpid());
-  void *handle1 =
-      dlopen(SO_DIR "/libcoverage_android_test_1.so", RTLD_LAZY);
-  assert(handle1);
-
-  if (argc == 0)
-    sink = 0;
-
-  void (*bar1)() = (void (*)())dlsym(handle1, "bar");
-  assert(bar1);
-  bar1();
-
-  return 0;
-}
-#endif
-
-// CHECK1: 2 PCs total
-// CHECK2: 4 PCs total
-// CHECK3: 5 PCs total
diff --git a/test/asan/TestCases/Darwin/abort_on_error.cc b/test/asan/TestCases/Darwin/abort_on_error.cc
index 295afb8..0aa1234 100644
--- a/test/asan/TestCases/Darwin/abort_on_error.cc
+++ b/test/asan/TestCases/Darwin/abort_on_error.cc
@@ -8,6 +8,8 @@
 // When we use lit's default ASAN_OPTIONS, we shouldn't crash.
 // RUN: not %run %t 2>&1 | FileCheck %s
 
+// UNSUPPORTED: ios
+
 #include <stdlib.h>
 int main() {
   char *x = (char*)malloc(10 * sizeof(char));
diff --git a/test/asan/TestCases/Darwin/address-range-limit.mm b/test/asan/TestCases/Darwin/address-range-limit.mm
index ba9175a..5f0fd89 100644
--- a/test/asan/TestCases/Darwin/address-range-limit.mm
+++ b/test/asan/TestCases/Darwin/address-range-limit.mm
@@ -3,6 +3,9 @@
 // RUN: %clangxx_asan %s -Wno-deprecated-declarations -flat_namespace -bundle -undefined suppress -o %t.bundle
 // RUN: %clangxx_asan %s -Wno-deprecated-declarations -o %t -framework Foundation && not %run %t 2>&1 | FileCheck %s
 
+// NSCreateObjectFileImageFromFile/NSLinkModule isn't available on iOS
+// UNSUPPORTED: ios
+
 #import <Foundation/Foundation.h>
 #import <mach-o/dyld.h>
 
diff --git a/test/asan/TestCases/Darwin/asan_gen_prefixes.cc b/test/asan/TestCases/Darwin/asan_gen_prefixes.cc
index 13363ac..9f3a66a 100644
--- a/test/asan/TestCases/Darwin/asan_gen_prefixes.cc
+++ b/test/asan/TestCases/Darwin/asan_gen_prefixes.cc
@@ -4,6 +4,8 @@
 // RUN: %clang_asan %s -S -o %t.s
 // RUN: cat %t.s | FileCheck %s || exit 1
 
+// UNSUPPORTED: ios
+
 int x, y, z;
 int main() { return 0; }
 // CHECK: .section{{.*}}__TEXT,__const
diff --git a/test/asan/TestCases/Darwin/atos-symbolizer-dyld-root-path.cc b/test/asan/TestCases/Darwin/atos-symbolizer-dyld-root-path.cc
index d2facd6..fc3d0dd 100644
--- a/test/asan/TestCases/Darwin/atos-symbolizer-dyld-root-path.cc
+++ b/test/asan/TestCases/Darwin/atos-symbolizer-dyld-root-path.cc
@@ -6,6 +6,9 @@
 // Due to a bug in atos, this only works on x86_64.
 // REQUIRES: asan-64-bits
 
+// Path returned by `which atos` is invalid on iOS.
+// UNSUPPORTED: ios
+
 #include <stdlib.h>
 #include <string.h>
 int main(int argc, char **argv) {
diff --git a/test/asan/TestCases/Darwin/atos-symbolizer.cc b/test/asan/TestCases/Darwin/atos-symbolizer.cc
index b4a868e..e7d7e7a 100644
--- a/test/asan/TestCases/Darwin/atos-symbolizer.cc
+++ b/test/asan/TestCases/Darwin/atos-symbolizer.cc
@@ -3,6 +3,9 @@
 // RUN: %clangxx_asan -O0 %s -o %t
 // RUN: %env_asan_opts=verbosity=2 ASAN_SYMBOLIZER_PATH=$(which atos) not %run %t 2>&1 | FileCheck %s
 
+// Path returned by `which atos` is invalid on iOS.
+// UNSUPPORTED: ios, i386-darwin
+
 #include <stdlib.h>
 #include <string.h>
 int main(int argc, char **argv) {
diff --git a/test/asan/TestCases/Darwin/dead-strip.c b/test/asan/TestCases/Darwin/dead-strip.c
index f87a5e5..8165fcd 100644
--- a/test/asan/TestCases/Darwin/dead-strip.c
+++ b/test/asan/TestCases/Darwin/dead-strip.c
@@ -6,6 +6,7 @@
 // runtime is able to register globals in the __DATA,__asan_globals section.
 
 // REQUIRES: osx-ld64-live_support
+// UNSUPPORTED: ios
 // RUN: %clang_asan -mmacosx-version-min=10.11 -Xlinker -dead_strip -o %t %s
 // RUN: llvm-nm -format=posix %t | FileCheck --check-prefix NM-CHECK %s
 // RUN: not %run %t 2>&1 | FileCheck --check-prefix ASAN-CHECK %s
diff --git a/test/asan/TestCases/Darwin/dladdr-demangling.cc b/test/asan/TestCases/Darwin/dladdr-demangling.cc
index 6f52b93..fb6f6d7 100644
--- a/test/asan/TestCases/Darwin/dladdr-demangling.cc
+++ b/test/asan/TestCases/Darwin/dladdr-demangling.cc
@@ -5,6 +5,9 @@
 // RUN: not %run %t 2>&1 | FileCheck %s
 // RUN: %env_asan_opts=verbosity=2 not %run sandbox-exec -p '(version 1)(allow default)(deny process-fork)' %t 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-DLADDR
 
+// sandbox-exec isn't available on iOS
+// UNSUPPORTED: ios
+
 #include <stdlib.h>
 
 class MyClass {
diff --git a/test/asan/TestCases/Darwin/dump_registers.cc b/test/asan/TestCases/Darwin/dump_registers.cc
index 884ad2e..cc2710f 100644
--- a/test/asan/TestCases/Darwin/dump_registers.cc
+++ b/test/asan/TestCases/Darwin/dump_registers.cc
@@ -5,22 +5,22 @@
 
 #include <assert.h>
 #include <stdio.h>
+#include <sys/mman.h>
 
 int main() {
   fprintf(stderr, "Hello\n");
   char *ptr;
 
-  if (sizeof(void *) == 8)
-    ptr = (char *)0x6666666666666666;
-  else if (sizeof(void *) == 4)
-    ptr = (char *)0x55555555;
-  else
-    assert(0 && "Your computer is weird.");
+  ptr = (char *)mmap(NULL, 0x10000, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0);
+  assert(ptr && "failed to mmap");
+
+  fprintf(stderr, sizeof(uintptr_t) == 8 ? "p = 0x%016lx\n" : "p = 0x%08lx\n", (uintptr_t)ptr);
+  // CHECK: p = [[ADDR:0x[0-9]+]]
 
   char c = *ptr;  // BOOM
-  // CHECK: ERROR: AddressSanitizer: SEGV
+  // CHECK: ERROR: AddressSanitizer: {{SEGV|BUS}}
   // CHECK: Register values:
-  // CHECK: {{0x55555555|0x6666666666666666}}
+  // CHECK: [[ADDR]]
   fprintf(stderr, "World\n");
   return c;
 }
diff --git a/test/asan/TestCases/Darwin/dyld_insert_libraries_reexec.cc b/test/asan/TestCases/Darwin/dyld_insert_libraries_reexec.cc
index b22036a..d195258 100644
--- a/test/asan/TestCases/Darwin/dyld_insert_libraries_reexec.cc
+++ b/test/asan/TestCases/Darwin/dyld_insert_libraries_reexec.cc
@@ -1,30 +1,30 @@
 // When DYLD-inserting the ASan dylib from a different location than the
 // original, make sure we don't try to reexec.
 
-// RUN: mkdir -p %T/dyld_insert_libraries_reexec
-// RUN: cp `%clang_asan %s -fsanitize=address -### 2>&1 \
-// RUN:   | grep "libclang_rt.asan_osx_dynamic.dylib" \
-// RUN:   | sed -e 's/.*"\(.*libclang_rt.asan_osx_dynamic.dylib\)".*/\1/'` \
-// RUN:   %T/dyld_insert_libraries_reexec/libclang_rt.asan_osx_dynamic.dylib
-// RUN: %clangxx_asan %s -o %T/dyld_insert_libraries_reexec/a.out
+// UNSUPPORTED: ios
+
+// RUN: rm -rf %t && mkdir -p %t
+// RUN: cp `%clang_asan -print-file-name=lib`/darwin/libclang_rt.asan_osx_dynamic.dylib \
+// RUN:   %t/libclang_rt.asan_osx_dynamic.dylib
+// RUN: %clangxx_asan %s -o %t/a.out
 
 // RUN:   %env_asan_opts=verbosity=1 \
 // RUN:       DYLD_INSERT_LIBRARIES=@executable_path/libclang_rt.asan_osx_dynamic.dylib \
-// RUN:       %run %T/dyld_insert_libraries_reexec/a.out 2>&1 \
+// RUN:       %run %t/a.out 2>&1 \
 // RUN:   | FileCheck %s
 
 // RUN: IS_OSX_10_11_OR_HIGHER=$([ `sw_vers -productVersion | cut -d'.' -f2` -lt 11 ]; echo $?)
 
 // On OS X 10.10 and lower, if the dylib is not DYLD-inserted, ASan will re-exec.
 // RUN: if [ $IS_OSX_10_11_OR_HIGHER == 0 ]; then \
-// RUN:   %env_asan_opts=verbosity=1 %run %T/dyld_insert_libraries_reexec/a.out 2>&1 \
+// RUN:   %env_asan_opts=verbosity=1 %run %t/a.out 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-NOINSERT %s; \
 // RUN:   fi
 
 // On OS X 10.11 and higher, we don't need to DYLD-insert anymore, and the interceptors
 // still installed correctly. Let's just check that things work and we don't try to re-exec.
 // RUN: if [ $IS_OSX_10_11_OR_HIGHER == 1 ]; then \
-// RUN:   %env_asan_opts=verbosity=1 %run %T/dyld_insert_libraries_reexec/a.out 2>&1 \
+// RUN:   %env_asan_opts=verbosity=1 %run %t/a.out 2>&1 \
 // RUN:   | FileCheck %s; \
 // RUN:   fi
 
diff --git a/test/asan/TestCases/Darwin/dyld_insert_libraries_remove.cc b/test/asan/TestCases/Darwin/dyld_insert_libraries_remove.cc
index a3af8c1..8a02105 100644
--- a/test/asan/TestCases/Darwin/dyld_insert_libraries_remove.cc
+++ b/test/asan/TestCases/Darwin/dyld_insert_libraries_remove.cc
@@ -2,26 +2,26 @@
 // the ASan dylib from the environment variable (both when using an absolute
 // or relative path) and also that the other dylibs are left untouched.
 
-// RUN: mkdir -p %T/dyld_insert_libraries_remove
-// RUN: cp `%clang_asan %s -fsanitize=address -### 2>&1 \
-// RUN:   | grep "libclang_rt.asan_osx_dynamic.dylib" \
-// RUN:   | sed -e 's/.*"\(.*libclang_rt.asan_osx_dynamic.dylib\)".*/\1/'` \
-// RUN:   %T/dyld_insert_libraries_remove/libclang_rt.asan_osx_dynamic.dylib
+// UNSUPPORTED: ios
 
-// RUN: %clangxx_asan %s -o %T/dyld_insert_libraries_remove/a.out
+// RUN: rm -rf %t && mkdir -p %t
+// RUN: cp `%clang_asan -print-file-name=lib`/darwin/libclang_rt.asan_osx_dynamic.dylib \
+// RUN:   %t/libclang_rt.asan_osx_dynamic.dylib
+
+// RUN: %clangxx_asan %s -o %t/a.out
 // RUN: %clangxx -DSHARED_LIB %s \
-// RUN:     -dynamiclib -o %T/dyld_insert_libraries_remove/dummy-so.dylib
+// RUN:     -dynamiclib -o %t/dummy-so.dylib
 
-// RUN: ( cd %T/dyld_insert_libraries_remove && \
+// RUN: ( cd %t && \
 // RUN:   DYLD_INSERT_LIBRARIES=@executable_path/libclang_rt.asan_osx_dynamic.dylib:dummy-so.dylib \
 // RUN:   %run ./a.out 2>&1 ) | FileCheck %s || exit 1
 
-// RUN: ( cd %T/dyld_insert_libraries_remove && \
+// RUN: ( cd %t && \
 // RUN:   DYLD_INSERT_LIBRARIES=libclang_rt.asan_osx_dynamic.dylib:dummy-so.dylib \
 // RUN:   %run ./a.out 2>&1 ) | FileCheck %s || exit 1
 
-// RUN: ( cd %T/dyld_insert_libraries_remove && \
-// RUN:   DYLD_INSERT_LIBRARIES=%T/dyld_insert_libraries_remove/libclang_rt.asan_osx_dynamic.dylib:dummy-so.dylib \
+// RUN: ( cd %t && \
+// RUN:   DYLD_INSERT_LIBRARIES=%t/libclang_rt.asan_osx_dynamic.dylib:dummy-so.dylib \
 // RUN:   %run ./a.out 2>&1 ) | FileCheck %s || exit 1
 
 #if !defined(SHARED_LIB)
diff --git a/test/asan/TestCases/Darwin/haswell-symbolication.cc b/test/asan/TestCases/Darwin/haswell-symbolication.cc
index 59c938c..7856c4d 100644
--- a/test/asan/TestCases/Darwin/haswell-symbolication.cc
+++ b/test/asan/TestCases/Darwin/haswell-symbolication.cc
@@ -48,6 +48,7 @@
 
 // REQUIRES: x86-target-arch
 // REQUIRES: x86_64h
+// UNSUPPORTED: ios
 
 #include <sanitizer/common_interface_defs.h>
 #include <stdio.h>
diff --git a/test/asan/TestCases/Darwin/interface_symbols_darwin.c b/test/asan/TestCases/Darwin/interface_symbols_darwin.cc
similarity index 92%
rename from test/asan/TestCases/Darwin/interface_symbols_darwin.c
rename to test/asan/TestCases/Darwin/interface_symbols_darwin.cc
index 9450575..a8e2bcb 100644
--- a/test/asan/TestCases/Darwin/interface_symbols_darwin.c
+++ b/test/asan/TestCases/Darwin/interface_symbols_darwin.cc
@@ -2,7 +2,7 @@
 // If you're changing this file, please also change
 // ../Linux/interface_symbols.c
 
-// RUN: %clang_asan -dead_strip -O2 %s -o %t.exe
+// RUN: %clangxx_asan -dead_strip -O2 %s -o %t.exe
 //
 // note: we can not use -D on Darwin.
 // RUN: nm -g `%clang_asan %s -fsanitize=address -### 2>&1 | grep "libclang_rt.asan_osx_dynamic.dylib" | sed -e 's/.*"\(.*libclang_rt.asan_osx_dynamic.dylib\)".*/\1/'` \
@@ -11,7 +11,7 @@
 // RUN:  | grep -v "__sanitizer_syscall"                                       \
 // RUN:  | grep -v "__sanitizer_weak_hook"                                     \
 // RUN:  | grep -v "__sanitizer_mz"                                            \
-// RUN:  | grep -v "__ubsan_handle_dynamic_type_cache_miss"                    \
+// RUN:  | grep -v "__sancov_lowest_stack"                                     \
 // RUN:  | sed -e "s/__asan_version_mismatch_check_v[0-9]+/__asan_version_mismatch_check/" \
 // RUN:  > %t.exports
 //
@@ -31,4 +31,6 @@
 // RUN: echo "=== NOTE === If you see a mismatch below, please update sanitizer_interface.inc files."
 // RUN: diff %t.imports-sorted %t.exports-sorted
 
+// UNSUPPORTED: ios
+
 int main() { return 0; }
diff --git a/test/asan/TestCases/Darwin/malloc_set_zone_name-mprotect.cc b/test/asan/TestCases/Darwin/malloc_set_zone_name-mprotect.cc
index 2c643bc..b9b96ef 100644
--- a/test/asan/TestCases/Darwin/malloc_set_zone_name-mprotect.cc
+++ b/test/asan/TestCases/Darwin/malloc_set_zone_name-mprotect.cc
@@ -47,5 +47,6 @@
     memset(mem[i], 'a', 8 * (i % kNumIter));
     free(mem[i]);
   }
+  malloc_destroy_zone(zone);
   return 0;
 }
diff --git a/test/asan/TestCases/Darwin/nil-return-struct.mm b/test/asan/TestCases/Darwin/nil-return-struct.mm
new file mode 100644
index 0000000..9fb77e7
--- /dev/null
+++ b/test/asan/TestCases/Darwin/nil-return-struct.mm
@@ -0,0 +1,31 @@
+// RUN: %clang_asan %s -o %t -framework Foundation
+// RUN: %run %t 2>&1 | FileCheck %s
+
+#import <Foundation/Foundation.h>
+
+struct MyStruct {
+  long a, b, c, d;
+};
+
+@interface MyClass: NSObject
+- (MyStruct)methodWhichReturnsARect;
+@end
+@implementation MyClass
+- (MyStruct)methodWhichReturnsARect {
+  MyStruct s;
+  s.a = 10;
+  s.b = 20;
+  s.c = 30;
+  s.d = 40;
+  return s;
+}
+@end
+
+int main() {
+  MyClass *myNil = nil;  // intentionally nil
+  [myNil methodWhichReturnsARect];
+  fprintf(stderr, "Hello world");
+}
+
+// CHECK-NOT: AddressSanitizer: stack-use-after-scope
+// CHECK: Hello world
diff --git a/test/asan/TestCases/Darwin/reexec-insert-libraries-env.cc b/test/asan/TestCases/Darwin/reexec-insert-libraries-env.cc
index aa4d92b..bee23f2 100644
--- a/test/asan/TestCases/Darwin/reexec-insert-libraries-env.cc
+++ b/test/asan/TestCases/Darwin/reexec-insert-libraries-env.cc
@@ -7,9 +7,11 @@
 // RUN:     -dynamiclib -o darwin-dummy-shared-lib-so.dylib
 
 // FIXME: the following command line may hang in the case of a regression.
-// RUN: env DYLD_INSERT_LIBRARIES=darwin-dummy-shared-lib-so.dylib \
+// RUN: %env DYLD_INSERT_LIBRARIES=darwin-dummy-shared-lib-so.dylib \
 // RUN:     %run %t 2>&1 | FileCheck %s || exit 1
 
+// UNSUPPORTED: ios
+
 #if !defined(SHARED_LIB)
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/test/asan/TestCases/Darwin/sandbox-symbolizer.cc b/test/asan/TestCases/Darwin/sandbox-symbolizer.cc
index 4310f9c..b36c4fa 100644
--- a/test/asan/TestCases/Darwin/sandbox-symbolizer.cc
+++ b/test/asan/TestCases/Darwin/sandbox-symbolizer.cc
@@ -12,6 +12,9 @@
 // RUN: not %run sandbox-exec -p '(version 1)(allow default)(deny mach-priv-task-port)' %t 2>&1 | FileCheck %s
 // RUN: env ASAN_SYMBOLIZER_PATH="" not %run sandbox-exec -p '(version 1)(allow default)(deny mach-priv-task-port)' %t 2>&1 | FileCheck %s
 
+// sandbox-exec isn't available on iOS
+// UNSUPPORTED: ios
+
 #include <stdlib.h>
 int main() {
   char *x = (char*)malloc(10 * sizeof(char));
diff --git a/test/asan/TestCases/Darwin/scribble.cc b/test/asan/TestCases/Darwin/scribble.cc
new file mode 100644
index 0000000..8303cf3
--- /dev/null
+++ b/test/asan/TestCases/Darwin/scribble.cc
@@ -0,0 +1,58 @@
+// RUN: %clang_asan -O2 %s -o %t
+// RUN: %run %t 2>&1 | FileCheck --check-prefix=CHECK-NOSCRIBBLE %s
+// RUN: %env MallocScribble=1 MallocPreScribble=1 %run %t 2>&1 | FileCheck --check-prefix=CHECK-SCRIBBLE %s
+// RUN: %env_asan_opts=max_free_fill_size=4096 %run %t 2>&1 | FileCheck --check-prefix=CHECK-SCRIBBLE %s
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+struct Isa {
+  const char *class_name;
+};
+
+struct MyClass {
+  long padding;
+  Isa *isa;
+  long data;
+
+  void print_my_class_name();
+};
+
+__attribute__((no_sanitize("address")))
+void MyClass::print_my_class_name() {
+  fprintf(stderr, "this = %p\n", this);
+  fprintf(stderr, "padding = 0x%lx\n", this->padding);
+  fprintf(stderr, "isa = %p\n", this->isa);
+
+  if ((uint32_t)(uintptr_t)this->isa != 0x55555555) {
+    fprintf(stderr, "class name: %s\n", this->isa->class_name);
+  }
+}
+
+int main() {
+  Isa *my_class_isa = (Isa *)malloc(sizeof(Isa));
+  memset(my_class_isa, 0x77, sizeof(Isa));
+  my_class_isa->class_name = "MyClass";
+
+  MyClass *my_object = (MyClass *)malloc(sizeof(MyClass));
+  memset(my_object, 0x88, sizeof(MyClass));
+  my_object->isa = my_class_isa;
+  my_object->data = 42;
+
+  my_object->print_my_class_name();
+  // CHECK-SCRIBBLE: class name: MyClass
+  // CHECK-NOSCRIBBLE: class name: MyClass
+
+  free(my_object);
+
+  my_object->print_my_class_name();
+  // CHECK-NOSCRIBBLE: class name: MyClass
+  // CHECK-SCRIBBLE: isa = {{(0x)?}}{{5555555555555555|55555555}}
+
+  fprintf(stderr, "okthxbai!\n");
+  // CHECK-SCRIBBLE: okthxbai!
+  // CHECK-NOSCRIBBLE: okthxbai!
+  free(my_class_isa);
+}
diff --git a/test/asan/TestCases/Darwin/suppressions-darwin.cc b/test/asan/TestCases/Darwin/suppressions-darwin.cc
index a177c4e..8c207b1 100644
--- a/test/asan/TestCases/Darwin/suppressions-darwin.cc
+++ b/test/asan/TestCases/Darwin/suppressions-darwin.cc
@@ -27,6 +27,7 @@
                               kCFStringEncodingUTF8, FALSE); // BOOM
   fprintf(stderr, "Ignored.\n");
   free(a);
+  CFRelease(str);
 }
 
 // CHECK-CRASH: AddressSanitizer: heap-buffer-overflow
diff --git a/test/asan/TestCases/Darwin/suppressions-function.cc b/test/asan/TestCases/Darwin/suppressions-function.cc
new file mode 100644
index 0000000..f58796f
--- /dev/null
+++ b/test/asan/TestCases/Darwin/suppressions-function.cc
@@ -0,0 +1,28 @@
+// Check that without suppressions, we catch the issue.
+// RUN: %clangxx_asan -O0 %s -o %t
+// RUN: not %run %t 2>&1 | FileCheck --check-prefix=CHECK-CRASH %s
+
+// RUN: echo "interceptor_via_fun:crash_function" > %t.supp
+// RUN: %clangxx_asan -O0 %s -o %t && %env_asan_opts=suppressions='"%t.supp"' %run %t 2>&1 | FileCheck --check-prefix=CHECK-IGNORE %s
+
+// UNSUPPORTED: ios
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+void crash_function() {
+  char *a = (char *)malloc(6);
+  free(a);
+  size_t len = strlen(a); // BOOM
+  fprintf(stderr, "strlen ignored, len = %zu\n", len);
+}
+
+int main() {
+  crash_function();
+}
+
+// CHECK-CRASH: AddressSanitizer: heap-use-after-free
+// CHECK-CRASH-NOT: strlen ignored
+// CHECK-IGNORE-NOT: AddressSanitizer: heap-use-after-free
+// CHECK-IGNORE: strlen ignored
diff --git a/test/asan/TestCases/Darwin/suppressions-sandbox.cc b/test/asan/TestCases/Darwin/suppressions-sandbox.cc
index ddbad46..966f213 100644
--- a/test/asan/TestCases/Darwin/suppressions-sandbox.cc
+++ b/test/asan/TestCases/Darwin/suppressions-sandbox.cc
@@ -8,6 +8,9 @@
 // RUN:   sandbox-exec -p '(version 1)(allow default)(deny process-fork)' \
 // RUN:   %run %t 2>&1 | FileCheck --check-prefix=CHECK-IGNORE %s
 
+// sandbox-exec isn't available on iOS
+// UNSUPPORTED: ios
+
 #include <CoreFoundation/CoreFoundation.h>
 
 int main() {
@@ -18,6 +21,7 @@
                               kCFStringEncodingUTF8, FALSE);  // BOOM
   fprintf(stderr, "Ignored.\n");
   free(a);
+  CFRelease(str);
 }
 
 // CHECK-CRASH: AddressSanitizer: heap-buffer-overflow
diff --git a/test/asan/TestCases/Darwin/unset-insert-libraries-on-exec.cc b/test/asan/TestCases/Darwin/unset-insert-libraries-on-exec.cc
index f8a330a..38fb3aa 100644
--- a/test/asan/TestCases/Darwin/unset-insert-libraries-on-exec.cc
+++ b/test/asan/TestCases/Darwin/unset-insert-libraries-on-exec.cc
@@ -2,16 +2,18 @@
 // executing other programs.
 
 // RUN: %clangxx_asan %s -o %t
-// RUN: %clangxx %p/../Helpers/echo-env.cc -o %T/echo-env
+// RUN: %clangxx %p/../Helpers/echo-env.cc -o %t-echo-env
 // RUN: %clangxx -DSHARED_LIB %s \
 // RUN:     -dynamiclib -o %t-darwin-dummy-shared-lib-so.dylib
 
 // Make sure DYLD_INSERT_LIBRARIES doesn't contain the runtime library before
 // execl().
 
-// RUN: %run %t %T/echo-env >/dev/null 2>&1
-// RUN: env DYLD_INSERT_LIBRARIES=%t-darwin-dummy-shared-lib-so.dylib \
-// RUN:     %run %t %T/echo-env 2>&1 | FileCheck %s || exit 1
+// RUN: %run %t %t-echo-env >/dev/null 2>&1
+// RUN: %env DYLD_INSERT_LIBRARIES=%t-darwin-dummy-shared-lib-so.dylib \
+// RUN:     %run %t %t-echo-env 2>&1 | FileCheck %s || exit 1
+
+// UNSUPPORTED: ios
 
 #if !defined(SHARED_LIB)
 #include <unistd.h>
diff --git a/test/asan/TestCases/Darwin/uuid.cc b/test/asan/TestCases/Darwin/uuid.cc
index 3f50272..3363858 100644
--- a/test/asan/TestCases/Darwin/uuid.cc
+++ b/test/asan/TestCases/Darwin/uuid.cc
@@ -4,6 +4,9 @@
 // RUN: %clangxx_asan %s -o %t -fsanitize-recover=address
 // RUN: %env_asan_opts=print_module_map=2:halt_on_error=0     %run %t 2>&1 | FileCheck %s
 
+// We can't run system("otool") in the simulator.
+// UNSUPPORTED: ios
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
diff --git a/test/asan/TestCases/Helpers/underflow.cc b/test/asan/TestCases/Helpers/underflow.cc
new file mode 100644
index 0000000..2697948
--- /dev/null
+++ b/test/asan/TestCases/Helpers/underflow.cc
@@ -0,0 +1 @@
+int YYY[3]={1,2,3};
diff --git a/test/asan/TestCases/Linux/abort_on_error.cc b/test/asan/TestCases/Linux/abort_on_error.cc
index 3f70613..3fe9899 100644
--- a/test/asan/TestCases/Linux/abort_on_error.cc
+++ b/test/asan/TestCases/Linux/abort_on_error.cc
@@ -9,6 +9,7 @@
 // lit doesn't set ASAN_OPTIONS anyway.
 // RUN: not %run %t 2>&1 | FileCheck %s
 
+// Android runs with abort_on_error=0
 // UNSUPPORTED: android
 
 #include <stdlib.h>
diff --git a/test/asan/TestCases/Linux/aligned_delete_test.cc b/test/asan/TestCases/Linux/aligned_delete_test.cc
new file mode 100644
index 0000000..5b9455e
--- /dev/null
+++ b/test/asan/TestCases/Linux/aligned_delete_test.cc
@@ -0,0 +1,168 @@
+// RUN: %clangxx_asan -std=c++1z -faligned-allocation -fsanitize-recover=address -O0 %s -o %t
+// RUN: %env_asan_opts=new_delete_type_mismatch=1:halt_on_error=false:detect_leaks=false %run %t 2>&1 | FileCheck %s
+// RUN: %env_asan_opts=new_delete_type_mismatch=0                                        %run %t
+
+// RUN: %clangxx_asan -std=c++1z -faligned-allocation -fsized-deallocation -fsanitize-recover=address -O0 %s -o %t
+// RUN: %env_asan_opts=new_delete_type_mismatch=1:halt_on_error=false:detect_leaks=false %run %t 2>&1 | FileCheck %s
+// RUN: %env_asan_opts=new_delete_type_mismatch=0                                        %run %t
+
+// REQUIRES: asan-static-runtime
+
+#include <stdio.h>
+
+// Define all new/delete to do not depend on the version provided by the
+// plaform. The implementation is provided by ASan anyway.
+
+namespace std {
+struct nothrow_t {};
+static const nothrow_t nothrow;
+enum class align_val_t : size_t {};
+}  // namespace std
+
+void *operator new(size_t);
+void *operator new[](size_t);
+void *operator new(size_t, std::nothrow_t const&);
+void *operator new[](size_t, std::nothrow_t const&);
+void *operator new(size_t, std::align_val_t);
+void *operator new[](size_t, std::align_val_t);
+void *operator new(size_t, std::align_val_t, std::nothrow_t const&);
+void *operator new[](size_t, std::align_val_t, std::nothrow_t const&);
+
+void operator delete(void*) throw();
+void operator delete[](void*) throw();
+void operator delete(void*, std::nothrow_t const&);
+void operator delete[](void*, std::nothrow_t const&);
+void operator delete(void*, size_t) throw();
+void operator delete[](void*, size_t) throw();
+void operator delete(void*, std::align_val_t) throw();
+void operator delete[](void*, std::align_val_t) throw();
+void operator delete(void*, std::align_val_t, std::nothrow_t const&);
+void operator delete[](void*, std::align_val_t, std::nothrow_t const&);
+void operator delete(void*, size_t, std::align_val_t) throw();
+void operator delete[](void*, size_t, std::align_val_t) throw();
+
+
+template<typename T>
+inline T* break_optimization(T *arg) {
+  __asm__ __volatile__("" : : "r" (arg) : "memory");
+  return arg;
+}
+
+
+struct S12 { int a, b, c; };
+struct alignas(128) S12_128 { int a, b, c; };
+struct alignas(256) S12_256 { int a, b, c; };
+struct alignas(512) S1024_512 { char a[1024]; };
+struct alignas(1024) S1024_1024 { char a[1024]; };
+
+
+int main(int argc, char **argv) {
+  fprintf(stderr, "Testing valid cases\n");
+
+  delete break_optimization(new S12);
+  operator delete(break_optimization(new S12), std::nothrow);
+  delete [] break_optimization(new S12[100]);
+  operator delete[](break_optimization(new S12[100]), std::nothrow);
+
+  delete break_optimization(new S12_128);
+  operator delete(break_optimization(new S12_128),
+                  std::align_val_t(alignof(S12_128)));
+  operator delete(break_optimization(new S12_128),
+                  std::align_val_t(alignof(S12_128)), std::nothrow);
+  operator delete(break_optimization(new S12_128), sizeof(S12_128),
+                  std::align_val_t(alignof(S12_128)));
+
+  delete [] break_optimization(new S12_128[100]);
+  operator delete[](break_optimization(new S12_128[100]),
+                    std::align_val_t(alignof(S12_128)));
+  operator delete[](break_optimization(new S12_128[100]),
+                    std::align_val_t(alignof(S12_128)), std::nothrow);
+  operator delete[](break_optimization(new S12_128[100]), sizeof(S12_128[100]),
+                    std::align_val_t(alignof(S12_128)));
+
+  fprintf(stderr, "Done\n");
+  // CHECK: Testing valid cases
+  // CHECK-NEXT: Done
+
+  // Explicit mismatched calls.
+
+  operator delete(break_optimization(new S12_128), std::nothrow);
+  // CHECK: AddressSanitizer: new-delete-type-mismatch
+  // CHECK:  object passed to delete has wrong type:
+  // CHECK:  alignment of the allocated type:   128 bytes;
+  // CHECK:  alignment of the deallocated type: default-aligned.
+  // CHECK: SUMMARY: AddressSanitizer: new-delete-type-mismatch
+
+  operator delete(break_optimization(new S12_128), sizeof(S12_128));
+  // CHECK: AddressSanitizer: new-delete-type-mismatch
+  // CHECK:  object passed to delete has wrong type:
+  // CHECK:  alignment of the allocated type:   128 bytes;
+  // CHECK:  alignment of the deallocated type: default-aligned.
+  // CHECK: SUMMARY: AddressSanitizer: new-delete-type-mismatch
+
+  operator delete[](break_optimization(new S12_128[100]), std::nothrow);
+  // CHECK: AddressSanitizer: new-delete-type-mismatch
+  // CHECK:  object passed to delete has wrong type:
+  // CHECK:  alignment of the allocated type:   128 bytes;
+  // CHECK:  alignment of the deallocated type: default-aligned.
+  // CHECK: SUMMARY: AddressSanitizer: new-delete-type-mismatch
+
+  operator delete[](break_optimization(new S12_128[100]), sizeof(S12_128[100]));
+  // CHECK: AddressSanitizer: new-delete-type-mismatch
+  // CHECK:  object passed to delete has wrong type:
+  // CHECK:  alignment of the allocated type:   128 bytes;
+  // CHECK:  alignment of the deallocated type: default-aligned.
+  // CHECK: SUMMARY: AddressSanitizer: new-delete-type-mismatch
+
+  // Various mismatched alignments.
+
+  delete break_optimization(reinterpret_cast<S12*>(new S12_256));
+  // CHECK: AddressSanitizer: new-delete-type-mismatch
+  // CHECK:  object passed to delete has wrong type:
+  // CHECK:  alignment of the allocated type:   256 bytes;
+  // CHECK:  alignment of the deallocated type: default-aligned.
+  // CHECK: SUMMARY: AddressSanitizer: new-delete-type-mismatch
+
+  delete break_optimization(reinterpret_cast<S12_256*>(new S12));
+  // CHECK: AddressSanitizer: new-delete-type-mismatch
+  // CHECK:  object passed to delete has wrong type:
+  // CHECK:  alignment of the allocated type:   default-aligned;
+  // CHECK:  alignment of the deallocated type: 256 bytes.
+  // CHECK: SUMMARY: AddressSanitizer: new-delete-type-mismatch
+
+  delete break_optimization(reinterpret_cast<S12_128*>(new S12_256));
+  // CHECK: AddressSanitizer: new-delete-type-mismatch
+  // CHECK:  object passed to delete has wrong type:
+  // CHECK:  alignment of the allocated type:   256 bytes;
+  // CHECK:  alignment of the deallocated type: 128 bytes.
+  // CHECK: SUMMARY: AddressSanitizer: new-delete-type-mismatch
+
+  delete [] break_optimization(reinterpret_cast<S12*>(new S12_256[100]));
+  // CHECK: AddressSanitizer: new-delete-type-mismatch
+  // CHECK:  object passed to delete has wrong type:
+  // CHECK:  alignment of the allocated type:   256 bytes;
+  // CHECK:  alignment of the deallocated type: default-aligned.
+  // CHECK: SUMMARY: AddressSanitizer: new-delete-type-mismatch
+
+  delete [] break_optimization(reinterpret_cast<S12_256*>(new S12[100]));
+  // CHECK: AddressSanitizer: new-delete-type-mismatch
+  // CHECK:  object passed to delete has wrong type:
+  // CHECK:  alignment of the allocated type:   default-aligned;
+  // CHECK:  alignment of the deallocated type: 256 bytes.
+  // CHECK: SUMMARY: AddressSanitizer: new-delete-type-mismatch
+
+  delete [] break_optimization(reinterpret_cast<S12_128*>(new S12_256[100]));
+  // CHECK: AddressSanitizer: new-delete-type-mismatch
+  // CHECK:  object passed to delete has wrong type:
+  // CHECK:  alignment of the allocated type:   256 bytes;
+  // CHECK:  alignment of the deallocated type: 128 bytes.
+  // CHECK: SUMMARY: AddressSanitizer: new-delete-type-mismatch
+
+  // Push ASan limits, the current limitation is that it cannot differentiate
+  // alignments above 512 bytes.
+  fprintf(stderr, "Checking alignments >= 512 bytes\n");
+  delete break_optimization(reinterpret_cast<S1024_512*>(new S1024_1024));
+  fprintf(stderr, "Done\n");
+  // CHECK: Checking alignments >= 512 bytes
+  // CHECK-NEXT: Done
+}
diff --git a/test/asan/TestCases/Linux/allocator_oom_test.cc b/test/asan/TestCases/Linux/allocator_oom_test.cc
new file mode 100644
index 0000000..f94475f
--- /dev/null
+++ b/test/asan/TestCases/Linux/allocator_oom_test.cc
@@ -0,0 +1,87 @@
+// Test the behavior of malloc/calloc/realloc when the allocation causes OOM
+// in the secondary allocator.
+// By default (allocator_may_return_null=0) the process should crash.
+// With allocator_may_return_null=1 the allocator should return 0.
+// Set the limit to 20.5T on 64 bits to account for ASan shadow memory,
+// allocator buffers etc. so that the test allocation of ~1T will trigger OOM.
+// Limit this test to Linux since we're relying on allocator internal
+// limits (shadow memory size, allocation limits etc.)
+
+// RUN: %clangxx_asan -O0 %s -o %t
+// RUN: ulimit -v 22024290304
+// RUN: not %run %t malloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefixes=CHECK-MALLOC,CHECK-CRASH
+// RUN: %env_asan_opts=allocator_may_return_null=0 not %run %t malloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefixes=CHECK-MALLOC,CHECK-CRASH
+// RUN: %env_asan_opts=allocator_may_return_null=1     %run %t malloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefixes=CHECK-MALLOC,CHECK-NULL
+// RUN: %env_asan_opts=allocator_may_return_null=0 not %run %t calloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefixes=CHECK-CALLOC,CHECK-CRASH
+// RUN: %env_asan_opts=allocator_may_return_null=1     %run %t calloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefixes=CHECK-CALLOC,CHECK-NULL
+// RUN: %env_asan_opts=allocator_may_return_null=0 not %run %t realloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefixes=CHECK-REALLOC,CHECK-CRASH
+// RUN: %env_asan_opts=allocator_may_return_null=1     %run %t realloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefixes=CHECK-REALLOC,CHECK-NULL
+// RUN: %env_asan_opts=allocator_may_return_null=0 not %run %t realloc-after-malloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefixes=CHECK-MALLOC-REALLOC,CHECK-CRASH
+// RUN: %env_asan_opts=allocator_may_return_null=1     %run %t realloc-after-malloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefixes=CHECK-MALLOC-REALLOC,CHECK-NULL
+
+// ASan shadow memory on s390 is too large for this test.
+// AArch64 bots fail on this test.
+// TODO(alekseys): Android lit do not run ulimit on device.
+// UNSUPPORTED: s390,android,arm,aarch64
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <assert.h>
+
+int main(int argc, char **argv) {
+  assert(argc == 2);
+  const char *action = argv[1];
+  fprintf(stderr, "%s:\n", action);
+
+  // Allocate just a bit less than max allocation size enforced by ASan's
+  // allocator (currently 1T and 3G).
+  const size_t size =
+#if __LP64__
+      (1ULL << 40) - (1ULL << 30);
+#else
+      (3ULL << 30) - (1ULL << 20);
+#endif
+
+  void *x = 0;
+
+  if (!strcmp(action, "malloc")) {
+    x = malloc(size);
+  } else if (!strcmp(action, "calloc")) {
+    x = calloc(size / 4, 4);
+  } else if (!strcmp(action, "realloc")) {
+    x = realloc(0, size);
+  } else if (!strcmp(action, "realloc-after-malloc")) {
+    char *t = (char*)malloc(100);
+    *t = 42;
+    x = realloc(t, size);
+    assert(*t == 42);
+    free(t);
+  } else {
+    assert(0);
+  }
+
+  // The NULL pointer is printed differently on different systems, while (long)0
+  // is always the same.
+  fprintf(stderr, "x: %lx\n", (long)x);
+  free(x);
+
+  return x != 0;
+}
+
+// CHECK-MALLOC: malloc:
+// CHECK-CALLOC: calloc:
+// CHECK-REALLOC: realloc:
+// CHECK-MALLOC-REALLOC: realloc-after-malloc:
+
+// CHECK-CRASH: AddressSanitizer's allocator is terminating the process
+// CHECK-NULL: x: 0
diff --git a/test/asan/TestCases/Linux/asan_dlopen_test.cc b/test/asan/TestCases/Linux/asan_dlopen_test.cc
index f1e31b0..5081b77 100644
--- a/test/asan/TestCases/Linux/asan_dlopen_test.cc
+++ b/test/asan/TestCases/Linux/asan_dlopen_test.cc
@@ -2,6 +2,8 @@
 //
 // RUN: %clangxx %s -DRT=\"%shared_libasan\" -o %t -ldl
 // RUN: not %run %t 2>&1 | FileCheck %s
+// RUN: %env_asan_opts=verify_asan_link_order=true not %run %t 2>&1 | FileCheck %s
+// RUN: %env_asan_opts=verify_asan_link_order=false %run %t 2>&1
 // REQUIRES: asan-dynamic-runtime
 // XFAIL: android
 
diff --git a/test/asan/TestCases/Linux/asan_preload_test-1.cc b/test/asan/TestCases/Linux/asan_preload_test-1.cc
index 4e365b5..e11bd62 100644
--- a/test/asan/TestCases/Linux/asan_preload_test-1.cc
+++ b/test/asan/TestCases/Linux/asan_preload_test-1.cc
@@ -10,7 +10,7 @@
 // REQUIRES: asan-dynamic-runtime
 
 // This way of setting LD_PRELOAD does not work with Android test runner.
-// REQUIRES: not-android
+// REQUIRES: !android
 
 #if BUILD_SO
 char dummy;
diff --git a/test/asan/TestCases/Linux/asan_preload_test-2.cc b/test/asan/TestCases/Linux/asan_preload_test-2.cc
index 488fd52..817c560 100644
--- a/test/asan/TestCases/Linux/asan_preload_test-2.cc
+++ b/test/asan/TestCases/Linux/asan_preload_test-2.cc
@@ -6,7 +6,7 @@
 // REQUIRES: asan-dynamic-runtime
 
 // This way of setting LD_PRELOAD does not work with Android test runner.
-// REQUIRES: not-android
+// REQUIRES: !android
 
 #include <stdlib.h>
 
diff --git a/test/asan/TestCases/Linux/asan_preload_test-3.cc b/test/asan/TestCases/Linux/asan_preload_test-3.cc
new file mode 100644
index 0000000..e998ff6
--- /dev/null
+++ b/test/asan/TestCases/Linux/asan_preload_test-3.cc
@@ -0,0 +1,36 @@
+// Regression test for PR33206
+//
+// RUN: %clang -DDYN=1 -DMALLOC=1 -fPIC -shared %s -o %t-dso1.so
+// RUN: %clang -DDYN=1 -DMALLOC=1 -fPIC -shared %s -o %t-dso2.so %t-dso1.so
+// RUN: %clang %s -o %t-1 %t-dso2.so
+// RUN: env LD_PRELOAD=%shared_libasan %run %t-1 2>&1 | FileCheck %s
+// RUN: %clang -DDYN=1 -DREALLOC=1 -fPIC -shared %s -o %t-dso3.so
+// RUN: %clang -DDYN=1 -DREALLOC=1 -fPIC -shared %s -o %t-dso4.so %t-dso3.so
+// RUN: %clang %s -o %t-2 %t-dso4.so
+// RUN: env LD_PRELOAD=%shared_libasan %run %t-2 2>&1 | FileCheck %s
+// REQUIRES: asan-dynamic-runtime
+
+// FIXME: Test regressed while android bot was disabled. Needs investigation.
+// UNSUPPORTED: android
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#ifdef DYN
+__attribute__((constructor)) void foo() {
+  void *p;
+#ifdef MALLOC
+  p = malloc(1 << 20);
+#endif
+#ifdef REALLOC
+  p = realloc (0, 1 << 20);
+#endif
+  free(p);
+}
+#else
+int main() {
+  // CHECK: Success
+  printf("Success\n");
+  return 0;
+}
+#endif
diff --git a/test/asan/TestCases/Linux/calloc-preload.c b/test/asan/TestCases/Linux/calloc-preload.c
index eb1c673..e1f3319 100644
--- a/test/asan/TestCases/Linux/calloc-preload.c
+++ b/test/asan/TestCases/Linux/calloc-preload.c
@@ -7,7 +7,7 @@
 // REQUIRES: asan-dynamic-runtime
 //
 // This way of setting LD_PRELOAD does not work with Android test runner.
-// REQUIRES: not-android
+// REQUIRES: !android
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/test/asan/TestCases/Linux/clang_gcc_abi.cc b/test/asan/TestCases/Linux/clang_gcc_abi.cc
index 845f412..79710dc 100644
--- a/test/asan/TestCases/Linux/clang_gcc_abi.cc
+++ b/test/asan/TestCases/Linux/clang_gcc_abi.cc
@@ -3,7 +3,7 @@
 // RUN: %clangxx_asan -O2 -x c %s -o %t && not %run %t 2>&1 | FileCheck %s
 // RUN: %clangxx_asan -O3 -x c %s -o %t && not %run %t 2>&1 | FileCheck %s
 
-// REQUIRES: arm-target-arch
+// REQUIRES: arm-target-arch, fast-unwinder-works
 // XFAIL: armv7l-unknown-linux-gnueabihf
 
 #include <stdlib.h>
diff --git a/test/asan/TestCases/Linux/coverage-missing.cc b/test/asan/TestCases/Linux/coverage-missing.cc
index 49487d3..585aee6 100644
--- a/test/asan/TestCases/Linux/coverage-missing.cc
+++ b/test/asan/TestCases/Linux/coverage-missing.cc
@@ -1,47 +1,49 @@
 // Test for "sancov.py missing ...".
 
 // First case: coverage from executable. main() is called on every code path.
-// RUN: %clangxx_asan -fsanitize-coverage=func %s -o %t -DFOOBAR -DMAIN
+// RUN: %clangxx_asan -fsanitize-coverage=func,trace-pc-guard %s -o %t -DFOOBAR -DMAIN
 // RUN: rm -rf %T/coverage-missing
 // RUN: mkdir -p %T/coverage-missing
 // RUN: cd %T/coverage-missing
 // RUN: %env_asan_opts=coverage=1:coverage_dir=%T/coverage-missing %run %t
 // RUN: %sancov print *.sancov > main.txt
 // RUN: rm *.sancov
-// RUN: [ $(cat main.txt | wc -l) == 1 ]
+// RUN: count 1 < main.txt
 // RUN: %env_asan_opts=coverage=1:coverage_dir=%T/coverage-missing %run %t x
 // RUN: %sancov print *.sancov > foo.txt
 // RUN: rm *.sancov
-// RUN: [ $(cat foo.txt | wc -l) == 3 ]
+// RUN: count 3 < foo.txt
 // RUN: %env_asan_opts=coverage=1:coverage_dir=%T/coverage-missing %run %t x x
 // RUN: %sancov print *.sancov > bar.txt
 // RUN: rm *.sancov
-// RUN: [ $(cat bar.txt | wc -l) == 4 ]
+// RUN: count 4 < bar.txt
 // RUN: %sancov missing %t < foo.txt > foo-missing.txt
 // RUN: sort main.txt foo-missing.txt -o foo-missing-with-main.txt
 // The "missing from foo" set may contain a few bogus PCs from the sanitizer
 // runtime, but it must include the entire "bar" code path as a subset. Sorted
 // lists can be tested for set inclusion with diff + grep.
-// RUN: ( diff bar.txt foo-missing-with-main.txt || true ) | not grep "^<"
+// RUN: diff bar.txt foo-missing-with-main.txt > %t.log || true
+// RUN: not grep "^<" %t.log
 
 // Second case: coverage from DSO.
 // cd %T
-// RUN: %clangxx_asan -fsanitize-coverage=func %s -o %dynamiclib -DFOOBAR -shared -fPIC
-// RUN: %clangxx_asan -fsanitize-coverage=func %s %dynamiclib -o %t -DMAIN
-// RUN: export LIBNAME=`basename %dynamiclib`
+// RUN: %clangxx_asan -fsanitize-coverage=func,trace-pc-guard %s -o %dynamiclib -DFOOBAR -shared -fPIC
+// RUN: %clangxx_asan -fsanitize-coverage=func,trace-pc-guard %s %dynamiclib -o %t -DMAIN
+// RUN: cd ..
 // RUN: rm -rf %T/coverage-missing
 // RUN: mkdir -p %T/coverage-missing
 // RUN: cd %T/coverage-missing
 // RUN: %env_asan_opts=coverage=1:coverage_dir=%T/coverage-missing %run %t x
-// RUN: %sancov print $LIBNAME.*.sancov > foo.txt
+// RUN: %sancov print %xdynamiclib_filename.*.sancov > foo.txt
 // RUN: rm *.sancov
-// RUN: [ $(cat foo.txt | wc -l) == 2 ]
+// RUN: count 2 < foo.txt
 // RUN: %env_asan_opts=coverage=1:coverage_dir=%T/coverage-missing %run %t x x
-// RUN: %sancov print $LIBNAME.*.sancov > bar.txt
+// RUN: %sancov print %xdynamiclib_filename.*.sancov > bar.txt
 // RUN: rm *.sancov
-// RUN: [ $(cat bar.txt | wc -l) == 3 ]
+// RUN: count 3 < bar.txt
 // RUN: %sancov missing %dynamiclib < foo.txt > foo-missing.txt
-// RUN: ( diff bar.txt foo-missing.txt || true ) | not grep "^<"
+// RUN: diff bar.txt foo-missing.txt > %t.log || true
+// RUN: not grep "^<" %t.log
 
 // REQUIRES: x86-target-arch
 // XFAIL: android
diff --git a/test/asan/TestCases/Linux/global-overflow-bfd.cc b/test/asan/TestCases/Linux/global-overflow-bfd.cc
new file mode 100644
index 0000000..117a761
--- /dev/null
+++ b/test/asan/TestCases/Linux/global-overflow-bfd.cc
@@ -0,0 +1,18 @@
+// Test that gc-sections-friendly instrumentation of globals does not introduce
+// false negatives with the BFD linker.
+// RUN: %clangxx_asan -fuse-ld=bfd -Wl,-gc-sections -ffunction-sections -fdata-sections -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s
+
+#include <string.h>
+int main(int argc, char **argv) {
+  static char XXX[10];
+  static char YYY[10];
+  static char ZZZ[10];
+  memset(XXX, 0, 10);
+  memset(YYY, 0, 10);
+  memset(ZZZ, 0, 10);
+  int res = YYY[argc * 10];  // BOOOM
+  // CHECK: {{READ of size 1 at}}
+  // CHECK: {{located 0 bytes to the right of global variable}}
+  res += XXX[argc] + ZZZ[argc];
+  return res;
+}
diff --git a/test/asan/TestCases/Linux/global-overflow-lld.cc b/test/asan/TestCases/Linux/global-overflow-lld.cc
new file mode 100644
index 0000000..f4d0bc9
--- /dev/null
+++ b/test/asan/TestCases/Linux/global-overflow-lld.cc
@@ -0,0 +1,19 @@
+// Test that gc-sections-friendly instrumentation of globals does not introduce
+// false negatives with the LLD linker.
+// RUN: %clangxx_asan -fuse-ld=lld -Wl,-gc-sections -ffunction-sections -fdata-sections -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// REQUIRES: lld
+
+#include <string.h>
+int main(int argc, char **argv) {
+  static char XXX[10];
+  static char YYY[10];
+  static char ZZZ[10];
+  memset(XXX, 0, 10);
+  memset(YYY, 0, 10);
+  memset(ZZZ, 0, 10);
+  int res = YYY[argc * 10];  // BOOOM
+  // CHECK: {{READ of size 1 at}}
+  // CHECK: {{located 0 bytes to the right of global variable}}
+  res += XXX[argc] + ZZZ[argc];
+  return res;
+}
diff --git a/test/asan/TestCases/Linux/globals-gc-sections-lld.cc b/test/asan/TestCases/Linux/globals-gc-sections-lld.cc
new file mode 100644
index 0000000..0d8bcdd
--- /dev/null
+++ b/test/asan/TestCases/Linux/globals-gc-sections-lld.cc
@@ -0,0 +1,15 @@
+// RUN: %clangxx_asan %s -o %t -Wl,--gc-sections -fuse-ld=lld -ffunction-sections -fdata-sections -mllvm -asan-globals=0
+// RUN: %clangxx_asan %s -o %t -Wl,--gc-sections -fuse-ld=lld -ffunction-sections -fdata-sections -mllvm -asan-globals=1
+
+// https://code.google.com/p/address-sanitizer/issues/detail?id=260
+// REQUIRES: lld
+
+int undefined();
+
+// On i386 clang adds --export-dynamic when linking with ASan, which adds all
+// non-hidden globals to GC roots.
+__attribute__((visibility("hidden"))) int (*unused)() = undefined;
+
+int main() {
+        return 0;
+}
diff --git a/test/asan/TestCases/Linux/globals-gc-sections.cc b/test/asan/TestCases/Linux/globals-gc-sections.cc
deleted file mode 100644
index 72a9e94..0000000
--- a/test/asan/TestCases/Linux/globals-gc-sections.cc
+++ /dev/null
@@ -1,13 +0,0 @@
-// RUN: %clangxx_asan %s -o %t -Wl,--gc-sections -ffunction-sections -mllvm -asan-globals=0
-// RUN: %clangxx_asan %s -o %t -Wl,--gc-sections -ffunction-sections -mllvm -asan-globals=1
-
-// https://code.google.com/p/address-sanitizer/issues/detail?id=260
-// XFAIL: *
-
-int undefined();
-
-int (*unused)() = undefined;
-
-int main() {
-        return 0;
-}
diff --git a/test/asan/TestCases/Linux/init_fini_sections.cc b/test/asan/TestCases/Linux/init_fini_sections.cc
index c7234ee..3037b23 100644
--- a/test/asan/TestCases/Linux/init_fini_sections.cc
+++ b/test/asan/TestCases/Linux/init_fini_sections.cc
@@ -2,11 +2,18 @@
 
 #include <stdio.h>
 
+int c = 0;
+
 static void foo() {
-  printf("foo\n");
+  ++c;
+}
+
+static void fini() {
+  printf("fini\n");
 }
 
 int main() {
+  printf("c=%d\n", c);
   return 0;
 }
 
@@ -17,8 +24,7 @@
 void (*call_foo_2)(void) = &foo;
 
 __attribute__((section(".fini_array")))
-void (*call_foo_3)(void) = &foo;
+void (*call_foo_3)(void) = &fini;
 
-// CHECK: foo
-// CHECK: foo
-// CHECK: foo
+// CHECK: c=2
+// CHECK: fini
diff --git a/test/asan/TestCases/Linux/interface_symbols_linux.c b/test/asan/TestCases/Linux/interface_symbols_linux.cc
similarity index 93%
rename from test/asan/TestCases/Linux/interface_symbols_linux.c
rename to test/asan/TestCases/Linux/interface_symbols_linux.cc
index 33fdd5c..8c22976 100644
--- a/test/asan/TestCases/Linux/interface_symbols_linux.c
+++ b/test/asan/TestCases/Linux/interface_symbols_linux.cc
@@ -1,11 +1,11 @@
 // Check the presence of interface symbols in compiled file.
 
-// RUN: %clang_asan -O2 %s -o %t.exe
+// RUN: %clangxx_asan -O2 %s -o %t.exe
 // RUN: nm -D %t.exe | grep " [TWw] "                                          \
 // RUN:  | grep -o "\(__asan_\|__ubsan_\|__sancov_\|__sanitizer_\)[^ ]*"       \
 // RUN:  | grep -v "__sanitizer_syscall"                                       \
 // RUN:  | grep -v "__sanitizer_weak_hook"                                     \
-// RUN:  | grep -v "__ubsan_handle_dynamic_type_cache_miss"                    \
+// RUN:  | grep -v "__sancov_lowest_stack"                                     \
 // RUN:  | sed -e "s/__asan_version_mismatch_check_v[0-9]+/__asan_version_mismatch_check/" \
 // RUN:  > %t.exports
 //
diff --git a/test/asan/TestCases/Linux/longjmp_chk.c b/test/asan/TestCases/Linux/longjmp_chk.c
new file mode 100644
index 0000000..99a4a16
--- /dev/null
+++ b/test/asan/TestCases/Linux/longjmp_chk.c
@@ -0,0 +1,51 @@
+// Verify that use of longjmp() in a _FORTIFY_SOURCE'd library (without ASAN)
+// is correctly intercepted such that the stack is unpoisoned.
+// Note: it is essential that the external library is not built with ASAN,
+// otherwise it would be able to unpoison the stack before use.
+//
+// RUN: %clang -DIS_LIBRARY -D_FORTIFY_SOURCE=2 -O2 %s -c -o %t.o
+// RUN: %clang_asan -O2 %s %t.o -o %t
+// RUN: %run %t
+
+#ifdef IS_LIBRARY
+/* the library */
+#include <setjmp.h>
+#include <assert.h>
+#include <sanitizer/asan_interface.h>
+
+static jmp_buf jenv;
+
+void external_callme(void (*callback)(void)) {
+  if (setjmp(jenv) == 0) {
+    callback();
+  }
+}
+
+void external_longjmp(char *msg) {
+  longjmp(jenv, 1);
+}
+
+void external_check_stack(void) {
+  char buf[256] = "";
+  for (int i = 0; i < 256; i++) {
+    assert(!__asan_address_is_poisoned(buf + i));
+  }
+}
+#else
+/* main program */
+extern void external_callme(void (*callback)(void));
+extern void external_longjmp(char *msg);
+extern void external_check_stack(void);
+
+static void callback(void) {
+  char msg[16];   /* Note: this triggers addition of a redzone. */
+  /* Note: msg is passed to prevent compiler optimization from removing it. */
+  external_longjmp(msg);
+}
+
+int main() {
+  external_callme(callback);
+  external_check_stack();
+  return 0;
+}
+#endif
diff --git a/test/asan/TestCases/Linux/memmem_test.cc b/test/asan/TestCases/Linux/memmem_test.cc
index 661381c..a838cb5 100644
--- a/test/asan/TestCases/Linux/memmem_test.cc
+++ b/test/asan/TestCases/Linux/memmem_test.cc
@@ -15,10 +15,10 @@
   // A1: AddressSanitizer: stack-buffer-overflow
   // A1: {{#0.*memmem}}
   // A1-NEXT: {{#1.*main}}
-  // A1: 'a1' <== Memory access at offset
+  // A1: 'a1'{{.*}} <== Memory access at offset
   //
   // A2: AddressSanitizer: stack-buffer-overflow
   // A2: {{#0.*memmem}}
-  // A2: 'a2' <== Memory access at offset
+  // A2: 'a2'{{.*}} <== Memory access at offset
   return res == NULL;
 }
diff --git a/test/asan/TestCases/Linux/preinstalled_signal.cc b/test/asan/TestCases/Linux/preinstalled_signal.cc
new file mode 100644
index 0000000..ac4ea93
--- /dev/null
+++ b/test/asan/TestCases/Linux/preinstalled_signal.cc
@@ -0,0 +1,113 @@
+// clang-format off
+// RUN: %clangxx -std=c++11 %s -o %t
+// RUN: env LD_PRELOAD=%shared_libasan %env_asan_opts=handle_segv=1 not %run %t 2>&1 | FileCheck %s
+// RUN: env LD_PRELOAD=%shared_libasan %env_asan_opts=handle_segv=2 not %run %t 2>&1 | FileCheck %s
+
+// RUN: %clangxx -std=c++11 -DTEST_INSTALL_SIG_HANDLER %s -o %t
+// RUN: env LD_PRELOAD=%shared_libasan %env_asan_opts=handle_segv=0 not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-HANDLER
+// RUN: env LD_PRELOAD=%shared_libasan %env_asan_opts=handle_segv=1 not %run %t 2>&1 | FileCheck %s
+// RUN: env LD_PRELOAD=%shared_libasan %env_asan_opts=handle_segv=2 not %run %t 2>&1 | FileCheck %s
+
+// RUN: %clangxx -std=c++11 -DTEST_INSTALL_SIG_ACTION %s -o %t
+// RUN: env LD_PRELOAD=%shared_libasan %env_asan_opts=handle_segv=0 not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-ACTION
+// RUN: env LD_PRELOAD=%shared_libasan %env_asan_opts=handle_segv=1 not %run %t 2>&1 | FileCheck %s
+// RUN: env LD_PRELOAD=%shared_libasan %env_asan_opts=handle_segv=2 not %run %t 2>&1 | FileCheck %s
+
+// REQUIRES: asan-dynamic-runtime
+
+// This way of setting LD_PRELOAD does not work with Android test runner.
+// REQUIRES: !android
+// clang-format on
+
+#include <assert.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+const char *handler = nullptr;
+void SigHandler(int signum) { handler = "TestSigHandler"; }
+void SigAction(int, siginfo_t *, void *) { handler = "TestSigAction"; }
+
+struct KernelSigaction {
+
+#if defined(__mips__)
+  unsigned long flags;
+  __sighandler_t handler;
+#else
+  __sighandler_t handler;
+  unsigned long flags;
+#endif
+  void (*restorer)();
+  char unused[1024];
+};
+
+#if defined(__x86_64__)
+extern "C" void restorer();
+asm("restorer:mov $15,%rax\nsyscall");
+#endif
+
+int InternalSigaction(int sig, KernelSigaction *act, KernelSigaction *oact) {
+  if (act) {
+#if defined(__x86_64__)
+    act->flags |= 0x04000000;
+    act->restorer = &restorer;
+#endif
+  }
+  return syscall(__NR_rt_sigaction, sig, act, oact, NSIG / 8);
+}
+
+struct KernelSigaction pre_asan = {};
+
+static void Init() {
+  int res = InternalSigaction(SIGSEGV, nullptr, &pre_asan);
+  assert(res >= 0);
+  assert(pre_asan.handler == SIG_DFL || pre_asan.handler == SIG_IGN);
+#if defined(TEST_INSTALL_SIG_HANDLER)
+  pre_asan = {};
+  pre_asan.handler = &SigHandler;
+  res = InternalSigaction(SIGSEGV, &pre_asan, nullptr);
+  assert(res >= 0);
+#elif defined(TEST_INSTALL_SIG_ACTION)
+  pre_asan = {};
+  pre_asan.flags = SA_SIGINFO | SA_NODEFER;
+  pre_asan.handler = (__sighandler_t)&SigAction;
+  res = InternalSigaction(SIGSEGV, &pre_asan, nullptr);
+  assert(res >= 0);
+#endif
+}
+
+__attribute__((section(".preinit_array"), used))
+void (*__local_test_preinit)(void) = Init;
+
+bool ExpectUserHandler() {
+#if defined(TEST_INSTALL_SIG_HANDLER) || defined(TEST_INSTALL_SIG_ACTION)
+  return !strcmp(getenv("ASAN_OPTIONS"), "handle_segv=0");
+#endif
+  return false;
+}
+
+int main(int argc, char *argv[]) {
+  KernelSigaction post_asan = {};
+  InternalSigaction(SIGSEGV, nullptr, &post_asan);
+
+  assert(post_asan.handler != SIG_DFL);
+  assert(post_asan.handler != SIG_IGN);
+  assert(ExpectUserHandler() ==
+         (post_asan.handler == pre_asan.handler));
+
+  raise(SIGSEGV);
+  printf("%s\n", handler);
+  return 1;
+}
+
+// CHECK-NOT: TestSig
+// CHECK: AddressSanitizer:DEADLYSIGNAL
+
+// CHECK-HANDLER-NOT: AddressSanitizer:DEADLYSIGNAL
+// CHECK-HANDLER: TestSigHandler
+
+// CHECK-ACTION-NOT: AddressSanitizer:DEADLYSIGNAL
+// CHECK-ACTION: TestSigAction
diff --git a/test/asan/TestCases/Linux/print_memory_profile_test.cc b/test/asan/TestCases/Linux/print_memory_profile_test.cc
index 8909cca..e7896be 100644
--- a/test/asan/TestCases/Linux/print_memory_profile_test.cc
+++ b/test/asan/TestCases/Linux/print_memory_profile_test.cc
@@ -3,8 +3,9 @@
 // REQUIRES: leak-detection
 //
 // RUN: %clangxx_asan %s -o %t
-// RUN: %run %t 100 2>&1 | FileCheck %s --check-prefix=CHECK-100
-// RUN: %run %t 50 2>&1 | FileCheck %s --check-prefix=CHECK-50
+// RUN: %run %t 100 10 2>&1 | FileCheck %s --check-prefix=CHECK-100-10
+// RUN: %run %t 100 1  2>&1 | FileCheck %s --check-prefix=CHECK-100-1
+// RUN: %run %t 50  10 2>&1 | FileCheck %s --check-prefix=CHECK-50-10
 #include <sanitizer/common_interface_defs.h>
 
 #include <stdio.h>
@@ -13,7 +14,7 @@
 char *sink[1000];
 
 int main(int argc, char **argv) {
-  if (argc < 2)
+  if (argc < 3)
     return 1;
 
   int idx = 0;
@@ -22,12 +23,17 @@
   for (int i = 0; i < 28; i++)
     sink[idx++] = new char[24000];
 
-  __sanitizer_print_memory_profile(atoi(argv[1]));
+  __sanitizer_print_memory_profile(atoi(argv[1]), atoi(argv[2]));
 }
 
-// CHECK-100: Live Heap Allocations: {{.*}}; showing top 100%
-// CHECK-100: 2227000 byte(s) ({{.*}}%) in 17 allocation(s)
-// CHECK-100: 672000 byte(s) ({{.*}}%) in 28 allocation(s)
-// CHECK-50: Live Heap Allocations: {{.*}}; showing top 50%
-// CHECK-50: 2227000 byte(s) ({{.*}}%) in 17 allocation(s)
-// CHECK-50-NOT: allocation
+// CHECK-100-10: Live Heap Allocations: {{.*}}; showing top 100% (at most 10 unique contexts)
+// CHECK-100-10: 2227000 byte(s) ({{.*}}%) in 17 allocation(s)
+// CHECK-100-10: 672000 byte(s) ({{.*}}%) in 28 allocation(s)
+
+// CHECK-100-1: Live Heap Allocations: {{.*}}; showing top 100% (at most 1 unique contexts)
+// CHECK-100-1: 2227000 byte(s) ({{.*}}%) in 17 allocation(s)
+// CHECK-100-1-NOT: allocation
+
+// CHECK-50-10: Live Heap Allocations: {{.*}}; showing top 50% (at most 10 unique contexts)
+// CHECK-50-10: 2227000 byte(s) ({{.*}}%) in 17 allocation(s)
+// CHECK-50-10-NOT: allocation
diff --git a/test/asan/TestCases/Linux/pvalloc-overflow.cc b/test/asan/TestCases/Linux/pvalloc-overflow.cc
new file mode 100644
index 0000000..b47c626
--- /dev/null
+++ b/test/asan/TestCases/Linux/pvalloc-overflow.cc
@@ -0,0 +1,41 @@
+// RUN: %clangxx_asan  %s -o %t
+// RUN: ASAN_OPTIONS=allocator_may_return_null=0 not %run %t m1 2>&1 | FileCheck %s
+// RUN: ASAN_OPTIONS=allocator_may_return_null=1     %run %t m1 2>&1
+// RUN: ASAN_OPTIONS=allocator_may_return_null=0 not %run %t psm1 2>&1 | FileCheck %s
+// RUN: ASAN_OPTIONS=allocator_may_return_null=1     %run %t psm1 2>&1
+
+// UNSUPPORTED: freebsd, android
+
+// Checks that pvalloc overflows are caught. If the allocator is allowed to
+// return null, the errno should be set to ENOMEM.
+
+#include <assert.h>
+#include <errno.h>
+#include <malloc.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+
+int main(int argc, char *argv[]) {
+  void *p;
+  size_t page_size;
+
+  assert(argc == 2);
+
+  page_size = sysconf(_SC_PAGESIZE);
+
+  if (!strcmp(argv[1], "m1")) {
+    p = pvalloc((uintptr_t)-1);
+    assert(!p);
+    assert(errno == ENOMEM);
+  }
+  if (!strcmp(argv[1], "psm1")) {
+    p = pvalloc((uintptr_t)-(page_size - 1));
+    assert(!p);
+    assert(errno == ENOMEM);
+  }
+
+  return 0;
+}
+
+// CHECK: AddressSanitizer's allocator is terminating the process
diff --git a/test/asan/TestCases/Linux/read_binary_name_regtest.c b/test/asan/TestCases/Linux/read_binary_name_regtest.c
index b09096c..4130256 100644
--- a/test/asan/TestCases/Linux/read_binary_name_regtest.c
+++ b/test/asan/TestCases/Linux/read_binary_name_regtest.c
@@ -3,6 +3,7 @@
 // This test uses seccomp-BPF to restrict the readlink() system call and makes
 // sure ASan is still able to
 // RUN: not ls /usr/include/linux/seccomp.h || ( %clang_asan %s -o %t && not %run %t 2>&1 | FileCheck %s )
+// REQUIRES: shell
 // UNSUPPORTED: android
 
 #include <errno.h>
diff --git a/test/asan/TestCases/Linux/recoverable-lsan.cc b/test/asan/TestCases/Linux/recoverable-lsan.cc
new file mode 100644
index 0000000..9356453
--- /dev/null
+++ b/test/asan/TestCases/Linux/recoverable-lsan.cc
@@ -0,0 +1,22 @@
+// Ensure that output is the same but exit code depends on halt_on_error value
+// RUN: %clangxx_asan %s -o %t
+// RUN: %env_asan_opts="halt_on_error=0" %run %t 2>&1 | FileCheck %s
+// RUN: %env_asan_opts="halt_on_error=1" not %run %t 2>&1 | FileCheck %s
+// RUN: not %run %t 2>&1 | FileCheck %s
+// REQUIRES: leak-detection
+// UNSUPPORTED: android 
+
+#include <stdlib.h>
+
+int f() {
+  volatile int *a = (int *)malloc(20);
+  a[0] = 1;
+  return a[0];
+}
+
+int main() {
+  f();
+  f();
+}
+
+// CHECK: LeakSanitizer: detected memory leaks
diff --git a/test/asan/TestCases/Linux/release_to_os_test.cc b/test/asan/TestCases/Linux/release_to_os_test.cc
index c85bcbb..3e28ffd 100644
--- a/test/asan/TestCases/Linux/release_to_os_test.cc
+++ b/test/asan/TestCases/Linux/release_to_os_test.cc
@@ -1,18 +1,21 @@
 // Tests ASAN_OPTIONS=allocator_release_to_os=1
-//
 
 // RUN: %clangxx_asan -std=c++11 %s -o %t
 // RUN: %env_asan_opts=allocator_release_to_os_interval_ms=0 %run %t 2>&1 | FileCheck %s --check-prefix=RELEASE
 // RUN: %env_asan_opts=allocator_release_to_os_interval_ms=-1 %run %t 2>&1 | FileCheck %s --check-prefix=NO_RELEASE
-//
+// RUN: %env_asan_opts=allocator_release_to_os_interval_ms=-1 %run %t force 2>&1 | FileCheck %s --check-prefix=FORCE_RELEASE
+
 // REQUIRES: x86_64-target-arch
-#include <stdlib.h>
-#include <stdio.h>
+
 #include <algorithm>
-#include <stdint.h>
 #include <assert.h>
 #include <random>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
 
+#include <sanitizer/allocator_interface.h>
 #include <sanitizer/asan_interface.h>
 
 void MallocReleaseStress() {
@@ -39,10 +42,13 @@
     delete[] p;
 }
 
-int main() {
+int main(int argc, char **argv) {
   MallocReleaseStress();
+  if (argc > 1 && !strcmp("force", argv[1]))
+    __sanitizer_purge_allocator();
   __asan_print_accumulated_stats();
 }
 
 // RELEASE: mapped:{{.*}}releases: {{[1-9]}}
 // NO_RELEASE: mapped:{{.*}}releases: 0
+// FORCE_RELEASE: mapped:{{.*}}releases: {{[1-9]}}
diff --git a/test/asan/TestCases/Linux/sanbox_read_proc_self_maps_test.cc b/test/asan/TestCases/Linux/sanbox_read_proc_self_maps_test.cc
new file mode 100644
index 0000000..d9099ed
--- /dev/null
+++ b/test/asan/TestCases/Linux/sanbox_read_proc_self_maps_test.cc
@@ -0,0 +1,28 @@
+// REQUIRES: x86_64-target-arch
+// RUN: %clangxx_asan  %s -o %t
+// RUN: not %run %t 2>&1 | FileCheck %s
+#include <sanitizer/common_interface_defs.h>
+#include <sched.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+int main() {
+  __sanitizer_sandbox_arguments args = {0};
+  // should cache /proc/self/maps
+  __sanitizer_sandbox_on_notify(&args);
+
+  if (unshare(CLONE_NEWUSER)) {
+    printf("unshare failed\n");
+    return 1;
+  }
+
+  // remove access to /proc/self/maps
+  if (chroot("/tmp")) {
+    printf("chroot failed\n");
+    return 2;
+  }
+
+  *(volatile int*)0x42 = 0;
+// CHECK-NOT: CHECK failed
+}
diff --git a/test/asan/TestCases/Linux/swapcontext_annotation.cc b/test/asan/TestCases/Linux/swapcontext_annotation.cc
index 56e8119..44189c0 100644
--- a/test/asan/TestCases/Linux/swapcontext_annotation.cc
+++ b/test/asan/TestCases/Linux/swapcontext_annotation.cc
@@ -4,10 +4,11 @@
 // RUN: %clangxx_asan -std=c++11 -lpthread -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
 // RUN: %clangxx_asan -std=c++11 -lpthread -O2 %s -o %t && %run %t 2>&1 | FileCheck %s
 // RUN: %clangxx_asan -std=c++11 -lpthread -O3 %s -o %t && %run %t 2>&1 | FileCheck %s
-// RUN: %clangxx_asan -std=c++11 -lpthread -O0 %s -o %t && %run %t 2>&1 | FileCheck <( seq 60 | xargs -i -- grep LOOPCHECK %s ) --check-prefix LOOPCHECK
-// RUN: %clangxx_asan -std=c++11 -lpthread -O1 %s -o %t && %run %t 2>&1 | FileCheck <( seq 60 | xargs -i -- grep LOOPCHECK %s ) --check-prefix LOOPCHECK
-// RUN: %clangxx_asan -std=c++11 -lpthread -O2 %s -o %t && %run %t 2>&1 | FileCheck <( seq 60 | xargs -i -- grep LOOPCHECK %s ) --check-prefix LOOPCHECK
-// RUN: %clangxx_asan -std=c++11 -lpthread -O3 %s -o %t && %run %t 2>&1 | FileCheck <( seq 60 | xargs -i -- grep LOOPCHECK %s ) --check-prefix LOOPCHECK
+// RUN: seq 60 | xargs -i -- grep LOOPCHECK %s > %t.checks
+// RUN: %clangxx_asan -std=c++11 -lpthread -O0 %s -o %t && %run %t 2>&1 | FileCheck %t.checks --check-prefix LOOPCHECK
+// RUN: %clangxx_asan -std=c++11 -lpthread -O1 %s -o %t && %run %t 2>&1 | FileCheck %t.checks --check-prefix LOOPCHECK
+// RUN: %clangxx_asan -std=c++11 -lpthread -O2 %s -o %t && %run %t 2>&1 | FileCheck %t.checks --check-prefix LOOPCHECK
+// RUN: %clangxx_asan -std=c++11 -lpthread -O3 %s -o %t && %run %t 2>&1 | FileCheck %t.checks --check-prefix LOOPCHECK
 
 //
 // This test is too subtle to try on non-x86 arch for now.
diff --git a/test/asan/TestCases/Linux/textdomain.c b/test/asan/TestCases/Linux/textdomain.c
new file mode 100644
index 0000000..9e249d9
--- /dev/null
+++ b/test/asan/TestCases/Linux/textdomain.c
@@ -0,0 +1,13 @@
+// RUN: %clang_asan -O0 -g %s -o %t
+// RUN: %env_asan_opts=strict_string_checks=1 %run %t
+
+// Android NDK does not have libintl.h
+// UNSUPPORTED: android
+
+#include <stdlib.h>
+#include <libintl.h>
+
+int main() {
+  textdomain(NULL);
+  return 0;
+}
diff --git a/test/asan/TestCases/Linux/uar_signals.cc b/test/asan/TestCases/Linux/uar_signals.cc
index f42c3f6..f96a2fe 100644
--- a/test/asan/TestCases/Linux/uar_signals.cc
+++ b/test/asan/TestCases/Linux/uar_signals.cc
@@ -1,12 +1,13 @@
 // This test checks that the implementation of use-after-return
 // is async-signal-safe.
-// RUN: %clangxx_asan -O1 %s -o %t -pthread && %run %t
+// RUN: %clangxx_asan -std=c++11 -O1 %s -o %t -pthread && %run %t
 // REQUIRES: stable-runtime
 #include <signal.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <sys/time.h>
 #include <pthread.h>
+#include <initializer_list>
 
 int *g;
 int n_signals;
@@ -17,7 +18,6 @@
   int local;
   g = &local;
   n_signals++;
-  // printf("s: %p\n", &local);
 }
 
 static void EnableSigprof(Sigaction SignalHandler) {
@@ -49,22 +49,29 @@
   RecursiveFunction(depth - 1);
 }
 
-void *Thread(void *) {
-  RecursiveFunction(18);
+void *FastThread(void *) {
+  RecursiveFunction(1);
+  return NULL;
+}
+
+void *SlowThread(void *) {
+  RecursiveFunction(1);
   return NULL;
 }
 
 int main(int argc, char **argv) {
   EnableSigprof(SignalHandler);
 
-  for (int i = 0; i < 4; i++) {
-    fprintf(stderr, ".");
-    const int kNumThread = sizeof(void*) == 8 ? 16 : 8;
-    pthread_t t[kNumThread];
-    for (int i = 0; i < kNumThread; i++)
-      pthread_create(&t[i], 0, Thread, 0);
-    for (int i = 0; i < kNumThread; i++)
-      pthread_join(t[i], 0);
+  for (auto Thread : {&FastThread, &SlowThread}) {
+    for (int i = 0; i < 1000; i++) {
+      fprintf(stderr, ".");
+      const int kNumThread = sizeof(void*) == 8 ? 32 : 8;
+      pthread_t t[kNumThread];
+      for (int i = 0; i < kNumThread; i++)
+        pthread_create(&t[i], 0, Thread, 0);
+      for (int i = 0; i < kNumThread; i++)
+        pthread_join(t[i], 0);
+    }
+    fprintf(stderr, "\n");
   }
-  fprintf(stderr, "\n");
 }
diff --git a/test/asan/TestCases/Posix/allow_user_segv.cc b/test/asan/TestCases/Posix/allow_user_segv.cc
deleted file mode 100644
index 69c1df9..0000000
--- a/test/asan/TestCases/Posix/allow_user_segv.cc
+++ /dev/null
@@ -1,59 +0,0 @@
-// Regression test for
-// https://code.google.com/p/address-sanitizer/issues/detail?id=180
-
-// RUN: %clangxx_asan -O0 %s -o %t && %env_asan_opts=allow_user_segv_handler=true not %run %t 2>&1 | FileCheck %s
-// RUN: %clangxx_asan -O2 %s -o %t && %env_asan_opts=allow_user_segv_handler=true not %run %t 2>&1 | FileCheck %s
-
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-struct sigaction original_sigaction_sigbus;
-struct sigaction original_sigaction_sigsegv;
-
-void User_OnSIGSEGV(int signum, siginfo_t *siginfo, void *context) {
-  fprintf(stderr, "User sigaction called\n");
-  struct sigaction original_sigaction;
-  if (signum == SIGBUS)
-    original_sigaction = original_sigaction_sigbus;
-  else if (signum == SIGSEGV)
-    original_sigaction = original_sigaction_sigsegv;
-  else {
-    printf("Invalid signum");
-    exit(1);
-  }
-  if (original_sigaction.sa_flags | SA_SIGINFO)
-    original_sigaction.sa_sigaction(signum, siginfo, context);
-  else
-    original_sigaction.sa_handler(signum);
-}
-
-int DoSEGV() {
-  volatile int *x = 0;
-  return *x;
-}
-
-int InstallHandler(int signum, struct sigaction *original_sigaction) {
-  struct sigaction user_sigaction;
-  user_sigaction.sa_sigaction = User_OnSIGSEGV;
-  user_sigaction.sa_flags = SA_SIGINFO;
-  if (sigaction(signum, &user_sigaction, original_sigaction)) {
-    perror("sigaction");
-    return 1;
-  }
-  return 0;
-}
-
-int main() {
-  // Let's install handlers for both SIGSEGV and SIGBUS, since pre-Yosemite
-  // 32-bit Darwin triggers SIGBUS instead.
-  if (InstallHandler(SIGSEGV, &original_sigaction_sigsegv)) return 1;
-  if (InstallHandler(SIGBUS, &original_sigaction_sigbus)) return 1;
-  fprintf(stderr, "User sigaction installed\n");
-  return DoSEGV();
-}
-
-// CHECK: User sigaction installed
-// CHECK-NEXT: User sigaction called
-// CHECK-NEXT: ASAN:DEADLYSIGNAL
-// CHECK: AddressSanitizer: SEGV on unknown address
diff --git a/test/asan/TestCases/Posix/asan-sigbus.cpp b/test/asan/TestCases/Posix/asan-sigbus.cpp
index e07392b..c91ecbd 100644
--- a/test/asan/TestCases/Posix/asan-sigbus.cpp
+++ b/test/asan/TestCases/Posix/asan-sigbus.cpp
@@ -1,8 +1,10 @@
 // Check handle_bus flag
 // Defaults to true
 // RUN: %clangxx_asan -std=c++11 %s -o %t
-// RUN: not %run %t %T/file 2>&1 | FileCheck %s -check-prefix=CHECK-BUS
-// RUN: %env_asan_opts=handle_sigbus=false not --crash %run %t %T/file 2>&1 | FileCheck %s
+// RUN: not %run %t 2>&1 | FileCheck %s -check-prefix=CHECK-BUS
+// RUN: %env_asan_opts=handle_sigbus=0 not --crash %run %t 2>&1 | FileCheck %s
+
+// UNSUPPORTED: ios
 
 #include <assert.h>
 #include <fcntl.h>
@@ -10,11 +12,11 @@
 #include <stdlib.h>
 #include <sys/mman.h>
 #include <unistd.h>
+#include <string>
 
 char array[4096];
 int main(int argc, char **argv) {
-  assert(argc > 1);
-  int fd = open(argv[1], O_RDWR | O_CREAT, 0700);
+  int fd = open((std::string(argv[0]) + ".m").c_str(), O_RDWR | O_CREAT, 0700);
   if (fd < 0) {
     perror("open");
     exit(1);
diff --git a/test/asan/TestCases/Posix/asan-symbolize-sanity-test.cc b/test/asan/TestCases/Posix/asan-symbolize-sanity-test.cc
index 2c4c133..11d5928 100644
--- a/test/asan/TestCases/Posix/asan-symbolize-sanity-test.cc
+++ b/test/asan/TestCases/Posix/asan-symbolize-sanity-test.cc
@@ -9,6 +9,8 @@
 // RUN: %env_asan_opts=symbolize=0 not %run %t 2>&1 | %asan_symbolize | FileCheck %s
 // REQUIRES: stable-runtime
 
+// UNSUPPORTED: ios
+
 #if !defined(SHARED_LIB)
 #include <dlfcn.h>
 #include <stdio.h>
diff --git a/test/asan/TestCases/Posix/closed-fds.cc b/test/asan/TestCases/Posix/closed-fds.cc
index b7bca26..b2604bb 100644
--- a/test/asan/TestCases/Posix/closed-fds.cc
+++ b/test/asan/TestCases/Posix/closed-fds.cc
@@ -2,11 +2,13 @@
 // symbolizer still works.
 
 // RUN: rm -f %t.log.*
-// RUN: %clangxx_asan -O0 %s -o %t 2>&1 && %env_asan_opts=log_path='"%t.log"':verbosity=2 not %run %t 2>&1
+// RUN: %clangxx_asan -O0 %s -o %t
+// RUN: %env_asan_opts=log_path='"%t.log"':verbosity=2 not %run %t
 // RUN: FileCheck %s --check-prefix=CHECK-FILE < %t.log.*
 
 // FIXME: copy %t.log back from the device and re-enable on Android.
 // UNSUPPORTED: android
+// UNSUPPORTED: ios
 
 #include <assert.h>
 #include <stdio.h>
diff --git a/test/asan/TestCases/Posix/concurrent_overflow.cc b/test/asan/TestCases/Posix/concurrent_overflow.cc
new file mode 100644
index 0000000..e9b9899
--- /dev/null
+++ b/test/asan/TestCases/Posix/concurrent_overflow.cc
@@ -0,0 +1,33 @@
+// RUN: %clangxx_asan -O0 -w %s -o %t && not %run %t 2>&1 | FileCheck %s
+
+// Checks that concurrent reports will not trigger false "nested bug" reports.
+// Regression test for https://github.com/google/sanitizers/issues/858
+
+#include <pthread.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+static void *start_routine(void *arg) {
+  volatile int *counter = (volatile int *)arg;
+  char buf[8];
+  __atomic_sub_fetch(counter, 1, __ATOMIC_SEQ_CST);
+  while (*counter)
+    ;
+  buf[0] = buf[9];
+  return 0;
+}
+
+int main(void) {
+  const int n_threads = 8;
+  int i, counter = n_threads;
+  pthread_t thread;
+
+  for (i = 0; i < n_threads; ++i)
+    pthread_create(&thread, NULL, &start_routine, (void *)&counter);
+  sleep(5);
+  return 0;
+}
+
+// CHECK-NOT: nested bug
+// CHECK: ERROR: AddressSanitizer: stack-buffer-overflow on address
+// CHECK: SUMMARY: AddressSanitizer: stack-buffer-overflow
diff --git a/test/asan/TestCases/Posix/coverage-caller-callee.cc b/test/asan/TestCases/Posix/coverage-caller-callee.cc
deleted file mode 100644
index fb8b9bf..0000000
--- a/test/asan/TestCases/Posix/coverage-caller-callee.cc
+++ /dev/null
@@ -1,75 +0,0 @@
-// Test caller-callee coverage with large number of threads
-// and various numbers of callers and callees.
-
-// RUN: %clangxx_asan -fsanitize-coverage=edge,indirect-calls %s -o %t
-// RUN: %env_asan_opts=coverage=1:verbosity=1 %run %t 10 1 2>&1 | FileCheck %s --check-prefix=CHECK-10-1
-// RUN: %env_asan_opts=coverage=1:verbosity=1 %run %t 9  2 2>&1 | FileCheck %s --check-prefix=CHECK-9-2
-// RUN: %env_asan_opts=coverage=1:verbosity=1 %run %t 7  3 2>&1 | FileCheck %s --check-prefix=CHECK-7-3
-// RUN: %env_asan_opts=coverage=1:verbosity=1 %run %t 17 1 2>&1 | FileCheck %s --check-prefix=CHECK-17-1
-// RUN: %env_asan_opts=coverage=1:verbosity=1 %run %t 15 2 2>&1 | FileCheck %s --check-prefix=CHECK-15-2
-// RUN: %env_asan_opts=coverage=1:verbosity=1 %run %t 18 3 2>&1 | FileCheck %s --check-prefix=CHECK-18-3
-// RUN: rm -f caller-callee*.sancov
-//
-// REQUIRES: asan-64-bits
-// UNSUPPORTED: android
-//
-// CHECK-10-1: CovDump: 10 caller-callee pairs written
-// CHECK-9-2: CovDump: 18 caller-callee pairs written
-// CHECK-7-3: CovDump: 21 caller-callee pairs written
-// CHECK-17-1: CovDump: 14 caller-callee pairs written
-// CHECK-15-2: CovDump: 28 caller-callee pairs written
-// CHECK-18-3: CovDump: 42 caller-callee pairs written
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <pthread.h>
-int P = 0;
-struct Foo {virtual void f() {if (P) printf("Foo::f()\n");}};
-struct Foo1 : Foo {virtual void f() {if (P) printf("%d\n", __LINE__);}};
-struct Foo2 : Foo {virtual void f() {if (P) printf("%d\n", __LINE__);}};
-struct Foo3 : Foo {virtual void f() {if (P) printf("%d\n", __LINE__);}};
-struct Foo4 : Foo {virtual void f() {if (P) printf("%d\n", __LINE__);}};
-struct Foo5 : Foo {virtual void f() {if (P) printf("%d\n", __LINE__);}};
-struct Foo6 : Foo {virtual void f() {if (P) printf("%d\n", __LINE__);}};
-struct Foo7 : Foo {virtual void f() {if (P) printf("%d\n", __LINE__);}};
-struct Foo8 : Foo {virtual void f() {if (P) printf("%d\n", __LINE__);}};
-struct Foo9 : Foo {virtual void f() {if (P) printf("%d\n", __LINE__);}};
-struct Foo10 : Foo {virtual void f() {if (P) printf("%d\n", __LINE__);}};
-struct Foo11 : Foo {virtual void f() {if (P) printf("%d\n", __LINE__);}};
-struct Foo12 : Foo {virtual void f() {if (P) printf("%d\n", __LINE__);}};
-struct Foo13 : Foo {virtual void f() {if (P) printf("%d\n", __LINE__);}};
-struct Foo14 : Foo {virtual void f() {if (P) printf("%d\n", __LINE__);}};
-struct Foo15 : Foo {virtual void f() {if (P) printf("%d\n", __LINE__);}};
-struct Foo16 : Foo {virtual void f() {if (P) printf("%d\n", __LINE__);}};
-struct Foo17 : Foo {virtual void f() {if (P) printf("%d\n", __LINE__);}};
-struct Foo18 : Foo {virtual void f() {if (P) printf("%d\n", __LINE__);}};
-struct Foo19 : Foo {virtual void f() {if (P) printf("%d\n", __LINE__);}};
-
-Foo *foo[20] = {
-    new Foo,   new Foo1,  new Foo2,  new Foo3,  new Foo4,  new Foo5,  new Foo6,
-    new Foo7,  new Foo8,  new Foo9,  new Foo10, new Foo11, new Foo12, new Foo13,
-    new Foo14, new Foo15, new Foo16, new Foo17, new Foo18, new Foo19,
-};
-
-int n_functions = 10;
-int n_callers = 2;
-
-void *Thread(void *arg) {
-  if (n_callers >= 1) for (int i = 0; i < 2000; i++) foo[i % n_functions]->f();
-  if (n_callers >= 2) for (int i = 0; i < 2000; i++) foo[i % n_functions]->f();
-  if (n_callers >= 3) for (int i = 0; i < 2000; i++) foo[i % n_functions]->f();
-  return arg;
-}
-
-int main(int argc, char **argv) {
-  if (argc >= 2)
-    n_functions = atoi(argv[1]);
-  if (argc >= 3)
-    n_callers = atoi(argv[2]);
-  const int kNumThreads = 16;
-  pthread_t t[kNumThreads];
-  for (int i = 0; i < kNumThreads; i++)
-    pthread_create(&t[i], 0, Thread, 0);
-  for (int i = 0; i < kNumThreads; i++)
-    pthread_join(t[i], 0);
-}
diff --git a/test/asan/TestCases/Posix/coverage-direct-activation.cc b/test/asan/TestCases/Posix/coverage-direct-activation.cc
deleted file mode 100644
index 0af3296..0000000
--- a/test/asan/TestCases/Posix/coverage-direct-activation.cc
+++ /dev/null
@@ -1,59 +0,0 @@
-// Test for direct coverage writing enabled at activation time.
-
-// RUN: %clangxx_asan -fsanitize-coverage=func -DSHARED %s -shared -o %dynamiclib -fPIC
-// RUN: %clangxx -c -DSO_DIR=\"%T\" %s -o %t.o
-// RUN: %clangxx_asan -fsanitize-coverage=func %t.o %libdl -o %t
-
-// RUN: rm -rf %T/coverage-direct-activation
-
-// RUN: mkdir -p %T/coverage-direct-activation/normal
-// RUN: %env_asan_opts=coverage=1,coverage_direct=0,coverage_dir=%T/coverage-direct-activation/normal:verbosity=1 %run %t %dynamiclib
-// RUN: %sancov print %T/coverage-direct-activation/normal/*.sancov >%T/coverage-direct-activation/normal/out.txt
-
-// RUN: mkdir -p %T/coverage-direct-activation/direct
-// RUN: %env_asan_opts=start_deactivated=1,coverage_direct=1,verbosity=1 \
-// RUN:   ASAN_ACTIVATION_OPTIONS=coverage=1,coverage_dir=%T/coverage-direct-activation/direct %run %t %dynamiclib
-// RUN: cd %T/coverage-direct-activation/direct
-// RUN: %sancov rawunpack *.sancov.raw
-// RUN: %sancov print *.sancov >out.txt
-// RUN: cd ../..
-
-// Test start_deactivated=1,coverage=1 in ASAN_OPTIONS.
-
-// RUN: diff -u coverage-direct-activation/normal/out.txt coverage-direct-activation/direct/out.txt
-
-// RUN: mkdir -p %T/coverage-direct-activation/direct2
-// RUN: %env_asan_opts=start_deactivated=1,coverage=1,coverage_direct=1,verbosity=1 \
-// RUN:   ASAN_ACTIVATION_OPTIONS=coverage_dir=%T/coverage-direct-activation/direct2 %run %t %dynamiclib
-// RUN: cd %T/coverage-direct-activation/direct2
-// RUN: %sancov rawunpack *.sancov.raw
-// RUN: %sancov print *.sancov >out.txt
-// RUN: cd ../..
-
-// RUN: diff -u coverage-direct-activation/normal/out.txt coverage-direct-activation/direct2/out.txt
-
-// XFAIL: android
-
-#include <assert.h>
-#include <dlfcn.h>
-#include <stdio.h>
-#include <unistd.h>
-
-#ifdef SHARED
-extern "C" {
-void bar() { printf("bar\n"); }
-}
-#else
-
-int main(int argc, char **argv) {
-  fprintf(stderr, "PID: %d\n", getpid());
-  assert(argc > 1);
-  void *handle1 = dlopen(argv[1], RTLD_LAZY);  // %dynamiclib
-  assert(handle1);
-  void (*bar1)() = (void (*)())dlsym(handle1, "bar");
-  assert(bar1);
-  bar1();
-
-  return 0;
-}
-#endif
diff --git a/test/asan/TestCases/Posix/coverage-direct-large.cc b/test/asan/TestCases/Posix/coverage-direct-large.cc
deleted file mode 100644
index 367a566..0000000
--- a/test/asan/TestCases/Posix/coverage-direct-large.cc
+++ /dev/null
@@ -1,65 +0,0 @@
-// Test for direct coverage writing with lots of data.
-// Current implementation maps output file in chunks of 64K. This test overflows
-// 1 chunk.
-
-// RUN: %clangxx_asan -fsanitize-coverage=func -O0 -DSHARED %s -shared -o %dynamiclib -fPIC
-// RUN: %clangxx_asan -fsanitize-coverage=func -O0 %s %libdl -o %t
-
-// RUN: rm -rf %T/coverage-direct-large
-
-// RUN: mkdir -p %T/coverage-direct-large/normal && cd %T/coverage-direct-large/normal
-// RUN: %env_asan_opts=coverage=1:coverage_direct=0:verbosity=1 %run %t %dynamiclib
-// RUN: %sancov print *.sancov >out.txt
-// RUN: cd ../..
-
-// RUN: mkdir -p %T/coverage-direct-large/direct && cd %T/coverage-direct-large/direct
-// RUN: %env_asan_opts=coverage=1:coverage_direct=1:verbosity=1 %run %t %dynamiclib
-// RUN: %sancov rawunpack *.sancov.raw
-// RUN: %sancov print *.sancov >out.txt
-// RUN: cd ../..
-
-// RUN: diff -u coverage-direct-large/normal/out.txt coverage-direct-large/direct/out.txt
-//
-// XFAIL: android
-
-#define F0(Q, x) Q(x)
-#define F1(Q, x)                                                          \
-  F0(Q, x##0) F0(Q, x##1) F0(Q, x##2) F0(Q, x##3) F0(Q, x##4) F0(Q, x##5) \
-      F0(Q, x##6) F0(Q, x##7) F0(Q, x##8) F0(Q, x##9)
-#define F2(Q, x)                                                          \
-  F1(Q, x##0) F1(Q, x##1) F1(Q, x##2) F1(Q, x##3) F1(Q, x##4) F1(Q, x##5) \
-      F1(Q, x##6) F1(Q, x##7) F1(Q, x##8) F1(Q, x##9)
-#define F3(Q, x)                                                          \
-  F2(Q, x##0) F2(Q, x##1) F2(Q, x##2) F2(Q, x##3) F2(Q, x##4) F2(Q, x##5) \
-      F2(Q, x##6) F2(Q, x##7) F2(Q, x##8) F2(Q, x##9)
-#define F4(Q, x)                                                          \
-  F3(Q, x##0) F3(Q, x##1) F3(Q, x##2) F3(Q, x##3) F3(Q, x##4) F3(Q, x##5) \
-      F3(Q, x##6) F3(Q, x##7) F3(Q, x##8) F3(Q, x##9)
-
-#define DECL(x) __attribute__((noinline)) static void x() {}
-#define CALL(x) x();
-
-F4(DECL, f)
-
-#ifdef SHARED
-extern "C" void so_entry() {
-  F4(CALL, f)
-}  
-#else
-
-#include <assert.h>
-#include <dlfcn.h>
-#include <stdio.h>
-int main(int argc, char **argv) {
-  F4(CALL, f)
-  assert(argc > 1);
-  void *handle1 = dlopen(argv[1], RTLD_LAZY);  // %dynamiclib
-  assert(handle1);
-  void (*so_entry)() = (void (*)())dlsym(handle1, "so_entry");
-  assert(so_entry);
-  so_entry();
-
-  return 0;
-}
-
-#endif // SHARED
diff --git a/test/asan/TestCases/Posix/coverage-direct.cc b/test/asan/TestCases/Posix/coverage-direct.cc
deleted file mode 100644
index 8caa9c5..0000000
--- a/test/asan/TestCases/Posix/coverage-direct.cc
+++ /dev/null
@@ -1,83 +0,0 @@
-// Test for direct coverage writing with dlopen at coverage level 1 to 3.
-
-// RUN: %clangxx_asan -fsanitize-coverage=func -DSHARED %s -shared -o %dynamiclib -fPIC
-// RUN: %clangxx_asan -fsanitize-coverage=func %s %libdl -o %t
-
-// RUN: rm -rf %T/coverage-direct
-
-// RUN: mkdir -p %T/coverage-direct/normal
-// RUN: %env_asan_opts=coverage=1:coverage_direct=0:coverage_dir=%T/coverage-direct/normal:verbosity=1 %run %t %dynamiclib
-// RUN: %sancov print %T/coverage-direct/normal/*.sancov >%T/coverage-direct/normal/out.txt
-
-// RUN: mkdir -p %T/coverage-direct/direct
-// RUN: %env_asan_opts=coverage=1:coverage_direct=1:coverage_dir=%T/coverage-direct/direct:verbosity=1 %run %t %dynamiclib
-// RUN: cd %T/coverage-direct/direct
-// RUN: %sancov rawunpack *.sancov.raw
-// RUN: %sancov print *.sancov >out.txt
-// RUN: cd ../..
-
-// RUN: diff -u coverage-direct/normal/out.txt coverage-direct/direct/out.txt
-
-
-// RUN: %clangxx_asan -fsanitize-coverage=bb -DSHARED %s -shared -o %dynamiclib -fPIC
-// RUN: %clangxx_asan -fsanitize-coverage=bb -DSO_DIR=\"%T\" %s %libdl -o %t
-
-// RUN: rm -rf %T/coverage-direct
-
-// RUN: mkdir -p %T/coverage-direct/normal
-// RUN: %env_asan_opts=coverage=1:coverage_direct=0:coverage_dir=%T/coverage-direct/normal:verbosity=1 %run %t %dynamiclib
-// RUN: %sancov print %T/coverage-direct/normal/*.sancov >%T/coverage-direct/normal/out.txt
-
-// RUN: mkdir -p %T/coverage-direct/direct
-// RUN: %env_asan_opts=coverage=1:coverage_direct=1:coverage_dir=%T/coverage-direct/direct:verbosity=1 %run %t %dynamiclib
-// RUN: cd %T/coverage-direct/direct
-// RUN: %sancov rawunpack *.sancov.raw
-// RUN: %sancov print *.sancov >out.txt
-// RUN: cd ../..
-
-// RUN: diff -u coverage-direct/normal/out.txt coverage-direct/direct/out.txt
-
-
-// RUN: %clangxx_asan -fsanitize-coverage=edge -DSHARED %s -shared -o %dynamiclib -fPIC
-// RUN: %clangxx_asan -fsanitize-coverage=edge -DSO_DIR=\"%T\" %s %libdl -o %t
-
-// RUN: rm -rf %T/coverage-direct
-
-// RUN: mkdir -p %T/coverage-direct/normal
-// RUN: %env_asan_opts=coverage=1:coverage_direct=0:coverage_dir=%T/coverage-direct/normal:verbosity=1 %run %t %dynamiclib
-// RUN: %sancov print %T/coverage-direct/normal/*.sancov >%T/coverage-direct/normal/out.txt
-
-// RUN: mkdir -p %T/coverage-direct/direct
-// RUN: %env_asan_opts=coverage=1:coverage_direct=1:coverage_dir=%T/coverage-direct/direct:verbosity=1 %run %t %dynamiclib
-// RUN: cd %T/coverage-direct/direct
-// RUN: %sancov rawunpack *.sancov.raw
-// RUN: %sancov print *.sancov >out.txt
-// RUN: cd ../..
-
-// RUN: diff -u coverage-direct/normal/out.txt coverage-direct/direct/out.txt
-
-// XFAIL: android
-
-#include <assert.h>
-#include <dlfcn.h>
-#include <stdio.h>
-#include <unistd.h>
-
-#ifdef SHARED
-extern "C" {
-void bar() { printf("bar\n"); }
-}
-#else
-
-int main(int argc, char **argv) {
-  fprintf(stderr, "PID: %d\n", getpid());
-  assert(argc > 1);
-  void *handle1 = dlopen(argv[1], RTLD_LAZY);
-  assert(handle1);
-  void (*bar1)() = (void (*)())dlsym(handle1, "bar");
-  assert(bar1);
-  bar1();
-
-  return 0;
-}
-#endif
diff --git a/test/asan/TestCases/Posix/coverage-fork-direct.cc b/test/asan/TestCases/Posix/coverage-fork-direct.cc
deleted file mode 100644
index c196719..0000000
--- a/test/asan/TestCases/Posix/coverage-fork-direct.cc
+++ /dev/null
@@ -1,38 +0,0 @@
-// RUN: %clangxx_asan -fsanitize-coverage=func %s -o %t
-// RUN: rm -rf %T/coverage-fork-direct
-// RUN: mkdir -p %T/coverage-fork-direct && cd %T/coverage-fork-direct
-// RUN: (%env_asan_opts=coverage=1:coverage_direct=1:verbosity=1 %run %t; \
-// RUN:  %sancov rawunpack *.sancov.raw; %sancov print *.sancov) 2>&1
-//
-// XFAIL: android
-
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-
-__attribute__((noinline))
-void foo() { printf("foo\n"); }
-
-__attribute__((noinline))
-void bar() { printf("bar\n"); }
-
-__attribute__((noinline))
-void baz() { printf("baz\n"); }
-
-int main(int argc, char **argv) {
-  pid_t child_pid = fork();
-  if (child_pid == 0) {
-    fprintf(stderr, "Child PID: %d\n", getpid());
-    baz();
-  } else {
-    fprintf(stderr, "Parent PID: %d\n", getpid());
-    foo();
-    bar();
-  }
-  return 0;
-}
-
-// CHECK-DAG: Child PID: [[ChildPID:[0-9]+]]
-// CHECK-DAG: Parent PID: [[ParentPID:[0-9]+]]
-// CHECK-DAG: read 3 PCs from {{.*}}.[[ParentPID]].sancov
-// CHECK-DAG: read 1 PCs from {{.*}}.[[ChildPID]].sancov
diff --git a/test/asan/TestCases/Posix/coverage-fork.cc b/test/asan/TestCases/Posix/coverage-fork.cc
index 40ce72e..da6e3c2 100644
--- a/test/asan/TestCases/Posix/coverage-fork.cc
+++ b/test/asan/TestCases/Posix/coverage-fork.cc
@@ -1,9 +1,13 @@
-// RUN: %clangxx_asan -fsanitize-coverage=func %s -o %t
+// RUN: %clangxx_asan -fsanitize-coverage=func,trace-pc-guard %s -o %t
 // RUN: rm -rf %T/coverage-fork
 // RUN: mkdir -p %T/coverage-fork && cd %T/coverage-fork
-// RUN: %env_asan_opts=coverage=1:coverage_direct=0:verbosity=1 %run %t 2>&1 | FileCheck %s
+// RUN: %env_asan_opts=coverage=1:verbosity=1 %run %t 2>&1 | FileCheck %s
 //
 // UNSUPPORTED: android
+//
+// Ideally a forked-subprocess should only report it's own coverage,
+// not parent's one. But trace-pc-guard currently does nothing special for fork,
+// and thus this test is relaxed.
 
 #include <stdio.h>
 #include <string.h>
@@ -32,6 +36,6 @@
 }
 
 // CHECK-DAG: Child PID: [[ChildPID:[0-9]+]]
-// CHECK-DAG: [[ChildPID]].sancov: 1 PCs written
+// CHECK-DAG: [[ChildPID]].sancov: {{.*}} PCs written
 // CHECK-DAG: Parent PID: [[ParentPID:[0-9]+]]
 // CHECK-DAG: [[ParentPID]].sancov: 3 PCs written
diff --git a/test/asan/TestCases/Posix/coverage-maybe-open-file.cc b/test/asan/TestCases/Posix/coverage-maybe-open-file.cc
deleted file mode 100644
index cab3d57..0000000
--- a/test/asan/TestCases/Posix/coverage-maybe-open-file.cc
+++ /dev/null
@@ -1,32 +0,0 @@
-// FIXME: https://code.google.com/p/address-sanitizer/issues/detail?id=316
-// XFAIL: android
-//
-// RUN: %clangxx_asan -fsanitize-coverage=func %s -o %t
-// RUN: rm -rf %T/coverage-maybe-open-file
-// RUN: mkdir -p %T/coverage-maybe-open-file && cd %T/coverage-maybe-open-file
-// RUN: %env_asan_opts=coverage=1 %run %t | FileCheck %s --check-prefix=CHECK-success
-// RUN: %env_asan_opts=coverage=0 %run %t | FileCheck %s --check-prefix=CHECK-fail
-// RUN: [ "$(cat test.sancov.packed)" == "test" ]
-// RUN: cd .. && rm -rf %T/coverage-maybe-open-file
-
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-
-#include <sanitizer/coverage_interface.h>
-
-// FIXME: the code below might not work on Windows.
-int main(int argc, char **argv) {
-  int fd = __sanitizer_maybe_open_cov_file("test");
-  if (fd > 0) {
-    printf("SUCCESS\n");
-    const char s[] = "test\n";
-    write(fd, s, strlen(s));
-    close(fd);
-  } else {
-    printf("FAIL\n");
-  }
-}
-
-// CHECK-success: SUCCESS
-// CHECK-fail: FAIL
diff --git a/test/asan/TestCases/Posix/coverage-module-unloaded.cc b/test/asan/TestCases/Posix/coverage-module-unloaded.cc
index d492af6..322a1ba 100644
--- a/test/asan/TestCases/Posix/coverage-module-unloaded.cc
+++ b/test/asan/TestCases/Posix/coverage-module-unloaded.cc
@@ -1,15 +1,15 @@
 // Check that unloading a module doesn't break coverage dumping for remaining
 // modules.
-// RUN: %clangxx_asan -fsanitize-coverage=func -DSHARED %s -shared -o %dynamiclib1 -fPIC
-// RUN: %clangxx_asan -fsanitize-coverage=func -DSHARED %s -shared -o %dynamiclib2 -fPIC
-// RUN: %clangxx_asan -fsanitize-coverage=func %s %libdl -o %t
-// RUN: mkdir -p %T/coverage-module-unloaded && cd %T/coverage-module-unloaded
-// RUN: %env_asan_opts=coverage=1:verbosity=1 %run %t %dynamiclib1 %dynamiclib2 2>&1        | FileCheck %s
-// RUN: %env_asan_opts=coverage=1:verbosity=1 %run %t %dynamiclib1 %dynamiclib2 foo 2>&1    | FileCheck %s
-// RUN: rm -r %T/coverage-module-unloaded
+// RUN: %clangxx_asan -fsanitize-coverage=func,trace-pc-guard -DSHARED %s -shared -o %dynamiclib1 -fPIC
+// RUN: %clangxx_asan -fsanitize-coverage=func,trace-pc-guard -DSHARED %s -shared -o %dynamiclib2 -fPIC
+// RUN: %clangxx_asan -fsanitize-coverage=func,trace-pc-guard %s %libdl -o %t.exe
+// RUN: mkdir -p %t.tmp/coverage-module-unloaded && cd %t.tmp/coverage-module-unloaded
+// RUN: %env_asan_opts=coverage=1:verbosity=1 %run %t.exe %dynamiclib1 %dynamiclib2 2>&1        | FileCheck %s
+// RUN: %env_asan_opts=coverage=1:verbosity=1 %run %t.exe %dynamiclib1 %dynamiclib2 foo 2>&1    | FileCheck %s
 //
 // https://code.google.com/p/address-sanitizer/issues/detail?id=263
 // XFAIL: android
+// UNSUPPORTED: ios
 
 #include <assert.h>
 #include <dlfcn.h>
@@ -47,8 +47,5 @@
 #endif
 
 // CHECK: PID: [[PID:[0-9]+]]
-// CHECK: [[PID]].sancov: 1 PCs written
-// CHECK: coverage-module-unloaded{{.*}}1.[[PID]]
-// CHECK: coverage-module-unloaded{{.*}}2.[[PID]]
-// Even though we've unloaded one of the libs we still dump the coverage file
-// for that lib (although the data will be inaccurate, if at all useful)
+// CHECK-DAG: exe{{.*}}[[PID]].sancov: {{.*}}PCs written
+// CHECK-DAG: dynamic{{.*}}[[PID]].sancov: {{.*}}PCs written
diff --git a/test/asan/TestCases/Posix/coverage-reset.cc b/test/asan/TestCases/Posix/coverage-reset.cc
new file mode 100644
index 0000000..201bf8e
--- /dev/null
+++ b/test/asan/TestCases/Posix/coverage-reset.cc
@@ -0,0 +1,65 @@
+// RUN: %clangxx_asan -fsanitize-coverage=func,trace-pc-guard -DSHARED %s -shared -o %dynamiclib -fPIC %ld_flags_rpath_so
+// RUN: %clangxx_asan -fsanitize-coverage=func,trace-pc-guard %s %ld_flags_rpath_exe -o %t
+// RUN: rm -rf %T/coverage-reset && mkdir -p %T/coverage-reset && cd %T/coverage-reset
+// RUN: %env_asan_opts=coverage=1:verbosity=1 %run %t 2>&1 | FileCheck %s
+//
+// UNSUPPORTED: ios
+
+#include <stdio.h>
+
+#include <sanitizer/coverage_interface.h>
+
+#ifdef SHARED
+void bar1() { printf("bar1\n"); }
+void bar2() { printf("bar2\n"); }
+#else
+__attribute__((noinline)) void foo1() { printf("foo1\n"); }
+__attribute__((noinline)) void foo2() { printf("foo2\n"); }
+void bar1();
+void bar2();
+
+int main(int argc, char **argv) {
+  fprintf(stderr, "RESET\n");
+  __sanitizer_cov_reset();
+  foo1();
+  foo2();
+  bar1();
+  bar2();
+  __sanitizer_cov_dump();
+// CHECK: RESET
+// CHECK-DAG: SanitizerCoverage: ./coverage-reset.cc{{.*}}.sancov: 2 PCs written
+// CHECK-DAG: SanitizerCoverage: ./libcoverage-reset.cc{{.*}}.sancov: 2 PCs written
+
+  fprintf(stderr, "RESET\n");
+  __sanitizer_cov_reset();
+  foo1();
+  bar1();
+  __sanitizer_cov_dump();
+// CHECK: RESET
+// CHECK-DAG: SanitizerCoverage: ./coverage-reset.cc{{.*}}.sancov: 1 PCs written
+// CHECK-DAG: SanitizerCoverage: ./libcoverage-reset.cc{{.*}}.sancov: 1 PCs written
+
+  fprintf(stderr, "RESET\n");
+  __sanitizer_cov_reset();
+  foo1();
+  foo2();
+  __sanitizer_cov_dump();
+// CHECK: RESET
+// CHECK: SanitizerCoverage: ./coverage-reset.cc{{.*}}.sancov: 2 PCs written
+
+  fprintf(stderr, "RESET\n");
+  __sanitizer_cov_reset();
+  bar1();
+  bar2();
+  __sanitizer_cov_dump();
+// CHECK: RESET
+// CHECK: SanitizerCoverage: ./libcoverage-reset.cc{{.*}}.sancov: 2 PCs written
+
+  fprintf(stderr, "RESET\n");
+  __sanitizer_cov_reset();
+// CHECK: RESET
+
+  bar2();
+// CHECK: SanitizerCoverage: ./libcoverage-reset.cc{{.*}}.sancov: 1 PCs written
+}
+#endif
diff --git a/test/asan/TestCases/Posix/coverage-sandboxing.cc b/test/asan/TestCases/Posix/coverage-sandboxing.cc
deleted file mode 100644
index c4e6bc7..0000000
--- a/test/asan/TestCases/Posix/coverage-sandboxing.cc
+++ /dev/null
@@ -1,86 +0,0 @@
-// RUN: %clangxx_asan -fsanitize-coverage=bb -DSHARED %s -shared -o %dynamiclib -fPIC %ld_flags_rpath_so
-// RUN: %clangxx_asan -fsanitize-coverage=func %s -o %t %ld_flags_rpath_exe
-
-// RUN: rm -rf %T/coverage_sandboxing_test
-// RUN: mkdir %T/coverage_sandboxing_test && cd %T/coverage_sandboxing_test
-// RUN: mkdir vanilla && cd vanilla
-// RUN: %env_asan_opts=coverage=1:verbosity=1 %run %t 2>&1  | FileCheck %s --check-prefix=CHECK-vanilla
-// RUN: mkdir ../sandbox1 && cd ../sandbox1
-// RUN: %env_asan_opts=coverage=1:verbosity=1 %run %t a 2>&1 | FileCheck %s --check-prefix=CHECK-sandbox
-// RUN: %sancov unpack coverage_sandboxing_test.sancov.packed
-// RUN: mkdir ../sandbox2 && cd ../sandbox2
-// RUN: %env_asan_opts=coverage=1:verbosity=1 %run %t a b 2>&1 | FileCheck %s --check-prefix=CHECK-sandbox
-// RUN: %sancov unpack coverage_sandboxing_test.sancov.packed
-// RUN: cd ..
-// RUN: %sancov print vanilla/`basename %dynamiclib`*.sancov > vanilla.txt
-// RUN: %sancov print sandbox1/`basename %dynamiclib`*.sancov > sandbox1.txt
-// RUN: %sancov print sandbox2/`basename %dynamiclib`*.sancov > sandbox2.txt
-// RUN: diff vanilla.txt sandbox1.txt
-// RUN: diff vanilla.txt sandbox2.txt
-// RUN: rm -r %T/coverage_sandboxing_test
-
-// https://code.google.com/p/address-sanitizer/issues/detail?id=263
-// XFAIL: android
-
-#include <assert.h>
-#include <fcntl.h>
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-
-#include <sanitizer/coverage_interface.h>
-
-#define bb0(n)                        \
-  case n:                             \
-    fprintf(stderr, "foo: %d\n", n);  \
-    break;
-
-#define bb1(n) bb0(n) bb0(n + 1)
-#define bb2(n) bb1(n) bb1(n + 2)
-#define bb3(n) bb2(n) bb2(n + 4)
-#define bb4(n) bb3(n) bb3(n + 8)
-#define bb5(n) bb4(n) bb4(n + 16)
-#define bb6(n) bb5(n) bb5(n + 32)
-#define bb7(n) bb6(n) bb6(n + 64)
-#define bb8(n) bb7(n) bb7(n + 128)
-
-#ifdef SHARED
-void foo(int i) {
-  switch(i) {
-    // 256 basic blocks
-    bb8(0)
-  }
-}
-#else
-extern void foo(int i);
-
-int main(int argc, char **argv) {
-  assert(argc <= 3);
-  for (int i = 0; i < 256; i++) foo(i);
-  fprintf(stderr, "PID: %d\n", getpid());
-  if (argc == 1) {
-    // Vanilla mode, dump to individual files.
-    return 0;
-  }
-  // Dump to packed file.
-  int fd = creat("coverage_sandboxing_test.sancov.packed", 0660);
-  __sanitizer_sandbox_arguments args = {0};
-  args.coverage_sandboxed = 1;
-  args.coverage_fd = fd;
-  if (argc == 2)
-    // Write to packed file, do not split into blocks.
-    args.coverage_max_block_size = 0;
-  else if (argc == 3)
-    // Write to packed file, split into blocks (as if writing to a socket).
-    args.coverage_max_block_size = 100;
-  __sanitizer_sandbox_on_notify(&args);
-  return 0;
-}
-#endif
-
-// CHECK-vanilla: PID: [[PID:[0-9]+]]
-// CHECK-vanilla: .so.[[PID]].sancov: 257 PCs written
-// CHECK-vanilla: [[PID]].sancov: 1 PCs written
-
-// CHECK-sandbox: PID: [[PID:[0-9]+]]
-// CHECK-sandbox: 257 PCs written to packed file
diff --git a/test/asan/TestCases/Posix/coverage.cc b/test/asan/TestCases/Posix/coverage.cc
index 7c1c494..a78560a 100644
--- a/test/asan/TestCases/Posix/coverage.cc
+++ b/test/asan/TestCases/Posix/coverage.cc
@@ -1,16 +1,16 @@
-// RUN: %clangxx_asan -fsanitize-coverage=func -DSHARED %s -shared -o %dynamiclib -fPIC %ld_flags_rpath_so
-// RUN: %clangxx_asan -fsanitize-coverage=func %s %ld_flags_rpath_exe -o %t
+// RUN: %clangxx_asan -fsanitize-coverage=func,trace-pc-guard -DSHARED %s -shared -o %dynamiclib -fPIC %ld_flags_rpath_so
+// RUN: %clangxx_asan -fsanitize-coverage=func,trace-pc-guard %s %ld_flags_rpath_exe -o %t
 // RUN: rm -rf %T/coverage && mkdir -p %T/coverage && cd %T/coverage
 // RUN: %env_asan_opts=coverage=1:verbosity=1 %run %t 2>&1         | FileCheck %s --check-prefix=CHECK-main
-// RUN: %sancov print `ls coverage.*sancov | grep -v '.so'` 2>&1 | FileCheck %s --check-prefix=CHECK-SANCOV1
+// RUN: %sancov print coverage.*sancov 2>&1 | FileCheck %s --check-prefix=CHECK-SANCOV1
 // RUN: %env_asan_opts=coverage=1:verbosity=1 %run %t foo 2>&1     | FileCheck %s --check-prefix=CHECK-foo
-// RUN: %sancov print `ls coverage.*sancov | grep -v '.so'` 2>&1 | FileCheck %s --check-prefix=CHECK-SANCOV2
+// RUN: %sancov print coverage.*sancov 2>&1 | FileCheck %s --check-prefix=CHECK-SANCOV2
 // RUN: %env_asan_opts=coverage=1:verbosity=1 %run %t bar 2>&1     | FileCheck %s --check-prefix=CHECK-bar
-// RUN: %sancov print `ls *coverage.*sancov | grep -v '.so'` 2>&1 | FileCheck %s --check-prefix=CHECK-SANCOV2
+// RUN: %sancov print coverage.*sancov 2>&1 | FileCheck %s --check-prefix=CHECK-SANCOV2
 // RUN: %env_asan_opts=coverage=1:verbosity=1 %run %t foo bar 2>&1 | FileCheck %s --check-prefix=CHECK-foo-bar
-// RUN: %sancov print `ls *coverage.*sancov | grep -v '.so'` 2>&1 | FileCheck %s --check-prefix=CHECK-SANCOV2
-// RUN: %sancov print `ls *coverage.*sancov | grep '.so'` 2>&1 | FileCheck %s --check-prefix=CHECK-SANCOV1
-// RUN: %sancov merge `ls *coverage.*sancov | grep -v '.so'` > merged-cov
+// RUN: %sancov print coverage.*sancov 2>&1 | FileCheck %s --check-prefix=CHECK-SANCOV2
+// RUN: %sancov print libcoverage.*sancov 2>&1 | FileCheck %s --check-prefix=CHECK-SANCOV1
+// RUN: %sancov merge coverage.*sancov > merged-cov
 // RUN: %sancov print merged-cov 2>&1 | FileCheck %s --check-prefix=CHECK-SANCOV2
 // RUN: %env_asan_opts=coverage=1:verbosity=1 not %run %t foo bar 4    2>&1 | FileCheck %s --check-prefix=CHECK-report
 // RUN: %env_asan_opts=coverage=1:verbosity=1 not %run %t foo bar 4 5  2>&1 | FileCheck %s --check-prefix=CHECK-segv
@@ -18,8 +18,8 @@
 //
 // https://code.google.com/p/address-sanitizer/issues/detail?id=263
 // XFAIL: android
+// UNSUPPORTED: ios
 
-#include <sanitizer/coverage_interface.h>
 #include <assert.h>
 #include <stdio.h>
 #include <string.h>
@@ -37,12 +37,8 @@
 int main(int argc, char **argv) {
   fprintf(stderr, "PID: %d\n", getpid());
   for (int i = 1; i < argc; i++) {
-    if (!strcmp(argv[i], "foo")) {
-      uintptr_t old_coverage = __sanitizer_get_total_unique_coverage();
+    if (!strcmp(argv[i], "foo"))
       foo();
-      uintptr_t new_coverage = __sanitizer_get_total_unique_coverage();
-      assert(new_coverage > old_coverage);
-    }
     if (!strcmp(argv[i], "bar"))
       bar();
   }
@@ -63,12 +59,12 @@
 // CHECK-foo-NOT: .so.[[PID]]
 //
 // CHECK-bar: PID: [[PID:[0-9]+]]
-// CHECK-bar: .so.[[PID]].sancov: 1 PCs written
-// CHECK-bar: [[PID]].sancov: 1 PCs written
+// CHECK-bar-DAG: .so.[[PID]].sancov: 1 PCs written
+// CHECK-bar-DAG: [[PID]].sancov: 1 PCs written
 //
 // CHECK-foo-bar: PID: [[PID:[0-9]+]]
-// CHECK-foo-bar: so.[[PID]].sancov: 1 PCs written
-// CHECK-foo-bar: [[PID]].sancov: 2 PCs written
+// CHECK-foo-bar-DAG: so.[[PID]].sancov: 1 PCs written
+// CHECK-foo-bar-DAG: [[PID]].sancov: 2 PCs written
 //
 // CHECK-report: AddressSanitizer: global-buffer-overflow
 // CHECK-report: PCs written
diff --git a/test/asan/TestCases/Posix/deep_call_stack.cc b/test/asan/TestCases/Posix/deep_call_stack.cc
index 18ba563..e6e82a4 100644
--- a/test/asan/TestCases/Posix/deep_call_stack.cc
+++ b/test/asan/TestCases/Posix/deep_call_stack.cc
@@ -1,9 +1,13 @@
 // Check that UAR mode can handle very deep recusrion.
-// RUN: %clangxx_asan -O2 %s -o %t && \
-// RUN:   (ulimit -s 4096; %env_asan_opts=detect_stack_use_after_return=1 %run %t) 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -O2 %s -o %t
+// RUN: ulimit -s 4096
+// RUN: %env_asan_opts=detect_stack_use_after_return=1 %run %t 2>&1 | FileCheck %s
 
 // Also check that use_sigaltstack+verbosity doesn't crash.
 // RUN: %env_asan_opts=verbosity=1:use_sigaltstack=1:detect_stack_use_after_return=1 %run %t  | FileCheck %s
+
+// UNSUPPORTED: ios
+
 #include <stdio.h>
 
 __attribute__((noinline))
diff --git a/test/asan/TestCases/Posix/fread_fwrite.cc b/test/asan/TestCases/Posix/fread_fwrite.cc
new file mode 100644
index 0000000..c062926
--- /dev/null
+++ b/test/asan/TestCases/Posix/fread_fwrite.cc
@@ -0,0 +1,34 @@
+// RUN: %clangxx_asan -g %s -o %t
+// RUN: not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-FWRITE
+// RUN: not %run %t 1 2>&1 | FileCheck %s --check-prefix=CHECK-FREAD
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int test_fread() {
+  FILE *f = fopen("/dev/zero", "r");
+  char buf[2];
+  fread(buf, sizeof(buf), 2, f); // BOOM
+  fclose(f);
+  return 0;
+}
+
+int test_fwrite() {
+  FILE *f = fopen("/dev/null", "w");
+  char buf[2];
+  fwrite(buf, sizeof(buf), 2, f); // BOOM
+  return fclose(f);
+}
+
+int main(int argc, char *argv[]) {
+  if (argc > 1)
+    test_fread();
+  else
+    test_fwrite();
+  return 0;
+}
+
+// CHECK-FREAD: {{.*ERROR: AddressSanitizer: stack-buffer-overflow}}
+// CHECK-FREAD: #{{.*}} in {{(wrap_|__interceptor_)?}}fread
+// CHECK-FWRITE: {{.*ERROR: AddressSanitizer: stack-buffer-overflow}}
+// CHECK-FWRITE: #{{.*}} in {{(wrap_|__interceptor_)?}}fwrite
diff --git a/test/asan/TestCases/Posix/glob.cc b/test/asan/TestCases/Posix/glob.cc
index e0eeb33..46d4a0d 100644
--- a/test/asan/TestCases/Posix/glob.cc
+++ b/test/asan/TestCases/Posix/glob.cc
@@ -1,5 +1,6 @@
 // FIXME: https://code.google.com/p/address-sanitizer/issues/detail?id=316
 // XFAIL: android
+// UNSUPPORTED: ios
 //
 // RUN: %clangxx_asan -O0 %s -o %t && %run %t %p 2>&1 | FileCheck %s
 // RUN: %clangxx_asan -O3 %s -o %t && %run %t %p 2>&1 | FileCheck %s
diff --git a/test/asan/TestCases/Posix/halt_on_error-signals.c b/test/asan/TestCases/Posix/halt_on_error-signals.c
index 6bdf30b..931ab86 100644
--- a/test/asan/TestCases/Posix/halt_on_error-signals.c
+++ b/test/asan/TestCases/Posix/halt_on_error-signals.c
@@ -2,10 +2,9 @@
 //
 // RUN: %clang_asan -fsanitize-recover=address -pthread %s -o %t
 //
-// RUN: rm -f %t.log
-// RUN: %env_asan_opts=halt_on_error=false:suppress_equal_pcs=false %run %t 100 >>%t.log 2>&1 || true
+// RUN: %env_asan_opts=halt_on_error=false:suppress_equal_pcs=false %run %t 100 >%t.log 2>&1 || true
 // Collision will almost always get triggered but we still need to check the unlikely case:
-// RUN: FileCheck --check-prefix=CHECK-COLLISION %s < %t.log || FileCheck --check-prefix=CHECK-NO-COLLISION %s < %t.log
+// RUN: FileCheck --check-prefix=CHECK-COLLISION %s <%t.log || FileCheck --check-prefix=CHECK-NO-COLLISION %s <%t.log
 
 #define _SVID_SOURCE 1  // SA_NODEFER
 
diff --git a/test/asan/TestCases/Posix/halt_on_error-torture.cc b/test/asan/TestCases/Posix/halt_on_error-torture.cc
index 5d7eff0..d4b1235 100644
--- a/test/asan/TestCases/Posix/halt_on_error-torture.cc
+++ b/test/asan/TestCases/Posix/halt_on_error-torture.cc
@@ -2,21 +2,17 @@
 //
 // RUN: %clangxx_asan -fsanitize-recover=address -pthread %s -o %t
 //
-// RUN: rm -f 1.txt
-// RUN: %env_asan_opts=halt_on_error=false:suppress_equal_pcs=false %run %t 1 10 >>1.txt 2>&1
-// RUN: FileCheck %s < 1.txt
-// RUN: [ $(grep -c 'ERROR: AddressSanitizer: use-after-poison' 1.txt) -eq 10 ]
-// RUN: FileCheck --check-prefix=CHECK-NO-COLLISION %s < 1.txt
+// RUN: %env_asan_opts=halt_on_error=false:suppress_equal_pcs=false %run %t 1 10 >%t.log 2>&1
+// RUN: grep 'ERROR: AddressSanitizer: use-after-poison' %t.log | count 10
+// RUN: FileCheck %s <%t.log
 //
-// Collisions are unlikely but still possible so we need the ||.
-// RUN: rm -f 10.txt
-// RUN: %env_asan_opts=halt_on_error=false:suppress_equal_pcs=false %run %t 10 20 >>10.txt 2>&1 || true
-// RUN: FileCheck --check-prefix=CHECK-COLLISION %s < 10.txt || FileCheck --check-prefix=CHECK-NO-COLLISION %s < 10.txt
+// RUN: %env_asan_opts=halt_on_error=false:suppress_equal_pcs=false:exitcode=0 %run %t 10 20 >%t.log 2>&1
+// RUN: grep 'ERROR: AddressSanitizer: use-after-poison' %t.log | count 200
+// RUN: FileCheck %s <%t.log
 //
-// Collisions are unlikely but still possible so we need the ||.
-// RUN: rm -f 20.txt
-// RUN: %env_asan_opts=halt_on_error=false %run %t 10 20 >>20.txt 2>&1 || true
-// RUN: FileCheck --check-prefix=CHECK-COLLISION %s < 20.txt || FileCheck --check-prefix=CHECK-NO-COLLISION %s < 20.txt
+// RUN: %env_asan_opts=halt_on_error=false:exitcode=0 %run %t 10 20 >%t.log 2>&1
+// RUN: grep 'ERROR: AddressSanitizer: use-after-poison' %t.log | count 1
+// RUN: FileCheck %s <%t.log
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -38,11 +34,10 @@
   unsigned seed = (unsigned)(size_t)arg;
 
   volatile char tmp[2];
-  __asan_poison_memory_region(&tmp, sizeof(tmp)); 
+  __asan_poison_memory_region(&tmp, sizeof(tmp));
 
   for (size_t i = 0; i < niter; ++i) {
     random_delay(&seed);
-    // Expect error collisions here
     // CHECK: ERROR: AddressSanitizer: use-after-poison
     volatile int idx = 0;
     tmp[idx] = 0;
@@ -76,8 +71,7 @@
     }
   }
 
-  // CHECK-COLLISION: AddressSanitizer: nested bug in the same thread, aborting
-  // CHECK-NO-COLLISION: All threads terminated
+  // CHECK: All threads terminated
   printf("All threads terminated\n");
 
   delete [] tids;
diff --git a/test/asan/TestCases/Posix/halt_on_error_suppress_equal_pcs.cc b/test/asan/TestCases/Posix/halt_on_error_suppress_equal_pcs.cc
index 98ef851..6b92618 100644
--- a/test/asan/TestCases/Posix/halt_on_error_suppress_equal_pcs.cc
+++ b/test/asan/TestCases/Posix/halt_on_error_suppress_equal_pcs.cc
@@ -6,17 +6,16 @@
 // RUN: %env_asan_opts=halt_on_error=false %run %t 2>&1 | FileCheck %s
 //
 // Check that we die after reaching different reports number threshold.
-// RUN: rm -f %t1.log
-// RUN: %env_asan_opts=halt_on_error=false not %run %t 1 >> %t1.log 2>&1
-// RUN: [ $(grep -c 'ERROR: AddressSanitizer: stack-buffer-overflow' %t1.log) -eq 25 ]
+// RUN: %env_asan_opts=halt_on_error=false not %run %t 1 >%t1.log 2>&1
+// RUN: grep 'ERROR: AddressSanitizer: stack-buffer-overflow' %t1.log | count 25
 //
 // Check suppress_equal_pcs=true behavior is equal to default one.
 // RUN: %env_asan_opts=halt_on_error=false:suppress_equal_pcs=true %run %t 2>&1 | FileCheck %s
 //
 // Check suppress_equal_pcs=false behavior isn't equal to default one.
 // RUN: rm -f %t2.log
-// RUN: %env_asan_opts=halt_on_error=false:suppress_equal_pcs=false %run %t >> %t2.log 2>&1
-// RUN: [ $(grep -c 'ERROR: AddressSanitizer: stack-buffer-overflow' %t2.log) -eq 30 ]
+// RUN: %env_asan_opts=halt_on_error=false:suppress_equal_pcs=false %run %t >%t2.log 2>&1
+// RUN: grep 'ERROR: AddressSanitizer: stack-buffer-overflow' %t2.log | count 30
 
 #define ACCESS_ARRAY_FIVE_ELEMENTS(array, i)     \
   array[i] = i;                                  \
diff --git a/test/asan/TestCases/Posix/handle_abort_on_error.cc b/test/asan/TestCases/Posix/handle_abort_on_error.cc
index fa8cdd4..1be060e 100644
--- a/test/asan/TestCases/Posix/handle_abort_on_error.cc
+++ b/test/asan/TestCases/Posix/handle_abort_on_error.cc
@@ -1,6 +1,8 @@
 // Regression test: this used to abort() in SIGABRT handler in an infinite loop.
 // RUN: %clangxx_asan -O0 %s -o %t && %env_asan_opts=handle_abort=1,abort_on_error=1 not --crash %run %t 2>&1 | FileCheck %s
 
+// UNSUPPORTED: ios
+
 #include <stdlib.h>
 
 int main() {
diff --git a/test/asan/TestCases/Posix/new_array_cookie_test.cc b/test/asan/TestCases/Posix/new_array_cookie_test.cc
index dd50bf7..40a9b78 100644
--- a/test/asan/TestCases/Posix/new_array_cookie_test.cc
+++ b/test/asan/TestCases/Posix/new_array_cookie_test.cc
@@ -3,6 +3,9 @@
 // RUN:                                    not %run %t 2>&1  | FileCheck %s
 // RUN: %env_asan_opts=poison_array_cookie=1 not %run %t 2>&1  | FileCheck %s
 // RUN: %env_asan_opts=poison_array_cookie=0 not %run %t 2>&1  | FileCheck %s --check-prefix=NO_COOKIE
+
+// UNSUPPORTED: ios
+
 #include <stdio.h>
 #include <stdlib.h>
 struct C {
diff --git a/test/asan/TestCases/Posix/new_array_cookie_uaf_test.cc b/test/asan/TestCases/Posix/new_array_cookie_uaf_test.cc
index f36da2b..335a567 100644
--- a/test/asan/TestCases/Posix/new_array_cookie_uaf_test.cc
+++ b/test/asan/TestCases/Posix/new_array_cookie_uaf_test.cc
@@ -2,6 +2,9 @@
 // RUN: %clangxx_asan -O3 %s -o %t
 // RUN: %env_asan_opts=poison_array_cookie=1 not %run %t 2>&1  | FileCheck %s --check-prefix=COOKIE
 // RUN: %env_asan_opts=poison_array_cookie=0 not %run %t 2>&1  | FileCheck %s --check-prefix=NO_COOKIE
+
+// UNSUPPORTED: ios
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <assert.h>
diff --git a/test/asan/TestCases/Posix/new_array_cookie_with_new_from_class.cc b/test/asan/TestCases/Posix/new_array_cookie_with_new_from_class.cc
index 0683e39..e7f7746 100644
--- a/test/asan/TestCases/Posix/new_array_cookie_with_new_from_class.cc
+++ b/test/asan/TestCases/Posix/new_array_cookie_with_new_from_class.cc
@@ -3,6 +3,9 @@
 // RUN: %clangxx_asan  %s -o %t && %run %t
 //
 // XFAIL: arm
+
+// UNSUPPORTED: ios
+
 #include <new>
 #include <stdlib.h>
 #include <stdint.h>
diff --git a/test/asan/TestCases/Posix/stack-overflow.cc b/test/asan/TestCases/Posix/stack-overflow.cc
index 8ef1618..d6b062e 100644
--- a/test/asan/TestCases/Posix/stack-overflow.cc
+++ b/test/asan/TestCases/Posix/stack-overflow.cc
@@ -16,6 +16,8 @@
 // RUN: not %run %t 2>&1 | FileCheck %s
 // REQUIRES: stable-runtime
 
+// UNSUPPORTED: ios
+
 #include <assert.h>
 #include <stdlib.h>
 #include <pthread.h>
diff --git a/test/asan/TestCases/Posix/stack-use-after-return.cc b/test/asan/TestCases/Posix/stack-use-after-return.cc
index cf114be..2da1a05 100644
--- a/test/asan/TestCases/Posix/stack-use-after-return.cc
+++ b/test/asan/TestCases/Posix/stack-use-after-return.cc
@@ -4,7 +4,7 @@
 // RUN: %clangxx_asan  -O3 %s -pthread -o %t && %env_asan_opts=detect_stack_use_after_return=1 not %run %t 2>&1 | FileCheck %s
 // RUN: %env_asan_opts=detect_stack_use_after_return=0 %run %t
 // Regression test for a CHECK failure with small stack size and large frame.
-// RUN: %clangxx_asan  -O3 %s -pthread -o %t -DkSize=10000 -DUseThread -DkStackSize=65536 && %env_asan_opts=detect_stack_use_after_return=1 not %run %t 2>&1 | FileCheck --check-prefix=THREAD %s
+// RUN: %clangxx_asan  -O3 %s -pthread -o %t -DkSize=10000 -DUseThread -DkStackSize=131072 && %env_asan_opts=detect_stack_use_after_return=1 not %run %t 2>&1 | FileCheck --check-prefix=THREAD %s
 //
 // Test that we can find UAR in a thread other than main:
 // RUN: %clangxx_asan  -DUseThread -O2 %s -pthread -o %t && %env_asan_opts=detect_stack_use_after_return=1 not %run %t 2>&1 | FileCheck --check-prefix=THREAD %s
@@ -14,8 +14,16 @@
 // RUN: %env_asan_opts=detect_stack_use_after_return=1:max_uar_stack_size_log=20:verbosity=1 not %run %t 2>&1 | FileCheck --check-prefix=CHECK-20 %s
 // RUN: %env_asan_opts=detect_stack_use_after_return=1:min_uar_stack_size_log=24:max_uar_stack_size_log=24:verbosity=1 not %run %t 2>&1 | FileCheck --check-prefix=CHECK-24 %s
 
-#include <stdio.h>
+// This test runs out of stack on AArch64.
+// UNSUPPORTED: aarch64
+
+// FIXME: Fix this test for dynamic runtime on armhf-linux.
+// UNSUPPORTED: armhf-linux && asan-dynamic-runtime
+
+#include <limits.h>
 #include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
 
 #ifndef kSize
 # define kSize 1
@@ -48,11 +56,11 @@
   // CHECK: WRITE of size 1 {{.*}} thread T0
   // CHECK:     #0{{.*}}Func2{{.*}}stack-use-after-return.cc:[[@LINE-2]]
   // CHECK: is located in stack of thread T0 at offset
-  // CHECK: 'local' <== Memory access at offset {{16|32}} is inside this variable
+  // CHECK: 'local'{{.*}} <== Memory access at offset {{16|32}} is inside this variable
   // THREAD: WRITE of size 1 {{.*}} thread T{{[1-9]}}
   // THREAD:     #0{{.*}}Func2{{.*}}stack-use-after-return.cc:[[@LINE-6]]
   // THREAD: is located in stack of thread T{{[1-9]}} at offset
-  // THREAD: 'local' <== Memory access at offset {{16|32}} is inside this variable
+  // THREAD: 'local'{{.*}} <== Memory access at offset {{16|32}} is inside this variable
   // CHECK-20: T0: FakeStack created:{{.*}} stack_size_log: 20
   // CHECK-24: T0: FakeStack created:{{.*}} stack_size_log: 24
 }
@@ -66,8 +74,31 @@
 #if UseThread
   pthread_attr_t attr;
   pthread_attr_init(&attr);
-  if (kStackSize > 0)
-    pthread_attr_setstacksize(&attr, kStackSize);
+  if (kStackSize > 0) {
+    size_t desired_stack_size = kStackSize;
+    if (desired_stack_size < PTHREAD_STACK_MIN) {
+      desired_stack_size = PTHREAD_STACK_MIN;
+    }
+
+    int ret = pthread_attr_setstacksize(&attr, desired_stack_size);
+    if (ret != 0) {
+      fprintf(stderr, "pthread_attr_setstacksize returned %d\n", ret);
+      abort();
+    }
+    
+    size_t stacksize_check;
+    ret = pthread_attr_getstacksize(&attr, &stacksize_check);
+    if (ret != 0) {
+      fprintf(stderr, "pthread_attr_getstacksize returned %d\n", ret);
+      abort();
+    }
+
+    if (stacksize_check != desired_stack_size) {
+      fprintf(stderr, "Unable to set stack size to %d, the stack size is %d.\n",
+              desired_stack_size, stacksize_check);
+      abort(); 
+    }
+  }
   pthread_t t;
   pthread_create(&t, &attr, Thread, 0);
   pthread_attr_destroy(&attr);
diff --git a/test/asan/TestCases/Posix/start-deactivated.cc b/test/asan/TestCases/Posix/start-deactivated.cc
index 2a2aa67..2870ffb 100644
--- a/test/asan/TestCases/Posix/start-deactivated.cc
+++ b/test/asan/TestCases/Posix/start-deactivated.cc
@@ -19,6 +19,7 @@
 // RUN:   ASAN_ACTIVATION_OPTIONS=help=1,handle_segv=0,verbosity=1 not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-UNSUPPORTED
 
 // XFAIL: arm-linux-gnueabi
+// UNSUPPORTED: ios
 
 // END.
 
diff --git a/test/asan/TestCases/Posix/strchr.c b/test/asan/TestCases/Posix/strchr.c
new file mode 100644
index 0000000..7086e13
--- /dev/null
+++ b/test/asan/TestCases/Posix/strchr.c
@@ -0,0 +1,34 @@
+// Test strchr for strict_string_checks=false does not look beyond necessary
+// char.
+// RUN: %clang_asan %s -o %t
+// RUN: %env_asan_opts=strict_string_checks=false %run %t 2>&1
+// RUN: %env_asan_opts=strict_string_checks=true not %run %t 2>&1 | FileCheck %s
+
+#include <assert.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+int main(int argc, char **argv) {
+  size_t page_size = sysconf(_SC_PAGE_SIZE);
+  size_t size = 2 * page_size;
+  char *s = (char *)mmap(0, size, PROT_READ | PROT_WRITE,
+                         MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+  assert(s);
+  assert(((uintptr_t)s & (page_size - 1)) == 0);
+  memset(s, 'o', size);
+  s[size - 1] = 0;
+
+  char *p = s + page_size - 1;
+  *p = 'x';
+
+  if (mprotect(p + 1, 1, PROT_NONE))
+    return 1;
+  char *r = strchr(s, 'x');
+  // CHECK: AddressSanitizer: {{SEGV|BUS}} on unknown address
+  assert(r == p);
+
+  return 0;
+}
diff --git a/test/asan/TestCases/Posix/strndup_oob_test.cc b/test/asan/TestCases/Posix/strndup_oob_test.cc
new file mode 100644
index 0000000..7ea0b7a
--- /dev/null
+++ b/test/asan/TestCases/Posix/strndup_oob_test.cc
@@ -0,0 +1,27 @@
+// RUN: %clangxx_asan -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -O2 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -O3 %s -o %t && not %run %t 2>&1 | FileCheck %s
+
+// When built as C on Linux, strndup is transformed to __strndup.
+// RUN: %clangxx_asan -O3 -xc %s -o %t && not %run %t 2>&1 | FileCheck %s
+
+// Unwind problem on arm: "main" is missing from the allocation stack trace.
+// UNSUPPORTED: win32,s390,armv7l-unknown-linux-gnueabihf
+
+#include <string.h>
+
+char kString[] = "foo";
+
+int main(int argc, char **argv) {
+  char *copy = strndup(kString, 2);
+  int x = copy[2 + argc];  // BOOM
+  // CHECK: AddressSanitizer: heap-buffer-overflow
+  // CHECK: #0 {{.*}}main {{.*}}strndup_oob_test.cc:[[@LINE-2]]
+  // CHECK-LABEL: allocated by thread T{{.*}} here:
+  // CHECK: #{{[01]}} {{.*}}strndup
+  // CHECK: #{{.*}}main {{.*}}strndup_oob_test.cc:[[@LINE-6]]
+  // CHECK-LABEL: SUMMARY
+  // CHECK: strndup_oob_test.cc:[[@LINE-7]]
+  return x;
+}
diff --git a/test/asan/TestCases/Posix/strndup_oob_test2.cc b/test/asan/TestCases/Posix/strndup_oob_test2.cc
new file mode 100644
index 0000000..0a1afe2
--- /dev/null
+++ b/test/asan/TestCases/Posix/strndup_oob_test2.cc
@@ -0,0 +1,22 @@
+// RUN: %clang_asan -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s

+// RUN: %clang_asan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s

+// RUN: %clang_asan -O2 %s -o %t && not %run %t 2>&1 | FileCheck %s

+// RUN: %clang_asan -O3 %s -o %t && not %run %t 2>&1 | FileCheck %s

+

+// When built as C on Linux, strndup is transformed to __strndup.

+// RUN: %clang_asan -O3 -xc %s -o %t && not %run %t 2>&1 | FileCheck %s

+

+// Unwind problem on arm: "main" is missing from the allocation stack trace.

+// UNSUPPORTED: win32,s390,armv7l-unknown-linux-gnueabihf

+

+#include <string.h>

+

+char kChars[] = { 'f', 'o', 'o' };

+

+int main(int argc, char **argv) {

+  char *copy = strndup(kChars, 3);

+  copy = strndup(kChars, 10);

+  // CHECK: AddressSanitizer: global-buffer-overflow

+  // CHECK: {{.*}}main {{.*}}.cc:[[@LINE-2]]

+  return *copy;

+}
\ No newline at end of file
diff --git a/test/asan/TestCases/Posix/wait.cc b/test/asan/TestCases/Posix/wait.cc
index ed6f326..85e8193 100644
--- a/test/asan/TestCases/Posix/wait.cc
+++ b/test/asan/TestCases/Posix/wait.cc
@@ -4,6 +4,7 @@
 // RUN: %clangxx_asan -DWAITPID -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s
 // RUN: %clangxx_asan -DWAITPID -O3 %s -o %t && not %run %t 2>&1 | FileCheck %s
 
+// UNSUPPORTED: darwin
 
 #include <assert.h>
 #include <sys/wait.h>
diff --git a/test/asan/TestCases/Posix/wait3.cc b/test/asan/TestCases/Posix/wait3.cc
index 2da816f..081a73e 100644
--- a/test/asan/TestCases/Posix/wait3.cc
+++ b/test/asan/TestCases/Posix/wait3.cc
@@ -4,7 +4,7 @@
 // RUN: %clangxx_asan -DWAIT3_RUSAGE -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s
 // RUN: %clangxx_asan -DWAIT3_RUSAGE -O3 %s -o %t && not %run %t 2>&1 | FileCheck %s
 
-// UNSUPPORTED: android
+// UNSUPPORTED: android,darwin
 
 #include <assert.h>
 #include <sys/wait.h>
diff --git a/test/asan/TestCases/Posix/wait4.cc b/test/asan/TestCases/Posix/wait4.cc
index b95246e..aee5570 100644
--- a/test/asan/TestCases/Posix/wait4.cc
+++ b/test/asan/TestCases/Posix/wait4.cc
@@ -5,6 +5,7 @@
 // RUN: %clangxx_asan -DWAIT4_RUSAGE -O3 %s -o %t && not %run %t 2>&1 | FileCheck %s
 
 // XFAIL: android
+// UNSUPPORTED: darwin
 
 #include <assert.h>
 #include <sys/wait.h>
diff --git a/test/asan/TestCases/Posix/waitid.cc b/test/asan/TestCases/Posix/waitid.cc
index 8b516dc..20fb0c6 100644
--- a/test/asan/TestCases/Posix/waitid.cc
+++ b/test/asan/TestCases/Posix/waitid.cc
@@ -1,6 +1,8 @@
 // RUN: %clangxx_asan -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s
 // RUN: %clangxx_asan -O3 %s -o %t && not %run %t 2>&1 | FileCheck %s
 
+// UNSUPPORTED: darwin
+
 #include <assert.h>
 #include <sys/wait.h>
 #include <unistd.h>
diff --git a/test/asan/TestCases/Windows/coverage-dll-stdio.cc b/test/asan/TestCases/Windows/coverage-dll-stdio.cc
index 5e12e38..92cd0a7 100644
--- a/test/asan/TestCases/Windows/coverage-dll-stdio.cc
+++ b/test/asan/TestCases/Windows/coverage-dll-stdio.cc
@@ -2,8 +2,8 @@
 // __local_stdio_printf_options function isn't instrumented for coverage.
 
 // RUN: rm -rf %t && mkdir %t && cd %t
-// RUN: %clang_cl_asan -fsanitize-coverage=func -O0 %p/dll_host.cc -Fet.exe
-// RUN: %clang_cl_asan -fsanitize-coverage=func -LD -O0 %s -Fet.dll
+// RUN: %clang_cl_asan -fsanitize-coverage=func,trace-pc-guard -O0 %p/dll_host.cc -Fet.exe
+// RUN: %clang_cl_asan -fsanitize-coverage=func,trace-pc-guard -LD -O0 %s -Fet.dll
 // RUN: %run ./t.exe t.dll 2>&1 | FileCheck %s
 
 #include <stdio.h>
diff --git a/test/asan/TestCases/Windows/dll_global_dead_strip.c b/test/asan/TestCases/Windows/dll_global_dead_strip.c
index 2664f5b..15cfd5a 100644
--- a/test/asan/TestCases/Windows/dll_global_dead_strip.c
+++ b/test/asan/TestCases/Windows/dll_global_dead_strip.c
@@ -1,8 +1,8 @@
 // RUN: %clang_cl_asan -O0 %p/dll_host.cc -Fe%t
 //
-// RUN: %clang_cl_asan -LD -O0 %s -Fe%t.dll
+// RUN: %clang_cl_asan /Gw -LD -O0 %s -Fe%t.dll
 // RUN: %env_asan_opts=report_globals=2 %run %t %t.dll 2>&1 | FileCheck %s --check-prefix=NOSTRIP
-// RUN: %clang_cl_asan -LD -O2 %s -Fe%t.dll -link -opt:ref
+// RUN: %clang_cl_asan /Gw -LD -O2 %s -Fe%t.dll -link -opt:ref
 // RUN: %env_asan_opts=report_globals=2 %run %t %t.dll 2>&1 | FileCheck %s --check-prefix=STRIP
 
 #include <stdio.h>
diff --git a/test/asan/TestCases/Windows/dll_intercept_memchr.cc b/test/asan/TestCases/Windows/dll_intercept_memchr.cc
index 4f794a2..6360cec 100644
--- a/test/asan/TestCases/Windows/dll_intercept_memchr.cc
+++ b/test/asan/TestCases/Windows/dll_intercept_memchr.cc
@@ -22,6 +22,6 @@
 // CHECK-NEXT:  test_function {{.*}}dll_intercept_memchr.cc:[[@LINE-5]]
 // CHECK: Address [[ADDR]] is located in stack of thread T0 at offset {{.*}} in frame
 // CHECK-NEXT:  test_function {{.*}}dll_intercept_memchr.cc
-// CHECK: 'buff' <== Memory access at offset {{.*}} overflows this variable
+// CHECK: 'buff'{{.*}} <== Memory access at offset {{.*}} overflows this variable
   return 0;
 }
diff --git a/test/asan/TestCases/Windows/dll_intercept_memcpy.cc b/test/asan/TestCases/Windows/dll_intercept_memcpy.cc
index 736e696..a5981fa 100644
--- a/test/asan/TestCases/Windows/dll_intercept_memcpy.cc
+++ b/test/asan/TestCases/Windows/dll_intercept_memcpy.cc
@@ -27,6 +27,6 @@
 // CHECK-NEXT:  test_function {{.*}}dll_intercept_memcpy.cc:[[@LINE-4]]
 // CHECK: Address [[ADDR]] is located in stack of thread T0 at offset {{.*}} in frame
 // CHECK-NEXT:  test_function {{.*}}dll_intercept_memcpy.cc
-// CHECK: 'buff2' <== Memory access at offset {{.*}} overflows this variable
+// CHECK: 'buff2'{{.*}} <== Memory access at offset {{.*}} overflows this variable
   return 0;
 }
diff --git a/test/asan/TestCases/Windows/dll_intercept_memcpy_indirect.cc b/test/asan/TestCases/Windows/dll_intercept_memcpy_indirect.cc
index 4e28905..f05ee21 100644
--- a/test/asan/TestCases/Windows/dll_intercept_memcpy_indirect.cc
+++ b/test/asan/TestCases/Windows/dll_intercept_memcpy_indirect.cc
@@ -29,6 +29,6 @@
 // CHECK-NEXT:  test_function {{.*}}dll_intercept_memcpy_indirect.cc:[[@LINE-5]]
 // CHECK: Address [[ADDR]] is located in stack of thread T0 at offset {{.*}} in frame
 // CHECK-NEXT:  test_function {{.*}}dll_intercept_memcpy_indirect.cc
-// CHECK: 'buff2' <== Memory access at offset {{.*}} overflows this variable
+// CHECK: 'buff2'{{.*}} <== Memory access at offset {{.*}} overflows this variable
   return 0;
 }
diff --git a/test/asan/TestCases/Windows/dll_intercept_memset.cc b/test/asan/TestCases/Windows/dll_intercept_memset.cc
index d4be376..4baa0a1 100644
--- a/test/asan/TestCases/Windows/dll_intercept_memset.cc
+++ b/test/asan/TestCases/Windows/dll_intercept_memset.cc
@@ -27,6 +27,6 @@
 // CHECK-NEXT:  test_function {{.*}}dll_intercept_memset.cc:[[@LINE-4]]
 // CHECK: Address [[ADDR]] is located in stack of thread T0 at offset {{.*}} in frame
 // CHECK-NEXT:  test_function {{.*}}dll_intercept_memset.cc
-// CHECK: 'buff' <== Memory access at offset {{.*}} overflows this variable
+// CHECK: 'buff'{{.*}} <== Memory access at offset {{.*}} overflows this variable
   return 0;
 }
diff --git a/test/asan/TestCases/Windows/dll_noreturn.cc b/test/asan/TestCases/Windows/dll_noreturn.cc
index 8b5e3d0..2f6f0c7 100644
--- a/test/asan/TestCases/Windows/dll_noreturn.cc
+++ b/test/asan/TestCases/Windows/dll_noreturn.cc
@@ -17,7 +17,7 @@
 //
 // CHECK: Address [[ADDR]] is located in stack of thread T0 at offset [[OFFSET:.*]] in frame
 // CHECK-NEXT:  noreturn_f{{.*}}dll_noreturn.cc
-// CHECK: 'buffer' <== Memory access at offset [[OFFSET]] underflows this variable
+// CHECK: 'buffer'{{.*}} <== Memory access at offset [[OFFSET]] underflows this variable
 // CHECK-LABEL: SUMMARY
 }
 
diff --git a/test/asan/TestCases/Windows/dll_poison_unpoison.cc b/test/asan/TestCases/Windows/dll_poison_unpoison.cc
index 9b25a12..6bd58ec 100644
--- a/test/asan/TestCases/Windows/dll_poison_unpoison.cc
+++ b/test/asan/TestCases/Windows/dll_poison_unpoison.cc
@@ -30,6 +30,6 @@
 //
 // CHECK: [[ADDR]] is located in stack of thread T0 at offset [[OFFSET:.*]] in frame
 // CHECK-NEXT: test_function{{.*}}\dll_poison_unpoison.cc
-// CHECK: 'buffer' <== Memory access at offset [[OFFSET]] is inside this variable
+// CHECK: 'buffer'{{.*}} <== Memory access at offset [[OFFSET]] is inside this variable
   return 0;
 }
diff --git a/test/asan/TestCases/Windows/dll_stack_use_after_return.cc b/test/asan/TestCases/Windows/dll_stack_use_after_return.cc
index 6428718..b6166d6 100644
--- a/test/asan/TestCases/Windows/dll_stack_use_after_return.cc
+++ b/test/asan/TestCases/Windows/dll_stack_use_after_return.cc
@@ -22,7 +22,7 @@
 //
 // CHECK: Address [[ADDR]] is located in stack of thread T0 at offset [[OFFSET:.*]] in frame
 // CHECK-NEXT: #0 {{.*}} foo{{.*}}dll_stack_use_after_return.cc
-// CHECK: 'stack_buffer' <== Memory access at offset [[OFFSET]] is inside this variable
+// CHECK: 'stack_buffer'{{.*}} <== Memory access at offset [[OFFSET]] is inside this variable
   return 0;
 }
 
diff --git a/test/asan/TestCases/Windows/dll_thread_stack_array_left_oob.cc b/test/asan/TestCases/Windows/dll_thread_stack_array_left_oob.cc
index dc7c7c6..75a094e 100644
--- a/test/asan/TestCases/Windows/dll_thread_stack_array_left_oob.cc
+++ b/test/asan/TestCases/Windows/dll_thread_stack_array_left_oob.cc
@@ -16,7 +16,7 @@
 // CHECK: Address [[ADDR]] is located in stack of thread T1 at offset [[OFFSET:.*]] in frame
 // CHECK-NEXT:  thread_proc{{.*}}dll_thread_stack_array_left_oob.cc
 //
-// CHECK: 'stack_buffer' <== Memory access at offset [[OFFSET]] underflows this variable
+// CHECK: 'stack_buffer'{{.*}} <== Memory access at offset [[OFFSET]] underflows this variable
 
   return 0;
 }
diff --git a/test/asan/TestCases/Windows/fuse-lld.cc b/test/asan/TestCases/Windows/fuse-lld.cc
index 76c36d8..c20e5ff 100644
--- a/test/asan/TestCases/Windows/fuse-lld.cc
+++ b/test/asan/TestCases/Windows/fuse-lld.cc
@@ -1,12 +1,12 @@
 // If we have LLD, see that things more or less work.
 //
-// REQUIRES: lld
+// REQUIRES: lld-available
 //
 // FIXME: Use -fuse-ld=lld after the old COFF linker is removed.
 // FIXME: Test will fail until we add flags for requesting dwarf or cv.
 // RUNX: %clangxx_asan -O2 %s -o %t.exe -fuse-ld=lld -Wl,-debug
-// RUN: %clangxx_asan -c -O2 %s -o %t.o -gdwarf
-// RUN: lld-link %t.o -out:%t.exe -debug -defaultlib:libcmt %asan_lib %asan_cxx_lib
+// RUN: %clangxx_asan -c -O2 %s -o %t.o -g -gdwarf
+// RUN: lld-link %t.o -out:%t.exe -debug -nopdb -defaultlib:libcmt %asan_lib %asan_cxx_lib
 // RUN: not %run %t.exe 2>&1 | FileCheck %s
 
 #include <stdlib.h>
diff --git a/test/asan/TestCases/Windows/global_dead_strip.c b/test/asan/TestCases/Windows/global_dead_strip.c
index e685490..2121392 100644
--- a/test/asan/TestCases/Windows/global_dead_strip.c
+++ b/test/asan/TestCases/Windows/global_dead_strip.c
@@ -1,6 +1,6 @@
-// RUN: %clang_cl_asan /O0 %s /Fe%t.exe
+// RUN: %clang_cl_asan /Gw /O0 %s /Fe%t.exe
 // RUN: %env_asan_opts=report_globals=2 %t.exe 2>&1 | FileCheck %s --check-prefix=NOSTRIP
-// RUN: %clang_cl_asan /O2 %s /Fe%t.exe -link -opt:ref
+// RUN: %clang_cl_asan /Gw /O2 %s /Fe%t.exe -link -opt:ref
 // RUN: %env_asan_opts=report_globals=2 %t.exe 2>&1 | FileCheck %s --check-prefix=STRIP
 
 #include <stdio.h>
diff --git a/test/asan/TestCases/Windows/intercept_memcpy.cc b/test/asan/TestCases/Windows/intercept_memcpy.cc
index 6e45e7f..d71333d 100644
--- a/test/asan/TestCases/Windows/intercept_memcpy.cc
+++ b/test/asan/TestCases/Windows/intercept_memcpy.cc
@@ -27,5 +27,5 @@
 // CHECK-NEXT:  main {{.*}}intercept_memcpy.cc:[[@LINE-5]]
 // CHECK: Address [[ADDR]] is located in stack of thread T0 at offset {{.*}} in frame
 // CHECK-NEXT:   #0 {{.*}} main
-// CHECK: 'buff2' <== Memory access at offset {{.*}} overflows this variable
+// CHECK: 'buff2'{{.*}} <== Memory access at offset {{.*}} overflows this variable
 }
diff --git a/test/asan/TestCases/Windows/intercept_strlen.cc b/test/asan/TestCases/Windows/intercept_strlen.cc
index 928a286..938e6c9 100644
--- a/test/asan/TestCases/Windows/intercept_strlen.cc
+++ b/test/asan/TestCases/Windows/intercept_strlen.cc
@@ -22,6 +22,6 @@
 // CHECK-NEXT: main {{.*}}intercept_strlen.cc:[[@LINE-5]]
 // CHECK: Address [[ADDR]] is located in stack of thread T0 at offset {{.*}} in frame
 // CHECK-NEXT: main {{.*}}intercept_strlen.cc
-// CHECK: 'str' <== Memory access at offset {{.*}} overflows this variable
+// CHECK: 'str'{{.*}} <== Memory access at offset {{.*}} overflows this variable
   return len < 6;
 }
diff --git a/test/asan/TestCases/Windows/interface_symbols_windows.c b/test/asan/TestCases/Windows/interface_symbols_windows.cc
similarity index 96%
rename from test/asan/TestCases/Windows/interface_symbols_windows.c
rename to test/asan/TestCases/Windows/interface_symbols_windows.cc
index a08f358..4a59dba 100644
--- a/test/asan/TestCases/Windows/interface_symbols_windows.c
+++ b/test/asan/TestCases/Windows/interface_symbols_windows.cc
@@ -38,6 +38,8 @@
 // IMPORT: __asan_set_seh_filter
 // IMPORT: __asan_unhandled_exception_filter
 // IMPORT: __asan_test_only_reported_buggy_pointer
+// IMPORT: __sancov_lowest_stack
+// IMPORT: __ubsan_vptr_type_cache
 //
 // RUN: cat %t.imports1 %t.imports2 %t.imports3 | sort | uniq > %t.imports-sorted
 // RUN: cat %t.exports | sort | uniq > %t.exports-sorted
diff --git a/test/asan/TestCases/Windows/oom.cc b/test/asan/TestCases/Windows/oom.cc
index 59cc7ed..71a9c2a 100644
--- a/test/asan/TestCases/Windows/oom.cc
+++ b/test/asan/TestCases/Windows/oom.cc
@@ -8,5 +8,5 @@
   while (true) {
     void *ptr = malloc(200 * 1024 * 1024);  // 200MB
   }
-// CHECK: failed to allocate
+// CHECK: allocator is terminating the process instead of returning 0
 }
diff --git a/test/asan/TestCases/Windows/shadow_conflict_32.cc b/test/asan/TestCases/Windows/shadow_conflict_32.cc
index 7c6d94b..a2b6b46 100644
--- a/test/asan/TestCases/Windows/shadow_conflict_32.cc
+++ b/test/asan/TestCases/Windows/shadow_conflict_32.cc
@@ -20,9 +20,9 @@
 extern "C" __declspec(dllexport) int test_function() { return 0; }
 #endif
 
-// CHECK: =={{[0-9]+}}==Shadow memory range interleaves with an existing memory mapping. ASan cannot proceed correctly. ABORTING.
-// CHECK: =={{[0-9]+}}==ASan shadow was supposed to be located in the [0x2fff0000-0x3fffffff] range.
-// CHECK: =={{[0-9]+}}==Dumping process modules
+// CHECK: =={{[0-9:]+}}==Shadow memory range interleaves with an existing memory mapping. ASan cannot proceed correctly. ABORTING.
+// CHECK: =={{[0-9:]+}}==ASan shadow was supposed to be located in the [0x2fff0000-0x3fffffff] range.
+// CHECK: =={{[0-9:]+}}==Dumping process modules
 
 // CHECK-DAG: {{0x30000000-0x300.....}} {{.*}}\shadow_conflict_32.cc.tmp_dll.dll
 // CHECK-DAG: {{0x........-0x........}} {{.*}}\shadow_conflict_32.cc.tmp.exe
diff --git a/test/asan/TestCases/Windows/stack_array_left_oob.cc b/test/asan/TestCases/Windows/stack_array_left_oob.cc
index 845a1f3..8d601fc 100644
--- a/test/asan/TestCases/Windows/stack_array_left_oob.cc
+++ b/test/asan/TestCases/Windows/stack_array_left_oob.cc
@@ -12,5 +12,5 @@
 // CHECK-NEXT: {{#0 .* main .*stack_array_left_oob.cc}}:[[@LINE-3]]
 // CHECK: Address [[ADDR]] is located in stack of thread T0 at offset [[OFFSET:.*]] in frame
 // CHECK-NEXT: {{#0 .* main .*stack_array_left_oob.cc}}
-// CHECK: 'buffer' <== Memory access at offset [[OFFSET]] underflows this variable
+// CHECK: 'buffer'{{.*}} <== Memory access at offset [[OFFSET]] underflows this variable
 }
diff --git a/test/asan/TestCases/Windows/stack_array_right_oob.cc b/test/asan/TestCases/Windows/stack_array_right_oob.cc
index a370246..721834d 100644
--- a/test/asan/TestCases/Windows/stack_array_right_oob.cc
+++ b/test/asan/TestCases/Windows/stack_array_right_oob.cc
@@ -12,5 +12,5 @@
 // CHECK-NEXT: {{#0 .* main .*stack_array_right_oob.cc}}:[[@LINE-3]]
 // CHECK: Address [[ADDR]] is located in stack of thread T0 at offset [[OFFSET:.*]] in frame
 // CHECK-NEXT: {{#0 .* main .*stack_array_right_oob.cc}}
-// CHECK: 'buffer' <== Memory access at offset [[OFFSET]] overflows this variable
+// CHECK: 'buffer'{{.*}} <== Memory access at offset [[OFFSET]] overflows this variable
 }
diff --git a/test/asan/TestCases/Windows/stack_use_after_return.cc b/test/asan/TestCases/Windows/stack_use_after_return.cc
index 9c31922..ca1c142 100644
--- a/test/asan/TestCases/Windows/stack_use_after_return.cc
+++ b/test/asan/TestCases/Windows/stack_use_after_return.cc
@@ -18,5 +18,5 @@
 // CHECK: is located in stack of thread T0 at offset [[OFFSET:.*]] in frame
 // CHECK-NEXT: {{#0 0x.* in foo.*stack_use_after_return.cc}}
 //
-// CHECK: 'stack_buffer' <== Memory access at offset [[OFFSET]] is inside this variable
+// CHECK: 'stack_buffer'{{.*}} <== Memory access at offset [[OFFSET]] is inside this variable
 }
diff --git a/test/asan/TestCases/Windows/wrong_downcast_on_stack.cc b/test/asan/TestCases/Windows/wrong_downcast_on_stack.cc
index 2859ecc..7848cf3 100644
--- a/test/asan/TestCases/Windows/wrong_downcast_on_stack.cc
+++ b/test/asan/TestCases/Windows/wrong_downcast_on_stack.cc
@@ -20,7 +20,7 @@
 // CHECK-NEXT:  {{#0 0x[0-9a-f]* in main .*wrong_downcast_on_stack.cc}}:[[@LINE-3]]
 // CHECK: [[ADDR]] is located in stack of thread T0 at offset [[OFFSET:[0-9]+]] in frame
 // CHECK-NEXT:  {{#0 0x[0-9a-f]* in main }}
-// CHECK:  'p' <== Memory access at offset [[OFFSET]] overflows this variable
+// CHECK:  'p'{{.*}} <== Memory access at offset [[OFFSET]] overflows this variable
   return 0;
 }
 
diff --git a/test/asan/TestCases/alloca_constant_size.cc b/test/asan/TestCases/alloca_constant_size.cc
index a766ae7..57aa315 100644
--- a/test/asan/TestCases/alloca_constant_size.cc
+++ b/test/asan/TestCases/alloca_constant_size.cc
@@ -10,7 +10,7 @@
 // MSVC provides _alloca instead of alloca.
 #if defined(_MSC_VER) && !defined(alloca)
 # define alloca _alloca
-#elif defined(__FreeBSD__)
+#elif defined(__FreeBSD__) || defined(__NetBSD__)
 #include <stdlib.h>
 #else
 #include <alloca.h>
diff --git a/test/asan/TestCases/allocator_returns_null.cc b/test/asan/TestCases/allocator_returns_null.cc
index cdfcd90..8ce002f 100644
--- a/test/asan/TestCases/allocator_returns_null.cc
+++ b/test/asan/TestCases/allocator_returns_null.cc
@@ -1,68 +1,102 @@
-// Test the behavior of malloc/calloc/realloc when the allocation size is huge.
+// Test the behavior of malloc/calloc/realloc/new when the allocation size is
+// more than ASan allocator's max allowed one.
 // By default (allocator_may_return_null=0) the process should crash.
-// With allocator_may_return_null=1 the allocator should return 0.
+// With allocator_may_return_null=1 the allocator should return 0, except the
+// operator new(), which should crash anyway (operator new(std::nothrow) should
+// return nullptr, indeed).
 //
 // RUN: %clangxx_asan -O0 %s -o %t
 // RUN: not %run %t malloc 2>&1 | FileCheck %s --check-prefix=CHECK-mCRASH
-// RUN: %env_asan_opts=allocator_may_return_null=0 not %run %t malloc 2>&1 | FileCheck %s --check-prefix=CHECK-mCRASH
-// RUN: %env_asan_opts=allocator_may_return_null=1     %run %t malloc 2>&1 | FileCheck %s --check-prefix=CHECK-mNULL
-// RUN: %env_asan_opts=allocator_may_return_null=0 not %run %t calloc 2>&1 | FileCheck %s --check-prefix=CHECK-cCRASH
-// RUN: %env_asan_opts=allocator_may_return_null=1     %run %t calloc 2>&1 | FileCheck %s --check-prefix=CHECK-cNULL
-// RUN: %env_asan_opts=allocator_may_return_null=0 not %run %t calloc-overflow 2>&1 | FileCheck %s --check-prefix=CHECK-coCRASH
-// RUN: %env_asan_opts=allocator_may_return_null=1     %run %t calloc-overflow 2>&1 | FileCheck %s --check-prefix=CHECK-coNULL
-// RUN: %env_asan_opts=allocator_may_return_null=0 not %run %t realloc 2>&1 | FileCheck %s --check-prefix=CHECK-rCRASH
-// RUN: %env_asan_opts=allocator_may_return_null=1     %run %t realloc 2>&1 | FileCheck %s --check-prefix=CHECK-rNULL
-// RUN: %env_asan_opts=allocator_may_return_null=0 not %run %t realloc-after-malloc 2>&1 | FileCheck %s --check-prefix=CHECK-mrCRASH
-// RUN: %env_asan_opts=allocator_may_return_null=1     %run %t realloc-after-malloc 2>&1 | FileCheck %s --check-prefix=CHECK-mrNULL
+// RUN: %env_asan_opts=allocator_may_return_null=0 not %run %t malloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-mCRASH
+// RUN: %env_asan_opts=allocator_may_return_null=1     %run %t malloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-mNULL
+// RUN: %env_asan_opts=allocator_may_return_null=0 not %run %t calloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-cCRASH
+// RUN: %env_asan_opts=allocator_may_return_null=1     %run %t calloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-cNULL
+// RUN: %env_asan_opts=allocator_may_return_null=0 not %run %t calloc-overflow 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-coCRASH
+// RUN: %env_asan_opts=allocator_may_return_null=1     %run %t calloc-overflow 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-coNULL
+// RUN: %env_asan_opts=allocator_may_return_null=0 not %run %t realloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-rCRASH
+// RUN: %env_asan_opts=allocator_may_return_null=1     %run %t realloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-rNULL
+// RUN: %env_asan_opts=allocator_may_return_null=0 not %run %t realloc-after-malloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-mrCRASH
+// RUN: %env_asan_opts=allocator_may_return_null=1     %run %t realloc-after-malloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-mrNULL
+// RUN: %env_asan_opts=allocator_may_return_null=0 not %run %t new 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-nCRASH
+// RUN: %env_asan_opts=allocator_may_return_null=1 not %run %t new 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-nCRASH
+// RUN: %env_asan_opts=allocator_may_return_null=0 not %run %t new-nothrow 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-nnCRASH
+// RUN: %env_asan_opts=allocator_may_return_null=1     %run %t new-nothrow 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-nnNULL
 
-#include <limits.h>
+// UNSUPPORTED: win32
+
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <stdio.h>
-#include <assert.h>
 #include <limits>
+#include <new>
+
 int main(int argc, char **argv) {
   // Disable stderr buffering. Needed on Windows.
   setvbuf(stderr, NULL, _IONBF, 0);
 
-  volatile size_t size = std::numeric_limits<size_t>::max() - 10000;
   assert(argc == 2);
-  void *x = 0;
-  if (!strcmp(argv[1], "malloc")) {
-    fprintf(stderr, "malloc:\n");
-    x = malloc(size);
-  }
-  if (!strcmp(argv[1], "calloc")) {
-    fprintf(stderr, "calloc:\n");
-    x = calloc(size / 4, 4);
-  }
+  const char *action = argv[1];
+  fprintf(stderr, "%s:\n", action);
 
-  if (!strcmp(argv[1], "calloc-overflow")) {
-    fprintf(stderr, "calloc-overflow:\n");
+  static const size_t kMaxAllowedMallocSizePlusOne =
+#if __LP64__ || defined(_WIN64)
+      (1ULL << 40) + 1;
+#else
+      (3UL << 30) + 1;
+#endif
+
+  void *x = 0;
+  if (!strcmp(action, "malloc")) {
+    x = malloc(kMaxAllowedMallocSizePlusOne);
+  } else if (!strcmp(action, "calloc")) {
+    x = calloc((kMaxAllowedMallocSizePlusOne / 4) + 1, 4);
+  } else if (!strcmp(action, "calloc-overflow")) {
     volatile size_t kMaxSizeT = std::numeric_limits<size_t>::max();
     size_t kArraySize = 4096;
     volatile size_t kArraySize2 = kMaxSizeT / kArraySize + 10;
     x = calloc(kArraySize, kArraySize2);
-  }
-
-  if (!strcmp(argv[1], "realloc")) {
-    fprintf(stderr, "realloc:\n");
-    x = realloc(0, size);
-  }
-  if (!strcmp(argv[1], "realloc-after-malloc")) {
-    fprintf(stderr, "realloc-after-malloc:\n");
+  } else if (!strcmp(action, "realloc")) {
+    x = realloc(0, kMaxAllowedMallocSizePlusOne);
+  } else if (!strcmp(action, "realloc-after-malloc")) {
     char *t = (char*)malloc(100);
     *t = 42;
-    x = realloc(t, size);
+    x = realloc(t, kMaxAllowedMallocSizePlusOne);
     assert(*t == 42);
     free(t);
+  } else if (!strcmp(action, "new")) {
+    x = operator new(kMaxAllowedMallocSizePlusOne);
+  } else if (!strcmp(action, "new-nothrow")) {
+    x = operator new(kMaxAllowedMallocSizePlusOne, std::nothrow);
+  } else {
+    assert(0);
   }
+
+  fprintf(stderr, "errno: %d\n", errno);
+
   // The NULL pointer is printed differently on different systems, while (long)0
   // is always the same.
   fprintf(stderr, "x: %lx\n", (long)x);
   free(x);
+
   return x != 0;
 }
+
 // CHECK-mCRASH: malloc:
 // CHECK-mCRASH: AddressSanitizer's allocator is terminating the process
 // CHECK-cCRASH: calloc:
@@ -73,14 +107,25 @@
 // CHECK-rCRASH: AddressSanitizer's allocator is terminating the process
 // CHECK-mrCRASH: realloc-after-malloc:
 // CHECK-mrCRASH: AddressSanitizer's allocator is terminating the process
+// CHECK-nCRASH: new:
+// CHECK-nCRASH: AddressSanitizer's allocator is terminating the process
+// CHECK-nnCRASH: new-nothrow:
+// CHECK-nnCRASH: AddressSanitizer's allocator is terminating the process
 
 // CHECK-mNULL: malloc:
+// CHECK-mNULL: errno: 12
 // CHECK-mNULL: x: 0
 // CHECK-cNULL: calloc:
+// CHECK-cNULL: errno: 12
 // CHECK-cNULL: x: 0
 // CHECK-coNULL: calloc-overflow:
+// CHECK-coNULL: errno: 12
 // CHECK-coNULL: x: 0
 // CHECK-rNULL: realloc:
+// CHECK-rNULL: errno: 12
 // CHECK-rNULL: x: 0
 // CHECK-mrNULL: realloc-after-malloc:
+// CHECK-mrNULL: errno: 12
 // CHECK-mrNULL: x: 0
+// CHECK-nnNULL: new-nothrow:
+// CHECK-nnNULL: x: 0
diff --git a/test/asan/TestCases/asan_and_llvm_coverage_test.cc b/test/asan/TestCases/asan_and_llvm_coverage_test.cc
index d53deb4..1574a34 100644
--- a/test/asan/TestCases/asan_and_llvm_coverage_test.cc
+++ b/test/asan/TestCases/asan_and_llvm_coverage_test.cc
@@ -1,6 +1,6 @@
 // RUN: %clangxx_asan -coverage -O0 %s -o %t
 // RUN: %env_asan_opts=check_initialization_order=1 %run %t 2>&1 | FileCheck %s
-// XFAIL: android
+
 // We don't really support running tests using profile runtime on Windows.
 // UNSUPPORTED: win32
 #include <stdio.h>
diff --git a/test/asan/TestCases/atexit_stats.cc b/test/asan/TestCases/atexit_stats.cc
index f0b5830..c8d97da 100644
--- a/test/asan/TestCases/atexit_stats.cc
+++ b/test/asan/TestCases/atexit_stats.cc
@@ -7,7 +7,7 @@
 // UNSUPPORTED: android
 
 #include <stdlib.h>
-#if !defined(__APPLE__) && !defined(__FreeBSD__)
+#if !defined(__APPLE__) && !defined(__FreeBSD__) && !defined(__NetBSD__)
 #include <malloc.h>
 #endif
 int *p1 = (int*)malloc(900);
diff --git a/test/asan/TestCases/coverage-and-lsan.cc b/test/asan/TestCases/coverage-and-lsan.cc
index 081f493..591b4e9 100644
--- a/test/asan/TestCases/coverage-and-lsan.cc
+++ b/test/asan/TestCases/coverage-and-lsan.cc
@@ -1,6 +1,6 @@
 // Make sure coverage is dumped even if there are reported leaks.
 //
-// RUN: %clangxx_asan -fsanitize-coverage=func %s -o %t
+// RUN: %clangxx_asan -fsanitize-coverage=func,trace-pc-guard %s -o %t
 //
 // RUN: rm -rf %T/coverage-and-lsan
 //
@@ -17,4 +17,4 @@
 }
 
 // CHECK: LeakSanitizer: detected memory leaks
-// CHECK: CovDump:
+// CHECK: SanitizerCoverage: {{.*}}PCs written
diff --git a/test/asan/TestCases/coverage-caller-callee-total-count.cc b/test/asan/TestCases/coverage-caller-callee-total-count.cc
deleted file mode 100644
index 955ffe5..0000000
--- a/test/asan/TestCases/coverage-caller-callee-total-count.cc
+++ /dev/null
@@ -1,42 +0,0 @@
-// Test __sanitizer_get_total_unique_coverage for caller-callee coverage
-
-// RUN: %clangxx_asan -fsanitize-coverage=edge,indirect-calls %s -o %t
-// RUN: %env_asan_opts=coverage=1 %run %t
-// RUN: rm -f caller-callee*.sancov
-//
-// REQUIRES: asan-64-bits
-
-#include <sanitizer/coverage_interface.h>
-#include <stdio.h>
-#include <assert.h>
-int P = 0;
-struct Foo {virtual void f() {if (P) printf("Foo::f()\n");}};
-struct Foo1 : Foo {virtual void f() {if (P) printf("%d\n", __LINE__);}};
-struct Foo2 : Foo {virtual void f() {if (P) printf("%d\n", __LINE__);}};
-
-Foo *foo[3] = {new Foo, new Foo1, new Foo2};
-
-uintptr_t CheckNewTotalUniqueCoverageIsLargerAndReturnIt(uintptr_t old_total) {
-  uintptr_t new_total = __sanitizer_get_total_unique_caller_callee_pairs();
-  fprintf(stderr, "Caller-Callee: old %zd new %zd\n", old_total, new_total);
-  assert(new_total > old_total);
-  return new_total;
-}
-
-int main(int argc, char **argv) {
-  uintptr_t total = __sanitizer_get_total_unique_caller_callee_pairs();
-  foo[0]->f();
-  total = CheckNewTotalUniqueCoverageIsLargerAndReturnIt(total);
-  foo[1]->f();
-  total = CheckNewTotalUniqueCoverageIsLargerAndReturnIt(total);
-  foo[2]->f();
-  total = CheckNewTotalUniqueCoverageIsLargerAndReturnIt(total);
-  // Ok, called every function once.
-  // Now call them again from another call site. Should get new coverage.
-  foo[0]->f();
-  total = CheckNewTotalUniqueCoverageIsLargerAndReturnIt(total);
-  foo[1]->f();
-  total = CheckNewTotalUniqueCoverageIsLargerAndReturnIt(total);
-  foo[2]->f();
-  total = CheckNewTotalUniqueCoverageIsLargerAndReturnIt(total);
-}
diff --git a/test/asan/TestCases/coverage-disabled.cc b/test/asan/TestCases/coverage-disabled.cc
index 490f2b2..b225035 100644
--- a/test/asan/TestCases/coverage-disabled.cc
+++ b/test/asan/TestCases/coverage-disabled.cc
@@ -8,11 +8,6 @@
 // RUN: %env_asan_opts=coverage_direct=0:coverage_dir='"%T/coverage-disabled/normal"':verbosity=1 %run %t
 // RUN: not %sancov print %T/coverage-disabled/normal/*.sancov 2>&1
 //
-// RUN: mkdir -p %T/coverage-disabled/direct
-// RUN: %env_asan_opts=coverage_direct=1:coverage_dir='"%T/coverage-disabled/direct"':verbosity=1 %run %t
-// RUN: cd %T/coverage-disabled/direct
-// RUN: not %sancov rawunpack *.sancov
-//
 // UNSUPPORTED: android
 
 int main(int argc, char **argv) {
diff --git a/test/asan/TestCases/coverage-levels.cc b/test/asan/TestCases/coverage-levels.cc
deleted file mode 100644
index 83f7cf6..0000000
--- a/test/asan/TestCases/coverage-levels.cc
+++ /dev/null
@@ -1,34 +0,0 @@
-// Test various levels of coverage
-//
-// RUN: %clangxx_asan -O1 -fsanitize-coverage=func  %s -o %t
-// RUN: %env_asan_opts=coverage=1:coverage_bitset=1:verbosity=1 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK1
-// RUN: %clangxx_asan -O1 -fsanitize-coverage=bb  %s -o %t
-// RUN: %env_asan_opts=coverage=1:coverage_bitset=1:verbosity=1 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK2
-// RUN: %clangxx_asan -O1 -fsanitize-coverage=edge  %s -o %t
-// RUN: %env_asan_opts=coverage=1:coverage_bitset=1:verbosity=1 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK3
-// RUN: %clangxx_asan -O1 -fsanitize-coverage=edge -mllvm -sanitizer-coverage-block-threshold=0 %s -o %t
-// RUN: %env_asan_opts=coverage=1:coverage_bitset=1:verbosity=1 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK3
-// RUN: %clangxx_asan -O1 -fsanitize-coverage=edge,8bit-counters %s -o %t
-// RUN: %env_asan_opts=coverage=1:coverage_counters=1:verbosity=1 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK_COUNTERS
-
-// RUN: %env_asan_opts=coverage=1:coverage_bitset=0:verbosity=1 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK3_NOBITSET
-// RUN: %env_asan_opts=coverage=1:verbosity=1 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK3_NOBITSET
-// RUN: %env_asan_opts=coverage=1:coverage_pcs=0:verbosity=1 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK3_NOPCS
-//
-// REQUIRES: asan-64-bits
-// UNSUPPORTED: android
-volatile int sink;
-int main(int argc, char **argv) {
-  if (argc == 0)
-    sink = 0;
-}
-
-// CHECK1: CovDump: bitset of 1 bits written for '{{.*}}', 1 bits are set
-// CHECK1:  1 PCs written
-// CHECK2: CovDump: bitset of 2 bits written for '{{.*}}', 1 bits are set
-// CHECK2:  1 PCs written
-// CHECK3: CovDump: bitset of 3 bits written for '{{.*}}', 2 bits are set
-// CHECK3:  2 PCs written
-// CHECK3_NOBITSET-NOT: bitset of
-// CHECK3_NOPCS-NOT: PCs written
-// CHECK_COUNTERS: CovDump: 3 counters written for
diff --git a/test/asan/TestCases/coverage-order-pcs.cc b/test/asan/TestCases/coverage-order-pcs.cc
deleted file mode 100644
index e81c910..0000000
--- a/test/asan/TestCases/coverage-order-pcs.cc
+++ /dev/null
@@ -1,57 +0,0 @@
-// Test coverage_order_pcs=1 flag which orders the PCs by their appearance.
-// RUN: DIR=%T/coverage-order-pcs
-// RUN: rm -rf $DIR
-// RUN: mkdir $DIR
-// RUN: %clangxx_asan -fsanitize-coverage=func %s -o %t
-// RUN: %env_asan_opts=coverage_dir=$DIR:coverage=1:coverage_order_pcs=0 %run %t
-// RUN: mv $DIR/*sancov $DIR/A
-
-// RUN: %env_asan_opts=coverage_dir=$DIR:coverage=1:coverage_order_pcs=0 %run %t 1
-// RUN: mv $DIR/*sancov $DIR/B
-
-// RUN: %env_asan_opts=coverage_dir=$DIR:coverage=1:coverage_order_pcs=1 %run %t
-// RUN: mv $DIR/*sancov $DIR/C
-
-// RUN: %env_asan_opts=coverage_dir=$DIR:coverage=1:coverage_order_pcs=1 %run %t 1
-// RUN: mv $DIR/*sancov $DIR/D
-//
-// RUN: (%sancov print $DIR/A; %sancov print $DIR/B; %sancov print $DIR/C; %sancov print $DIR/D) | FileCheck %s
-//
-// RUN: rm -rf $DIR
-// Ordering works only in 64-bit mode for now.
-// REQUIRES: asan-64-bits, shell
-// UNSUPPORTED: android
-#include <stdio.h>
-
-void foo() { fprintf(stderr, "FOO\n"); }
-void bar() { fprintf(stderr, "BAR\n"); }
-
-int main(int argc, char **argv) {
-  if (argc == 2) {
-    foo();
-    bar();
-  } else {
-    bar();
-    foo();
-  }
-}
-
-// Run A: no ordering
-// CHECK: [[FOO:0x[0-9a-f]*]]
-// CHECK-NEXT: [[BAR:0x[0-9a-f]*]]
-// CHECK-NEXT: [[MAIN:0x[0-9a-f]*]]
-//
-// Run B: still no ordering
-// CHECK-NEXT: [[FOO]]
-// CHECK-NEXT: [[BAR]]
-// CHECK-NEXT: [[MAIN]]
-//
-// Run C: MAIN, BAR, FOO
-// CHECK-NEXT: [[MAIN]]
-// CHECK-NEXT: [[BAR]]
-// CHECK-NEXT: [[FOO]]
-//
-// Run D: MAIN, FOO, BAR
-// CHECK-NEXT: [[MAIN]]
-// CHECK-NEXT: [[FOO]]
-// CHECK-NEXT: [[BAR]]
diff --git a/test/asan/TestCases/coverage-reset.cc b/test/asan/TestCases/coverage-reset.cc
deleted file mode 100644
index 11c5ef6..0000000
--- a/test/asan/TestCases/coverage-reset.cc
+++ /dev/null
@@ -1,63 +0,0 @@
-// Test __sanitizer_reset_coverage().
-
-// RUN: %clangxx_asan -fsanitize-coverage=func %s -o %t
-// RUN: %env_asan_opts=coverage=1 %run %t
-
-// https://github.com/google/sanitizers/issues/618
-// UNSUPPORTED: android
-
-#include <sanitizer/coverage_interface.h>
-#include <stdio.h>
-#include <assert.h>
-static volatile int sink;
-__attribute__((noinline)) void bar() { sink = 2; }
-__attribute__((noinline)) void foo() { sink = 1; }
-
-// In MSVC 2015, printf is an inline function, which causes this test to fail as
-// it introduces an extra coverage point. Define away printf on that platform to
-// avoid the issue.
-#if _MSC_VER >= 1900
-# define printf(arg, ...)
-#endif
-
-#define GET_AND_PRINT_COVERAGE()                                       \
-  bitset = 0;                                                  \
-  for (size_t i = 0; i < n_guards; i++)                        \
-    if (guards[i]) bitset |= 1U << i;                          \
-  printf("line %d: bitset %zd total: %zd\n", __LINE__, bitset, \
-         __sanitizer_get_total_unique_coverage());
-
-#define IS_POWER_OF_TWO(a) ((a & ((a) - 1)) == 0)
-
-int main() {
-  size_t *guards = 0;
-  size_t bitset;
-  size_t n_guards = __sanitizer_get_coverage_guards(&guards);
-
-  GET_AND_PRINT_COVERAGE();
-  size_t main_bit = bitset;
-  assert(IS_POWER_OF_TWO(main_bit));
-
-  foo();
-  GET_AND_PRINT_COVERAGE();
-  size_t foo_bit = bitset & ~main_bit;
-  assert(IS_POWER_OF_TWO(foo_bit));
-
-  bar();
-  GET_AND_PRINT_COVERAGE();
-  size_t bar_bit = bitset & ~(main_bit | foo_bit);
-  assert(IS_POWER_OF_TWO(bar_bit));
-
-  __sanitizer_reset_coverage();
-  assert(__sanitizer_get_total_unique_coverage() == 0);
-  GET_AND_PRINT_COVERAGE();
-  assert(bitset == 0);
-
-  foo();
-  GET_AND_PRINT_COVERAGE();
-  assert(bitset == foo_bit);
-
-  bar();
-  GET_AND_PRINT_COVERAGE();
-  assert(bitset == (foo_bit | bar_bit));
-}
diff --git a/test/asan/TestCases/coverage-tracing.cc b/test/asan/TestCases/coverage-tracing.cc
deleted file mode 100644
index 278cfb1..0000000
--- a/test/asan/TestCases/coverage-tracing.cc
+++ /dev/null
@@ -1,51 +0,0 @@
-// Test -fsanitize-coverage=trace-bb
-//
-// RUN: %clangxx_asan -O1 -fsanitize-coverage=func,trace-bb %s -o %t
-// RUN: rm -rf   %T/coverage-tracing
-// RUN: mkdir %T/coverage-tracing
-// RUN: cd %T/coverage-tracing
-// RUN:  A=x;   %env_asan_opts=coverage=1:verbosity=1 %run %t $A 1   2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK1; mv trace-points.*.sancov $A.points
-// RUN:  A=f;   %env_asan_opts=coverage=1:verbosity=1 %run %t $A 1   2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK2; mv trace-points.*.sancov $A.points
-// RUN:  A=b;   %env_asan_opts=coverage=1:verbosity=1 %run %t $A 1   2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK2; mv trace-points.*.sancov $A.points
-// RUN:  A=bf;  %env_asan_opts=coverage=1:verbosity=1 %run %t $A 1   2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK3; mv trace-points.*.sancov $A.points
-// RUN:  A=fb;  %env_asan_opts=coverage=1:verbosity=1 %run %t $A 1   2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK3; mv trace-points.*.sancov $A.points
-// RUN:  A=ffb; %env_asan_opts=coverage=1:verbosity=1 %run %t $A 1   2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK4; mv trace-points.*.sancov $A.points
-// RUN:  A=fff; %env_asan_opts=coverage=1:verbosity=1 %run %t $A 1   2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK4; mv trace-points.*.sancov $A.points
-// RUN:  A=bbf; %env_asan_opts=coverage=1:verbosity=1 %run %t $A 100 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK301; mv trace-points.*.sancov $A.points
-// RUN: diff f.points fff.points
-// RUN: diff bf.points fb.points
-// RUN: diff bf.points ffb.points
-// RUN: diff bf.points bbf.points
-// RUN: not diff x.points f.points
-// RUN: not diff x.points b.points
-// RUN: not diff x.points bf.points
-// RUN: not diff f.points b.points
-// RUN: not diff f.points bf.points
-// RUN: not diff b.points bf.points
-// RUN: rm -rf   %T/coverage-tracing
-//
-// REQUIRES: asan-64-bits, shell
-// UNSUPPORTED: android
-
-#include <stdlib.h>
-volatile int sink;
-__attribute__((noinline)) void foo() { sink++; }
-__attribute__((noinline)) void bar() { sink++; }
-
-int main(int argc, char **argv) {
-  if (argc != 3) return 0;
-  int n = strtol(argv[2], 0, 10);
-  while (n-- > 0) {
-    for (int i = 0; argv[1][i]; i++) {
-      if (argv[1][i] == 'f') foo();
-      else if (argv[1][i] == 'b') bar();
-    }
-  }
-}
-
-// CHECK: CovDump: Trace: 3 PCs written
-// CHECK1: CovDump: Trace: 1 Events written
-// CHECK2: CovDump: Trace: 2 Events written
-// CHECK3: CovDump: Trace: 3 Events written
-// CHECK4: CovDump: Trace: 4 Events written
-// CHECK301: CovDump: Trace: 301 Events written
diff --git a/test/asan/TestCases/default_blacklist.cc b/test/asan/TestCases/default_blacklist.cc
index 9358cc4..84c0438 100644
--- a/test/asan/TestCases/default_blacklist.cc
+++ b/test/asan/TestCases/default_blacklist.cc
@@ -1,5 +1,6 @@
 // FIXME: https://code.google.com/p/address-sanitizer/issues/detail?id=316
 // XFAIL: android
+// UNSUPPORTED: ios
 //
 // Test that ASan uses the default blacklist from resource directory.
 // RUN: %clangxx_asan -### %s 2>&1 | FileCheck %s
diff --git a/test/asan/TestCases/error_report_callback.cc b/test/asan/TestCases/error_report_callback.cc
new file mode 100644
index 0000000..8c5bbe4
--- /dev/null
+++ b/test/asan/TestCases/error_report_callback.cc
@@ -0,0 +1,21 @@
+// RUN: %clangxx_asan -O0 %s -o %t
+// RUN: not %run %t 0 2>&1 | FileCheck %s
+
+#include <sanitizer/asan_interface.h>
+#include <stdio.h>
+
+static void ErrorReportCallbackOneToZ(const char *report) {
+  fprintf(stderr, "ABCDEF%sGHIJKL", report);
+  fflush(stderr);
+}
+
+int main(int argc, char **argv) {
+  __asan_set_error_report_callback(ErrorReportCallbackOneToZ);
+  __asan_report_error(
+      (void *)__builtin_extract_return_addr(__builtin_return_address(0)), 0, 0,
+      0, true, 1);
+  // CHECK: ABCDEF
+  // CHECK: ERROR: AddressSanitizer
+  // CHECK: GHIJKL
+  return 0;
+}
diff --git a/test/asan/TestCases/global-address.cpp b/test/asan/TestCases/global-address.cpp
index 0e56ca1..81f0230 100644
--- a/test/asan/TestCases/global-address.cpp
+++ b/test/asan/TestCases/global-address.cpp
@@ -5,8 +5,8 @@
 int g_i = 42;
 int main() {
   // CHECK: AddressSanitizer: attempting to call __sanitizer_get_allocated_size() for pointer which is not owned
-  // CHECK-NOT: ASAN:DEADLYSIGNAL
+  // CHECK-NOT: AddressSanitizer:DEADLYSIGNAL
   // CHECK: SUMMARY: AddressSanitizer: bad-__sanitizer_get_allocated_size
-  // CHECK-NOT: ASAN:DEADLYSIGNAL
+  // CHECK-NOT: AddressSanitizer:DEADLYSIGNAL
   return (int)__sanitizer_get_allocated_size(&g_i);
 }
diff --git a/test/asan/TestCases/global-underflow.cc b/test/asan/TestCases/global-underflow.cc
new file mode 100644
index 0000000..4a03513
--- /dev/null
+++ b/test/asan/TestCases/global-underflow.cc
@@ -0,0 +1,17 @@
+// RUN: %clangxx_asan -O0 %s %p/Helpers/underflow.cc -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -O1 %s %p/Helpers/underflow.cc -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -O2 %s %p/Helpers/underflow.cc -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -O3 %s %p/Helpers/underflow.cc -o %t && not %run %t 2>&1 | FileCheck %s
+
+int XXX[2] = {2, 3};
+extern int YYY[];
+#include <string.h>
+int main(int argc, char **argv) {
+  memset(XXX, 0, 2*sizeof(int));
+  // CHECK: {{READ of size 4 at 0x.* thread T0}}
+  // CHECK: {{    #0 0x.* in main .*global-underflow.cc:}}[[@LINE+3]]
+  // CHECK: {{0x.* is located 4 bytes to the left of global variable}}
+  // CHECK:   {{.*YYY.* of size 12}}
+  int res = YYY[-1];
+  return res;
+}
diff --git a/test/asan/TestCases/heavy_uar_test.cc b/test/asan/TestCases/heavy_uar_test.cc
index 8338f80..9ad29f0 100644
--- a/test/asan/TestCases/heavy_uar_test.cc
+++ b/test/asan/TestCases/heavy_uar_test.cc
@@ -5,6 +5,11 @@
 // FIXME: Fix this test under GCC.
 // REQUIRES: Clang
 
+// FIXME: Fix this test for dynamic runtime on armhf-linux.
+// UNSUPPORTED: armhf-linux && asan-dynamic-runtime
+
+// UNSUPPORTED: ios
+
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
diff --git a/test/asan/TestCases/initialization-bug.cc b/test/asan/TestCases/initialization-bug.cc
index b28174f..6ecc6c8 100644
--- a/test/asan/TestCases/initialization-bug.cc
+++ b/test/asan/TestCases/initialization-bug.cc
@@ -10,6 +10,7 @@
 
 // The test is expected to fail on OS X Yosemite and older
 // UNSUPPORTED: osx-no-ld64-live_support
+// UNSUPPORTED: ios
 
 #include <cstdio>
 
diff --git a/test/asan/TestCases/invalid-pointer-pairs.cc b/test/asan/TestCases/invalid-pointer-pairs.cc
index b36e6cd..e1df151 100644
--- a/test/asan/TestCases/invalid-pointer-pairs.cc
+++ b/test/asan/TestCases/invalid-pointer-pairs.cc
@@ -13,10 +13,10 @@
   // [[PTR1:0x[0-9a-f]+]] [[PTR2:0x[0-9a-f]+]]
   switch (c) {
   case 'g':
-    // CMP: #{{[0-9]+ .*}} in f({{char, char\*, char\*|char,char \*,char \*}}) {{.*}}invalid-pointer-pairs.cc:[[@LINE+1]]:14
+    // CMP: #{{[0-9]+ .*}} in f({{char, char\*, char\*|char,char \*,char \*}}) {{.*}}invalid-pointer-pairs.cc:[[@LINE+1]]
     return p > q;
   case 's':
-    // SUB: #{{[0-9]+ .*}} in f({{char, char\*, char\*|char,char \*,char \*}}) {{.*}}invalid-pointer-pairs.cc:[[@LINE+1]]:14
+    // SUB: #{{[0-9]+ .*}} in f({{char, char\*, char\*|char,char \*,char \*}}) {{.*}}invalid-pointer-pairs.cc:[[@LINE+1]]
     return p - q;
   case 'k': {
     // OK-NOT: ERROR
@@ -26,7 +26,7 @@
   case 'f': {
     char *p3 = p + 20;
     free(p);
-    // FREE: #{{[0-9]+ .*}} in f({{char, char\*, char\*|char,char \*,char \*}}) {{.*}}invalid-pointer-pairs.cc:[[@LINE+2]]:14
+    // FREE: #{{[0-9]+ .*}} in f({{char, char\*, char\*|char,char \*,char \*}}) {{.*}}invalid-pointer-pairs.cc:[[@LINE+2]]
     // FREE: freed by thread
     return p < p3;
   }
diff --git a/test/asan/TestCases/log-path_test.cc b/test/asan/TestCases/log-path_test.cc
index b4218ad..710d220 100644
--- a/test/asan/TestCases/log-path_test.cc
+++ b/test/asan/TestCases/log-path_test.cc
@@ -1,5 +1,6 @@
 // FIXME: https://code.google.com/p/address-sanitizer/issues/detail?id=316
 // XFAIL: android
+// UNSUPPORTED: ios
 //
 // The for loop in the backticks below requires bash.
 // REQUIRES: shell
diff --git a/test/asan/TestCases/malloc-no-intercept.c b/test/asan/TestCases/malloc-no-intercept.c
index 563f2ab..c1442e6 100644
--- a/test/asan/TestCases/malloc-no-intercept.c
+++ b/test/asan/TestCases/malloc-no-intercept.c
@@ -7,6 +7,9 @@
 // RUN: not %clang_asan -Dtestfunc=pvalloc  %s -o %t
 // RUN: not %clang_asan -Dtestfunc=cfree    %s -o %t
 
+// Conflicts with BIONIC declarations.
+// UNSUPPORTED: android
+
 #include <stdlib.h>
 
 // For glibc, cause link failures by referencing a nonexistent function.
diff --git a/test/asan/TestCases/non-executable-pc.cpp b/test/asan/TestCases/non-executable-pc.cpp
index f8adee6..6ef4054 100644
--- a/test/asan/TestCases/non-executable-pc.cpp
+++ b/test/asan/TestCases/non-executable-pc.cpp
@@ -2,8 +2,8 @@
 // RUN: not %run %t 0 2>&1 | FileCheck %s
 // RUN: not %run %t n 2>&1 | FileCheck %s -check-prefix=CHECK -check-prefix=NON_EXEC
 
-// Only Linux and FreeBSD list every memory region in MemoryMappingLayout, for now.
-// REQUIRES: linux || freebsd
+// Not every OS lists every memory region in MemoryMappingLayout.
+// REQUIRES: linux || freebsd || netbsd
 
 #include <assert.h>
 
diff --git a/test/asan/TestCases/pass-object-byval.cc b/test/asan/TestCases/pass-object-byval.cc
new file mode 100644
index 0000000..b99360f
--- /dev/null
+++ b/test/asan/TestCases/pass-object-byval.cc
@@ -0,0 +1,40 @@
+// Verify that objects passed by value get red zones and that the copy
+// constructor is called.
+// RUN: %clangxx_asan -O0 %s -o %t
+// RUN: not %run %t 2>&1 | FileCheck %s --implicit-check-not \
+// RUN:     Assertion{{.*}}failed
+
+// ASan instrumentation can't insert red-zones around inalloca parameters.
+// XFAIL: win32 && asan-32-bits
+
+#include <cassert>
+
+class A {
+ public:
+  A() : me(this) {}
+  A(const A &other) : me(this) {
+    for (int i = 0; i < 8; ++i) a[i] = other.a[i];
+  }
+
+  int a[8];
+  A *me;
+};
+
+int bar(A *a) {
+  int *volatile ptr = &a->a[0];
+  return *(ptr - 1);
+}
+
+void foo(A a) {
+  assert(a.me == &a);
+  bar(&a);
+}
+
+int main() {
+  A a;
+  foo(a);
+}
+
+// CHECK: ERROR: AddressSanitizer: stack-buffer-overflow
+// CHECK: READ of size 4 at
+// CHECK: is located in stack of thread
diff --git a/test/asan/TestCases/pass-struct-byval-uar.cc b/test/asan/TestCases/pass-struct-byval-uar.cc
new file mode 100644
index 0000000..aa6fa57
--- /dev/null
+++ b/test/asan/TestCases/pass-struct-byval-uar.cc
@@ -0,0 +1,38 @@
+// Test that use-after-return works with arguments passed by value.
+// RUN: %clangxx_asan -O0 %s -o %t
+// RUN: %env_asan_opts=detect_stack_use_after_return=0 %run %t 2>&1 | \
+// RUN:     FileCheck --check-prefix=CHECK-NO-UAR %s
+// RUN: not %env_asan_opts=detect_stack_use_after_return=1 %run %t 2>&1 | \
+// RUN:     FileCheck --check-prefix=CHECK-UAR %s
+//
+// On several architectures, the IR does not use byval arguments for foo() and
+// instead creates a copy in main() and gives foo() a pointer to the copy.  In
+// that case, ASAN has nothing to poison on return from foo() and will not
+// detect the UAR.
+// REQUIRES: x86_64-target-arch, linux, !android
+
+#include <cstdio>
+
+struct A {
+  int a[8];
+};
+
+A *foo(A a) {
+  return &a;
+}
+
+int main() {
+  A *a = foo(A());
+  a->a[0] = 7;
+  std::fprintf(stderr, "\n");  // Ensures some output is generated for FileCheck
+                               // to verify in the case where UAR is not
+                               // detected.
+}
+
+// CHECK-NO-UAR-NOT: ERROR: AddressSanitizer: stack-use-after-return
+// CHECK-NO-UAR-NOT: WRITE of size 4 at
+// CHECK-NO-UAR-NOT: Memory access at offset {{[0-9]+}} is inside this variable
+//
+// CHECK-UAR: ERROR: AddressSanitizer: stack-use-after-return
+// CHECK-UAR: WRITE of size 4 at
+// CHECK-UAR: Memory access at offset {{[0-9]+}} is inside this variable
diff --git a/test/asan/TestCases/pass-struct-byval.cc b/test/asan/TestCases/pass-struct-byval.cc
new file mode 100644
index 0000000..ba49ecc
--- /dev/null
+++ b/test/asan/TestCases/pass-struct-byval.cc
@@ -0,0 +1,23 @@
+// RUN: %clangxx_asan -O0 %s -o %t
+// RUN: not %run %t 2>&1 | FileCheck %s
+
+struct A {
+  int a[8];
+};
+
+int bar(A *a) {
+  int *volatile ptr = &a->a[0];
+  return *(ptr - 1);
+}
+
+void foo(A a) {
+  bar(&a);
+}
+
+int main() {
+  foo(A());
+}
+
+// CHECK: ERROR: AddressSanitizer: stack-buffer-underflow
+// CHECK: READ of size 4 at
+// CHECK: is located in stack of thread
diff --git a/test/asan/TestCases/pr33372.cc b/test/asan/TestCases/pr33372.cc
new file mode 100644
index 0000000..a4b606e
--- /dev/null
+++ b/test/asan/TestCases/pr33372.cc
@@ -0,0 +1,39 @@
+// RUN: %clangxx_asan -O0 -std=c++11 %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -O1 -std=c++11 %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -O2 -std=c++11 %s -o %t && %run %t 2>&1 | FileCheck %s
+
+// Test that we do not detect false buffer overflows cased by optimization when
+// when local variable replaced by a smaller global constant.
+// https://bugs.llvm.org/show_bug.cgi?id=33372
+
+#include <stdio.h>
+#include <string.h>
+
+struct A { int x, y, z; };
+struct B { A a; /*gap*/ long b; };
+B *bb;
+
+void test1() {
+  A a1 = {1, 1, 2};
+  B b1 = {a1, 6};
+  bb = new B(b1);
+}
+
+const char KKK[] = {1, 1, 2};
+char bbb[100000];
+
+void test2() {
+  char cc[sizeof(bbb)];
+  memcpy(cc, KKK , sizeof(KKK));
+  memcpy(bbb, cc, sizeof(bbb));
+}
+
+int main(int argc, char *argv[]) {
+  test1();
+  test2();
+  printf("PASSED");
+  return 0;
+}
+
+// CHECK-NOT: ERROR: AddressSanitizer
+// CHECK: PASSED
diff --git a/test/asan/TestCases/realloc.cc b/test/asan/TestCases/realloc.cc
new file mode 100644
index 0000000..fcf383b
--- /dev/null
+++ b/test/asan/TestCases/realloc.cc
@@ -0,0 +1,21 @@
+// RUN: %clangxx_asan -O0 %s -o %t
+// Default is true (free on realloc to 0 size)
+// RUN: %run %t 2>&1 | FileCheck %s
+// RUN: %env_asan_opts=allocator_frees_and_returns_null_on_realloc_zero=true %run %t 2>&1 | FileCheck %s
+// RUN: %env_asan_opts=allocator_frees_and_returns_null_on_realloc_zero=false %run %t 2>&1 | FileCheck %s --check-prefix=NO-FREE
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int main() {
+  void *p = malloc(42);
+  p = realloc(p, 0);
+  if (p) {
+    // NO-FREE: Allocated something on realloc(p, 0)
+    fprintf(stderr, "Allocated something on realloc(p, 0)\n");
+  } else {
+    // CHECK: realloc(p, 0) returned nullptr
+    fprintf(stderr, "realloc(p, 0) returned nullptr\n");
+  }
+  free(p);
+}
diff --git a/test/asan/TestCases/sleep_after_init.c b/test/asan/TestCases/sleep_after_init.c
new file mode 100644
index 0000000..147af67
--- /dev/null
+++ b/test/asan/TestCases/sleep_after_init.c
@@ -0,0 +1,10 @@
+// RUN: %clang_asan -O2 %s -o %t
+// RUN: %env_asan_opts=sleep_after_init=1 not %run %t 2>&1 | FileCheck %s
+
+#include <stdlib.h>
+int main() {
+  // CHECK: Sleeping for 1 second
+  char *x = (char*)malloc(10 * sizeof(char));
+  free(x);
+  return x[5];
+}
diff --git a/test/asan/TestCases/small_memcpy_test.cc b/test/asan/TestCases/small_memcpy_test.cc
new file mode 100644
index 0000000..2d6dea6
--- /dev/null
+++ b/test/asan/TestCases/small_memcpy_test.cc
@@ -0,0 +1,28 @@
+// Test that small memcpy works correctly.
+
+// RUN: %clangxx_asan %s -o %t
+// RUN: not %run %t 8 24 2>&1 | FileCheck %s --check-prefix=CHECK
+// RUN: not %run %t 16 32 2>&1 | FileCheck %s --check-prefix=CHECK
+// RUN: not %run %t 24 40 2>&1 | FileCheck %s --check-prefix=CHECK
+// RUN: not %run %t 32 48 2>&1 | FileCheck %s --check-prefix=CHECK
+// RUN: not %run %t 40 56 2>&1 | FileCheck %s --check-prefix=CHECK
+// RUN: not %run %t 48 64 2>&1 | FileCheck %s --check-prefix=CHECK
+#include <assert.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include <sanitizer/asan_interface.h>
+
+int main(int argc, char **argv) {
+  assert(argc == 3);
+  size_t poison_from = atoi(argv[1]);
+  size_t poison_to = atoi(argv[2]);
+  assert(poison_from <= poison_to);
+  char A1[64], A2[64];
+  fprintf(stderr, "%zd %zd\n", poison_from, poison_to - poison_from);
+  __asan_poison_memory_region(&A1[0] + poison_from, poison_to - poison_from);
+  memcpy(A1, A2, sizeof(A1));
+// CHECK: AddressSanitizer: use-after-poison
+  return 0;
+}
diff --git a/test/asan/TestCases/stack-buffer-overflow-with-position.cc b/test/asan/TestCases/stack-buffer-overflow-with-position.cc
index 88f5825..2a575f7 100644
--- a/test/asan/TestCases/stack-buffer-overflow-with-position.cc
+++ b/test/asan/TestCases/stack-buffer-overflow-with-position.cc
@@ -30,15 +30,15 @@
   // make sure BBB and CCC are not removed;
   return *(short*)(p) + BBB[argc % 2] + CCC[argc % 2];
 }
-// CHECK-m2: 'AAA' <== {{.*}}underflows this variable
-// CHECK-m1: 'AAA' <== {{.*}}partially underflows this variable
-// CHECK-9:  'AAA' <== {{.*}}partially overflows this variable
-// CHECK-10: 'AAA' <== {{.*}}overflows this variable
-// CHECK-30: 'BBB' <== {{.*}}underflows this variable
-// CHECK-31: 'BBB' <== {{.*}}partially underflows this variable
-// CHECK-41: 'BBB' <== {{.*}}partially overflows this variable
-// CHECK-42: 'BBB' <== {{.*}}overflows this variable
-// CHECK-62: 'CCC' <== {{.*}}underflows this variable
-// CHECK-63: 'CCC' <== {{.*}}partially underflows this variable
-// CHECK-73: 'CCC' <== {{.*}}partially overflows this variable
-// CHECK-74: 'CCC' <== {{.*}}overflows this variable
+// CHECK-m2: 'AAA'{{.*}} <== {{.*}}underflows this variable
+// CHECK-m1: 'AAA'{{.*}} <== {{.*}}partially underflows this variable
+// CHECK-9:  'AAA'{{.*}} <== {{.*}}partially overflows this variable
+// CHECK-10: 'AAA'{{.*}} <== {{.*}}overflows this variable
+// CHECK-30: 'BBB'{{.*}} <== {{.*}}underflows this variable
+// CHECK-31: 'BBB'{{.*}} <== {{.*}}partially underflows this variable
+// CHECK-41: 'BBB'{{.*}} <== {{.*}}partially overflows this variable
+// CHECK-42: 'BBB'{{.*}} <== {{.*}}overflows this variable
+// CHECK-62: 'CCC'{{.*}} <== {{.*}}underflows this variable
+// CHECK-63: 'CCC'{{.*}} <== {{.*}}partially underflows this variable
+// CHECK-73: 'CCC'{{.*}} <== {{.*}}partially overflows this variable
+// CHECK-74: 'CCC'{{.*}} <== {{.*}}overflows this variable
diff --git a/test/asan/TestCases/strcasestr-1.c b/test/asan/TestCases/strcasestr-1.c
index c38871e..dccfbcd 100644
--- a/test/asan/TestCases/strcasestr-1.c
+++ b/test/asan/TestCases/strcasestr-1.c
@@ -19,7 +19,7 @@
   char s1[4] = "abC";
   __asan_poison_memory_region ((char *)&s1[2], 2);
   r = strcasestr(s1, s2);
-  // CHECK:'s1' <== Memory access at offset {{[0-9]+}} partially overflows this variable
+  // CHECK:'s1'{{.*}} <== Memory access at offset {{[0-9]+}} partially overflows this variable
   assert(r == s1 + 2);
   return 0;
 }
diff --git a/test/asan/TestCases/strcasestr-2.c b/test/asan/TestCases/strcasestr-2.c
index 47fd692..70de2dd 100644
--- a/test/asan/TestCases/strcasestr-2.c
+++ b/test/asan/TestCases/strcasestr-2.c
@@ -20,6 +20,6 @@
   __asan_poison_memory_region ((char *)&s2[2], 2);
   r = strcasestr(s1, s2);
   assert(r == 0);
-  // CHECK:'s2' <== Memory access at offset {{[0-9]+}} partially overflows this variable
+  // CHECK:'s2'{{.*}} <== Memory access at offset {{[0-9]+}} partially overflows this variable
   return 0;
 }
diff --git a/test/asan/TestCases/strcspn-1.c b/test/asan/TestCases/strcspn-1.c
index 6cda2e2..2a9f7d7 100644
--- a/test/asan/TestCases/strcspn-1.c
+++ b/test/asan/TestCases/strcspn-1.c
@@ -14,7 +14,7 @@
   char s1[4] = "caB";
   __asan_poison_memory_region ((char *)&s1[2], 2);
   r = strcspn(s1, s2);
-  // CHECK:'s1' <== Memory access at offset {{[0-9]+}} partially overflows this variable
+  // CHECK:'s1'{{.*}} <== Memory access at offset {{[0-9]+}} partially overflows this variable
   assert(r == 1);
   return 0;
 }
diff --git a/test/asan/TestCases/strcspn-2.c b/test/asan/TestCases/strcspn-2.c
index 8bb4b8a..a51fb91 100644
--- a/test/asan/TestCases/strcspn-2.c
+++ b/test/asan/TestCases/strcspn-2.c
@@ -14,7 +14,7 @@
   char s2[4] = "abc";
   __asan_poison_memory_region ((char *)&s2[2], 2);
   r = strcspn(s1, s2);
-  // CHECK:'s2' <== Memory access at offset {{[0-9]+}} partially overflows this variable
+  // CHECK:'s2'{{.*}} <== Memory access at offset {{[0-9]+}} partially overflows this variable
   assert(r == 0);
   return 0;
 }
diff --git a/test/asan/TestCases/strpbrk-1.c b/test/asan/TestCases/strpbrk-1.c
index 626e877..eb32326 100644
--- a/test/asan/TestCases/strpbrk-1.c
+++ b/test/asan/TestCases/strpbrk-1.c
@@ -14,7 +14,7 @@
   char s1[4] = "cab";
   __asan_poison_memory_region ((char *)&s1[2], 2);
   r = strpbrk(s1, s2);
-  // CHECK:'s1' <== Memory access at offset {{[0-9]+}} partially overflows this variable
+  // CHECK:'s1'{{.*}} <== Memory access at offset {{[0-9]+}} partially overflows this variable
   assert(r == s1 + 1);
   return 0;
 }
diff --git a/test/asan/TestCases/strpbrk-2.c b/test/asan/TestCases/strpbrk-2.c
index 29f3150..1f24656 100644
--- a/test/asan/TestCases/strpbrk-2.c
+++ b/test/asan/TestCases/strpbrk-2.c
@@ -14,7 +14,7 @@
   char s2[4] = "bca";
   __asan_poison_memory_region ((char *)&s2[2], 2);
   r = strpbrk(s1, s2);
-  // CHECK:'s2' <== Memory access at offset {{[0-9]+}} partially overflows this variable
+  // CHECK:'s2'{{.*}} <== Memory access at offset {{[0-9]+}} partially overflows this variable
   assert(r == s1);
   return 0;
 }
diff --git a/test/asan/TestCases/strspn-1.c b/test/asan/TestCases/strspn-1.c
index b0c40ea..5ddb14f 100644
--- a/test/asan/TestCases/strspn-1.c
+++ b/test/asan/TestCases/strspn-1.c
@@ -14,7 +14,7 @@
   char s1[4] = "acb";
   __asan_poison_memory_region ((char *)&s1[2], 2);
   r = strspn(s1, s2);
-  // CHECK:'s1' <== Memory access at offset {{[0-9]+}} partially overflows this variable
+  // CHECK:'s1'{{.*}} <== Memory access at offset {{[0-9]+}} partially overflows this variable
   assert(r == 1);
   return 0;
 }
diff --git a/test/asan/TestCases/strspn-2.c b/test/asan/TestCases/strspn-2.c
index 4c89910..d564ef8 100644
--- a/test/asan/TestCases/strspn-2.c
+++ b/test/asan/TestCases/strspn-2.c
@@ -14,7 +14,7 @@
   char s2[5] = "abcd";
   __asan_poison_memory_region ((char *)&s2[3], 2);
   r = strspn(s1, s2);
-  // CHECK:'s2' <== Memory access at offset {{[0-9]+}} partially overflows this variable
+  // CHECK:'s2'{{.*}} <== Memory access at offset {{[0-9]+}} partially overflows this variable
   assert(r >= 2);
   return 0;
 }
diff --git a/test/asan/TestCases/strstr-1.c b/test/asan/TestCases/strstr-1.c
index 06a8a8a..319cff5 100644
--- a/test/asan/TestCases/strstr-1.c
+++ b/test/asan/TestCases/strstr-1.c
@@ -15,7 +15,7 @@
   char s1[4] = "acb";
   __asan_poison_memory_region ((char *)&s1[2], 2);
   r = strstr(s1, s2);
-  // CHECK:'s1' <== Memory access at offset {{[0-9]+}} {{partially overflows this variable|is inside this variable}}
+  // CHECK:'s1'{{.*}} <== Memory access at offset {{[0-9]+}} {{partially overflows this variable|is inside this variable}}
   assert(r == s1 + 1);
   return 0;
 }
diff --git a/test/asan/TestCases/strstr-2.c b/test/asan/TestCases/strstr-2.c
index 8bc6e99..4d00c6e 100644
--- a/test/asan/TestCases/strstr-2.c
+++ b/test/asan/TestCases/strstr-2.c
@@ -15,7 +15,7 @@
   char s2[4] = "cab";
   __asan_poison_memory_region ((char *)&s2[2], 2);
   r = strstr(s1, s2);
-  // CHECK:'s2' <== Memory access at offset {{[0-9]+}} partially overflows this variable
+  // CHECK:'s2'{{.*}} <== Memory access at offset {{[0-9]+}} partially overflows this variable
   assert(r == 0);
   return 0;
 }
diff --git a/test/asan/TestCases/strtok.c b/test/asan/TestCases/strtok.c
new file mode 100644
index 0000000..c7b2617
--- /dev/null
+++ b/test/asan/TestCases/strtok.c
@@ -0,0 +1,103 @@
+// RUN: %clang_asan %s -o %t
+
+// Test overflows with strict_string_checks
+
+// RUN: %env_asan_opts=strict_string_checks=true not %run %t test1 2>&1 | \
+// RUN:    FileCheck %s --check-prefix=CHECK1
+// RUN: %env_asan_opts=intercept_strtok=false %run %t test1 2>&1
+// RUN: %env_asan_opts=strict_string_checks=true not %run %t test2 2>&1 | \
+// RUN:    FileCheck %s --check-prefix=CHECK2
+// RUN: %env_asan_opts=intercept_strtok=false %run %t test2 2>&1
+// RUN: %env_asan_opts=strict_string_checks=true not %run %t test3 2>&1 | \
+// RUN:    FileCheck %s --check-prefix=CHECK3
+// RUN: %env_asan_opts=intercept_strtok=false %run %t test3 2>&1
+// RUN: %env_asan_opts=strict_string_checks=true %run %t test4 2>&1
+// RUN: %env_asan_opts=intercept_strtok=false %run %t test4 2>&1
+
+// Test overflows with !strict_string_checks
+// RUN: %env_asan_opts=strict_string_checks=false not %run %t test5 2>&1 | \
+// RUN:    FileCheck %s --check-prefix=CHECK5
+// RUN: %env_asan_opts=intercept_strtok=false %run %t test5 2>&1
+// RUN: %env_asan_opts=strict_string_checks=false not %run %t test6 2>&1 | \
+// RUN:    FileCheck %s --check-prefix=CHECK6
+// RUN: %env_asan_opts=intercept_strtok=false %run %t test6 2>&1
+
+
+#include <assert.h>
+#include <string.h>
+#include <sanitizer/asan_interface.h>
+
+// Check that we find overflows in the delimiters on the first call
+// with strict_string_checks.
+void test1() {
+  char *token;
+  char s[4] = "abc";
+  char token_delimiter[2] = "b";
+  __asan_poison_memory_region ((char *)&token_delimiter[1], 2);
+  token = strtok(s, token_delimiter);
+  // CHECK1: 'token_delimiter'{{.*}} <== Memory access at offset {{[0-9]+}} partially overflows this variable
+}
+
+// Check that we find overflows in the delimiters on the second call (str == NULL)
+// with strict_string_checks.
+void test2() {
+  char *token;
+  char s[4] = "abc";
+  char token_delimiter[2] = "b";
+  token = strtok(s, token_delimiter);
+  assert(strcmp(token, "a") == 0);
+  __asan_poison_memory_region ((char *)&token_delimiter[1], 2);
+  token = strtok(NULL, token_delimiter);
+  // CHECK2: 'token_delimiter'{{.*}} <== Memory access at offset {{[0-9]+}} partially overflows this variable
+}
+
+// Check that we find overflows in the string (only on the first call) with strict_string_checks.
+void test3() {
+  char *token;
+  char s[4] = "abc";
+  char token_delimiter[2] = "b";
+  __asan_poison_memory_region ((char *)&s[3], 2);
+  token = strtok(s, token_delimiter);
+  // CHECK3: 's'{{.*}} <== Memory access at offset {{[0-9]+}} partially overflows this variable
+}
+
+// Check that we do not crash when strtok returns NULL with strict_string_checks.
+void test4() {
+  char *token;
+  char s[] = "";
+  char token_delimiter[] = "a";
+  token = strtok(s, token_delimiter);
+  assert(token == NULL);
+}
+
+// Check that we find overflows in the string (only on the first call) with !strict_string_checks.
+void test5() {
+  char *token;
+  char s[4] = "abc";
+  char token_delimiter[2] = "d";
+  __asan_poison_memory_region ((char *)&s[2], 2);
+  __asan_poison_memory_region ((char *)&token_delimiter[1], 2);
+  token = strtok(s, token_delimiter);
+  // CHECK5: 's'{{.*}} <== Memory access at offset {{[0-9]+}} partially overflows this variable
+}
+
+// Check that we find overflows in the delimiters (only on the first call) with !strict_string_checks.
+void test6() {
+  char *token;
+  char s[4] = "abc";
+  char token_delimiter[1] = {'d'};
+  __asan_poison_memory_region ((char *)&token_delimiter[1], 2);
+  token = strtok(s, &token_delimiter[1]);
+  // CHECK6: 'token_delimiter'{{.*}} <== Memory access at offset {{[0-9]+}} overflows this variable
+}
+
+int main(int argc, char **argv) {
+  if (argc != 2) return 1;
+  if (!strcmp(argv[1], "test1")) test1();
+  if (!strcmp(argv[1], "test2")) test2();
+  if (!strcmp(argv[1], "test3")) test3();
+  if (!strcmp(argv[1], "test4")) test4();
+  if (!strcmp(argv[1], "test5")) test5();
+  if (!strcmp(argv[1], "test6")) test6();
+  return 0;
+}
diff --git a/test/asan/TestCases/suppressions-exec-relative-location.cc b/test/asan/TestCases/suppressions-exec-relative-location.cc
index 740cece..d4e214d 100644
--- a/test/asan/TestCases/suppressions-exec-relative-location.cc
+++ b/test/asan/TestCases/suppressions-exec-relative-location.cc
@@ -25,6 +25,7 @@
 
 // XFAIL: android
 // XFAIL: win32
+// UNSUPPORTED: ios
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/test/asan/TestCases/suppressions-function.cc b/test/asan/TestCases/suppressions-function.cc
index d5ac9f7..becefa2 100644
--- a/test/asan/TestCases/suppressions-function.cc
+++ b/test/asan/TestCases/suppressions-function.cc
@@ -8,6 +8,11 @@
 
 // FIXME: Windows symbolizer needs work to make this pass.
 // XFAIL: android,win32
+// UNSUPPORTED: ios
+
+// FIXME: atos does not work for inlined functions, yet llvm-symbolizer
+// does not always work with debug info on Darwin.
+// UNSUPPORTED: darwin
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/test/asan/TestCases/throw_call_test.cc b/test/asan/TestCases/throw_call_test.cc
index 5a8204a..20a7c0b 100644
--- a/test/asan/TestCases/throw_call_test.cc
+++ b/test/asan/TestCases/throw_call_test.cc
@@ -1,9 +1,6 @@
 // RUN: %clangxx_asan %s -o %t && %run %t
 // http://code.google.com/p/address-sanitizer/issues/detail?id=147 (not fixed).
 // BROKEN: %clangxx_asan %s -o %t -static-libstdc++ && %run %t
-//
-// Android builds with static libstdc++ by default.
-// XFAIL: android
 
 #include <stdio.h>
 static volatile int zero = 0;
diff --git a/test/asan/TestCases/use-after-scope-conversion.cc b/test/asan/TestCases/use-after-scope-conversion.cc
new file mode 100644
index 0000000..99b845d
--- /dev/null
+++ b/test/asan/TestCases/use-after-scope-conversion.cc
@@ -0,0 +1,50 @@
+// RUN: %clangxx_asan -O0 -fsanitize-address-use-after-scope %s -o %t
+
+// RUN: not %run %t 'A' 2>&1 | FileCheck %s
+// RUN: not %run %t 'B' 2>&1 | FileCheck %s
+
+// Missing lifetime markers in test_a
+// https://bugs.llvm.org/show_bug.cgi?id=34353
+// XFAIL: *
+
+struct B {
+  B() : p('B') {}
+  char p;
+};
+
+struct C {
+  const char *p;
+  explicit C(const char *c) : p(c) {}
+  C(const B &b) : p(&b.p) {} // NOLINT
+};
+
+struct A {
+  char p;
+  explicit A() : p('C') {}
+  const operator C() const { return C(&p); }
+};
+
+volatile char r;
+void test_a() {
+  C s = A();
+  r = *s.p;
+}
+
+void test_b() {
+  C s = B();
+  r = *s.p;
+}
+
+int main(int argc, char **argv) {
+  switch (argv[1][0]) {
+  case 'A':
+    test_a();
+    return 0;
+  case 'B':
+    test_b();
+    return 0;
+  }
+  return 1;
+}
+
+// CHECK: ERROR: AddressSanitizer: stack-use-after-scope
diff --git a/test/asan/TestCases/use-after-scope-inlined.cc b/test/asan/TestCases/use-after-scope-inlined.cc
index 98a455c..bed9814 100644
--- a/test/asan/TestCases/use-after-scope-inlined.cc
+++ b/test/asan/TestCases/use-after-scope-inlined.cc
@@ -21,8 +21,8 @@
   // CHECK: READ of size 4 at 0x{{.*}} thread T0
   // CHECK:   #0 0x{{.*}} in main
   // CHECK:      {{.*}}use-after-scope-inlined.cc:[[@LINE-4]]
-  // CHECK: Address 0x{{.*}} is located in stack of thread T0 at offset
-  // CHECK:      [[OFFSET:[^ ]*]] in frame
-  // CHECK: main
-  // CHECK:   {{\[}}[[OFFSET]], {{.*}}) 'x.i:[[@LINE-15]]'
+  // CHECK: Address 0x{{.*}} is located in stack of thread T0 at offset [[OFFSET:[^ ]*]] in frame
+  // CHECK:      {{.*}} in main
+  // CHECK:   This frame has
+  // CHECK:     {{\[}}[[OFFSET]], {{.*}}) 'x.i' (line [[@LINE-15]])
 }
diff --git a/test/asan/TestCases/use-after-scope.cc b/test/asan/TestCases/use-after-scope.cc
index d92dae6..4c5998a 100644
--- a/test/asan/TestCases/use-after-scope.cc
+++ b/test/asan/TestCases/use-after-scope.cc
@@ -1,6 +1,10 @@
 // RUN: %clangxx_asan -O1 -fsanitize-address-use-after-scope %s -o %t && \
 // RUN:     not %run %t 2>&1 | FileCheck %s
 
+// -fsanitize-address-use-after-scope is now on by default:
+// RUN: %clangxx_asan -O1 %s -o %t && \
+// RUN:     not %run %t 2>&1 | FileCheck %s
+
 volatile int *p = 0;
 
 int main() {
diff --git a/test/asan/TestCases/verbose-log-path_test.cc b/test/asan/TestCases/verbose-log-path_test.cc
index 47a5c22..3c3db08 100644
--- a/test/asan/TestCases/verbose-log-path_test.cc
+++ b/test/asan/TestCases/verbose-log-path_test.cc
@@ -8,8 +8,9 @@
 // RUN: %env_asan_opts=log_path=%T/asan.log:log_exe_name=1 not %run %T/verbose-log-path_test-binary 2> %t.out
 // RUN: FileCheck %s --check-prefix=CHECK-ERROR < %T/asan.log.verbose-log-path_test-binary.*
 
-// FIXME: only FreeBSD and Linux have verbose log paths now.
+// FIXME: only FreeBSD, NetBSD and Linux have verbose log paths now.
 // XFAIL: win32,android
+// UNSUPPORTED: ios
 
 #include <stdlib.h>
 #include <string.h>
diff --git a/test/asan/lit.cfg b/test/asan/lit.cfg
index 7765a24..6a4044a 100644
--- a/test/asan/lit.cfg
+++ b/test/asan/lit.cfg
@@ -2,6 +2,7 @@
 
 import os
 import platform
+import re
 
 import lit.formats
 
@@ -30,24 +31,19 @@
 config.name = 'AddressSanitizer' + config.name_suffix
 
 # Platform-specific default ASAN_OPTIONS for lit tests.
-default_asan_opts = ''
-if config.host_os == 'Darwin':
-  # On Darwin, we default to `abort_on_error=1`, which would make tests run
-  # much slower. Let's override this and run lit tests with 'abort_on_error=0'.
-  # Also, make sure we do not overwhelm the syslog while testing.
-  default_asan_opts = 'abort_on_error=0'
-  default_asan_opts += ':log_to_syslog=0'
-elif config.android:
-  # The same as on Darwin, we default to "abort_on_error=1" which slows down
-  # testing. Also, all existing tests are using "not" instead of "not --crash"
-  # which does not work for abort()-terminated programs.
-  default_asan_opts = 'abort_on_error=0'
+default_asan_opts = list(config.default_sanitizer_opts)
 
-if default_asan_opts:
-  config.environment['ASAN_OPTIONS'] = default_asan_opts
-  default_asan_opts += ':'
+# On Darwin, leak checking is not enabled by default. Enable for x86_64
+# tests to prevent regressions
+if config.host_os == 'Darwin' and config.target_arch == 'x86_64':
+  default_asan_opts += ['detect_leaks=1']
+
+default_asan_opts_str = ':'.join(default_asan_opts)
+if default_asan_opts_str:
+  config.environment['ASAN_OPTIONS'] = default_asan_opts_str
+  default_asan_opts_str += ':'
 config.substitutions.append(('%env_asan_opts=',
-                             'env ASAN_OPTIONS=' + default_asan_opts))
+                             'env ASAN_OPTIONS=' + default_asan_opts_str))
 
 # Setup source root.
 config.test_source_root = os.path.dirname(__file__)
@@ -65,11 +61,6 @@
 else:
   extra_link_flags = []
 
-# BFD linker in 64-bit android toolchains fails to find libm.so, which is a
-# transitive shared library dependency (via asan runtime).
-if config.android:
-  extra_link_flags += ["-lm"]
-
 # Setup default compiler flags used with -fsanitize=address option.
 # FIXME: Review the set of required flags and check if it can be reduced.
 target_cflags = [get_required_attr(config, "target_cflags")] + extra_link_flags
@@ -104,28 +95,14 @@
     win_runtime_feature = "win32-static-asan"
   config.available_features.add(win_runtime_feature)
 
-asan_lit_source_dir = get_required_attr(config, "asan_lit_source_dir")
-if config.android == "1":
-  config.available_features.add('android')
-  clang_wrapper = os.path.join(asan_lit_source_dir,
-                               "android_commands", "android_compile.py") + " "
-else:
-  config.available_features.add('not-android')
-  clang_wrapper = ""
-
 def build_invocation(compile_flags):
-  return " " + " ".join([clang_wrapper, config.clang] + compile_flags) + " "
-
-# Clang driver link 'x86' (i686) architecture to 'i386'.
-target_arch = config.target_arch
-if (target_arch == "i686"):
-  target_arch = "i386"
+  return " " + " ".join([config.compile_wrapper, config.clang] + compile_flags) + " "
 
 config.substitutions.append( ("%clang ", build_invocation(target_cflags)) )
 config.substitutions.append( ("%clangxx ", build_invocation(target_cxxflags)) )
 config.substitutions.append( ("%clang_asan ", build_invocation(clang_asan_cflags)) )
 config.substitutions.append( ("%clangxx_asan ", build_invocation(clang_asan_cxxflags)) )
-config.substitutions.append( ("%shared_libasan", "libclang_rt.asan-%s.so" % target_arch))
+config.substitutions.append( ("%shared_libasan", "libclang_rt.asan-%s.so" % config.target_arch))
 if config.asan_dynamic:
   config.substitutions.append( ("%clang_asan_static ", build_invocation(clang_asan_static_cflags)) )
   config.substitutions.append( ("%clangxx_asan_static ", build_invocation(clang_asan_static_cxxflags)) )
@@ -180,7 +157,7 @@
 config.substitutions.append( ("%sancov ", python_exec + " " + sancov + " ") )
 
 # Determine kernel bitness
-if config.host_arch.find('64') != -1 and config.android != "1":
+if config.host_arch.find('64') != -1 and not config.android:
   kernel_bits = '64'
 else:
   kernel_bits = '32'
@@ -191,27 +168,14 @@
 
 config.available_features.add("asan-" + config.bits + "-bits")
 
-if config.host_os == 'Darwin':
-  config.substitutions.append( ("%ld_flags_rpath_exe", '-Wl,-rpath,@executable_path/ %dynamiclib') )
-  config.substitutions.append( ("%ld_flags_rpath_so", '-install_name @rpath/`basename %dynamiclib`') )
-elif config.host_os == 'FreeBSD':
-  config.substitutions.append( ("%ld_flags_rpath_exe", "-Wl,-z,origin -Wl,-rpath,\$ORIGIN -L%T -l%xdynamiclib_namespec") )
-  config.substitutions.append( ("%ld_flags_rpath_so", '') )
-elif config.host_os == 'Linux':
-  config.substitutions.append( ("%ld_flags_rpath_exe", "-Wl,-rpath,\$ORIGIN -L%T -l%xdynamiclib_namespec") )
-  config.substitutions.append( ("%ld_flags_rpath_so", '') )
-
-# Must be defined after the substitutions that use %dynamiclib.
-config.substitutions.append( ("%dynamiclib", '%T/lib%xdynamiclib_namespec.so') )
-config.substitutions.append( ("%xdynamiclib_namespec", '$(basename %t).dynamic') )
-
-# Allow tests to use REQUIRES=stable-runtime.  For use when you cannot use XFAIL
-# because the test hangs. Adding armhf as we now have two modes.
-if config.target_arch != 'arm' and config.target_arch != 'armhf' and config.target_arch != 'aarch64':
-  config.available_features.add('stable-runtime')
+# Fast unwinder doesn't work with Thumb
+if re.search('mthumb', config.target_cflags) is not None:
+  config.available_features.add('fast-unwinder-works')
 
 # Turn on leak detection on 64-bit Linux.
-if config.host_os == 'Linux' and (config.target_arch == 'x86_64' or config.target_arch == 'i386'):
+leak_detection_linux = (config.host_os == 'Linux') and (config.target_arch == 'x86_64' or config.target_arch == 'i386')
+leak_detection_mac = (config.host_os == 'Darwin') and (config.target_arch == 'x86_64')
+if leak_detection_linux or leak_detection_mac:
   config.available_features.add('leak-detection')
 
 # Set LD_LIBRARY_PATH to pick dynamic runtime up properly.
diff --git a/test/asan/lit.site.cfg.in b/test/asan/lit.site.cfg.in
index 1b6fed2..6c8f882 100644
--- a/test/asan/lit.site.cfg.in
+++ b/test/asan/lit.site.cfg.in
@@ -2,11 +2,11 @@
 
 # Tool-specific config options.
 config.name_suffix = "@ASAN_TEST_CONFIG_SUFFIX@"
-config.asan_lit_source_dir = "@ASAN_LIT_SOURCE_DIR@"
 config.target_cflags = "@ASAN_TEST_TARGET_CFLAGS@"
 config.clang = "@ASAN_TEST_TARGET_CC@"
 config.bits = "@ASAN_TEST_BITS@"
-config.android = "@ANDROID@"
+config.ios = @ASAN_TEST_IOS_PYBOOL@
+config.iossim = @ASAN_TEST_IOSSIM_PYBOOL@
 config.asan_dynamic = @ASAN_TEST_DYNAMIC@
 config.target_arch = "@ASAN_TEST_TARGET_ARCH@"
 
diff --git a/test/builtins/CMakeLists.txt b/test/builtins/CMakeLists.txt
index 443e552..cabf767 100644
--- a/test/builtins/CMakeLists.txt
+++ b/test/builtins/CMakeLists.txt
@@ -9,6 +9,33 @@
   ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
 )
 
+#Unit tests.
+
+include(builtin-config-ix)
+
+pythonize_bool(MSVC)
+
+#TODO: Add support for Apple.
+if (NOT APPLE)
+foreach(arch ${BUILTIN_SUPPORTED_ARCH})
+  set(BUILTINS_TEST_TARGET_ARCH ${arch})
+  string(TOLOWER "-${arch}-${OS_NAME}" BUILTINS_TEST_CONFIG_SUFFIX)
+  get_test_cc_for_arch(${arch} BUILTINS_TEST_TARGET_CC BUILTINS_TEST_TARGET_CFLAGS)
+  if (${arch} STREQUAL "armhf")
+    list(APPEND BUILTINS_TEST_TARGET_CFLAGS -fomit-frame-pointer -DCOMPILER_RT_ARMHF_TARGET)
+    string(REPLACE ";" " " BUILTINS_TEST_TARGET_CFLAGS "${BUILTINS_TEST_TARGET_CFLAGS}")
+  endif()
+
+  string(TOUPPER ${arch} ARCH_UPPER_CASE)
+  set(CONFIG_NAME ${ARCH_UPPER_CASE}${OS_NAME}Config)
+  configure_lit_site_cfg(
+    ${CMAKE_CURRENT_SOURCE_DIR}/Unit/lit.site.cfg.in
+    ${CMAKE_CURRENT_BINARY_DIR}/Unit/${CONFIG_NAME}/lit.site.cfg
+    )
+  list(APPEND BUILTINS_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/Unit/${CONFIG_NAME})
+endforeach()
+endif()
+
 add_lit_testsuite(check-builtins "Running the Builtins tests"
   ${BUILTINS_TESTSUITES}
   DEPENDS ${BUILTINS_TEST_DEPS})
diff --git a/test/builtins/TestCases/Darwin/os_version_check_test_no_core_foundation.c b/test/builtins/TestCases/Darwin/os_version_check_test_no_core_foundation.c
new file mode 100644
index 0000000..4e0da35
--- /dev/null
+++ b/test/builtins/TestCases/Darwin/os_version_check_test_no_core_foundation.c
@@ -0,0 +1,12 @@
+// RUN: %clang %s -o %t -mmacosx-version-min=10.5
+// RUN: %run %t
+
+int __isOSVersionAtLeast(int Major, int Minor, int Subminor);
+
+int main() {
+  // When CoreFoundation isn't linked, we expect the system version to be 0, 0,
+  // 0.
+  if (__isOSVersionAtLeast(1, 0, 0))
+    return 1;
+  return 0;
+}
diff --git a/test/builtins/Unit/absvdi2_test.c b/test/builtins/Unit/absvdi2_test.c
index f69ae41..dd9dfd1 100644
--- a/test/builtins/Unit/absvdi2_test.c
+++ b/test/builtins/Unit/absvdi2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- absvdi2_test.c - Test __absvdi2 -----------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/absvsi2_test.c b/test/builtins/Unit/absvsi2_test.c
index c395cca..bae306b 100644
--- a/test/builtins/Unit/absvsi2_test.c
+++ b/test/builtins/Unit/absvsi2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- absvsi2_test.c - Test __absvsi2 -----------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/absvti2_test.c b/test/builtins/Unit/absvti2_test.c
index 6c626e9..0c0117d 100644
--- a/test/builtins/Unit/absvti2_test.c
+++ b/test/builtins/Unit/absvti2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- absvti2_test.c - Test __absvti2 -----------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/adddf3vfp_test.c b/test/builtins/Unit/adddf3vfp_test.c
index c1b9884..e0da08b 100644
--- a/test/builtins/Unit/adddf3vfp_test.c
+++ b/test/builtins/Unit/adddf3vfp_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- adddf3vfp_test.c - Test __adddf3vfp -------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/addsf3vfp_test.c b/test/builtins/Unit/addsf3vfp_test.c
index 958865d..ed18de3 100644
--- a/test/builtins/Unit/addsf3vfp_test.c
+++ b/test/builtins/Unit/addsf3vfp_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- addsf3vfp_test.c - Test __addsf3vfp -------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/addtf3_test.c b/test/builtins/Unit/addtf3_test.c
index 7b92cce..57a4729 100644
--- a/test/builtins/Unit/addtf3_test.c
+++ b/test/builtins/Unit/addtf3_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===--------------- addtf3_test.c - Test __addtf3 ------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/addvdi3_test.c b/test/builtins/Unit/addvdi3_test.c
index 5f8729a..99e7040 100644
--- a/test/builtins/Unit/addvdi3_test.c
+++ b/test/builtins/Unit/addvdi3_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- addvdi3_test.c - Test __addvdi3 -----------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/addvsi3_test.c b/test/builtins/Unit/addvsi3_test.c
index b5358d0..11fdbc3 100644
--- a/test/builtins/Unit/addvsi3_test.c
+++ b/test/builtins/Unit/addvsi3_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- addvsi3_test.c - Test __addvsi3 -----------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/addvti3_test.c b/test/builtins/Unit/addvti3_test.c
index e2f75cf..3ffcf4b 100644
--- a/test/builtins/Unit/addvti3_test.c
+++ b/test/builtins/Unit/addvti3_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- addvti3_test.c - Test __addvti3 -----------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/arm/aeabi_cdcmpeq_test.c b/test/builtins/Unit/arm/aeabi_cdcmpeq_test.c
index ce8b0a4..0d9e006 100644
--- a/test/builtins/Unit/arm/aeabi_cdcmpeq_test.c
+++ b/test/builtins/Unit/arm/aeabi_cdcmpeq_test.c
@@ -1,3 +1,6 @@
+// REQUIRES-ANY: arm-target-arch,armv6m-target-arch
+// RUN: %arm_call_apsr -o %t.aspr.o
+// RUN: %clang_builtins %s %t.aspr.o %librt -o %t && %run %t
 //===-- aeabi_cdcmpeq.c - Test __aeabi_cdcmpeq ----------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/arm/aeabi_cdcmple_test.c b/test/builtins/Unit/arm/aeabi_cdcmple_test.c
index afc7014..e499bf8 100644
--- a/test/builtins/Unit/arm/aeabi_cdcmple_test.c
+++ b/test/builtins/Unit/arm/aeabi_cdcmple_test.c
@@ -1,3 +1,7 @@
+// REQUIRES-ANY: arm-target-arch,armv6m-target-arch
+// RUN: %arm_call_apsr -o %t.aspr.o
+// RUN: %clang_builtins %s  %t.aspr.o %librt -o %t && %run %t
+
 //===-- aeabi_cdcmple.c - Test __aeabi_cdcmple and __aeabi_cdrcmple -------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/arm/aeabi_cfcmpeq_test.c b/test/builtins/Unit/arm/aeabi_cfcmpeq_test.c
index fe01664..72a556c 100644
--- a/test/builtins/Unit/arm/aeabi_cfcmpeq_test.c
+++ b/test/builtins/Unit/arm/aeabi_cfcmpeq_test.c
@@ -1,3 +1,6 @@
+// REQUIRES-ANY: arm-target-arch,armv6m-target-arch
+// RUN: %arm_call_apsr -o %t.aspr.o
+// RUN: %clang_builtins %s  %t.aspr.o %librt -o %t && %run %t
 //===-- aeabi_cfcmpeq.c - Test __aeabi_cfcmpeq ----------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/arm/aeabi_cfcmple_test.c b/test/builtins/Unit/arm/aeabi_cfcmple_test.c
index aebe425..a09aead 100644
--- a/test/builtins/Unit/arm/aeabi_cfcmple_test.c
+++ b/test/builtins/Unit/arm/aeabi_cfcmple_test.c
@@ -1,3 +1,7 @@
+// REQUIRES-ANY: arm-target-arch,armv6m-target-arch
+// RUN: %arm_call_apsr -o %t.aspr.o
+// RUN: %clang_builtins %s  %t.aspr.o %librt -o %t && %run %t
+
 //===-- aeabi_cfcmple.c - Test __aeabi_cfcmple and __aeabi_cfrcmple -------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/arm/aeabi_drsub_test.c b/test/builtins/Unit/arm/aeabi_drsub_test.c
index 7d867ef..8bd04a9 100644
--- a/test/builtins/Unit/arm/aeabi_drsub_test.c
+++ b/test/builtins/Unit/arm/aeabi_drsub_test.c
@@ -1,3 +1,5 @@
+// REQUIRES-ANY: arm-target-arch,armv6m-target-arch
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- aeabi_drsub.c - Test __aeabi_drsub --------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/arm/aeabi_frsub_test.c b/test/builtins/Unit/arm/aeabi_frsub_test.c
index b8b21b9..3d30161 100644
--- a/test/builtins/Unit/arm/aeabi_frsub_test.c
+++ b/test/builtins/Unit/arm/aeabi_frsub_test.c
@@ -1,3 +1,5 @@
+// REQUIRES-ANY: arm-target-arch,armv6m-target-arch
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- aeabi_frsub.c - Test __aeabi_frsub --------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/arm/aeabi_idivmod_test.c b/test/builtins/Unit/arm/aeabi_idivmod_test.c
index c732e4b..ac18046 100644
--- a/test/builtins/Unit/arm/aeabi_idivmod_test.c
+++ b/test/builtins/Unit/arm/aeabi_idivmod_test.c
@@ -1,3 +1,5 @@
+// REQUIRES-ANY: arm-target-arch,armv6m-target-arch
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- aeabi_idivmod_test.c - Test __aeabi_idivmod -----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/arm/aeabi_uidivmod_test.c b/test/builtins/Unit/arm/aeabi_uidivmod_test.c
index 81d5e0e..9ac51da 100644
--- a/test/builtins/Unit/arm/aeabi_uidivmod_test.c
+++ b/test/builtins/Unit/arm/aeabi_uidivmod_test.c
@@ -1,3 +1,5 @@
+// REQUIRES-ANY: arm-target-arch,armv6m-target-arch
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- aeabi_uidivmod_test.c - Test __aeabi_uidivmod ---------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/arm/aeabi_uldivmod_test.c b/test/builtins/Unit/arm/aeabi_uldivmod_test.c
index f629d6b..a40f006 100644
--- a/test/builtins/Unit/arm/aeabi_uldivmod_test.c
+++ b/test/builtins/Unit/arm/aeabi_uldivmod_test.c
@@ -1,3 +1,5 @@
+// REQUIRES-ANY: arm-target-arch,armv6m-target-arch
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- aeabi_uldivmod_test.c - Test aeabi_uldivmod -----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/arm/call_apsr.S b/test/builtins/Unit/arm/call_apsr.S
index b5e154c..2656f8d 100644
--- a/test/builtins/Unit/arm/call_apsr.S
+++ b/test/builtins/Unit/arm/call_apsr.S
@@ -22,11 +22,11 @@
 // }
 
 DEFINE_COMPILERRT_PRIVATE_FUNCTION(call_apsr_d)
-    push {lr}
-    ldr ip, [sp, #4]
-    blx ip
+    push {r7, lr}
+    ldr r7, [sp, #8]
+    blx r7
     mrs r0, apsr
-    pop {pc}
+    pop {r7, pc}
 END_COMPILERRT_FUNCTION(call_apsr_d)
 
 // __attribute__((pcs("aapcs")))
diff --git a/test/builtins/Unit/ashldi3_test.c b/test/builtins/Unit/ashldi3_test.c
index 398fb69..f0984e0 100644
--- a/test/builtins/Unit/ashldi3_test.c
+++ b/test/builtins/Unit/ashldi3_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- ashldi3_test.c - Test __ashldi3 -----------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/ashlti3_test.c b/test/builtins/Unit/ashlti3_test.c
index 595e353..0618661 100644
--- a/test/builtins/Unit/ashlti3_test.c
+++ b/test/builtins/Unit/ashlti3_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- ashlti3_test.c - Test __ashlti3 -----------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/ashrdi3_test.c b/test/builtins/Unit/ashrdi3_test.c
index ee6409c..a987c95 100644
--- a/test/builtins/Unit/ashrdi3_test.c
+++ b/test/builtins/Unit/ashrdi3_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- ashrdi3_test.c - Test __ashrdi3 -----------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/ashrti3_test.c b/test/builtins/Unit/ashrti3_test.c
index 201582d..1f08653 100644
--- a/test/builtins/Unit/ashrti3_test.c
+++ b/test/builtins/Unit/ashrti3_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- ashrti3_test.c - Test __ashrti3 -----------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/bswapdi2_test.c b/test/builtins/Unit/bswapdi2_test.c
index 2d830cf..57ee38b 100644
--- a/test/builtins/Unit/bswapdi2_test.c
+++ b/test/builtins/Unit/bswapdi2_test.c
@@ -1,3 +1,5 @@
+// UNSUPPORTED: armv6m-target-arch
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- bswapdi2_test.c - Test __bswapdi2 ---------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
@@ -11,34 +13,25 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <stdlib.h>
+#include <math.h>
 #include <stdint.h>
 #include <stdio.h>
-#include <math.h>
-
+#include <stdlib.h>
 
 extern uint64_t __bswapdi2(uint64_t);
 
-#if __arm__
-int test__bswapdi2(uint64_t a, uint64_t expected)
-{
-    uint64_t actual = __bswapdi2(a);
-    if (actual != expected)
-        printf("error in test__bswapsi2(0x%0llX) = 0x%0llX, expected 0x%0llX\n",
-               a, actual, expected);
-    return actual != expected;
+int test__bswapdi2(uint64_t a, uint64_t expected) {
+  uint64_t actual = __bswapdi2(a);
+  if (actual != expected)
+    printf("error in test__bswapsi2(0x%0llX) = 0x%0llX, expected 0x%0llX\n", a,
+           actual, expected);
+  return actual != expected;
 }
-#endif
 
-int main()
-{
-#if __arm__
-    if (test__bswapdi2(0x123456789ABCDEF0LL, 0xF0DEBC9A78563412LL))
-        return 1;
-    if (test__bswapdi2(0x0000000100000002LL, 0x0200000001000000LL))
-        return 1;
-#else
-    printf("skipped\n");
-#endif
-    return 0;
+int main() {
+  if (test__bswapdi2(0x123456789ABCDEF0LL, 0xF0DEBC9A78563412LL))
+    return 1;
+  if (test__bswapdi2(0x0000000100000002LL, 0x0200000001000000LL))
+    return 1;
+  return 0;
 }
diff --git a/test/builtins/Unit/bswapsi2_test.c b/test/builtins/Unit/bswapsi2_test.c
index 4488a88..899c251 100644
--- a/test/builtins/Unit/bswapsi2_test.c
+++ b/test/builtins/Unit/bswapsi2_test.c
@@ -1,3 +1,5 @@
+// UNSUPPORTED: armv6m-target-arch
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- bswapsi2_test.c - Test __bswapsi2 ---------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
@@ -11,34 +13,25 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <stdlib.h>
+#include <math.h>
 #include <stdint.h>
 #include <stdio.h>
-#include <math.h>
-
+#include <stdlib.h>
 
 extern uint32_t __bswapsi2(uint32_t);
 
-#if __arm__
-int test__bswapsi2(uint32_t a, uint32_t expected)
-{
-    uint32_t actual = __bswapsi2(a);
-    if (actual != expected)
-        printf("error in test__bswapsi2(0x%0X) = 0x%0X, expected 0x%0X\n",
-               a, actual, expected);
-    return actual != expected;
+int test__bswapsi2(uint32_t a, uint32_t expected) {
+  uint32_t actual = __bswapsi2(a);
+  if (actual != expected)
+    printf("error in test__bswapsi2(0x%0X) = 0x%0X, expected 0x%0X\n", a,
+           actual, expected);
+  return actual != expected;
 }
-#endif
 
-int main()
-{
-#if __arm__
-    if (test__bswapsi2(0x12345678, 0x78563412))
-        return 1;
-    if (test__bswapsi2(0x00000001, 0x01000000))
-        return 1;
-#else
-    printf("skipped\n");
-#endif
-    return 0;
+int main() {
+  if (test__bswapsi2(0x12345678, 0x78563412))
+    return 1;
+  if (test__bswapsi2(0x00000001, 0x01000000))
+    return 1;
+  return 0;
 }
diff --git a/test/builtins/Unit/clear_cache_test.c b/test/builtins/Unit/clear_cache_test.c
index 0ef704f..e50e66f 100644
--- a/test/builtins/Unit/clear_cache_test.c
+++ b/test/builtins/Unit/clear_cache_test.c
@@ -1,3 +1,6 @@
+// REQUIRES: native-run
+// UNSUPPORTED: arm, aarch64
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- clear_cache_test.c - Test clear_cache -----------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
@@ -13,12 +16,6 @@
 #include <stdint.h>
 #if defined(_WIN32)
 #include <windows.h>
-void __clear_cache(void* start, void* end)
-{
-    if (!FlushInstructionCache(GetCurrentProcess(), start, end-start))
-        exit(1);
-}
-
 static uintptr_t get_page_size() {
     SYSTEM_INFO si;
     GetSystemInfo(&si);
@@ -27,27 +24,20 @@
 #else
 #include <unistd.h>
 #include <sys/mman.h>
-extern void __clear_cache(void* start, void* end);
 
 static uintptr_t get_page_size() {
     return sysconf(_SC_PAGE_SIZE);
 }
 #endif
 
-
+extern void __clear_cache(void* start, void* end);
 
 
 typedef int (*pfunc)(void);
 
-int func1() 
-{
-    return 1;
-}
-
-int func2() 
-{
-    return 2;
-}
+// Make these static to avoid ILT jumps for incremental linking on Windows.
+static int func1() { return 1; }
+static int func2() { return 2; }
 
 void *__attribute__((noinline))
 memcpy_f(void *dst, const void *src, size_t n) {
@@ -62,7 +52,7 @@
 #endif
 }
 
-unsigned char execution_buffer[128];
+unsigned char execution_buffer[128] __attribute__((aligned(8)));
 
 int main()
 {
diff --git a/test/builtins/Unit/clzdi2_test.c b/test/builtins/Unit/clzdi2_test.c
index 41e1209..a8c0e1b 100644
--- a/test/builtins/Unit/clzdi2_test.c
+++ b/test/builtins/Unit/clzdi2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- clzdi2_test.c - Test __clzdi2 -------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/clzsi2_test.c b/test/builtins/Unit/clzsi2_test.c
index 80b300f..f86e888 100644
--- a/test/builtins/Unit/clzsi2_test.c
+++ b/test/builtins/Unit/clzsi2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- clzsi2_test.c - Test __clzsi2 -------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/clzti2_test.c b/test/builtins/Unit/clzti2_test.c
index 3a2c6fa..157838b 100644
--- a/test/builtins/Unit/clzti2_test.c
+++ b/test/builtins/Unit/clzti2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- clzti2_test.c - Test __clzti2 -------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/cmpdi2_test.c b/test/builtins/Unit/cmpdi2_test.c
index 33a12a0..1420dc7 100644
--- a/test/builtins/Unit/cmpdi2_test.c
+++ b/test/builtins/Unit/cmpdi2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- cmpdi2_test.c - Test __cmpdi2 -------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/cmpti2_test.c b/test/builtins/Unit/cmpti2_test.c
index d951923..a2215f3 100644
--- a/test/builtins/Unit/cmpti2_test.c
+++ b/test/builtins/Unit/cmpti2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- cmpti2_test.c - Test __cmpti2 -------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/comparedf2_test.c b/test/builtins/Unit/comparedf2_test.c
index 6623722..8446901 100644
--- a/test/builtins/Unit/comparedf2_test.c
+++ b/test/builtins/Unit/comparedf2_test.c
@@ -1,3 +1,5 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+
 //===-- cmpdf2_test.c - Test __cmpdf2 -------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/comparesf2_test.c b/test/builtins/Unit/comparesf2_test.c
index 026e900..1b5902f 100644
--- a/test/builtins/Unit/comparesf2_test.c
+++ b/test/builtins/Unit/comparesf2_test.c
@@ -1,3 +1,5 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+
 //===-- cmpsf2_test.c - Test __cmpsf2 -------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/cpu_model_test.c b/test/builtins/Unit/cpu_model_test.c
index ed484cd..6b47d14 100644
--- a/test/builtins/Unit/cpu_model_test.c
+++ b/test/builtins/Unit/cpu_model_test.c
@@ -1,4 +1,8 @@
-//===-- cpu_model_test.c - Test __builtin_cpu_supports -------------------------------===//
+// FIXME: XFAIL the test because it is expected to return non-zero value.
+// XFAIL: *
+// REQUIRES: x86-target-arch
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+//===-- cpu_model_test.c - Test __builtin_cpu_supports --------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,8 +15,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-// REQUIRES: x86-target-arch
-
 #include <stdio.h>
 
 int main (void) {
diff --git a/test/builtins/Unit/ctzdi2_test.c b/test/builtins/Unit/ctzdi2_test.c
index bde66b1..0515e20 100644
--- a/test/builtins/Unit/ctzdi2_test.c
+++ b/test/builtins/Unit/ctzdi2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- ctzdi2_test.c - Test __ctzdi2 -------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/ctzsi2_test.c b/test/builtins/Unit/ctzsi2_test.c
index cbc101f..bf8982b 100644
--- a/test/builtins/Unit/ctzsi2_test.c
+++ b/test/builtins/Unit/ctzsi2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- ctzsi2_test.c - Test __ctzsi2 -------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/ctzti2_test.c b/test/builtins/Unit/ctzti2_test.c
index 0ca1920..bef79b6 100644
--- a/test/builtins/Unit/ctzti2_test.c
+++ b/test/builtins/Unit/ctzti2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- ctzti2_test.c - Test __ctzti2 -------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/divdc3_test.c b/test/builtins/Unit/divdc3_test.c
index 80b9e86..042fd23 100644
--- a/test/builtins/Unit/divdc3_test.c
+++ b/test/builtins/Unit/divdc3_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- divdc3_test.c - Test __divdc3 -------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
@@ -16,6 +17,8 @@
 #include <complex.h>
 #include <stdio.h>
 
+// REQUIRES: c99-complex
+
 // Returns: the quotient of (a + ib) / (c + id)
 
 COMPILER_RT_ABI double _Complex
diff --git a/test/builtins/Unit/divdf3vfp_test.c b/test/builtins/Unit/divdf3vfp_test.c
index 8735f63..4f18409 100644
--- a/test/builtins/Unit/divdf3vfp_test.c
+++ b/test/builtins/Unit/divdf3vfp_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- divdf3vfp_test.c - Test __divdf3vfp -------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/divdi3_test.c b/test/builtins/Unit/divdi3_test.c
index 1d45980..4c8c922 100644
--- a/test/builtins/Unit/divdi3_test.c
+++ b/test/builtins/Unit/divdi3_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- divdi3_test.c - Test __divdi3 -------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/divmodsi4_test.c b/test/builtins/Unit/divmodsi4_test.c
index 6fb1c98..e766aae 100644
--- a/test/builtins/Unit/divmodsi4_test.c
+++ b/test/builtins/Unit/divmodsi4_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- divmodsi4_test.c - Test __divmodsi4 -------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/divsc3_test.c b/test/builtins/Unit/divsc3_test.c
index 2d7c659..daa2218 100644
--- a/test/builtins/Unit/divsc3_test.c
+++ b/test/builtins/Unit/divsc3_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -lm -o %t && %run %t
 //===-- divsc3_test.c - Test __divsc3 -------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
@@ -16,6 +17,8 @@
 #include <complex.h>
 #include <stdio.h>
 
+// REQUIRES: c99-complex
+
 // Returns: the quotient of (a + ib) / (c + id)
 
 COMPILER_RT_ABI float _Complex
diff --git a/test/builtins/Unit/divsf3vfp_test.c b/test/builtins/Unit/divsf3vfp_test.c
index 039fa7f..75b7eba 100644
--- a/test/builtins/Unit/divsf3vfp_test.c
+++ b/test/builtins/Unit/divsf3vfp_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- divsf3vfp_test.c - Test __divsf3vfp -------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/divsi3_test.c b/test/builtins/Unit/divsi3_test.c
index c523367..4c5d0fb 100644
--- a/test/builtins/Unit/divsi3_test.c
+++ b/test/builtins/Unit/divsi3_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- divsi3_test.c - Test __divsi3 -------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/divtc3_test.c b/test/builtins/Unit/divtc3_test.c
index a1f0613..7474330 100644
--- a/test/builtins/Unit/divtc3_test.c
+++ b/test/builtins/Unit/divtc3_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -lm -o %t && %run %t
 //===-- divtc3_test.c - Test __divtc3 -------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
@@ -17,6 +18,8 @@
 #include <math.h>
 #include <complex.h>
 
+// REQUIRES: c99-complex
+
 // Returns: the quotient of (a + ib) / (c + id)
 
 COMPILER_RT_ABI long double _Complex
diff --git a/test/builtins/Unit/divtf3_test.c b/test/builtins/Unit/divtf3_test.c
index e0def45..12cb94a 100644
--- a/test/builtins/Unit/divtf3_test.c
+++ b/test/builtins/Unit/divtf3_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===--------------- divtf3_test.c - Test __divtf3 ------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/divti3_test.c b/test/builtins/Unit/divti3_test.c
index 3a94dab..9a6bf17 100644
--- a/test/builtins/Unit/divti3_test.c
+++ b/test/builtins/Unit/divti3_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- divti3_test.c - Test __divti3 -------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/divxc3_test.c b/test/builtins/Unit/divxc3_test.c
index 509b4b1..9974dfb 100644
--- a/test/builtins/Unit/divxc3_test.c
+++ b/test/builtins/Unit/divxc3_test.c
@@ -1,3 +1,5 @@
+// RUN: %clang_builtins %s %librt -lm -o %t && %run %t
+// UNSUPPORTED: powerpc64
 //===-- divxc3_test.c - Test __divxc3 -------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
@@ -18,6 +20,9 @@
 #include <complex.h>
 #include <stdio.h>
 
+// UNSUPPORTED: mips
+// REQUIRES: c99-complex
+
 // Returns: the quotient of (a + ib) / (c + id)
 
 COMPILER_RT_ABI long double _Complex
diff --git a/test/builtins/Unit/enable_execute_stack_test.c b/test/builtins/Unit/enable_execute_stack_test.c
index 38a142a..72fc042 100644
--- a/test/builtins/Unit/enable_execute_stack_test.c
+++ b/test/builtins/Unit/enable_execute_stack_test.c
@@ -1,3 +1,5 @@
+// REQUIRES: native-run
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- enable_execute_stack_test.c - Test __enable_execute_stack ----------===//
 //
 //                     The LLVM Compiler Infrastructure
@@ -11,39 +13,14 @@
 #include <stdio.h>
 #include <string.h>
 #include <stdint.h>
-#if defined(_WIN32)
-#include <windows.h>
-void __clear_cache(void* start, void* end)
-{
-    if (!FlushInstructionCache(GetCurrentProcess(), start, end-start))
-        exit(1);
-}
-void __enable_execute_stack(void *addr)
-{
-    MEMORY_BASIC_INFORMATION b;
-
-    if (!VirtualQuery(addr, &b, sizeof(b)))
-        exit(1);
-    if (!VirtualProtect(b.BaseAddress, b.RegionSize, PAGE_EXECUTE_READWRITE, &b.Protect))
-        exit(1);
-}
-#else
-#include <sys/mman.h>
 extern void __clear_cache(void* start, void* end);
 extern void __enable_execute_stack(void* addr);
-#endif
 
 typedef int (*pfunc)(void);
 
-int func1() 
-{
-    return 1;
-}
-
-int func2() 
-{
-    return 2;
-}
+// Make these static to avoid ILT jumps for incremental linking on Windows.
+static int func1() { return 1; }
+static int func2() { return 2; }
 
 void *__attribute__((noinline))
 memcpy_f(void *dst, const void *src, size_t n) {
@@ -67,6 +44,7 @@
     // verify you can copy and execute a function
     pfunc f1 = (pfunc)memcpy_f(execution_buffer, func1, 128);
     __clear_cache(execution_buffer, &execution_buffer[128]);
+    printf("f1: %p\n", f1);
     if ((*f1)() != 1)
         return 1;
 
diff --git a/test/builtins/Unit/endianness.h b/test/builtins/Unit/endianness.h
index 06c53de..9cecd21 100644
--- a/test/builtins/Unit/endianness.h
+++ b/test/builtins/Unit/endianness.h
@@ -51,7 +51,7 @@
 
 /* .. */
 
-#if defined(__OpenBSD__) || defined(__Bitrig__)
+#if defined(__OpenBSD__)
 #include <machine/endian.h>
 
 #if _BYTE_ORDER == _BIG_ENDIAN
@@ -62,7 +62,7 @@
 #define _YUGA_BIG_ENDIAN    0
 #endif /* _BYTE_ORDER */
 
-#endif /* OpenBSD and Bitrig. */
+#endif /* OpenBSD */
 
 /* .. */
 
diff --git a/test/builtins/Unit/eqdf2vfp_test.c b/test/builtins/Unit/eqdf2vfp_test.c
index 4780d87..69dd37b 100644
--- a/test/builtins/Unit/eqdf2vfp_test.c
+++ b/test/builtins/Unit/eqdf2vfp_test.c
@@ -1,3 +1,5 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+
 //===-- eqdf2vfp_test.c - Test __eqdf2vfp ---------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/eqsf2vfp_test.c b/test/builtins/Unit/eqsf2vfp_test.c
index 7d6f581..9c8dc16 100644
--- a/test/builtins/Unit/eqsf2vfp_test.c
+++ b/test/builtins/Unit/eqsf2vfp_test.c
@@ -1,3 +1,5 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+
 //===-- eqsf2vfp_test.c - Test __eqsf2vfp ---------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/eqtf2_test.c b/test/builtins/Unit/eqtf2_test.c
index 0385835..91b35cf 100644
--- a/test/builtins/Unit/eqtf2_test.c
+++ b/test/builtins/Unit/eqtf2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===------------ eqtf2_test.c - Test __eqtf2------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/extebdsfdf2vfp_test.c b/test/builtins/Unit/extebdsfdf2vfp_test.c
index ec27c4c..c350d93 100644
--- a/test/builtins/Unit/extebdsfdf2vfp_test.c
+++ b/test/builtins/Unit/extebdsfdf2vfp_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- extendsfdf2vfp_test.c - Test __extendsfdf2vfp ---------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/extenddftf2_test.c b/test/builtins/Unit/extenddftf2_test.c
index 2cfb32b..7254141 100644
--- a/test/builtins/Unit/extenddftf2_test.c
+++ b/test/builtins/Unit/extenddftf2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===--------------- extenddftf2_test.c - Test __extenddftf2 --------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/extendhfsf2_test.c b/test/builtins/Unit/extendhfsf2_test.c
index 5dd994c..d423e7b 100644
--- a/test/builtins/Unit/extendhfsf2_test.c
+++ b/test/builtins/Unit/extendhfsf2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===--------------- extendhfsf2_test.c - Test __extendhfsf2 --------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/extendsftf2_test.c b/test/builtins/Unit/extendsftf2_test.c
index 7dff5b6..4fad906 100644
--- a/test/builtins/Unit/extendsftf2_test.c
+++ b/test/builtins/Unit/extendsftf2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===--------------- extendsftf2_test.c - Test __extendsftf2 --------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/ffsdi2_test.c b/test/builtins/Unit/ffsdi2_test.c
index a27d154..80c0c08 100644
--- a/test/builtins/Unit/ffsdi2_test.c
+++ b/test/builtins/Unit/ffsdi2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- ffsdi2_test.c - Test __ffsdi2 -------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/ffssi2_test.c b/test/builtins/Unit/ffssi2_test.c
new file mode 100644
index 0000000..5d96b96
--- /dev/null
+++ b/test/builtins/Unit/ffssi2_test.c
@@ -0,0 +1,57 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+//===-- ffssi2_test.c - Test __ffssi2 -------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file tests __ffssi2 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+#include <stdio.h>
+
+// Returns: the index of the least significant 1-bit in a, or
+// the value zero if a is zero. The least significant bit is index one.
+
+COMPILER_RT_ABI si_int __ffssi2(si_int a);
+
+int test__ffssi2(si_int a, si_int expected)
+{
+    si_int x = __ffssi2(a);
+    if (x != expected)
+        printf("error in __ffssi2(0x%X) = %d, expected %d\n", a, x, expected);
+    return x != expected;
+}
+
+int main()
+{
+    if (test__ffssi2(0x00000000, 0))
+        return 1;
+    if (test__ffssi2(0x00000001, 1))
+        return 1;
+    if (test__ffssi2(0x00000002, 2))
+        return 1;
+    if (test__ffssi2(0x00000003, 1))
+        return 1;
+    if (test__ffssi2(0x00000004, 3))
+        return 1;
+    if (test__ffssi2(0x00000005, 1))
+        return 1;
+    if (test__ffssi2(0x0000000A, 2))
+        return 1;
+    if (test__ffssi2(0x10000000, 29))
+        return 1;
+    if (test__ffssi2(0x20000000, 30))
+        return 1;
+    if (test__ffssi2(0x60000000, 30))
+        return 1;
+    if (test__ffssi2(0x80000000u, 32))
+        return 1;
+
+   return 0;
+}
diff --git a/test/builtins/Unit/ffsti2_test.c b/test/builtins/Unit/ffsti2_test.c
index 396269d..3b312c4 100644
--- a/test/builtins/Unit/ffsti2_test.c
+++ b/test/builtins/Unit/ffsti2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- ffsti2_test.c - Test __ffsti2 -------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/fixdfdi_test.c b/test/builtins/Unit/fixdfdi_test.c
index 4a7cfa3..d2ba7b1 100644
--- a/test/builtins/Unit/fixdfdi_test.c
+++ b/test/builtins/Unit/fixdfdi_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- fixdfdi_test.c - Test __fixdfdi -----------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/fixdfsivfp_test.c b/test/builtins/Unit/fixdfsivfp_test.c
index 73e4e58..33b4d24 100644
--- a/test/builtins/Unit/fixdfsivfp_test.c
+++ b/test/builtins/Unit/fixdfsivfp_test.c
@@ -1,3 +1,5 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+
 //===-- fixdfsivfp_test.c - Test __fixdfsivfp -----------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/fixdfti_test.c b/test/builtins/Unit/fixdfti_test.c
index b5da456..0abe187 100644
--- a/test/builtins/Unit/fixdfti_test.c
+++ b/test/builtins/Unit/fixdfti_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- fixdfti_test.c - Test __fixdfti -----------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/fixsfdi_test.c b/test/builtins/Unit/fixsfdi_test.c
index f37ecef..468299f 100644
--- a/test/builtins/Unit/fixsfdi_test.c
+++ b/test/builtins/Unit/fixsfdi_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- fixsfdi_test.c - Test __fixsfdi -----------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/fixsfsivfp_test.c b/test/builtins/Unit/fixsfsivfp_test.c
index 0ded952..ee33a1d 100644
--- a/test/builtins/Unit/fixsfsivfp_test.c
+++ b/test/builtins/Unit/fixsfsivfp_test.c
@@ -1,3 +1,5 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+
 //===-- fixsfsivfp_test.c - Test __fixsfsivfp -----------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/fixsfti_test.c b/test/builtins/Unit/fixsfti_test.c
index 38748aa..ec4e8dd 100644
--- a/test/builtins/Unit/fixsfti_test.c
+++ b/test/builtins/Unit/fixsfti_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- fixsfti_test.c - Test __fixsfti -----------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/fixtfdi_test.c b/test/builtins/Unit/fixtfdi_test.c
index cc25bec..3cd1ebf 100644
--- a/test/builtins/Unit/fixtfdi_test.c
+++ b/test/builtins/Unit/fixtfdi_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===--------------- fixtfdi_test.c - Test __fixtfdi ----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/fixtfsi_test.c b/test/builtins/Unit/fixtfsi_test.c
index 1da516b..59e4352 100644
--- a/test/builtins/Unit/fixtfsi_test.c
+++ b/test/builtins/Unit/fixtfsi_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===--------------- fixtfsi_test.c - Test __fixtfsi ----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/fixtfti_test.c b/test/builtins/Unit/fixtfti_test.c
index 52184ca..4d3b56c 100644
--- a/test/builtins/Unit/fixtfti_test.c
+++ b/test/builtins/Unit/fixtfti_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===--------------- fixtfti_test.c - Test __fixtfti ----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/fixunsdfdi_test.c b/test/builtins/Unit/fixunsdfdi_test.c
index 3998482..ab09bd0 100644
--- a/test/builtins/Unit/fixunsdfdi_test.c
+++ b/test/builtins/Unit/fixunsdfdi_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- fixunsdfdi_test.c - Test __fixunsdfdi -----------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/fixunsdfsi_test.c b/test/builtins/Unit/fixunsdfsi_test.c
index 8d3d630..7fbd9ee 100644
--- a/test/builtins/Unit/fixunsdfsi_test.c
+++ b/test/builtins/Unit/fixunsdfsi_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- fixunsdfsi_test.c - Test __fixunsdfsi -----------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/fixunsdfsivfp_test.c b/test/builtins/Unit/fixunsdfsivfp_test.c
index 33cec81..b707992 100644
--- a/test/builtins/Unit/fixunsdfsivfp_test.c
+++ b/test/builtins/Unit/fixunsdfsivfp_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- fixunsdfsivfp_test.c - Test __fixunsdfsivfp -----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/fixunsdfti_test.c b/test/builtins/Unit/fixunsdfti_test.c
index 0298fb9..249d8f8 100644
--- a/test/builtins/Unit/fixunsdfti_test.c
+++ b/test/builtins/Unit/fixunsdfti_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- fixunsdfti_test.c - Test __fixunsdfti -----------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/fixunssfdi_test.c b/test/builtins/Unit/fixunssfdi_test.c
index 812457a..2d693eb 100644
--- a/test/builtins/Unit/fixunssfdi_test.c
+++ b/test/builtins/Unit/fixunssfdi_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- fixunssfdi_test.c - Test __fixunssfdi -----------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/fixunssfsi_test.c b/test/builtins/Unit/fixunssfsi_test.c
index 17293e4..3974b53 100644
--- a/test/builtins/Unit/fixunssfsi_test.c
+++ b/test/builtins/Unit/fixunssfsi_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- fixunssfsi_test.c - Test __fixunssfsi -----------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/fixunssfsivfp_test.c b/test/builtins/Unit/fixunssfsivfp_test.c
index 9c8194f..c1d8ed7 100644
--- a/test/builtins/Unit/fixunssfsivfp_test.c
+++ b/test/builtins/Unit/fixunssfsivfp_test.c
@@ -1,3 +1,5 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+
 //===-- fixunssfsivfp_test.c - Test __fixunssfsivfp -----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/fixunssfti_test.c b/test/builtins/Unit/fixunssfti_test.c
index 979d661..fd2d108 100644
--- a/test/builtins/Unit/fixunssfti_test.c
+++ b/test/builtins/Unit/fixunssfti_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- fixunssfti_test.c - Test __fixunssfti -----------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/fixunstfdi_test.c b/test/builtins/Unit/fixunstfdi_test.c
index 817c59b..67fcc2f 100644
--- a/test/builtins/Unit/fixunstfdi_test.c
+++ b/test/builtins/Unit/fixunstfdi_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- fixunstfdi_test.c - Test __fixunstfdi -----------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/fixunstfsi_test.c b/test/builtins/Unit/fixunstfsi_test.c
index 13ed779..edf5422 100644
--- a/test/builtins/Unit/fixunstfsi_test.c
+++ b/test/builtins/Unit/fixunstfsi_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===--------------- fixunstfsi_test.c - Test __fixunstfsi ----------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/fixunstfti_test.c b/test/builtins/Unit/fixunstfti_test.c
index 60a0982..eaf4b8f 100644
--- a/test/builtins/Unit/fixunstfti_test.c
+++ b/test/builtins/Unit/fixunstfti_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- fixunstfti_test.c - Test __fixunstfti -----------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
@@ -13,6 +14,8 @@
 
 #include <stdio.h>
 
+// UNSUPPORTED: mips
+
 #if __LDBL_MANT_DIG__ == 113
 
 #include "fp_test.h"
diff --git a/test/builtins/Unit/fixunsxfdi_test.c b/test/builtins/Unit/fixunsxfdi_test.c
index 6f42079..c5c27d3 100644
--- a/test/builtins/Unit/fixunsxfdi_test.c
+++ b/test/builtins/Unit/fixunsxfdi_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- fixunsxfdi_test.c - Test __fixunsxfdi -----------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/fixunsxfsi_test.c b/test/builtins/Unit/fixunsxfsi_test.c
index 0d78dcb..e0bed7e 100644
--- a/test/builtins/Unit/fixunsxfsi_test.c
+++ b/test/builtins/Unit/fixunsxfsi_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- fixunsxfsi_test.c - Test __fixunsxfsi -----------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/fixunsxfti_test.c b/test/builtins/Unit/fixunsxfti_test.c
index 94b5aeb..256ba0c 100644
--- a/test/builtins/Unit/fixunsxfti_test.c
+++ b/test/builtins/Unit/fixunsxfti_test.c
@@ -1,3 +1,6 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+// REQUIRES: x86-target-arch
+
 //===-- fixunsxfti_test.c - Test __fixunsxfti -----------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/fixxfdi_test.c b/test/builtins/Unit/fixxfdi_test.c
index 0a90a56..40ad2b0 100644
--- a/test/builtins/Unit/fixxfdi_test.c
+++ b/test/builtins/Unit/fixxfdi_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- fixxfdi_test.c - Test __fixxfdi -----------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/fixxfti_test.c b/test/builtins/Unit/fixxfti_test.c
index b8573cc..518ef44 100644
--- a/test/builtins/Unit/fixxfti_test.c
+++ b/test/builtins/Unit/fixxfti_test.c
@@ -1,3 +1,6 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+// REQUIRES: x86-target-arch
+
 //===-- fixxfti_test.c - Test __fixxfti -----------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/floatdidf_test.c b/test/builtins/Unit/floatdidf_test.c
index 9bf2be9..7742917 100644
--- a/test/builtins/Unit/floatdidf_test.c
+++ b/test/builtins/Unit/floatdidf_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- floatdidf.c - Test __floatdidf ------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/floatdisf_test.c b/test/builtins/Unit/floatdisf_test.c
index a55c6a9..8299bb1 100644
--- a/test/builtins/Unit/floatdisf_test.c
+++ b/test/builtins/Unit/floatdisf_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- floatdisf_test.c - Test __floatdisf -------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/floatditf_test.c b/test/builtins/Unit/floatditf_test.c
index 8cf8a08..bdfb7f9 100644
--- a/test/builtins/Unit/floatditf_test.c
+++ b/test/builtins/Unit/floatditf_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- floatditf_test.c - Test __floatditf -------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/floatdixf_test.c b/test/builtins/Unit/floatdixf_test.c
index f6ab5a4..0a4ca3a 100644
--- a/test/builtins/Unit/floatdixf_test.c
+++ b/test/builtins/Unit/floatdixf_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- floatdixf_test.c - Test __floatdixf -------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/floatsidfvfp_test.c b/test/builtins/Unit/floatsidfvfp_test.c
index 8404a7e..8a3c7fb 100644
--- a/test/builtins/Unit/floatsidfvfp_test.c
+++ b/test/builtins/Unit/floatsidfvfp_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- floatsidfvfp_test.c - Test __floatsidfvfp -------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/floatsisfvfp_test.c b/test/builtins/Unit/floatsisfvfp_test.c
index c41cf9d..7c044b2 100644
--- a/test/builtins/Unit/floatsisfvfp_test.c
+++ b/test/builtins/Unit/floatsisfvfp_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- floatsisfvfp_test.c - Test __floatsisfvfp -------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/floatsitf_test.c b/test/builtins/Unit/floatsitf_test.c
index 6f98721..f8d700b 100644
--- a/test/builtins/Unit/floatsitf_test.c
+++ b/test/builtins/Unit/floatsitf_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===--------------- floatsitf_test.c - Test __floatsitf ------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/floattidf_test.c b/test/builtins/Unit/floattidf_test.c
index 3af382a..9da8c5d 100644
--- a/test/builtins/Unit/floattidf_test.c
+++ b/test/builtins/Unit/floattidf_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- floattidf.c - Test __floattidf ------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/floattisf_test.c b/test/builtins/Unit/floattisf_test.c
index 0f5dc54..9d7282c 100644
--- a/test/builtins/Unit/floattisf_test.c
+++ b/test/builtins/Unit/floattisf_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- floattisf_test.c - Test __floattisf -------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/floattitf_test.c b/test/builtins/Unit/floattitf_test.c
index 928b2e8..5aeb760 100644
--- a/test/builtins/Unit/floattitf_test.c
+++ b/test/builtins/Unit/floattitf_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- floattitf.c - Test __floattitf ------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/floattixf_test.c b/test/builtins/Unit/floattixf_test.c
index d281deb..77a6f7d 100644
--- a/test/builtins/Unit/floattixf_test.c
+++ b/test/builtins/Unit/floattixf_test.c
@@ -1,3 +1,6 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+// REQUIRES: x86-target-arch
+
 //===-- floattixf.c - Test __floattixf ------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/floatundidf_test.c b/test/builtins/Unit/floatundidf_test.c
index 97fb1e5..d5d7419 100644
--- a/test/builtins/Unit/floatundidf_test.c
+++ b/test/builtins/Unit/floatundidf_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- floatundidf_test.c - Test __floatundidf ---------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/floatundisf_test.c b/test/builtins/Unit/floatundisf_test.c
index 40b6bcc..898d886 100644
--- a/test/builtins/Unit/floatundisf_test.c
+++ b/test/builtins/Unit/floatundisf_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- floatundisf_test.c - Test __floatundisf ---------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/floatunditf_test.c b/test/builtins/Unit/floatunditf_test.c
index b66a174..342dc27 100644
--- a/test/builtins/Unit/floatunditf_test.c
+++ b/test/builtins/Unit/floatunditf_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- floatunditf_test.c - Test __floatunditf ---------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/floatundixf_test.c b/test/builtins/Unit/floatundixf_test.c
index 690dce1..47b4316 100644
--- a/test/builtins/Unit/floatundixf_test.c
+++ b/test/builtins/Unit/floatundixf_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- floatundixf_test.c - Test __floatundixf ---------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/floatunsitf_test.c b/test/builtins/Unit/floatunsitf_test.c
index c3d3fe9..966a4fd 100644
--- a/test/builtins/Unit/floatunsitf_test.c
+++ b/test/builtins/Unit/floatunsitf_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===--------------- floatunsitf_test.c - Test __floatunsitf --------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/floatunssidfvfp_test.c b/test/builtins/Unit/floatunssidfvfp_test.c
index 1671c74..7337557 100644
--- a/test/builtins/Unit/floatunssidfvfp_test.c
+++ b/test/builtins/Unit/floatunssidfvfp_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- floatunssidfvfp_test.c - Test __floatunssidfvfp -------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/floatunssisfvfp_test.c b/test/builtins/Unit/floatunssisfvfp_test.c
index 506f3be..9d7fb65 100644
--- a/test/builtins/Unit/floatunssisfvfp_test.c
+++ b/test/builtins/Unit/floatunssisfvfp_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- floatunssisfvfp_test.c - Test __floatunssisfvfp -------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/floatuntidf_test.c b/test/builtins/Unit/floatuntidf_test.c
index 9855ff7..1bf19ba 100644
--- a/test/builtins/Unit/floatuntidf_test.c
+++ b/test/builtins/Unit/floatuntidf_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- floatuntidf.c - Test __floatuntidf --------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/floatuntisf_test.c b/test/builtins/Unit/floatuntisf_test.c
index 9b5ff79..c7c11ba 100644
--- a/test/builtins/Unit/floatuntisf_test.c
+++ b/test/builtins/Unit/floatuntisf_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- floatuntisf.c - Test __floatuntisf --------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/floatuntitf_test.c b/test/builtins/Unit/floatuntitf_test.c
index 495adcf..e81c30e 100644
--- a/test/builtins/Unit/floatuntitf_test.c
+++ b/test/builtins/Unit/floatuntitf_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- floatuntitf.c - Test __floatuntitf --------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/floatuntixf_test.c b/test/builtins/Unit/floatuntixf_test.c
index c58b55d..0f7ad46 100644
--- a/test/builtins/Unit/floatuntixf_test.c
+++ b/test/builtins/Unit/floatuntixf_test.c
@@ -1,3 +1,6 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+// REQUIRES: x86-target-arch
+
 //===-- floatuntixf.c - Test __floatuntixf --------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/fp_test.h b/test/builtins/Unit/fp_test.h
index 1f0e7be..781b7e2 100644
--- a/test/builtins/Unit/fp_test.h
+++ b/test/builtins/Unit/fp_test.h
@@ -85,7 +85,7 @@
     if (rep == expected){
         return 0;
     }
-    // test other posible NaN representation(signal NaN)
+    // test other possible NaN representation(signal NaN)
     else if (expected == 0x7e00U){
         if ((rep & 0x7c00U) == 0x7c00U &&
             (rep & 0x3ffU) > 0){
@@ -103,7 +103,7 @@
     if (rep == expected){
         return 0;
     }
-    // test other posible NaN representation(signal NaN)
+    // test other possible NaN representation(signal NaN)
     else if (expected == 0x7fc00000U){
         if ((rep & 0x7f800000U) == 0x7f800000U &&
             (rep & 0x7fffffU) > 0){
@@ -121,7 +121,7 @@
     if (rep == expected){
         return 0;
     }
-    // test other posible NaN representation(signal NaN)
+    // test other possible NaN representation(signal NaN)
     else if (expected == 0x7ff8000000000000UL){
         if ((rep & 0x7ff0000000000000UL) == 0x7ff0000000000000UL &&
             (rep & 0xfffffffffffffUL) > 0){
@@ -146,7 +146,7 @@
     if (hi == expectedHi && lo == expectedLo){
         return 0;
     }
-    // test other posible NaN representation(signal NaN)
+    // test other possible NaN representation(signal NaN)
     else if (expectedHi == 0x7fff800000000000UL && expectedLo == 0x0UL){
         if ((hi & 0x7fff000000000000UL) == 0x7fff000000000000UL &&
             ((hi & 0xffffffffffffUL) > 0 || lo > 0)){
diff --git a/test/builtins/Unit/gcc_personality_test.c b/test/builtins/Unit/gcc_personality_test.c
index f9598c6..b3345dd 100644
--- a/test/builtins/Unit/gcc_personality_test.c
+++ b/test/builtins/Unit/gcc_personality_test.c
@@ -1,3 +1,6 @@
+// FIXME: XFAIL as currently it cannot be built by lit properly.
+// XFAIL: *
+// RUN: %clangxx_builtins %s %librt -o %t && %run %t
 /* ===-- gcc_personality_test.c - Tests __gcc_personality_v0 -------------===
  *
  *                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/gedf2vfp_test.c b/test/builtins/Unit/gedf2vfp_test.c
index 341fd65..ad72083 100644
--- a/test/builtins/Unit/gedf2vfp_test.c
+++ b/test/builtins/Unit/gedf2vfp_test.c
@@ -1,3 +1,5 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+
 //===-- gedf2vfp_test.c - Test __gedf2vfp ---------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/gesf2vfp_test.c b/test/builtins/Unit/gesf2vfp_test.c
index 607d988..8a855e1 100644
--- a/test/builtins/Unit/gesf2vfp_test.c
+++ b/test/builtins/Unit/gesf2vfp_test.c
@@ -1,3 +1,5 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+
 //===-- gesf2vfp_test.c - Test __gesf2vfp ---------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/getf2_test.c b/test/builtins/Unit/getf2_test.c
index 9796b8a..115b630 100644
--- a/test/builtins/Unit/getf2_test.c
+++ b/test/builtins/Unit/getf2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===------------ getf2_test.c - Test __getf2------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/gtdf2vfp_test.c b/test/builtins/Unit/gtdf2vfp_test.c
index 1bf68bf..e6eb545 100644
--- a/test/builtins/Unit/gtdf2vfp_test.c
+++ b/test/builtins/Unit/gtdf2vfp_test.c
@@ -1,3 +1,5 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+
 //===-- gtdf2vfp_test.c - Test __gtdf2vfp ---------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/gtsf2vfp_test.c b/test/builtins/Unit/gtsf2vfp_test.c
index 8209647..e0442c6 100644
--- a/test/builtins/Unit/gtsf2vfp_test.c
+++ b/test/builtins/Unit/gtsf2vfp_test.c
@@ -1,3 +1,5 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+
 //===-- gtsf2vfp_test.c - Test __gtsf2vfp ---------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/gttf2_test.c b/test/builtins/Unit/gttf2_test.c
index 6508d4b..81d68cb 100644
--- a/test/builtins/Unit/gttf2_test.c
+++ b/test/builtins/Unit/gttf2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===------------ gttf2_test.c - Test __gttf2------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/ledf2vfp_test.c b/test/builtins/Unit/ledf2vfp_test.c
index 2e1daf0..f0cd56e 100644
--- a/test/builtins/Unit/ledf2vfp_test.c
+++ b/test/builtins/Unit/ledf2vfp_test.c
@@ -1,3 +1,5 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+
 //===-- ledf2vfp_test.c - Test __ledf2vfp ---------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/lesf2vfp_test.c b/test/builtins/Unit/lesf2vfp_test.c
index 0f89393..02ae182 100644
--- a/test/builtins/Unit/lesf2vfp_test.c
+++ b/test/builtins/Unit/lesf2vfp_test.c
@@ -1,3 +1,5 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+
 //===-- lesf2vfp_test.c - Test __lesf2vfp ---------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/letf2_test.c b/test/builtins/Unit/letf2_test.c
index 1842e3c..bb84525 100644
--- a/test/builtins/Unit/letf2_test.c
+++ b/test/builtins/Unit/letf2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===------------ letf2_test.c - Test __letf2------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/lit.cfg b/test/builtins/Unit/lit.cfg
new file mode 100644
index 0000000..0e17e47
--- /dev/null
+++ b/test/builtins/Unit/lit.cfg
@@ -0,0 +1,87 @@
+# -*- Python -*-
+
+import os
+import platform
+
+import lit.formats
+
+def get_required_attr(config, attr_name):
+  attr_value = getattr(config, attr_name, None)
+  if attr_value == None:
+    lit_config.fatal(
+      "No attribute %r in test configuration! You may need to run "
+      "tests from your build directory or add this attribute "
+      "to lit.site.cfg " % attr_name)
+  return attr_value
+
+# Setup config name.
+config.name = 'Builtins' + config.name_suffix
+
+# Platform-specific default Builtins_OPTIONS for lit tests.
+default_builtins_opts = ''
+
+# Setup source root.
+config.test_source_root = os.path.dirname(__file__)
+
+# Path to the static library
+is_msvc = get_required_attr(config, "builtins_is_msvc")
+if is_msvc:
+  base_lib = os.path.join(config.compiler_rt_libdir, "clang_rt.builtins-%s.lib "
+                          % config.target_arch)
+  config.substitutions.append( ("%librt ", base_lib) )
+else:
+  base_lib = os.path.join(config.compiler_rt_libdir, "libclang_rt.builtins-%s.a"
+                          % config.target_arch)
+  config.substitutions.append( ("%librt ", base_lib + ' -lc -lm ') )
+
+builtins_source_dir = os.path.join(
+  get_required_attr(config, "compiler_rt_src_root"), "lib", "builtins")
+builtins_lit_source_dir = get_required_attr(config, "builtins_lit_source_dir")
+
+extra_link_flags = ["-nodefaultlibs"]
+
+target_cflags = [get_required_attr(config, "target_cflags")]
+target_cflags += ['-fno-builtin', '-I', builtins_source_dir]
+target_cflags += extra_link_flags
+target_cxxflags = config.cxx_mode_flags + target_cflags
+clang_builtins_static_cflags = ([""] +
+                            config.debug_info_flags + target_cflags)
+clang_builtins_static_cxxflags = config.cxx_mode_flags + \
+                                 clang_builtins_static_cflags
+
+clang_builtins_cflags = clang_builtins_static_cflags
+clang_builtins_cxxflags = clang_builtins_static_cxxflags
+
+if not is_msvc:
+  config.available_features.add('c99-complex')
+
+clang_wrapper = ""
+
+def build_invocation(compile_flags):
+  return " " + " ".join([clang_wrapper, config.clang] + compile_flags) + " "
+
+
+target_arch = config.target_arch
+if (target_arch == "arm"):
+  target_arch = "armv7"
+
+config.substitutions.append( ("%clang ", build_invocation(target_cflags)) )
+config.substitutions.append( ("%clangxx ", build_invocation(target_cxxflags)) )
+config.substitutions.append( ("%clang_builtins ", \
+                              build_invocation(clang_builtins_cflags)))
+config.substitutions.append( ("%clangxx_builtins ", \
+                              build_invocation(clang_builtins_cxxflags)))
+
+# FIXME: move the call_apsr.s into call_apsr.h as inline-asm.
+# some ARM tests needs call_apsr.s
+call_apsr_source = os.path.join(builtins_lit_source_dir, 'arm', 'call_apsr.S')
+march_flag = '-march=' + target_arch
+call_apsr_flags = ['-c', march_flag, call_apsr_source]
+config.substitutions.append( ("%arm_call_apsr ", \
+                              build_invocation(call_apsr_flags)) )
+
+# Default test suffixes.
+config.suffixes = ['.c', '.cc', '.cpp']
+
+if not config.emulator:
+  config.available_features.add('native-run')
diff --git a/test/builtins/Unit/lit.site.cfg.in b/test/builtins/Unit/lit.site.cfg.in
new file mode 100644
index 0000000..4241bdc
--- /dev/null
+++ b/test/builtins/Unit/lit.site.cfg.in
@@ -0,0 +1,12 @@
+@LIT_SITE_CFG_IN_HEADER@
+
+config.name_suffix = "@BUILTINS_TEST_CONFIG_SUFFIX@"
+config.builtins_lit_source_dir = "@BUILTINS_LIT_SOURCE_DIR@/Unit"
+config.target_cflags = "@BUILTINS_TEST_TARGET_CFLAGS@"
+config.target_arch = "@BUILTINS_TEST_TARGET_ARCH@"
+config.builtins_is_msvc = @MSVC_PYBOOL@
+# Load common config for all compiler-rt lit tests.
+lit_config.load_config(config, "@COMPILER_RT_BINARY_DIR@/test/lit.common.configured")
+
+# Load tool-specific config that would do the real work.
+lit_config.load_config(config, "@BUILTINS_LIT_SOURCE_DIR@/Unit/lit.cfg")
diff --git a/test/builtins/Unit/lshrdi3_test.c b/test/builtins/Unit/lshrdi3_test.c
index d48ae4d..c5faa98 100644
--- a/test/builtins/Unit/lshrdi3_test.c
+++ b/test/builtins/Unit/lshrdi3_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- lshrdi3_test.c - Test __lshrdi3 -----------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/lshrti3_test.c b/test/builtins/Unit/lshrti3_test.c
index f5a0dd6..91356c8 100644
--- a/test/builtins/Unit/lshrti3_test.c
+++ b/test/builtins/Unit/lshrti3_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- lshrti3_test.c - Test __lshrti3 -----------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/ltdf2vfp_test.c b/test/builtins/Unit/ltdf2vfp_test.c
index fdbe9a1..1edb319 100644
--- a/test/builtins/Unit/ltdf2vfp_test.c
+++ b/test/builtins/Unit/ltdf2vfp_test.c
@@ -1,3 +1,5 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+
 //===-- ltdf2vfp_test.c - Test __ltdf2vfp ---------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/ltsf2vfp_test.c b/test/builtins/Unit/ltsf2vfp_test.c
index d4d65ba..2fc0c11 100644
--- a/test/builtins/Unit/ltsf2vfp_test.c
+++ b/test/builtins/Unit/ltsf2vfp_test.c
@@ -1,3 +1,5 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+
 //===-- ltsf2vfp_test.c - Test __ltsf2vfp ---------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/lttf2_test.c b/test/builtins/Unit/lttf2_test.c
index e8f9dc1..cfe1906 100644
--- a/test/builtins/Unit/lttf2_test.c
+++ b/test/builtins/Unit/lttf2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===------------ lttf2_test.c - Test __lttf2------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/moddi3_test.c b/test/builtins/Unit/moddi3_test.c
index 62e8f22..8325ad7 100644
--- a/test/builtins/Unit/moddi3_test.c
+++ b/test/builtins/Unit/moddi3_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- moddi3_test.c - Test __moddi3 -------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/modsi3_test.c b/test/builtins/Unit/modsi3_test.c
index 8c9f588..8075f51 100644
--- a/test/builtins/Unit/modsi3_test.c
+++ b/test/builtins/Unit/modsi3_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 /* ===-- modsi3_test.c - Test __modsi3 -------------------------------------===
  *
  *                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/modti3_test.c b/test/builtins/Unit/modti3_test.c
index 99413aa..53fbf5b 100644
--- a/test/builtins/Unit/modti3_test.c
+++ b/test/builtins/Unit/modti3_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- modti3_test.c - Test __modti3 -------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/muldc3_test.c b/test/builtins/Unit/muldc3_test.c
index 6902ef3..5a856bc 100644
--- a/test/builtins/Unit/muldc3_test.c
+++ b/test/builtins/Unit/muldc3_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -lm -o %t && %run %t
 //===-- muldc3_test.c - Test __muldc3 -------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
@@ -16,6 +17,8 @@
 #include <complex.h>
 #include <stdio.h>
 
+// REQUIRES: c99-complex
+
 // Returns: the product of a + ib and c + id
 
 COMPILER_RT_ABI double _Complex
diff --git a/test/builtins/Unit/muldf3vfp_test.c b/test/builtins/Unit/muldf3vfp_test.c
index 024c9a8..36a2262 100644
--- a/test/builtins/Unit/muldf3vfp_test.c
+++ b/test/builtins/Unit/muldf3vfp_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- muldf3vfp_test.c - Test __muldf3vfp -------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/muldi3_test.c b/test/builtins/Unit/muldi3_test.c
index 651dd01..d09e74d 100644
--- a/test/builtins/Unit/muldi3_test.c
+++ b/test/builtins/Unit/muldi3_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- muldi3_test.c - Test __muldi3 -------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/mulodi4_test.c b/test/builtins/Unit/mulodi4_test.c
index 4546609..f03b8fe 100644
--- a/test/builtins/Unit/mulodi4_test.c
+++ b/test/builtins/Unit/mulodi4_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- mulodi4_test.c - Test __mulodi4 -----------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/mulosi4_test.c b/test/builtins/Unit/mulosi4_test.c
index 6a27d69..12913f1 100644
--- a/test/builtins/Unit/mulosi4_test.c
+++ b/test/builtins/Unit/mulosi4_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- mulosi4_test.c - Test __mulosi4 -----------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/muloti4_test.c b/test/builtins/Unit/muloti4_test.c
index d00e7bb..e7c78cf 100644
--- a/test/builtins/Unit/muloti4_test.c
+++ b/test/builtins/Unit/muloti4_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- muloti4_test.c - Test __muloti4 -----------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/mulsc3_test.c b/test/builtins/Unit/mulsc3_test.c
index eeb537a..46309c3 100644
--- a/test/builtins/Unit/mulsc3_test.c
+++ b/test/builtins/Unit/mulsc3_test.c
@@ -1,3 +1,6 @@
+// RUN: %clang_builtins %s %librt -lm -o %t && %run %t
+// UNSUPPORTED: armhf-target-arch
+// see pr 32475.
 //===-- mulsc3_test.c - Test __mulsc3 -------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
@@ -16,6 +19,8 @@
 #include <complex.h>
 #include <stdio.h>
 
+// REQUIRES: c99-complex
+
 // Returns: the product of a + ib and c + id
 
 COMPILER_RT_ABI float _Complex
diff --git a/test/builtins/Unit/mulsf3vfp_test.c b/test/builtins/Unit/mulsf3vfp_test.c
index 9fe88f2..8ee0551 100644
--- a/test/builtins/Unit/mulsf3vfp_test.c
+++ b/test/builtins/Unit/mulsf3vfp_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- mulsf3vfp_test.c - Test __mulsf3vfp -------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/multc3_test.c b/test/builtins/Unit/multc3_test.c
index f6cf4ca..5ef8467 100644
--- a/test/builtins/Unit/multc3_test.c
+++ b/test/builtins/Unit/multc3_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- multc3_test.c - Test __multc3 -------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/multf3_test.c b/test/builtins/Unit/multf3_test.c
index 9944865..2bce707 100644
--- a/test/builtins/Unit/multf3_test.c
+++ b/test/builtins/Unit/multf3_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===--------------- multf3_test.c - Test __multf3 ------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/multi3_test.c b/test/builtins/Unit/multi3_test.c
index 04b1b8a..8227f24 100644
--- a/test/builtins/Unit/multi3_test.c
+++ b/test/builtins/Unit/multi3_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- multi3_test.c - Test __multi3 -------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/mulvdi3_test.c b/test/builtins/Unit/mulvdi3_test.c
index 7f16c4c..0e10bbe 100644
--- a/test/builtins/Unit/mulvdi3_test.c
+++ b/test/builtins/Unit/mulvdi3_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- mulvdi3_test.c - Test __mulvdi3 -----------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/mulvsi3_test.c b/test/builtins/Unit/mulvsi3_test.c
index 64df4fe..f62a5aa 100644
--- a/test/builtins/Unit/mulvsi3_test.c
+++ b/test/builtins/Unit/mulvsi3_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- mulvsi3_test.c - Test __mulvsi3 -----------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/mulvti3_test.c b/test/builtins/Unit/mulvti3_test.c
index bf2f731..36e96ad 100644
--- a/test/builtins/Unit/mulvti3_test.c
+++ b/test/builtins/Unit/mulvti3_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- mulvti3_test.c - Test __mulvti3 -----------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/mulxc3_test.c b/test/builtins/Unit/mulxc3_test.c
index e77e94f..9b8e058 100644
--- a/test/builtins/Unit/mulxc3_test.c
+++ b/test/builtins/Unit/mulxc3_test.c
@@ -1,3 +1,5 @@
+// RUN: %clang_builtins %s %librt -lm -o %t && %run %t
+// UNSUPPORTED: powerpc64
 //===-- mulxc3_test.c - Test __mulxc3 -------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
@@ -18,6 +20,9 @@
 #include <complex.h>
 #include <stdio.h>
 
+// UNSUPPORTED: mips
+// REQUIRES: c99-complex
+
 // Returns: the product of a + ib and c + id
 
 COMPILER_RT_ABI long double _Complex
diff --git a/test/builtins/Unit/nedf2vfp_test.c b/test/builtins/Unit/nedf2vfp_test.c
index 69587d4..536917a 100644
--- a/test/builtins/Unit/nedf2vfp_test.c
+++ b/test/builtins/Unit/nedf2vfp_test.c
@@ -1,3 +1,5 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+
 //===-- nedf2vfp_test.c - Test __nedf2vfp ---------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/negdf2vfp_test.c b/test/builtins/Unit/negdf2vfp_test.c
index 72bde7c..776dca6 100644
--- a/test/builtins/Unit/negdf2vfp_test.c
+++ b/test/builtins/Unit/negdf2vfp_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- negdf2vfp_test.c - Test __negdf2vfp -------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/negdi2_test.c b/test/builtins/Unit/negdi2_test.c
index beccd71..c85e915 100644
--- a/test/builtins/Unit/negdi2_test.c
+++ b/test/builtins/Unit/negdi2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- negdi2_test.c - Test __negdi2 -------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/negsf2vfp_test.c b/test/builtins/Unit/negsf2vfp_test.c
index 1c9ba52..e15e43c 100644
--- a/test/builtins/Unit/negsf2vfp_test.c
+++ b/test/builtins/Unit/negsf2vfp_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- negsf2vfp_test.c - Test __negsf2vfp -------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/negti2_test.c b/test/builtins/Unit/negti2_test.c
index b075978..bb7379c 100644
--- a/test/builtins/Unit/negti2_test.c
+++ b/test/builtins/Unit/negti2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- negti2_test.c - Test __negti2 -------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/negvdi2_test.c b/test/builtins/Unit/negvdi2_test.c
index 5c202e5..4e17c30 100644
--- a/test/builtins/Unit/negvdi2_test.c
+++ b/test/builtins/Unit/negvdi2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- negvdi2_test.c - Test __negvdi2 -----------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/negvsi2_test.c b/test/builtins/Unit/negvsi2_test.c
index 6330803..3deb423 100644
--- a/test/builtins/Unit/negvsi2_test.c
+++ b/test/builtins/Unit/negvsi2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- negvsi2_test.c - Test __negvsi2 -----------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/negvti2_test.c b/test/builtins/Unit/negvti2_test.c
index 005f8a8..980f448 100644
--- a/test/builtins/Unit/negvti2_test.c
+++ b/test/builtins/Unit/negvti2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- negvti2_test.c - Test __negvti2 -----------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/nesf2vfp_test.c b/test/builtins/Unit/nesf2vfp_test.c
index 0ebcd67..bb01490 100644
--- a/test/builtins/Unit/nesf2vfp_test.c
+++ b/test/builtins/Unit/nesf2vfp_test.c
@@ -1,3 +1,5 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+
 //===-- nesf2vfp_test.c - Test __nesf2vfp ---------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/netf2_test.c b/test/builtins/Unit/netf2_test.c
index bbd953a..c0b839d 100644
--- a/test/builtins/Unit/netf2_test.c
+++ b/test/builtins/Unit/netf2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===------------ netf2_test.c - Test __netf2------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/paritydi2_test.c b/test/builtins/Unit/paritydi2_test.c
index 98220bd..cc56eda 100644
--- a/test/builtins/Unit/paritydi2_test.c
+++ b/test/builtins/Unit/paritydi2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- paritydi2_test.c - Test __paritydi2 -------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/paritysi2_test.c b/test/builtins/Unit/paritysi2_test.c
index 175aeb2..42d687f 100644
--- a/test/builtins/Unit/paritysi2_test.c
+++ b/test/builtins/Unit/paritysi2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- paritysi2_test.c - Test __paritysi2 -------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/parityti2_test.c b/test/builtins/Unit/parityti2_test.c
index cc1e999..bcd26d0 100644
--- a/test/builtins/Unit/parityti2_test.c
+++ b/test/builtins/Unit/parityti2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- parityti2_test.c - Test __parityti2 -------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/popcountdi2_test.c b/test/builtins/Unit/popcountdi2_test.c
index bfd4977..1d52fb8 100644
--- a/test/builtins/Unit/popcountdi2_test.c
+++ b/test/builtins/Unit/popcountdi2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- popcountdi2_test.c - Test __popcountdi2 ----------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/popcountsi2_test.c b/test/builtins/Unit/popcountsi2_test.c
index 10b757d..5eb88ac 100644
--- a/test/builtins/Unit/popcountsi2_test.c
+++ b/test/builtins/Unit/popcountsi2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- popcountsi2_test.c - Test __popcountsi2 ---------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/popcountti2_test.c b/test/builtins/Unit/popcountti2_test.c
index 3a3c3cb..ef8b2c3 100644
--- a/test/builtins/Unit/popcountti2_test.c
+++ b/test/builtins/Unit/popcountti2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- popcountti2_test.c - Test __popcountti2 ----------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/powidf2_test.c b/test/builtins/Unit/powidf2_test.c
index c499d9a..210e3c3 100644
--- a/test/builtins/Unit/powidf2_test.c
+++ b/test/builtins/Unit/powidf2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- powidf2_test.cpp - Test __powidf2 ---------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/powisf2_test.c b/test/builtins/Unit/powisf2_test.c
index 1186ef4..add4be4 100644
--- a/test/builtins/Unit/powisf2_test.c
+++ b/test/builtins/Unit/powisf2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- powisf2_test.cpp - Test __powisf2 ---------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/powitf2_test.c b/test/builtins/Unit/powitf2_test.c
index 13c890a..9d11b76 100644
--- a/test/builtins/Unit/powitf2_test.c
+++ b/test/builtins/Unit/powitf2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- powitf2_test.cpp - Test __powitf2 ---------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/powixf2_test.c b/test/builtins/Unit/powixf2_test.c
index a28f1f9..0d2734a 100644
--- a/test/builtins/Unit/powixf2_test.c
+++ b/test/builtins/Unit/powixf2_test.c
@@ -1,3 +1,5 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+// UNSUPPORTED: powerpc64
 //===-- powixf2_test.cpp - Test __powixf2 ---------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/ppc/fixtfdi_test.c b/test/builtins/Unit/ppc/fixtfdi_test.c
index b4865fb..ea6c405 100644
--- a/test/builtins/Unit/ppc/fixtfdi_test.c
+++ b/test/builtins/Unit/ppc/fixtfdi_test.c
@@ -1,3 +1,5 @@
+// REQUIRES: powerpc-registered-target
+// RUN: %clang_builtins %s -o %t && %run %t
 #include <stdio.h>
 #include <limits.h>
 #include <stdint.h>
diff --git a/test/builtins/Unit/ppc/floatditf_test.c b/test/builtins/Unit/ppc/floatditf_test.c
index 71ecf7c..5c08ade 100644
--- a/test/builtins/Unit/ppc/floatditf_test.c
+++ b/test/builtins/Unit/ppc/floatditf_test.c
@@ -1,3 +1,5 @@
+// REQUIRES: powerpc-registered-target
+// RUN: %clang_builtins %s -o %t && %run %t
 #include <stdint.h>
 #include <stdio.h>
 
diff --git a/test/builtins/Unit/ppc/floatunditf_test.c b/test/builtins/Unit/ppc/floatunditf_test.c
index 4d1ce08..3e50128 100644
--- a/test/builtins/Unit/ppc/floatunditf_test.c
+++ b/test/builtins/Unit/ppc/floatunditf_test.c
@@ -1,3 +1,5 @@
+// REQUIRES: powerpc-registered-target
+// RUN: %clang_builtins %s -o %t && %run %t
 #include <stdint.h>
 #include <stdio.h>
 
diff --git a/test/builtins/Unit/ppc/qadd_test.c b/test/builtins/Unit/ppc/qadd_test.c
index 6d4ca93..327fd21 100644
--- a/test/builtins/Unit/ppc/qadd_test.c
+++ b/test/builtins/Unit/ppc/qadd_test.c
@@ -1,3 +1,5 @@
+// REQUIRES: powerpc-registered-target
+// RUN: %clang_builtins %s -o %t && %run %t
 #include <stdio.h>
 #include "DD.h"
 
diff --git a/test/builtins/Unit/ppc/qdiv_test.c b/test/builtins/Unit/ppc/qdiv_test.c
index 8e4e75a..7530e42 100644
--- a/test/builtins/Unit/ppc/qdiv_test.c
+++ b/test/builtins/Unit/ppc/qdiv_test.c
@@ -1,3 +1,5 @@
+// REQUIRES: powerpc-registered-target
+// RUN: %clang_builtins %s -o %t && %run %t
 #include <stdio.h>
 #include "DD.h"
 
diff --git a/test/builtins/Unit/ppc/qmul_test.c b/test/builtins/Unit/ppc/qmul_test.c
index fc5b46d..dbe3536 100644
--- a/test/builtins/Unit/ppc/qmul_test.c
+++ b/test/builtins/Unit/ppc/qmul_test.c
@@ -1,3 +1,5 @@
+// REQUIRES: powerpc-registered-target
+// RUN: %clang_builtins %s -o %t && %run %t
 #include <stdio.h>
 #include "DD.h"
 
diff --git a/test/builtins/Unit/ppc/qsub_test.c b/test/builtins/Unit/ppc/qsub_test.c
index 43bc7f4..e212240 100644
--- a/test/builtins/Unit/ppc/qsub_test.c
+++ b/test/builtins/Unit/ppc/qsub_test.c
@@ -1,3 +1,5 @@
+// REQUIRES: powerpc-registered-target
+// RUN: %clang_builtins %s -o %t && %run %t
 #include <stdio.h>
 #include "DD.h"
 
diff --git a/test/builtins/Unit/subdf3vfp_test.c b/test/builtins/Unit/subdf3vfp_test.c
index 398494e..75bfe45 100644
--- a/test/builtins/Unit/subdf3vfp_test.c
+++ b/test/builtins/Unit/subdf3vfp_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- subdf3vfp_test.c - Test __subdf3vfp -------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/subsf3vfp_test.c b/test/builtins/Unit/subsf3vfp_test.c
index 8d529e5..fb3839f 100644
--- a/test/builtins/Unit/subsf3vfp_test.c
+++ b/test/builtins/Unit/subsf3vfp_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- subsf3vfp_test.c - Test __subsf3vfp -------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/subtf3_test.c b/test/builtins/Unit/subtf3_test.c
index 32c30bd..771242b 100644
--- a/test/builtins/Unit/subtf3_test.c
+++ b/test/builtins/Unit/subtf3_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===--------------- subtf3_test.c - Test __subtf3 ------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/subvdi3_test.c b/test/builtins/Unit/subvdi3_test.c
index 96e825c..7606e27 100644
--- a/test/builtins/Unit/subvdi3_test.c
+++ b/test/builtins/Unit/subvdi3_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- subvdi3_test.c - Test __subvdi3 -----------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/subvsi3_test.c b/test/builtins/Unit/subvsi3_test.c
index 03ef504..f52a12b 100644
--- a/test/builtins/Unit/subvsi3_test.c
+++ b/test/builtins/Unit/subvsi3_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- subvsi3_test.c - Test __subvsi3 -----------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/subvti3_test.c b/test/builtins/Unit/subvti3_test.c
index 40eb518..66806ba 100644
--- a/test/builtins/Unit/subvti3_test.c
+++ b/test/builtins/Unit/subvti3_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- subvti3_test.c - Test __subvti3 -----------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/trampoline_setup_test.c b/test/builtins/Unit/trampoline_setup_test.c
index dc30fb6..b8c3eae 100644
--- a/test/builtins/Unit/trampoline_setup_test.c
+++ b/test/builtins/Unit/trampoline_setup_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -fnested-functions -o %t && %run %t
 /* ===-- trampoline_setup_test.c - Test __trampoline_setup -----------------===
  *
  *                     The LLVM Compiler Infrastructure
@@ -12,7 +13,6 @@
 #include <stdio.h>
 #include <string.h>
 #include <stdint.h>
-#include <sys/mman.h>
 
 /*
  * Tests nested functions
diff --git a/test/builtins/Unit/truncdfhf2_test.c b/test/builtins/Unit/truncdfhf2_test.c
index 6627a00..b172811 100644
--- a/test/builtins/Unit/truncdfhf2_test.c
+++ b/test/builtins/Unit/truncdfhf2_test.c
@@ -1,3 +1,5 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+
 //===--------------- truncdfhf2_test.c - Test __truncdfhf2 ----------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/truncdfsf2_test.c b/test/builtins/Unit/truncdfsf2_test.c
index a208a3a..04bebf4 100644
--- a/test/builtins/Unit/truncdfsf2_test.c
+++ b/test/builtins/Unit/truncdfsf2_test.c
@@ -1,3 +1,5 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+
 //===--------------- truncdfsf2_test.c - Test __truncdfsf2 ----------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/truncdfsf2vfp_test.c b/test/builtins/Unit/truncdfsf2vfp_test.c
index efe6f97..f034dd2 100644
--- a/test/builtins/Unit/truncdfsf2vfp_test.c
+++ b/test/builtins/Unit/truncdfsf2vfp_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- truncdfsf2vfp_test.c - Test __truncdfsf2vfp -----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/truncsfhf2_test.c b/test/builtins/Unit/truncsfhf2_test.c
index 5bc3c8e..2240c14 100644
--- a/test/builtins/Unit/truncsfhf2_test.c
+++ b/test/builtins/Unit/truncsfhf2_test.c
@@ -1,3 +1,5 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+
 //===--------------- truncsfhf2_test.c - Test __truncsfhf2 ----------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/trunctfdf2_test.c b/test/builtins/Unit/trunctfdf2_test.c
index 0366a8c..1d8c2bf 100644
--- a/test/builtins/Unit/trunctfdf2_test.c
+++ b/test/builtins/Unit/trunctfdf2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-------------- trunctfdf2_test.c - Test __trunctfdf2 -----------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/trunctfsf2_test.c b/test/builtins/Unit/trunctfsf2_test.c
index a6b922c..1775083 100644
--- a/test/builtins/Unit/trunctfsf2_test.c
+++ b/test/builtins/Unit/trunctfsf2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===--------------- trunctfsf2_test.c - Test __trunctfsf2 ----------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/ucmpdi2_test.c b/test/builtins/Unit/ucmpdi2_test.c
index 65ae4fc..ee81545 100644
--- a/test/builtins/Unit/ucmpdi2_test.c
+++ b/test/builtins/Unit/ucmpdi2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- ucmpdi2_test.c - Test __ucmpdi2 -----------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/ucmpti2_test.c b/test/builtins/Unit/ucmpti2_test.c
index 826cd64..17a857f 100644
--- a/test/builtins/Unit/ucmpti2_test.c
+++ b/test/builtins/Unit/ucmpti2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- ucmpti2_test.c - Test __ucmpti2 -----------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/udivdi3_test.c b/test/builtins/Unit/udivdi3_test.c
index 48c99e3..5858bc7 100644
--- a/test/builtins/Unit/udivdi3_test.c
+++ b/test/builtins/Unit/udivdi3_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- udivdi3_test.c - Test __udivdi3 -----------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/udivmoddi4_test.c b/test/builtins/Unit/udivmoddi4_test.c
index 79af1ee..a5024ba 100644
--- a/test/builtins/Unit/udivmoddi4_test.c
+++ b/test/builtins/Unit/udivmoddi4_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- udivmoddi4_test.c - Test __udivmoddi4 -----------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/udivmodsi4_test.c b/test/builtins/Unit/udivmodsi4_test.c
index 4c14e29..320018c 100644
--- a/test/builtins/Unit/udivmodsi4_test.c
+++ b/test/builtins/Unit/udivmodsi4_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- udivmodsi4_test.c - Test __udivmodsi4 -----------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/udivmodti4_test.c b/test/builtins/Unit/udivmodti4_test.c
index c424661..b07ce8c 100644
--- a/test/builtins/Unit/udivmodti4_test.c
+++ b/test/builtins/Unit/udivmodti4_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- udivmodti4_test.c - Test __udivmodti4 -----------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/udivsi3_test.c b/test/builtins/Unit/udivsi3_test.c
index 4905386..7adf30a 100644
--- a/test/builtins/Unit/udivsi3_test.c
+++ b/test/builtins/Unit/udivsi3_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- udivsi3_test.c - Test __udivsi3 -----------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/udivti3_test.c b/test/builtins/Unit/udivti3_test.c
index f24ff03..81cedad 100644
--- a/test/builtins/Unit/udivti3_test.c
+++ b/test/builtins/Unit/udivti3_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- udivti3_test.c - Test __udivti3 -----------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/umoddi3_test.c b/test/builtins/Unit/umoddi3_test.c
index b46fb40..b1382b6 100644
--- a/test/builtins/Unit/umoddi3_test.c
+++ b/test/builtins/Unit/umoddi3_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- umoddi3_test.c - Test __umoddi3 -----------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/umodsi3_test.c b/test/builtins/Unit/umodsi3_test.c
index 3655da1..c430538 100644
--- a/test/builtins/Unit/umodsi3_test.c
+++ b/test/builtins/Unit/umodsi3_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- umodsi3_test.c - Test __umodsi3 -----------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/umodti3_test.c b/test/builtins/Unit/umodti3_test.c
index 21e82a4..5bce025 100644
--- a/test/builtins/Unit/umodti3_test.c
+++ b/test/builtins/Unit/umodti3_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===-- umodti3_test.c - Test __umodti3 -----------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/unorddf2vfp_test.c b/test/builtins/Unit/unorddf2vfp_test.c
index be17e41..21938d0 100644
--- a/test/builtins/Unit/unorddf2vfp_test.c
+++ b/test/builtins/Unit/unorddf2vfp_test.c
@@ -1,3 +1,5 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+
 //===-- unorddf2vfp_test.c - Test __unorddf2vfp ---------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/unordsf2vfp_test.c b/test/builtins/Unit/unordsf2vfp_test.c
index e40d71f..7126652 100644
--- a/test/builtins/Unit/unordsf2vfp_test.c
+++ b/test/builtins/Unit/unordsf2vfp_test.c
@@ -1,3 +1,5 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+
 //===-- unordsf2vfp_test.c - Test __unordsf2vfp ---------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/builtins/Unit/unordtf2_test.c b/test/builtins/Unit/unordtf2_test.c
index 114cd8c..22907ce 100644
--- a/test/builtins/Unit/unordtf2_test.c
+++ b/test/builtins/Unit/unordtf2_test.c
@@ -1,3 +1,4 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 //===------------ unordtf2_test.c - Test __unordtf2------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
diff --git a/test/cfi/CMakeLists.txt b/test/cfi/CMakeLists.txt
index bd51eac..c7fadde 100644
--- a/test/cfi/CMakeLists.txt
+++ b/test/cfi/CMakeLists.txt
@@ -1,14 +1,58 @@
-set(CFI_LIT_TEST_MODE Standalone)
-configure_lit_site_cfg(
-  ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
-  ${CMAKE_CURRENT_BINARY_DIR}/Standalone/lit.site.cfg
-  )
+set(CFI_TESTSUITES)
 
-set(CFI_LIT_TEST_MODE Devirt)
-configure_lit_site_cfg(
-  ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
-  ${CMAKE_CURRENT_BINARY_DIR}/Devirt/lit.site.cfg
-  )
+macro (add_cfi_test_suites lld thinlto)
+  set(suffix)
+  if (${lld})
+    set(suffix ${suffix}-lld)
+  endif()
+  if (${thinlto})
+    set(suffix ${suffix}-thinlto)
+  endif()
+  set(suffix ${suffix}-${CFI_TEST_TARGET_ARCH})
+
+  set(CFI_TEST_USE_LLD ${lld})
+  set(CFI_TEST_USE_THINLTO ${thinlto})
+
+  set(CFI_LIT_TEST_MODE Standalone)
+  set(CFI_TEST_CONFIG_SUFFIX -standalone${suffix})
+  configure_lit_site_cfg(
+    ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
+    ${CMAKE_CURRENT_BINARY_DIR}/Standalone${suffix}/lit.site.cfg
+    )
+  list(APPEND CFI_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/Standalone${suffix})
+
+  set(CFI_LIT_TEST_MODE Devirt)
+  set(CFI_TEST_CONFIG_SUFFIX -devirt${suffix})
+  configure_lit_site_cfg(
+    ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
+    ${CMAKE_CURRENT_BINARY_DIR}/Devirt${suffix}/lit.site.cfg
+    )
+  list(APPEND CFI_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/Devirt${suffix})
+endmacro()
+
+set(CFI_TEST_ARCH ${CFI_SUPPORTED_ARCH})
+if(APPLE)
+  darwin_filter_host_archs(CFI_SUPPORTED_ARCH CFI_TEST_ARCH)
+endif()
+
+foreach(arch ${CFI_TEST_ARCH})
+  set(CFI_TEST_TARGET_ARCH ${arch})
+  get_test_cc_for_arch(${arch} CFI_TEST_TARGET_CC CFI_TEST_TARGET_CFLAGS)
+  if (APPLE)
+    # FIXME: enable ThinLTO tests after fixing http://llvm.org/pr32741
+    add_cfi_test_suites(False False)
+  elseif(WIN32)
+    add_cfi_test_suites(True False)
+    add_cfi_test_suites(True True)
+  else()
+    add_cfi_test_suites(False False)
+    add_cfi_test_suites(False True)
+    if (COMPILER_RT_HAS_LLD AND NOT arch STREQUAL "i386")
+      add_cfi_test_suites(True False)
+      add_cfi_test_suites(True True)
+    endif()
+  endif()
+endforeach()
 
 set(CFI_TEST_DEPS ${SANITIZER_COMMON_LIT_TEST_DEPS})
 list(APPEND CFI_TEST_DEPS
@@ -34,7 +78,7 @@
       LTO
     )
   endif()
-  if(WIN32 AND COMPILER_RT_HAS_LLD_SOURCES)
+  if(NOT APPLE AND COMPILER_RT_HAS_LLD)
     list(APPEND CFI_TEST_DEPS
       lld
     )
@@ -42,13 +86,11 @@
 endif()
 
 add_lit_testsuite(check-cfi "Running the cfi regression tests"
-  ${CMAKE_CURRENT_BINARY_DIR}/Standalone
-  ${CMAKE_CURRENT_BINARY_DIR}/Devirt
+  ${CFI_TESTSUITES}
   DEPENDS ${CFI_TEST_DEPS})
 
 add_lit_target(check-cfi-and-supported "Running the cfi regression tests"
-  ${CMAKE_CURRENT_BINARY_DIR}/Standalone
-  ${CMAKE_CURRENT_BINARY_DIR}/Devirt
+  ${CFI_TESTSUITES}
   PARAMS check_supported=1
   DEPENDS ${CFI_TEST_DEPS})
 
diff --git a/test/cfi/anon-namespace.cpp b/test/cfi/anon-namespace.cpp
index 8e6c1c1..2a7ed9c 100644
--- a/test/cfi/anon-namespace.cpp
+++ b/test/cfi/anon-namespace.cpp
@@ -1,32 +1,32 @@
 // RUN: %clangxx_cfi -c -DTU1 -o %t1.o %s
 // RUN: %clangxx_cfi -c -DTU2 -o %t2.o %S/../cfi/anon-namespace.cpp
 // RUN: %clangxx_cfi -o %t1 %t1.o %t2.o
-// RUN: %expect_crash %t1 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t1 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -c -DTU1 -DB32 -o %t1.o %s
 // RUN: %clangxx_cfi -c -DTU2 -DB32 -o %t2.o %S/../cfi/anon-namespace.cpp
 // RUN: %clangxx_cfi -o %t2 %t1.o %t2.o
-// RUN: %expect_crash %t2 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t2 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -c -DTU1 -DB64 -o %t1.o %s
 // RUN: %clangxx_cfi -c -DTU2 -DB64 -o %t2.o %S/../cfi/anon-namespace.cpp
 // RUN: %clangxx_cfi -o %t3 %t1.o %t2.o
-// RUN: %expect_crash %t3 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t3 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -c -DTU1 -DBM -o %t1.o %s
 // RUN: %clangxx_cfi -c -DTU2 -DBM -o %t2.o %S/../cfi/anon-namespace.cpp
 // RUN: %clangxx_cfi -o %t4 %t1.o %t2.o
-// RUN: %expect_crash %t4 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t4 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx -c -DTU1 -o %t1.o %s
 // RUN: %clangxx -c -DTU2 -o %t2.o %S/../cfi/anon-namespace.cpp
 // RUN: %clangxx -o %t5 %t1.o %t2.o
-// RUN: %t5 2>&1 | FileCheck --check-prefix=NCFI %s
+// RUN: %run %t5 2>&1 | FileCheck --check-prefix=NCFI %s
 
 // RUN: %clangxx_cfi_diag -c -DTU1 -o %t1.o %s
 // RUN: %clangxx_cfi_diag -c -DTU2 -o %t2.o %S/../cfi/anon-namespace.cpp
 // RUN: %clangxx_cfi_diag -o %t6 %t1.o %t2.o
-// RUN: %t6 2>&1 | FileCheck --check-prefix=CFI-DIAG %s
+// RUN: %run %t6 2>&1 | FileCheck --check-prefix=CFI-DIAG %s
 
 // Tests that the CFI mechanism treats classes in the anonymous namespace in
 // different translation units as having distinct identities. This is done by
diff --git a/test/cfi/bad-cast.cpp b/test/cfi/bad-cast.cpp
index e2f4f25..1c4f19e 100644
--- a/test/cfi/bad-cast.cpp
+++ b/test/cfi/bad-cast.cpp
@@ -1,68 +1,68 @@
 // RUN: %clangxx_cfi -o %t1 %s
-// RUN: %expect_crash %t1 a 2>&1 | FileCheck --check-prefix=FAIL %s
-// RUN: %expect_crash %t1 b 2>&1 | FileCheck --check-prefix=FAIL %s
-// RUN: %expect_crash %t1 c 2>&1 | FileCheck --check-prefix=FAIL %s
-// RUN: %t1 d 2>&1 | FileCheck --check-prefix=PASS %s
-// RUN: %t1 e 2>&1 | FileCheck --check-prefix=PASS %s
-// RUN: %t1 f 2>&1 | FileCheck --check-prefix=PASS %s
-// RUN: %expect_crash %t1 g 2>&1 | FileCheck --check-prefix=FAIL %s
-// RUN: %t1 h 2>&1 | FileCheck --check-prefix=PASS %s
+// RUN: %expect_crash %run %t1 a 2>&1 | FileCheck --check-prefix=FAIL %s
+// RUN: %expect_crash %run %t1 b 2>&1 | FileCheck --check-prefix=FAIL %s
+// RUN: %expect_crash %run %t1 c 2>&1 | FileCheck --check-prefix=FAIL %s
+// RUN: %run %t1 d 2>&1 | FileCheck --check-prefix=PASS %s
+// RUN: %run %t1 e 2>&1 | FileCheck --check-prefix=PASS %s
+// RUN: %run %t1 f 2>&1 | FileCheck --check-prefix=PASS %s
+// RUN: %expect_crash %run %t1 g 2>&1 | FileCheck --check-prefix=FAIL %s
+// RUN: %run %t1 h 2>&1 | FileCheck --check-prefix=PASS %s
 
 // RUN: %clangxx_cfi -DB32 -o %t2 %s
-// RUN: %expect_crash %t2 a 2>&1 | FileCheck --check-prefix=FAIL %s
-// RUN: %expect_crash %t2 b 2>&1 | FileCheck --check-prefix=FAIL %s
-// RUN: %expect_crash %t2 c 2>&1 | FileCheck --check-prefix=FAIL %s
-// RUN: %t2 d 2>&1 | FileCheck --check-prefix=PASS %s
-// RUN: %t2 e 2>&1 | FileCheck --check-prefix=PASS %s
-// RUN: %t2 f 2>&1 | FileCheck --check-prefix=PASS %s
-// RUN: %expect_crash %t2 g 2>&1 | FileCheck --check-prefix=FAIL %s
-// RUN: %t2 h 2>&1 | FileCheck --check-prefix=PASS %s
+// RUN: %expect_crash %run %t2 a 2>&1 | FileCheck --check-prefix=FAIL %s
+// RUN: %expect_crash %run %t2 b 2>&1 | FileCheck --check-prefix=FAIL %s
+// RUN: %expect_crash %run %t2 c 2>&1 | FileCheck --check-prefix=FAIL %s
+// RUN: %run %t2 d 2>&1 | FileCheck --check-prefix=PASS %s
+// RUN: %run %t2 e 2>&1 | FileCheck --check-prefix=PASS %s
+// RUN: %run %t2 f 2>&1 | FileCheck --check-prefix=PASS %s
+// RUN: %expect_crash %run %t2 g 2>&1 | FileCheck --check-prefix=FAIL %s
+// RUN: %run %t2 h 2>&1 | FileCheck --check-prefix=PASS %s
 
 // RUN: %clangxx_cfi -DB64 -o %t3 %s
-// RUN: %expect_crash %t3 a 2>&1 | FileCheck --check-prefix=FAIL %s
-// RUN: %expect_crash %t3 b 2>&1 | FileCheck --check-prefix=FAIL %s
-// RUN: %expect_crash %t3 c 2>&1 | FileCheck --check-prefix=FAIL %s
-// RUN: %t3 d 2>&1 | FileCheck --check-prefix=PASS %s
-// RUN: %t3 e 2>&1 | FileCheck --check-prefix=PASS %s
-// RUN: %t3 f 2>&1 | FileCheck --check-prefix=PASS %s
-// RUN: %expect_crash %t3 g 2>&1 | FileCheck --check-prefix=FAIL %s
-// RUN: %t3 h 2>&1 | FileCheck --check-prefix=PASS %s
+// RUN: %expect_crash %run %t3 a 2>&1 | FileCheck --check-prefix=FAIL %s
+// RUN: %expect_crash %run %t3 b 2>&1 | FileCheck --check-prefix=FAIL %s
+// RUN: %expect_crash %run %t3 c 2>&1 | FileCheck --check-prefix=FAIL %s
+// RUN: %run %t3 d 2>&1 | FileCheck --check-prefix=PASS %s
+// RUN: %run %t3 e 2>&1 | FileCheck --check-prefix=PASS %s
+// RUN: %run %t3 f 2>&1 | FileCheck --check-prefix=PASS %s
+// RUN: %expect_crash %run %t3 g 2>&1 | FileCheck --check-prefix=FAIL %s
+// RUN: %run %t3 h 2>&1 | FileCheck --check-prefix=PASS %s
 
 // RUN: %clangxx_cfi -DBM -o %t4 %s
-// RUN: %expect_crash %t4 a 2>&1 | FileCheck --check-prefix=FAIL %s
-// RUN: %expect_crash %t4 b 2>&1 | FileCheck --check-prefix=FAIL %s
-// RUN: %expect_crash %t4 c 2>&1 | FileCheck --check-prefix=FAIL %s
-// RUN: %t4 d 2>&1 | FileCheck --check-prefix=PASS %s
-// RUN: %t4 e 2>&1 | FileCheck --check-prefix=PASS %s
-// RUN: %t4 f 2>&1 | FileCheck --check-prefix=PASS %s
-// RUN: %expect_crash %t4 g 2>&1 | FileCheck --check-prefix=FAIL %s
-// RUN: %t4 h 2>&1 | FileCheck --check-prefix=PASS %s
+// RUN: %expect_crash %run %t4 a 2>&1 | FileCheck --check-prefix=FAIL %s
+// RUN: %expect_crash %run %t4 b 2>&1 | FileCheck --check-prefix=FAIL %s
+// RUN: %expect_crash %run %t4 c 2>&1 | FileCheck --check-prefix=FAIL %s
+// RUN: %run %t4 d 2>&1 | FileCheck --check-prefix=PASS %s
+// RUN: %run %t4 e 2>&1 | FileCheck --check-prefix=PASS %s
+// RUN: %run %t4 f 2>&1 | FileCheck --check-prefix=PASS %s
+// RUN: %expect_crash %run %t4 g 2>&1 | FileCheck --check-prefix=FAIL %s
+// RUN: %run %t4 h 2>&1 | FileCheck --check-prefix=PASS %s
 
 // RUN: %clangxx_cfi -fsanitize=cfi-cast-strict -o %t5 %s
-// RUN: %expect_crash %t5 a 2>&1 | FileCheck --check-prefix=FAIL %s
-// RUN: %expect_crash %t5 b 2>&1 | FileCheck --check-prefix=FAIL %s
-// RUN: %expect_crash %t5 c 2>&1 | FileCheck --check-prefix=FAIL %s
-// RUN: %expect_crash %t5 d 2>&1 | FileCheck --check-prefix=FAIL %s
-// RUN: %expect_crash %t5 e 2>&1 | FileCheck --check-prefix=FAIL %s
-// RUN: %expect_crash %t5 f 2>&1 | FileCheck --check-prefix=FAIL %s
-// RUN: %expect_crash %t5 g 2>&1 | FileCheck --check-prefix=FAIL %s
-// RUN: %expect_crash %t5 h 2>&1 | FileCheck --check-prefix=FAIL %s
+// RUN: %expect_crash %run %t5 a 2>&1 | FileCheck --check-prefix=FAIL %s
+// RUN: %expect_crash %run %t5 b 2>&1 | FileCheck --check-prefix=FAIL %s
+// RUN: %expect_crash %run %t5 c 2>&1 | FileCheck --check-prefix=FAIL %s
+// RUN: %expect_crash %run %t5 d 2>&1 | FileCheck --check-prefix=FAIL %s
+// RUN: %expect_crash %run %t5 e 2>&1 | FileCheck --check-prefix=FAIL %s
+// RUN: %expect_crash %run %t5 f 2>&1 | FileCheck --check-prefix=FAIL %s
+// RUN: %expect_crash %run %t5 g 2>&1 | FileCheck --check-prefix=FAIL %s
+// RUN: %expect_crash %run %t5 h 2>&1 | FileCheck --check-prefix=FAIL %s
 
 // RUN: %clangxx -o %t6 %s
-// RUN: %t6 a 2>&1 | FileCheck --check-prefix=PASS %s
-// RUN: %t6 b 2>&1 | FileCheck --check-prefix=PASS %s
-// RUN: %t6 c 2>&1 | FileCheck --check-prefix=PASS %s
-// RUN: %t6 d 2>&1 | FileCheck --check-prefix=PASS %s
-// RUN: %t6 e 2>&1 | FileCheck --check-prefix=PASS %s
-// RUN: %t6 f 2>&1 | FileCheck --check-prefix=PASS %s
-// RUN: %t6 g 2>&1 | FileCheck --check-prefix=PASS %s
-// RUN: %t6 h 2>&1 | FileCheck --check-prefix=PASS %s
+// RUN: %run %t6 a 2>&1 | FileCheck --check-prefix=PASS %s
+// RUN: %run %t6 b 2>&1 | FileCheck --check-prefix=PASS %s
+// RUN: %run %t6 c 2>&1 | FileCheck --check-prefix=PASS %s
+// RUN: %run %t6 d 2>&1 | FileCheck --check-prefix=PASS %s
+// RUN: %run %t6 e 2>&1 | FileCheck --check-prefix=PASS %s
+// RUN: %run %t6 f 2>&1 | FileCheck --check-prefix=PASS %s
+// RUN: %run %t6 g 2>&1 | FileCheck --check-prefix=PASS %s
+// RUN: %run %t6 h 2>&1 | FileCheck --check-prefix=PASS %s
 
 // RUN: %clangxx_cfi_diag -o %t7 %s
-// RUN: %t7 a 2>&1 | FileCheck --check-prefix=CFI-DIAG-D %s
-// RUN: %t7 b 2>&1 | FileCheck --check-prefix=CFI-DIAG-D %s
-// RUN: %t7 c 2>&1 | FileCheck --check-prefix=CFI-DIAG-D %s
-// RUN: %t7 g 2>&1 | FileCheck --check-prefix=CFI-DIAG-U %s
+// RUN: %run %t7 a 2>&1 | FileCheck --check-prefix=CFI-DIAG-D %s
+// RUN: %run %t7 b 2>&1 | FileCheck --check-prefix=CFI-DIAG-D %s
+// RUN: %run %t7 c 2>&1 | FileCheck --check-prefix=CFI-DIAG-D %s
+// RUN: %run %t7 g 2>&1 | FileCheck --check-prefix=CFI-DIAG-U %s
 
 // Tests that the CFI enforcement detects bad casts.
 
diff --git a/test/cfi/bad-split.cpp b/test/cfi/bad-split.cpp
new file mode 100644
index 0000000..37e635a
--- /dev/null
+++ b/test/cfi/bad-split.cpp
@@ -0,0 +1,21 @@
+// GlobalSplit used to lose type metadata for classes with virtual bases but no virtual methods.
+// RUN: %clangxx_cfi -o %t1 %s && %run %t1
+
+// UNSUPPORTED: win32
+
+struct Z {
+};
+
+struct ZZ : public virtual Z {
+};
+
+struct A : public ZZ {
+};
+
+struct B : public A {
+};
+
+int main() {
+  A* a = new B();
+  B *b = (B*)a;
+}
diff --git a/test/cfi/base-derived-destructor.cpp b/test/cfi/base-derived-destructor.cpp
index c5e9db1..33c7445 100644
--- a/test/cfi/base-derived-destructor.cpp
+++ b/test/cfi/base-derived-destructor.cpp
@@ -1,56 +1,56 @@
 // RUN: %clangxx_cfi -o %t1 %s
-// RUN: %expect_crash %t1 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t1 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -DB32 -o %t2 %s
-// RUN: %expect_crash %t2 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t2 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -DB64 -o %t3 %s
-// RUN: %expect_crash %t3 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t3 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -DBM -o %t4 %s
-// RUN: %expect_crash %t4 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t4 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -O1 -o %t5 %s
-// RUN: %expect_crash %t5 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t5 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -O1 -DB32 -o %t6 %s
-// RUN: %expect_crash %t6 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t6 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -O1 -DB64 -o %t7 %s
-// RUN: %expect_crash %t7 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t7 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -O1 -DBM -o %t8 %s
-// RUN: %expect_crash %t8 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t8 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -O2 -o %t9 %s
-// RUN: %expect_crash %t9 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t9 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -O2 -DB32 -o %t10 %s
-// RUN: %expect_crash %t10 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t10 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -O2 -DB64 -o %t11 %s
-// RUN: %expect_crash %t11 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t11 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -O2 -DBM -o %t12 %s
-// RUN: %expect_crash %t12 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t12 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -O3 -o %t13 %s
-// RUN: %expect_crash %t13 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t13 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -O3 -DB32 -o %t14 %s
-// RUN: %expect_crash %t14 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t14 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -O3 -DB64 -o %t15 %s
-// RUN: %expect_crash %t15 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t15 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -O3 -DBM -o %t16 %s
-// RUN: %expect_crash %t16 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t16 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi_diag -o %t17 %s
-// RUN: %t17 2>&1 | FileCheck --check-prefix=CFI-DIAG %s
+// RUN: %run %t17 2>&1 | FileCheck --check-prefix=CFI-DIAG %s
 
 // RUN: %clangxx -o %t18 %s
-// RUN: %t18 2>&1 | FileCheck --check-prefix=NCFI %s
+// RUN: %run %t18 2>&1 | FileCheck --check-prefix=NCFI %s
 
 // Tests that the CFI mechanism crashes the program when making a
 // base-to-derived cast from a destructor of the base class,
diff --git a/test/cfi/create-derivers.test b/test/cfi/create-derivers.test
index a67562b..8b569d0 100644
--- a/test/cfi/create-derivers.test
+++ b/test/cfi/create-derivers.test
@@ -1,20 +1,21 @@
 REQUIRES: asserts
 
-RUN: %clangxx_cfi -c -o %t1.o %S/simple-fail.cpp
+%% Explicit -flto to override possible -flto=thin in %clangxx_cfi
+RUN: %clangxx_cfi -flto -c -o %t1.o %S/simple-fail.cpp
 RUN: opt -lowertypetests -debug-only=lowertypetests -o /dev/null %t1.o 2>&1 | FileCheck --check-prefix=B0 %s
 B0: {{1B|B@@}}: {{.*}} size 1
 
-RUN: %clangxx_cfi -DB32 -c -o %t2.o %S/simple-fail.cpp
+RUN: %clangxx_cfi -DB32 -flto -c -o %t2.o %S/simple-fail.cpp
 RUN: opt -lowertypetests -debug-only=lowertypetests -o /dev/null %t2.o 2>&1 | FileCheck --check-prefix=B32 %s
 B32: {{1B|B@@}}: {{.*}} size 24
 B32-NOT: all-ones
 
-RUN: %clangxx_cfi -DB64 -c -o %t3.o %S/simple-fail.cpp
+RUN: %clangxx_cfi -DB64 -flto -c -o %t3.o %S/simple-fail.cpp
 RUN: opt -lowertypetests -debug-only=lowertypetests -o /dev/null %t3.o 2>&1 | FileCheck --check-prefix=B64 %s
 B64: {{1B|B@@}}: {{.*}} size 54
 B64-NOT: all-ones
 
-RUN: %clangxx_cfi -DBM -c -o %t4.o %S/simple-fail.cpp
+RUN: %clangxx_cfi -DBM -flto -c -o %t4.o %S/simple-fail.cpp
 RUN: opt -lowertypetests -debug-only=lowertypetests -o /dev/null %t4.o 2>&1 | FileCheck --check-prefix=BM %s
 BM: {{1B|B@@}}: {{.*}} size 84
 BM-NOT: all-ones
diff --git a/test/cfi/cross-dso/icall/diag.cpp b/test/cfi/cross-dso/icall/diag.cpp
index c9ca28c..579ee83 100644
--- a/test/cfi/cross-dso/icall/diag.cpp
+++ b/test/cfi/cross-dso/icall/diag.cpp
@@ -6,8 +6,8 @@
 // * otherwise, the callee decides between trap/recover/norecover.
 
 // Full-recover.
-// RUN: %clangxx_cfi_dso_diag -g -DSHARED_LIB %s -fPIC -shared -o %t-so.so
-// RUN: %clangxx_cfi_dso_diag -g %s -o %t %t-so.so
+// RUN: %clangxx_cfi_dso_diag -g -DSHARED_LIB %s -fPIC -shared -o %dynamiclib %ld_flags_rpath_so
+// RUN: %clangxx_cfi_dso_diag -g %s -o %t %ld_flags_rpath_exe
 
 // RUN: %t icv 2>&1 | FileCheck %s --check-prefix=ICALL-DIAG --check-prefix=CAST-DIAG \
 // RUN:                            --check-prefix=VCALL-DIAG --check-prefix=ALL-RECOVER
@@ -23,9 +23,9 @@
 
 // Trap on icall, no-recover on cast.
 // RUN: %clangxx_cfi_dso_diag -fsanitize-trap=cfi-icall -fno-sanitize-recover=cfi-unrelated-cast \
-// RUN:     -g -DSHARED_LIB %s -fPIC -shared -o %t-so.so
+// RUN:     -g -DSHARED_LIB %s -fPIC -shared -o %dynamiclib %ld_flags_rpath_so
 // RUN: %clangxx_cfi_dso_diag -fsanitize-trap=cfi-icall -fno-sanitize-recover=cfi-unrelated-cast \
-// RUN:     -g %s -o %t %t-so.so
+// RUN:     -g %s -o %t %ld_flags_rpath_exe
 
 // RUN: %expect_crash %t icv 2>&1 | FileCheck %s --check-prefix=ICALL-NODIAG --check-prefix=CAST-NODIAG \
 // RUN:                                          --check-prefix=VCALL-NODIAG --check-prefix=ICALL-FATAL
@@ -40,9 +40,9 @@
 // Caller: recover on everything.
 // The same as in the previous case, behaviour is decided by the callee.
 // RUN: %clangxx_cfi_dso_diag -fsanitize-trap=cfi-icall -fno-sanitize-recover=cfi-unrelated-cast \
-// RUN:     -g -DSHARED_LIB %s -fPIC -shared -o %t-so.so
+// RUN:     -g -DSHARED_LIB %s -fPIC -shared -o %dynamiclib %ld_flags_rpath_so
 // RUN: %clangxx_cfi_dso_diag \
-// RUN:     -g %s -o %t %t-so.so
+// RUN:     -g %s -o %t %ld_flags_rpath_exe
 
 // RUN: %expect_crash %t icv 2>&1 | FileCheck %s --check-prefix=ICALL-NODIAG --check-prefix=CAST-NODIAG \
 // RUN:                                          --check-prefix=VCALL-NODIAG --check-prefix=ICALL-FATAL
@@ -57,9 +57,9 @@
 // Caller wins.
 // cfi-nvcall is non-trapping in the main executable to link the diagnostic runtime library.
 // RUN: %clangxx_cfi_dso_diag \
-// RUN:     -g -DSHARED_LIB %s -fPIC -shared -o %t-so.so
+// RUN:     -g -DSHARED_LIB %s -fPIC -shared -o %dynamiclib %ld_flags_rpath_so
 // RUN: %clangxx_cfi_dso -fno-sanitize-trap=cfi-nvcall \
-// RUN:     -g %s -o %t %t-so.so
+// RUN:     -g %s -o %t %ld_flags_rpath_exe
 
 // RUN: %expect_crash %t icv 2>&1 | FileCheck %s --check-prefix=ICALL-NODIAG --check-prefix=CAST-NODIAG \
 // RUN:                                          --check-prefix=VCALL-NODIAG --check-prefix=ICALL-FATAL
diff --git a/test/cfi/cross-dso/dlopen.cpp b/test/cfi/cross-dso/icall/dlopen.cpp
similarity index 99%
rename from test/cfi/cross-dso/dlopen.cpp
rename to test/cfi/cross-dso/icall/dlopen.cpp
index ee4dae2..d238a7a 100644
--- a/test/cfi/cross-dso/dlopen.cpp
+++ b/test/cfi/cross-dso/icall/dlopen.cpp
@@ -55,7 +55,7 @@
 
 #ifdef SHARED_LIB
 
-#include "../utils.h"
+#include "../../utils.h"
 struct B {
   virtual void f();
 };
diff --git a/test/cfi/cross-dso/icall/icall-from-dso.cpp b/test/cfi/cross-dso/icall/icall-from-dso.cpp
index 93cf4f6..125e030 100644
--- a/test/cfi/cross-dso/icall/icall-from-dso.cpp
+++ b/test/cfi/cross-dso/icall/icall-from-dso.cpp
@@ -1,8 +1,8 @@
-// RUN: %clangxx_cfi_dso -DSHARED_LIB %s -fPIC -shared -o %t-so.so
-// RUN: %clangxx_cfi_dso %s -o %t %t-so.so && %expect_crash %t 2>&1 | FileCheck %s
+// RUN: %clangxx_cfi_dso -DSHARED_LIB %s -fPIC -shared -o %dynamiclib %ld_flags_rpath_so
+// RUN: %clangxx_cfi_dso %s -o %t %ld_flags_rpath_exe && %expect_crash %t 2>&1 | FileCheck %s
 
-// RUN: %clangxx_cfi_dso_diag -g -DSHARED_LIB %s -fPIC -shared -o %t2-so.so
-// RUN: %clangxx_cfi_dso_diag -g %s -o %t2 %t2-so.so && %t2 2>&1 | FileCheck %s --check-prefix=CFI-DIAG
+// RUN: %clangxx_cfi_dso_diag -g -DSHARED_LIB %s -fPIC -shared -o %dynamiclib %ld_flags_rpath_so
+// RUN: %clangxx_cfi_dso_diag -g %s -o %t %ld_flags_rpath_exe && %t 2>&1 | FileCheck %s --check-prefix=CFI-DIAG
 
 #include <stdio.h>
 
diff --git a/test/cfi/cross-dso/icall/icall.cpp b/test/cfi/cross-dso/icall/icall.cpp
index 6017b80..9e9bfd0 100644
--- a/test/cfi/cross-dso/icall/icall.cpp
+++ b/test/cfi/cross-dso/icall/icall.cpp
@@ -1,8 +1,8 @@
-// RUN: %clangxx_cfi_dso -DSHARED_LIB %s -fPIC -shared -o %t-so.so
-// RUN: %clangxx_cfi_dso %s -o %t %t-so.so && %expect_crash %t 2>&1 | FileCheck %s
+// RUN: %clangxx_cfi_dso -DSHARED_LIB %s -fPIC -shared -o %dynamiclib %ld_flags_rpath_so
+// RUN: %clangxx_cfi_dso %s -o %t %ld_flags_rpath_exe && %expect_crash %t 2>&1 | FileCheck %s
 
-// RUN: %clangxx_cfi_dso_diag -g -DSHARED_LIB %s -fPIC -shared -o %t2-so.so
-// RUN: %clangxx_cfi_dso_diag -g %s -o %t2 %t2-so.so && %t2 2>&1 | FileCheck %s --check-prefix=CFI-DIAG
+// RUN: %clangxx_cfi_dso_diag -g -DSHARED_LIB %s -fPIC -shared -o %dynamiclib %ld_flags_rpath_so
+// RUN: %clangxx_cfi_dso_diag -g %s -o %t %ld_flags_rpath_exe && %t 2>&1 | FileCheck %s --check-prefix=CFI-DIAG
 
 #include <stdio.h>
 
diff --git a/test/cfi/cross-dso/lit.local.cfg b/test/cfi/cross-dso/lit.local.cfg
index 57271b8..afdac42 100644
--- a/test/cfi/cross-dso/lit.local.cfg
+++ b/test/cfi/cross-dso/lit.local.cfg
@@ -7,3 +7,7 @@
 
 if root.host_os not in ['Linux']:
   config.unsupported = True
+
+# Android O (API level 26) has support for cross-dso cfi in libdl.so.
+if config.android and 'android-26' not in config.available_features:
+  config.unsupported = True
diff --git a/test/cfi/cross-dso/shadow_is_read_only.cpp b/test/cfi/cross-dso/shadow_is_read_only.cpp
index 65aec82..8811506 100644
--- a/test/cfi/cross-dso/shadow_is_read_only.cpp
+++ b/test/cfi/cross-dso/shadow_is_read_only.cpp
@@ -12,6 +12,9 @@
 // Tests that shadow is read-only most of the time.
 // REQUIRES: cxxabi
 
+// Uses private API that is not available on Android.
+// UNSUPPORTED: android
+
 #include <assert.h>
 #include <dlfcn.h>
 #include <stdio.h>
diff --git a/test/cfi/cross-dso/simple-fail.cpp b/test/cfi/cross-dso/simple-fail.cpp
index 276b67d..93503eb 100644
--- a/test/cfi/cross-dso/simple-fail.cpp
+++ b/test/cfi/cross-dso/simple-fail.cpp
@@ -1,37 +1,37 @@
-// RUN: %clangxx_cfi_dso -DSHARED_LIB %s -fPIC -shared -o %t1-so.so
-// RUN: %clangxx_cfi_dso %s -o %t1 %t1-so.so
-// RUN: %expect_crash %t1 2>&1 | FileCheck --check-prefix=CFI %s
-// RUN: %expect_crash %t1 x 2>&1 | FileCheck --check-prefix=CFI-CAST %s
+// RUN: %clangxx_cfi_dso -DSHARED_LIB %s -fPIC -shared -o %dynamiclib %ld_flags_rpath_so
+// RUN: %clangxx_cfi_dso %s -o %t %ld_flags_rpath_exe
+// RUN: %expect_crash %t 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %t x 2>&1 | FileCheck --check-prefix=CFI-CAST %s
 
-// RUN: %clangxx_cfi_dso -DB32 -DSHARED_LIB %s -fPIC -shared -o %t2-so.so
-// RUN: %clangxx_cfi_dso -DB32 %s -o %t2 %t2-so.so
-// RUN: %expect_crash %t2 2>&1 | FileCheck --check-prefix=CFI %s
-// RUN: %expect_crash %t2 x 2>&1 | FileCheck --check-prefix=CFI-CAST %s
+// RUN: %clangxx_cfi_dso -DB32 -DSHARED_LIB %s -fPIC -shared -o %dynamiclib %ld_flags_rpath_so
+// RUN: %clangxx_cfi_dso -DB32 %s -o %t %ld_flags_rpath_exe
+// RUN: %expect_crash %t 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %t x 2>&1 | FileCheck --check-prefix=CFI-CAST %s
 
-// RUN: %clangxx_cfi_dso -DB64 -DSHARED_LIB %s -fPIC -shared -o %t3-so.so
-// RUN: %clangxx_cfi_dso -DB64 %s -o %t3 %t3-so.so
-// RUN: %expect_crash %t3 2>&1 | FileCheck --check-prefix=CFI %s
-// RUN: %expect_crash %t3 x 2>&1 | FileCheck --check-prefix=CFI-CAST %s
+// RUN: %clangxx_cfi_dso -DB64 -DSHARED_LIB %s -fPIC -shared -o %dynamiclib %ld_flags_rpath_so
+// RUN: %clangxx_cfi_dso -DB64 %s -o %t %ld_flags_rpath_exe
+// RUN: %expect_crash %t 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %t x 2>&1 | FileCheck --check-prefix=CFI-CAST %s
 
-// RUN: %clangxx_cfi_dso -DBM -DSHARED_LIB %s -fPIC -shared -o %t4-so.so
-// RUN: %clangxx_cfi_dso -DBM %s -o %t4 %t4-so.so
-// RUN: %expect_crash %t4 2>&1 | FileCheck --check-prefix=CFI %s
-// RUN: %expect_crash %t4 x 2>&1 | FileCheck --check-prefix=CFI-CAST %s
+// RUN: %clangxx_cfi_dso -DBM -DSHARED_LIB %s -fPIC -shared -o %dynamiclib %ld_flags_rpath_so
+// RUN: %clangxx_cfi_dso -DBM %s -o %t %ld_flags_rpath_exe
+// RUN: %expect_crash %t 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %t x 2>&1 | FileCheck --check-prefix=CFI-CAST %s
 
-// RUN: %clangxx -DBM -DSHARED_LIB %s -fPIC -shared -o %t5-so.so
-// RUN: %clangxx -DBM %s -o %t5 %t5-so.so
-// RUN: %t5 2>&1 | FileCheck --check-prefix=NCFI %s
-// RUN: %t5 x 2>&1 | FileCheck --check-prefix=NCFI %s
+// RUN: %clangxx -DBM -DSHARED_LIB %s -fPIC -shared -o %dynamiclib %ld_flags_rpath_so
+// RUN: %clangxx -DBM %s -o %t %ld_flags_rpath_exe
+// RUN: %t 2>&1 | FileCheck --check-prefix=NCFI %s
+// RUN: %t x 2>&1 | FileCheck --check-prefix=NCFI %s
 
-// RUN: %clangxx -DBM -DSHARED_LIB %s -fPIC -shared -o %t6-so.so
-// RUN: %clangxx_cfi_dso -DBM %s -o %t6 %t6-so.so
-// RUN: %t6 2>&1 | FileCheck --check-prefix=NCFI %s
-// RUN: %t6 x 2>&1 | FileCheck --check-prefix=NCFI %s
+// RUN: %clangxx -DBM -DSHARED_LIB %s -fPIC -shared -o %dynamiclib %ld_flags_rpath_so
+// RUN: %clangxx_cfi_dso -DBM %s -o %t %ld_flags_rpath_exe
+// RUN: %t 2>&1 | FileCheck --check-prefix=NCFI %s
+// RUN: %t x 2>&1 | FileCheck --check-prefix=NCFI %s
 
-// RUN: %clangxx_cfi_dso_diag -DSHARED_LIB %s -fPIC -shared -o %t7-so.so
-// RUN: %clangxx_cfi_dso_diag %s -o %t7 %t7-so.so
-// RUN: %t7 2>&1 | FileCheck --check-prefix=CFI-DIAG-CALL %s
-// RUN: %t7 x 2>&1 | FileCheck --check-prefix=CFI-DIAG-CALL --check-prefix=CFI-DIAG-CAST %s
+// RUN: %clangxx_cfi_dso_diag -DSHARED_LIB %s -fPIC -shared -o %dynamiclib %ld_flags_rpath_so
+// RUN: %clangxx_cfi_dso_diag %s -o %t %ld_flags_rpath_exe
+// RUN: %t 2>&1 | FileCheck --check-prefix=CFI-DIAG-CALL %s
+// RUN: %t x 2>&1 | FileCheck --check-prefix=CFI-DIAG-CALL --check-prefix=CFI-DIAG-CAST %s
 
 // Tests that the CFI mechanism crashes the program when making a virtual call
 // to an object of the wrong class but with a compatible vtable, by casting a
diff --git a/test/cfi/cross-dso/simple-pass.cpp b/test/cfi/cross-dso/simple-pass.cpp
index 42f7a27..6ce6471 100644
--- a/test/cfi/cross-dso/simple-pass.cpp
+++ b/test/cfi/cross-dso/simple-pass.cpp
@@ -1,23 +1,23 @@
-// RUN: %clangxx_cfi_dso -DSHARED_LIB %s -fPIC -shared -o %t1-so.so
-// RUN: %clangxx_cfi_dso %s -o %t1 %t1-so.so
-// RUN: %t1 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %clangxx_cfi_dso -DSHARED_LIB %s -fPIC -shared -o %dynamiclib %ld_flags_rpath_so
+// RUN: %clangxx_cfi_dso -g %s -o %t %ld_flags_rpath_exe
+// RUN: %t 2>&1 | FileCheck --check-prefix=CFI %s
 
-// RUN: %clangxx_cfi_dso -DB32 -DSHARED_LIB %s -fPIC -shared -o %t2-so.so
-// RUN: %clangxx_cfi_dso -DB32 %s -o %t2 %t2-so.so
-// RUN: %t2 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %clangxx_cfi_dso -DB32 -DSHARED_LIB %s -fPIC -shared -o %dynamiclib %ld_flags_rpath_so
+// RUN: %clangxx_cfi_dso -DB32 %s -o %t %ld_flags_rpath_exe
+// RUN: %t 2>&1 | FileCheck --check-prefix=CFI %s
 
-// RUN: %clangxx_cfi_dso -DB64 -DSHARED_LIB %s -fPIC -shared -o %t3-so.so
-// RUN: %clangxx_cfi_dso -DB64 %s -o %t3 %t3-so.so
-// RUN: %t3 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %clangxx_cfi_dso -DB64 -DSHARED_LIB %s -fPIC -shared -o %dynamiclib %ld_flags_rpath_so
+// RUN: %clangxx_cfi_dso -DB64 %s -o %t %ld_flags_rpath_exe
+// RUN: %t 2>&1 | FileCheck --check-prefix=CFI %s
 
-// RUN: %clangxx_cfi_dso -DBM -DSHARED_LIB %s -fPIC -shared -o %t4-so.so
-// RUN: %clangxx_cfi_dso -DBM %s -o %t4 %t4-so.so
-// RUN: %t4 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %clangxx_cfi_dso -DBM -DSHARED_LIB %s -fPIC -shared -o %dynamiclib %ld_flags_rpath_so
+// RUN: %clangxx_cfi_dso -DBM %s -o %t %ld_flags_rpath_exe
+// RUN: %t 2>&1 | FileCheck --check-prefix=CFI %s
 
-// RUN: %clangxx -DBM -DSHARED_LIB %s -fPIC -shared -o %t5-so.so
-// RUN: %clangxx -DBM %s -o %t5 %t5-so.so
-// RUN: %t5 2>&1 | FileCheck --check-prefix=NCFI %s
-// RUN: %t5 x 2>&1 | FileCheck --check-prefix=NCFI %s
+// RUN: %clangxx -DBM -DSHARED_LIB %s -fPIC -shared -o %dynamiclib %ld_flags_rpath_so
+// RUN: %clangxx -DBM %s -o %t %ld_flags_rpath_exe
+// RUN: %t 2>&1 | FileCheck --check-prefix=NCFI %s
+// RUN: %t x 2>&1 | FileCheck --check-prefix=NCFI %s
 
 // Tests that the CFI mechanism crashes the program when making a virtual call
 // to an object of the wrong class but with a compatible vtable, by casting a
diff --git a/test/cfi/cross-dso/stats.cpp b/test/cfi/cross-dso/stats.cpp
index 6566ea2..09a7217 100644
--- a/test/cfi/cross-dso/stats.cpp
+++ b/test/cfi/cross-dso/stats.cpp
@@ -3,6 +3,12 @@
 // RUN: env SANITIZER_STATS_PATH=%t.stats %t
 // RUN: sanstats %t.stats | FileCheck %s
 
+// CFI-icall is not implemented in thinlto mode => ".cfi" suffixes are missing
+// in sanstats output.
+
+// FIXME: %t.stats must be transferred from device to host for this to work on Android.
+// XFAIL: android
+
 struct ABase {};
 
 struct A : ABase {
diff --git a/test/cfi/cross-dso/util/cfi_stubs.h b/test/cfi/cross-dso/util/cfi_stubs.h
new file mode 100644
index 0000000..b742074
--- /dev/null
+++ b/test/cfi/cross-dso/util/cfi_stubs.h
@@ -0,0 +1,30 @@
+// This is a hack to access CFI interface that Android has in libdl.so on
+// device, but not in the NDK.
+#include <dlfcn.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+typedef void (*cfi_slowpath_ty)(uint64_t, void *);
+typedef void (*cfi_slowpath_diag_ty)(uint64_t, void *, void *);
+
+static cfi_slowpath_ty cfi_slowpath;
+static cfi_slowpath_diag_ty cfi_slowpath_diag;
+
+__attribute__((constructor(0), no_sanitize("cfi"))) static void init() {
+  cfi_slowpath = (cfi_slowpath_ty)dlsym(RTLD_NEXT, "__cfi_slowpath");
+  cfi_slowpath_diag =
+      (cfi_slowpath_diag_ty)dlsym(RTLD_NEXT, "__cfi_slowpath_diag");
+  if (!cfi_slowpath || !cfi_slowpath_diag) abort();
+}
+
+extern "C" {
+__attribute__((visibility("hidden"), no_sanitize("cfi"))) void __cfi_slowpath(
+    uint64_t Type, void *Addr) {
+  cfi_slowpath(Type, Addr);
+}
+
+__attribute__((visibility("hidden"), no_sanitize("cfi"))) void
+__cfi_slowpath_diag(uint64_t Type, void *Addr, void *Diag) {
+  cfi_slowpath_diag(Type, Addr, Diag);
+}
+}
diff --git a/test/cfi/icall/external-call.c b/test/cfi/icall/external-call.c
index e90c7e0..27c4478 100644
--- a/test/cfi/icall/external-call.c
+++ b/test/cfi/icall/external-call.c
@@ -4,7 +4,8 @@
 
 // This test uses jump tables containing PC-relative references to external
 // symbols, which the Mach-O object writer does not currently support.
-// XFAIL: darwin
+// The test passes on i386 Darwin and fails on x86_64, hence unsupported instead of xfail.
+// UNSUPPORTED: darwin
 
 #include <stdlib.h>
 #include <stdio.h>
diff --git a/test/cfi/icall/wrong-signature-mixed-lto.c b/test/cfi/icall/wrong-signature-mixed-lto.c
new file mode 100644
index 0000000..0e5fb85
--- /dev/null
+++ b/test/cfi/icall/wrong-signature-mixed-lto.c
@@ -0,0 +1,41 @@
+// Test that the checking is done with the actual type of f() even when the
+// calling module has an incorrect declaration. Test a mix of lto types.
+//
+// -flto below overrides -flto=thin in %clang_cfi
+// RUN: %clang_cfi %s -DMODULE_A -c -o %t1_a.o
+// RUN: %clang_cfi %s -DMODULE_B -c -o %t1_b.o -flto
+// RUN: %clang_cfi %t1_a.o %t1_b.o -o %t1
+// RUN: %expect_crash %t1 2>&1 | FileCheck --check-prefix=CFI %s
+//
+// RUN: %clang_cfi %s -DMODULE_A -c -o %t2_a.o -flto
+// RUN: %clang_cfi %s -DMODULE_B -c -o %t2_b.o
+// RUN: %clang_cfi %t2_a.o %t2_b.o -o %t2
+// RUN: %expect_crash %t2 2>&1 | FileCheck --check-prefix=CFI %s
+//
+// RUN: %clang_cfi %s -DMODULE_A -c -o %t3_a.o
+// RUN: %clang_cfi %s -DMODULE_B -c -o %t3_b.o
+// RUN: %clang_cfi %t3_a.o %t3_b.o -o %t3
+// RUN: %expect_crash %t3 2>&1 | FileCheck --check-prefix=CFI %s
+//
+// REQUIRES: thinlto
+
+#include <stdio.h>
+
+#if defined(MODULE_B)
+int f() {
+  return 42;
+}
+#elif defined(MODULE_A)
+void f();
+
+int main() {
+  // CFI: 1
+  fprintf(stderr, "1\n");
+
+  void (*volatile p)() = &f;
+  p();
+
+  // CFI-NOT: 2
+  fprintf(stderr, "2\n");
+}
+#endif
diff --git a/test/cfi/lit.cfg b/test/cfi/lit.cfg
index 3c02506..301d932 100644
--- a/test/cfi/lit.cfg
+++ b/test/cfi/lit.cfg
@@ -1,16 +1,17 @@
 import lit.formats
 import os
 
-config.name = 'cfi'
+config.name = 'cfi' + config.name_suffix
 config.suffixes = ['.c', '.cpp', '.test']
 config.test_source_root = os.path.dirname(__file__)
 
-clangxx = ' '.join([config.clang] + config.cxx_mode_flags)
+clang = ' '.join([config.compile_wrapper, config.clang, config.target_cflags])
+clangxx = ' '.join([config.compile_wrapper, config.clang, config.target_cflags] + config.cxx_mode_flags)
 
-config.substitutions.append((r"%clang ", ' '.join([config.clang]) + ' '))
+config.substitutions.append((r"%clang ", clang + ' '))
 config.substitutions.append((r"%clangxx ", clangxx + ' '))
 if config.lto_supported:
-  clang_cfi = ' '.join(config.lto_launch + [config.clang] + config.lto_flags + ['-flto -fsanitize=cfi '])
+  clang_cfi = ' '.join(config.lto_launch + [clang] + config.lto_flags + ['-fsanitize=cfi '])
 
   if config.cfi_lit_test_mode == "Devirt":
     config.available_features.add('devirt')
@@ -23,6 +24,8 @@
   diag = '-fno-sanitize-trap=cfi -fsanitize-recover=cfi '
   non_dso = '-fvisibility=hidden '
   dso = '-fsanitize-cfi-cross-dso -fvisibility=default '
+  if config.android:
+    dso += '-include ' + config.test_source_root + '/cross-dso/util/cfi_stubs.h '
   config.substitutions.append((r"%clang_cfi ", clang_cfi + non_dso))
   config.substitutions.append((r"%clangxx_cfi ", clang_cfi + cxx + non_dso))
   config.substitutions.append((r"%clang_cfi_diag ", clang_cfi + non_dso + diag))
@@ -32,5 +35,8 @@
 else:
   config.unsupported = True
 
+if config.default_sanitizer_opts:
+  config.environment['UBSAN_OPTIONS'] = ':'.join(config.default_sanitizer_opts)
+
 if lit_config.params.get('check_supported', None) and config.unsupported:
   raise BaseException("Tests unsupported")
diff --git a/test/cfi/lit.site.cfg.in b/test/cfi/lit.site.cfg.in
index 87e5b51..066c915 100644
--- a/test/cfi/lit.site.cfg.in
+++ b/test/cfi/lit.site.cfg.in
@@ -1,6 +1,11 @@
 @LIT_SITE_CFG_IN_HEADER@
 
+config.name_suffix = "@CFI_TEST_CONFIG_SUFFIX@"
 config.cfi_lit_test_mode = "@CFI_LIT_TEST_MODE@"
+config.target_arch = "@CFI_TEST_TARGET_ARCH@"
+config.target_cflags = "@CFI_TEST_TARGET_CFLAGS@"
+config.use_lld = @CFI_TEST_USE_LLD@
+config.use_thinlto = @CFI_TEST_USE_THINLTO@
 
 lit_config.load_config(config, "@COMPILER_RT_BINARY_DIR@/test/lit.common.configured")
 lit_config.load_config(config, "@CMAKE_CURRENT_SOURCE_DIR@/lit.cfg")
diff --git a/test/cfi/multiple-inheritance.cpp b/test/cfi/multiple-inheritance.cpp
index a3b2ac5..b8520d8 100644
--- a/test/cfi/multiple-inheritance.cpp
+++ b/test/cfi/multiple-inheritance.cpp
@@ -1,26 +1,26 @@
 // RUN: %clangxx_cfi -o %t1 %s
-// RUN: %expect_crash %t1 2>&1 | FileCheck --check-prefix=CFI %s
-// RUN: %expect_crash %t1 x 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t1 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t1 x 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -DB32 -o %t2 %s
-// RUN: %expect_crash %t2 2>&1 | FileCheck --check-prefix=CFI %s
-// RUN: %expect_crash %t2 x 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t2 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t2 x 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -DB64 -o %t3 %s
-// RUN: %expect_crash %t3 2>&1 | FileCheck --check-prefix=CFI %s
-// RUN: %expect_crash %t3 x 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t3 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t3 x 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -DBM -o %t4 %s
-// RUN: %expect_crash %t4 2>&1 | FileCheck --check-prefix=CFI %s
-// RUN: %expect_crash %t4 x 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t4 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t4 x 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx -o %t5 %s
-// RUN: %t5 2>&1 | FileCheck --check-prefix=NCFI %s
-// RUN: %t5 x 2>&1 | FileCheck --check-prefix=NCFI %s
+// RUN: %run %t5 2>&1 | FileCheck --check-prefix=NCFI %s
+// RUN: %run %t5 x 2>&1 | FileCheck --check-prefix=NCFI %s
 
 // RUN: %clangxx_cfi_diag -o %t6 %s
-// RUN: %t6 2>&1 | FileCheck --check-prefix=CFI-DIAG2 %s
-// RUN: %t6 x 2>&1 | FileCheck --check-prefix=CFI-DIAG1 %s
+// RUN: %run %t6 2>&1 | FileCheck --check-prefix=CFI-DIAG2 %s
+// RUN: %run %t6 x 2>&1 | FileCheck --check-prefix=CFI-DIAG1 %s
 
 // Tests that the CFI mechanism is sensitive to multiple inheritance and only
 // permits calls via virtual tables for the correct base class.
diff --git a/test/cfi/nvcall.cpp b/test/cfi/nvcall.cpp
index 9d8f5f4..b61adb1 100644
--- a/test/cfi/nvcall.cpp
+++ b/test/cfi/nvcall.cpp
@@ -1,20 +1,20 @@
 // RUN: %clangxx_cfi -o %t1 %s
-// RUN: %expect_crash %t1 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t1 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -DB32 -o %t2 %s
-// RUN: %expect_crash %t2 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t2 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -DB64 -o %t3 %s
-// RUN: %expect_crash %t3 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t3 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -DBM -o %t4 %s
-// RUN: %expect_crash %t4 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t4 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx -o %t5 %s
-// RUN: %t5 2>&1 | FileCheck --check-prefix=NCFI %s
+// RUN: %run %t5 2>&1 | FileCheck --check-prefix=NCFI %s
 
 // RUN: %clangxx_cfi_diag -o %t6 %s
-// RUN: %t6 2>&1 | FileCheck --check-prefix=CFI-DIAG %s
+// RUN: %run %t6 2>&1 | FileCheck --check-prefix=CFI-DIAG %s
 
 // Tests that the CFI mechanism crashes the program when making a non-virtual
 // call to an object of the wrong class, by casting a pointer to such an object
diff --git a/test/cfi/overwrite.cpp b/test/cfi/overwrite.cpp
index 48c0a89..7d7ad1c 100644
--- a/test/cfi/overwrite.cpp
+++ b/test/cfi/overwrite.cpp
@@ -1,20 +1,20 @@
 // RUN: %clangxx_cfi -o %t1 %s
-// RUN: %expect_crash_unless_devirt %t1 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash_unless_devirt %run %t1 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -DB32 -o %t2 %s
-// RUN: %expect_crash %t2 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t2 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -DB64 -o %t3 %s
-// RUN: %expect_crash %t3 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t3 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -DBM -o %t4 %s
-// RUN: %expect_crash %t4 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t4 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx -o %t5 %s
-// RUN: %t5 2>&1 | FileCheck --check-prefix=NCFI %s
+// RUN: %run %t5 2>&1 | FileCheck --check-prefix=NCFI %s
 
 // RUN: %clangxx_cfi_diag -o %t6 %s
-// RUN: %t6 2>&1 | FileCheck --check-prefix=CFI-DIAG %s
+// RUN: %run %t6 2>&1 | FileCheck --check-prefix=CFI-DIAG %s
 
 // Tests that the CFI mechanism crashes the program when a virtual table is
 // replaced with a compatible table of function pointers that does not belong to
diff --git a/test/cfi/sibling.cpp b/test/cfi/sibling.cpp
index 9f32302..fb6e2f2 100644
--- a/test/cfi/sibling.cpp
+++ b/test/cfi/sibling.cpp
@@ -1,21 +1,21 @@
 // XFAIL: *
 
 // RUN: %clangxx_cfi -o %t1 %s
-// RUN: %expect_crash %t1 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t1 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -DB32 -o %t2 %s
-// RUN: %expect_crash %t2 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t2 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -DB64 -o %t3 %s
-// RUN: %expect_crash %t3 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t3 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -DBM -o %t4 %s
-// RUN: %expect_crash %t4 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t4 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx -o %t5 %s
-// RUN: %t5 2>&1 | FileCheck --check-prefix=NCFI %s
+// RUN: %run %t5 2>&1 | FileCheck --check-prefix=NCFI %s
 
-// Tests that the CFI enforcement distinguishes betwen non-overriding siblings.
+// Tests that the CFI enforcement distinguishes between non-overriding siblings.
 // XFAILed as not implemented yet.
 
 #include <stdio.h>
diff --git a/test/cfi/simple-fail.cpp b/test/cfi/simple-fail.cpp
index 595ca16..ef36fb0 100644
--- a/test/cfi/simple-fail.cpp
+++ b/test/cfi/simple-fail.cpp
@@ -1,59 +1,59 @@
 // RUN: %clangxx_cfi -o %t1 %s
-// RUN: %expect_crash %t1 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t1 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -DB32 -o %t2 %s
-// RUN: %expect_crash %t2 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t2 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -DB64 -o %t3 %s
-// RUN: %expect_crash %t3 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t3 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -DBM -o %t4 %s
-// RUN: %expect_crash %t4 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t4 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -O1 -o %t5 %s
-// RUN: %expect_crash %t5 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t5 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -O1 -DB32 -o %t6 %s
-// RUN: %expect_crash %t6 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t6 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -O1 -DB64 -o %t7 %s
-// RUN: %expect_crash %t7 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t7 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -O1 -DBM -o %t8 %s
-// RUN: %expect_crash %t8 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t8 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -O2 -o %t9 %s
-// RUN: %expect_crash %t9 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t9 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -O2 -DB32 -o %t10 %s
-// RUN: %expect_crash %t10 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t10 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -O2 -DB64 -o %t11 %s
-// RUN: %expect_crash %t11 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t11 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -O2 -DBM -o %t12 %s
-// RUN: %expect_crash %t12 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t12 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -O3 -o %t13 %s
-// RUN: %expect_crash %t13 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t13 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -O3 -DB32 -o %t14 %s
-// RUN: %expect_crash %t14 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t14 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -O3 -DB64 -o %t15 %s
-// RUN: %expect_crash %t15 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t15 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -O3 -DBM -o %t16 %s
-// RUN: %expect_crash %t16 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t16 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi_diag -o %t17 %s
-// RUN: %t17 2>&1 | FileCheck --check-prefix=CFI-DIAG %s
+// RUN: %run %t17 2>&1 | FileCheck --check-prefix=CFI-DIAG %s
 
 // RUN: %clangxx -o %t18 %s
-// RUN: %t18 2>&1 | FileCheck --check-prefix=NCFI %s
+// RUN: %run %t18 2>&1 | FileCheck --check-prefix=NCFI %s
 
 // RUN: %clangxx_cfi -DCHECK_NO_SANITIZE_CFI -o %t19 %s
-// RUN: %t19 2>&1 | FileCheck --check-prefix=NCFI %s
+// RUN: %run %t19 2>&1 | FileCheck --check-prefix=NCFI %s
 
 // Tests that the CFI mechanism crashes the program when making a virtual call
 // to an object of the wrong class but with a compatible vtable, by casting a
diff --git a/test/cfi/simple-pass.cpp b/test/cfi/simple-pass.cpp
index 4d856eb..aba09be 100644
--- a/test/cfi/simple-pass.cpp
+++ b/test/cfi/simple-pass.cpp
@@ -1,5 +1,5 @@
 // RUN: %clangxx_cfi -o %t %s
-// RUN: %t
+// RUN: %run %t
 
 // Tests that the CFI mechanism does not crash the program when making various
 // kinds of valid calls involving classes with various different linkages and
diff --git a/test/cfi/stats.cpp b/test/cfi/stats.cpp
index 566fcfb..56cc2dd 100644
--- a/test/cfi/stats.cpp
+++ b/test/cfi/stats.cpp
@@ -1,10 +1,13 @@
 // RUN: %clangxx_cfi -g -fsanitize-stats -o %t %s
-// RUN: env SANITIZER_STATS_PATH=%t.stats %t
+// RUN: env SANITIZER_STATS_PATH=%t.stats %run %t
 // RUN: sanstats %t.stats | FileCheck %s
 
 // FIXME: We currently emit the wrong debug info under devirtualization.
 // UNSUPPORTED: devirt
 
+// FIXME: %t.stats must be transferred from device to host for this to work on Android.
+// XFAIL: android
+
 struct ABase {};
 
 struct A : ABase {
diff --git a/test/cfi/target_uninstrumented.cpp b/test/cfi/target_uninstrumented.cpp
index 2ec2b5b..5df0738 100644
--- a/test/cfi/target_uninstrumented.cpp
+++ b/test/cfi/target_uninstrumented.cpp
@@ -1,8 +1,9 @@
-// RUN: %clangxx -g -DSHARED_LIB %s -fPIC -shared -o %T/target_uninstrumented-so.so
-// RUN: %clangxx_cfi_diag -g %s -o %t %T/target_uninstrumented-so.so
-// RUN: %t 2>&1 | FileCheck %s
+// RUN: %clangxx -g -DSHARED_LIB %s -fPIC -shared -o %dynamiclib %ld_flags_rpath_so
+// RUN: %clangxx_cfi_diag -g %s -o %t %ld_flags_rpath_exe
+// RUN: %run %t 2>&1 | FileCheck %s
 
 // REQUIRES: cxxabi
+// UNSUPPORTED: win32
 
 #include <stdio.h>
 #include <string.h>
@@ -31,7 +32,7 @@
 int main(int argc, char *argv[]) {
   void *p = create_B();
   // CHECK: runtime error: control flow integrity check for type 'A' failed during cast to unrelated type
-  // CHECK: invalid vtable in module {{.*}}target_uninstrumented-so.so
+  // CHECK: invalid vtable in module {{.*}}libtarget_uninstrumented.cpp.dynamic.so
   A *a = (A *)p;
   memset(p, 0, sizeof(A));
   // CHECK: runtime error: control flow integrity check for type 'A' failed during cast to unrelated type
diff --git a/test/cfi/two-vcalls.cpp b/test/cfi/two-vcalls.cpp
index 854b3e0..fbe4d97 100644
--- a/test/cfi/two-vcalls.cpp
+++ b/test/cfi/two-vcalls.cpp
@@ -1,5 +1,5 @@
 // RUN: %clangxx_cfi_diag -o %t %s
-// RUN: %t 2>&1 | FileCheck %s
+// RUN: %run %t 2>&1 | FileCheck %s
 
 // This test checks that we don't generate two type checks,
 // if two virtual calls are in the same function.
diff --git a/test/cfi/vdtor.cpp b/test/cfi/vdtor.cpp
index 522d24c..defa4ce 100644
--- a/test/cfi/vdtor.cpp
+++ b/test/cfi/vdtor.cpp
@@ -1,20 +1,20 @@
 // RUN: %clangxx_cfi -o %t1 %s
-// RUN: %expect_crash %t1 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t1 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -DB32 -o %t2 %s
-// RUN: %expect_crash %t2 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t2 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -DB64 -o %t3 %s
-// RUN: %expect_crash %t3 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t3 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx_cfi -DBM -o %t4 %s
-// RUN: %expect_crash %t4 2>&1 | FileCheck --check-prefix=CFI %s
+// RUN: %expect_crash %run %t4 2>&1 | FileCheck --check-prefix=CFI %s
 
 // RUN: %clangxx -o %t5 %s
-// RUN: %t5 2>&1 | FileCheck --check-prefix=NCFI %s
+// RUN: %run %t5 2>&1 | FileCheck --check-prefix=NCFI %s
 
 // RUN: %clangxx_cfi_diag -o %t6 %s
-// RUN: %t6 2>&1 | FileCheck --check-prefix=CFI-DIAG %s
+// RUN: %run %t6 2>&1 | FileCheck --check-prefix=CFI-DIAG %s
 
 // Tests that the CFI enforcement also applies to virtual destructor calls made
 // via 'delete'.
diff --git a/test/cfi/vtable-may-alias.cpp b/test/cfi/vtable-may-alias.cpp
new file mode 100644
index 0000000..f63b53b
--- /dev/null
+++ b/test/cfi/vtable-may-alias.cpp
@@ -0,0 +1,25 @@
+// RUN: %clangxx_cfi -o %t %s
+// RUN: %run %t
+
+// In this example, both __typeid_A_global_addr and __typeid_B_global_addr will
+// refer to the same address. Make sure that the compiler does not assume that
+// they do not alias.
+
+struct A {
+  virtual void f() = 0;
+};
+
+struct B : A {
+  virtual void f() {}
+};
+
+__attribute__((weak)) void foo(void *p) {
+  B *b = (B *)p;
+  A *a = (A *)b;
+  a->f();
+}
+
+int main() {
+  B b;
+  foo(&b);
+}
diff --git a/test/dfsan/custom.cc b/test/dfsan/custom.cc
index c96d940..b36db01 100644
--- a/test/dfsan/custom.cc
+++ b/test/dfsan/custom.cc
@@ -3,7 +3,7 @@
 // RUN: %clang_dfsan -DSTRICT_DATA_DEPENDENCIES %s -o %t && %run %t
 // RUN: %clang_dfsan -DSTRICT_DATA_DEPENDENCIES -mllvm -dfsan-args-abi %s -o %t && %run %t
 
-// XFAIL: target-is-mips64el
+// XFAIL: target-is-mips64,target-is-mips64el
 
 // Tests custom implementations of various glibc functions.
 
diff --git a/test/esan/TestCases/large-stack-linux.c b/test/esan/TestCases/large-stack-linux.c
index 3e024fc..1af32f8 100644
--- a/test/esan/TestCases/large-stack-linux.c
+++ b/test/esan/TestCases/large-stack-linux.c
@@ -56,14 +56,14 @@
   // CHECK:      in esan::initializeLibrary
   // CHECK:      Testing child with infinite stack
   // CHECK-NEXT: in esan::initializeLibrary
-  // CHECK-NEXT: =={{[0-9]+}}==The stack size limit is beyond the maximum supported.
+  // CHECK-NEXT: =={{[0-9:]+}}==The stack size limit is beyond the maximum supported.
   // CHECK-NEXT: Re-execing with a stack size below 1TB.
   // CHECK-NEXT: in esan::initializeLibrary
   // CHECK:      done
   // CHECK:      in esan::finalizeLibrary
   // CHECK:      Testing child with 1TB stack
   // CHECK-NEXT: in esan::initializeLibrary
-  // CHECK-NEXT: =={{[0-9]+}}==The stack size limit is beyond the maximum supported.
+  // CHECK-NEXT: =={{[0-9:]+}}==The stack size limit is beyond the maximum supported.
   // CHECK-NEXT: Re-execing with a stack size below 1TB.
   // CHECK-NEXT: in esan::initializeLibrary
   // CHECK:      done
diff --git a/test/esan/TestCases/workingset-midreport.cpp b/test/esan/TestCases/workingset-midreport.cpp
index 2c29cf4..38c3765 100644
--- a/test/esan/TestCases/workingset-midreport.cpp
+++ b/test/esan/TestCases/workingset-midreport.cpp
@@ -4,6 +4,9 @@
 // RUN: %clang -O0 %s -o %t 2>&1
 // RUN: %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ESAN
 
+// FIXME: Re-enable once PR33590 is fixed.
+// UNSUPPORTED: x86_64
+
 #include <sanitizer/esan_interface.h>
 #include <sched.h>
 #include <stdio.h>
diff --git a/test/esan/TestCases/workingset-samples.cpp b/test/esan/TestCases/workingset-samples.cpp
index cf198d2..d97b62b 100644
--- a/test/esan/TestCases/workingset-samples.cpp
+++ b/test/esan/TestCases/workingset-samples.cpp
@@ -1,6 +1,9 @@
 // RUN: %clang_esan_wset -O0 %s -o %t 2>&1
 // RUN: %run %t 2>&1 | FileCheck %s
 
+// FIXME: Re-enable once PR33590 is fixed.
+// UNSUPPORTED: x86_64
+
 #include <sanitizer/esan_interface.h>
 #include <sched.h>
 #include <stdlib.h>
@@ -8,7 +11,6 @@
 #include <sys/mman.h>
 
 const int size = 0x1 << 25; // 523288 cache lines
-const int iters = 6;
 
 int main(int argc, char **argv) {
   char *buf = (char *)mmap(0, size, PROT_READ | PROT_WRITE,
diff --git a/test/esan/TestCases/workingset-simple.cpp b/test/esan/TestCases/workingset-simple.cpp
index c8a2d52..f1ac2ec 100644
--- a/test/esan/TestCases/workingset-simple.cpp
+++ b/test/esan/TestCases/workingset-simple.cpp
@@ -1,6 +1,9 @@
 // RUN: %clang_esan_wset -O0 %s -o %t 2>&1
 // RUN: %run %t 2>&1 | FileCheck %s
 
+// FIXME: Re-enable once PR33590 is fixed.
+// UNSUPPORTED: x86_64
+
 #include <stdlib.h>
 #include <string.h>
 #include <sys/mman.h>
diff --git a/test/fuzzer/AFLDriverTest.cpp b/test/fuzzer/AFLDriverTest.cpp
new file mode 100644
index 0000000..b949adc
--- /dev/null
+++ b/test/fuzzer/AFLDriverTest.cpp
@@ -0,0 +1,28 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Contains dummy functions used to avoid dependency on AFL.
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+extern "C" void __afl_manual_init() {}
+
+extern "C" int __afl_persistent_loop(unsigned int N) {
+  static int Count = N;
+  fprintf(stderr, "__afl_persistent_loop calle, Count = %d\n", Count);
+  if (Count--) return 1;
+  return 0;
+}
+
+// This declaration exists to prevent the Darwin linker
+// from complaining about this being a missing weak symbol.
+extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) {
+  fprintf(stderr, "LLVMFuzzerInitialize called\n");
+  return 0;
+}
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  fprintf(stderr, "LLVMFuzzerTestOneInput called; Size = %zd\n", Size);
+  return 0;
+}
diff --git a/test/fuzzer/AbsNegAndConstant64Test.cpp b/test/fuzzer/AbsNegAndConstant64Test.cpp
new file mode 100644
index 0000000..abeb784
--- /dev/null
+++ b/test/fuzzer/AbsNegAndConstant64Test.cpp
@@ -0,0 +1,24 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// abs(x) < 0 and y == Const puzzle, 64-bit variant.
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size < 16) return 0;
+  int64_t x;
+  uint64_t y;
+  memcpy(&x, Data, sizeof(x));
+  memcpy(&y, Data + sizeof(x), sizeof(y));
+  if (llabs(x) < 0 && y == 0xbaddcafedeadbeefULL) {
+    printf("BINGO; Found the target, exiting; x = 0x%lx y 0x%lx\n", x, y);
+    fflush(stdout);
+    exit(1);
+  }
+  return 0;
+}
+
diff --git a/test/fuzzer/AbsNegAndConstantTest.cpp b/test/fuzzer/AbsNegAndConstantTest.cpp
new file mode 100644
index 0000000..049db0a
--- /dev/null
+++ b/test/fuzzer/AbsNegAndConstantTest.cpp
@@ -0,0 +1,24 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// abs(x) < 0 and y == Const puzzle.
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size < 8) return 0;
+  int x;
+  unsigned y;
+  memcpy(&x, Data, sizeof(x));
+  memcpy(&y, Data + sizeof(x), sizeof(y));
+  if (abs(x) < 0 && y == 0xbaddcafe) {
+    printf("BINGO; Found the target, exiting; x = 0x%x y 0x%x\n", x, y);
+    fflush(stdout);
+    exit(1);
+  }
+  return 0;
+}
+
diff --git a/test/fuzzer/AccumulateAllocationsTest.cpp b/test/fuzzer/AccumulateAllocationsTest.cpp
new file mode 100644
index 0000000..e9acd7c
--- /dev/null
+++ b/test/fuzzer/AccumulateAllocationsTest.cpp
@@ -0,0 +1,17 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Test with a more mallocs than frees, but no leak.
+#include <cstddef>
+#include <cstdint>
+
+const int kAllocatedPointersSize = 10000;
+int NumAllocatedPointers = 0;
+int *AllocatedPointers[kAllocatedPointersSize];
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (NumAllocatedPointers < kAllocatedPointersSize)
+    AllocatedPointers[NumAllocatedPointers++] = new int;
+  return 0;
+}
+
diff --git a/test/fuzzer/BadStrcmpTest.cpp b/test/fuzzer/BadStrcmpTest.cpp
new file mode 100644
index 0000000..ba2b068
--- /dev/null
+++ b/test/fuzzer/BadStrcmpTest.cpp
@@ -0,0 +1,19 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Test that we don't creash in case of bad strcmp params.
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+
+static volatile int Sink;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size != 10) return 0;
+  // Data is not zero-terminated, so this call is bad.
+  // Still, there are cases when such calles appear, see e.g.
+  // https://bugs.llvm.org/show_bug.cgi?id=32357
+  Sink = strcmp(reinterpret_cast<const char*>(Data), "123456789");
+  return 0;
+}
+
diff --git a/test/fuzzer/BogusInitializeTest.cpp b/test/fuzzer/BogusInitializeTest.cpp
new file mode 100644
index 0000000..c7e81a5
--- /dev/null
+++ b/test/fuzzer/BogusInitializeTest.cpp
@@ -0,0 +1,15 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Make sure LLVMFuzzerInitialize does not change argv[0].
+#include <stddef.h>
+#include <stdint.h>
+
+extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) {
+  ***argv = 'X';
+  return 0;
+}
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  return 0;
+}
diff --git a/test/fuzzer/BufferOverflowOnInput.cpp b/test/fuzzer/BufferOverflowOnInput.cpp
new file mode 100644
index 0000000..159da92
--- /dev/null
+++ b/test/fuzzer/BufferOverflowOnInput.cpp
@@ -0,0 +1,24 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Simple test for a fuzzer. The fuzzer must find the string "Hi!".
+#include <assert.h>
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <iostream>
+#include <ostream>
+
+static volatile bool SeedLargeBuffer;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  assert(Data);
+  if (Size >= 4)
+    SeedLargeBuffer = true;
+  if (Size == 3 && SeedLargeBuffer && Data[3]) {
+    std::cout << "Woops, reading Data[3] w/o crashing\n" << std::flush;
+    exit(1);
+  }
+  return 0;
+}
+
diff --git a/test/fuzzer/CMakeLists.txt b/test/fuzzer/CMakeLists.txt
new file mode 100644
index 0000000..cf83c00
--- /dev/null
+++ b/test/fuzzer/CMakeLists.txt
@@ -0,0 +1,43 @@
+set(LIBFUZZER_TEST_DEPS ${SANITIZER_COMMON_LIT_TEST_DEPS})
+list(REMOVE_ITEM LIBFUZZER_TEST_DEPS SanitizerLintCheck)
+if (NOT COMPILER_RT_STANDALONE_BUILD)
+  list(APPEND LIBFUZZER_TEST_DEPS fuzzer asan)
+endif()
+
+if(COMPILER_RT_INCLUDE_TESTS)
+  list(APPEND LIBFUZZER_TEST_DEPS FuzzerUnitTests)
+endif()
+
+set(LIBFUZZER_TESTSUITES)
+
+
+if(COMPILER_RT_INCLUDE_TESTS)
+  # libFuzzer unit tests.
+  configure_lit_site_cfg(
+    ${CMAKE_CURRENT_SOURCE_DIR}/unit/lit.site.cfg.in
+    ${CMAKE_CURRENT_BINARY_DIR}/unit/lit.site.cfg)
+  list(APPEND LIBFUZZER_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/unit)
+endif()
+
+foreach(arch ${FUZZER_SUPPORTED_ARCH})
+  set(LIBFUZZER_TEST_COMPILER ${COMPILER_RT_TEST_COMPILER})
+  get_test_cc_for_arch(${arch} LIBFUZZER_TEST_COMPILER LIBFUZZER_TEST_FLAGS)
+
+  string(TOUPPER ${arch} ARCH_UPPER_CASE)
+  set(CONFIG_NAME ${ARCH_UPPER_CASE}${OS_NAME}Config)
+
+  # LIT-based libFuzzer tests.
+  configure_lit_site_cfg(
+    ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
+    ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/lit.site.cfg
+    )
+  list(APPEND LIBFUZZER_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME})
+
+endforeach()
+
+set(EXCLUDE_FROM_ALL ON)
+
+add_lit_testsuite(check-fuzzer "Running Fuzzer tests"
+  ${LIBFUZZER_TESTSUITES}
+  DEPENDS ${LIBFUZZER_TEST_DEPS})
+set_target_properties(check-fuzzer PROPERTIES FOLDER "Compiler-RT Tests")
diff --git a/test/fuzzer/CallerCalleeTest.cpp b/test/fuzzer/CallerCalleeTest.cpp
new file mode 100644
index 0000000..ed9f37c
--- /dev/null
+++ b/test/fuzzer/CallerCalleeTest.cpp
@@ -0,0 +1,59 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Simple test for a fuzzer.
+// Try to find the target using the indirect caller-callee pairs.
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <iostream>
+
+typedef void (*F)();
+static F t[256];
+
+void f34() {
+  std::cerr << "BINGO\n";
+  exit(1);
+}
+void f23() { t[(unsigned)'d'] = f34;}
+void f12() { t[(unsigned)'c'] = f23;}
+void f01() { t[(unsigned)'b'] = f12;}
+void f00() {}
+
+static F t0[256] = {
+  f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00,
+  f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00,
+  f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00,
+  f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00,
+  f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00,
+  f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00,
+  f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00,
+  f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00,
+  f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00,
+  f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00,
+  f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00,
+  f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00,
+  f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00,
+  f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00,
+  f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00,
+  f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00,
+};
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size < 4) return 0;
+  // Spoof the counters.
+  for (int i = 0; i < 200; i++) {
+    f23();
+    f12();
+    f01();
+  }
+  memcpy(t, t0, sizeof(t));
+  t[(unsigned)'a'] = f01;
+  t[Data[0]]();
+  t[Data[1]]();
+  t[Data[2]]();
+  t[Data[3]]();
+  return 0;
+}
+
diff --git a/test/fuzzer/CleanseTest.cpp b/test/fuzzer/CleanseTest.cpp
new file mode 100644
index 0000000..ee18457
--- /dev/null
+++ b/test/fuzzer/CleanseTest.cpp
@@ -0,0 +1,16 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Test the the fuzzer is able to 'cleanse' the reproducer
+// by replacing all irrelevant bytes with garbage.
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size >= 20 && Data[1] == '1' && Data[5] == '5' && Data[10] == 'A' &&
+      Data[19] == 'Z')
+    abort();
+  return 0;
+}
+
diff --git a/test/fuzzer/CounterTest.cpp b/test/fuzzer/CounterTest.cpp
new file mode 100644
index 0000000..4917934
--- /dev/null
+++ b/test/fuzzer/CounterTest.cpp
@@ -0,0 +1,18 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Test for a fuzzer: must find the case where a particular basic block is
+// executed many times.
+#include <iostream>
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  int Num = 0;
+  for (size_t i = 0; i < Size; i++)
+    if (Data[i] == 'A' + i)
+      Num++;
+  if (Num >= 4) {
+    std::cerr <<  "BINGO!\n";
+    exit(1);
+  }
+  return 0;
+}
diff --git a/test/fuzzer/CustomCrossOverAndMutateTest.cpp b/test/fuzzer/CustomCrossOverAndMutateTest.cpp
new file mode 100644
index 0000000..74fc939
--- /dev/null
+++ b/test/fuzzer/CustomCrossOverAndMutateTest.cpp
@@ -0,0 +1,34 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Test that libFuzzer does not crash when LLVMFuzzerMutate called from
+// LLVMFuzzerCustomCrossOver.
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <string.h>
+#include <string>
+#include <vector>
+
+#include "FuzzerInterface.h"
+
+static volatile int sink;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  std::string Str(reinterpret_cast<const char *>(Data), Size);
+  if (Size && Data[0] == '0')
+    sink++;
+  return 0;
+}
+
+extern "C" size_t LLVMFuzzerCustomCrossOver(const uint8_t *Data1, size_t Size1,
+                                            const uint8_t *Data2, size_t Size2,
+                                            uint8_t *Out, size_t MaxOutSize,
+                                            unsigned int Seed) {
+  std::vector<uint8_t> Buffer(MaxOutSize * 10);
+  LLVMFuzzerMutate(Buffer.data(), Buffer.size(), Buffer.size());
+  size_t Size = std::min(Size1, MaxOutSize);
+  memcpy(Out, Data1, Size);
+  return Size;
+}
diff --git a/test/fuzzer/CustomCrossOverTest.cpp b/test/fuzzer/CustomCrossOverTest.cpp
new file mode 100644
index 0000000..bd9afe7
--- /dev/null
+++ b/test/fuzzer/CustomCrossOverTest.cpp
@@ -0,0 +1,59 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Simple test for a cutom crossover.
+#include <assert.h>
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <iostream>
+#include <ostream>
+#include <random>
+#include <string.h>
+#include <functional>
+
+static const char *Separator = "-########-";
+static const char *Target = "A-########-B";
+
+static volatile int sink;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  assert(Data);
+  std::string Str(reinterpret_cast<const char *>(Data), Size);
+  static const size_t TargetHash = std::hash<std::string>{}(std::string(Target));
+  size_t StrHash = std::hash<std::string>{}(Str);
+
+  // Ensure we have 'A' and 'B' in the corpus.
+  if (Size == 1 && *Data == 'A')
+    sink++;
+  if (Size == 1 && *Data == 'B')
+    sink--;
+
+  if (TargetHash == StrHash) {
+    std::cout << "BINGO; Found the target, exiting\n" << std::flush;
+    exit(1);
+  }
+  return 0;
+}
+
+extern "C" size_t LLVMFuzzerCustomCrossOver(const uint8_t *Data1, size_t Size1,
+                                            const uint8_t *Data2, size_t Size2,
+                                            uint8_t *Out, size_t MaxOutSize,
+                                            unsigned int Seed) {
+  static size_t Printed;
+  static size_t SeparatorLen = strlen(Separator);
+
+  if (Printed++ < 32)
+    std::cerr << "In LLVMFuzzerCustomCrossover " << Size1 << " " << Size2 << "\n";
+
+  size_t Size = Size1 + Size2 + SeparatorLen;
+
+  if (Size > MaxOutSize)
+    return 0;
+
+  memcpy(Out, Data1, Size1);
+  memcpy(Out + Size1, Separator, SeparatorLen);
+  memcpy(Out + Size1 + SeparatorLen, Data2, Size2);
+
+  return Size;
+}
diff --git a/test/fuzzer/CustomMutatorTest.cpp b/test/fuzzer/CustomMutatorTest.cpp
new file mode 100644
index 0000000..b2adb94
--- /dev/null
+++ b/test/fuzzer/CustomMutatorTest.cpp
@@ -0,0 +1,39 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Simple test for a cutom mutator.
+#include <assert.h>
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <iostream>
+#include <ostream>
+
+#include "FuzzerInterface.h"
+
+static volatile int Sink;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  assert(Data);
+  if (Size > 0 && Data[0] == 'H') {
+    Sink = 1;
+    if (Size > 1 && Data[1] == 'i') {
+      Sink = 2;
+      if (Size > 2 && Data[2] == '!') {
+        std::cout << "BINGO; Found the target, exiting\n" << std::flush;
+        exit(1);
+      }
+    }
+  }
+  return 0;
+}
+
+extern "C" size_t LLVMFuzzerCustomMutator(uint8_t *Data, size_t Size,
+                                          size_t MaxSize, unsigned int Seed) {
+  static bool Printed;
+  if (!Printed) {
+    std::cerr << "In LLVMFuzzerCustomMutator\n";
+    Printed = true;
+  }
+  return LLVMFuzzerMutate(Data, Size, MaxSize);
+}
diff --git a/test/fuzzer/CxxStringEqTest.cpp b/test/fuzzer/CxxStringEqTest.cpp
new file mode 100644
index 0000000..924851c
--- /dev/null
+++ b/test/fuzzer/CxxStringEqTest.cpp
@@ -0,0 +1,25 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Simple test for a fuzzer. Must find a specific string
+// used in std::string operator ==.
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <iostream>
+#include <string>
+
+static volatile int Sink;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  std::string Str((const char*)Data, Size);
+  bool Eq = Str == "FooBar";
+  Sink = Str == "123456";   // Try to confuse the fuzzer
+  if (Eq) {
+    std::cout << "BINGO; Found the target, exiting\n";
+    std::cout.flush();
+    abort();
+  }
+  return 0;
+}
+
diff --git a/test/fuzzer/DSO1.cpp b/test/fuzzer/DSO1.cpp
new file mode 100644
index 0000000..72a5ec4
--- /dev/null
+++ b/test/fuzzer/DSO1.cpp
@@ -0,0 +1,14 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Source code for a simple DSO.
+#ifdef _WIN32
+__declspec( dllexport )
+#endif
+int DSO1(int a) {
+  if (a < 123456)
+    return 0;
+  return 1;
+}
+
+void Uncovered1() { }
diff --git a/test/fuzzer/DSO2.cpp b/test/fuzzer/DSO2.cpp
new file mode 100644
index 0000000..2967055
--- /dev/null
+++ b/test/fuzzer/DSO2.cpp
@@ -0,0 +1,14 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Source code for a simple DSO.
+#ifdef _WIN32
+__declspec( dllexport )
+#endif
+int DSO2(int a) {
+  if (a < 3598235)
+    return 0;
+  return 1;
+}
+
+void Uncovered2() {}
diff --git a/test/fuzzer/DSOTestExtra.cpp b/test/fuzzer/DSOTestExtra.cpp
new file mode 100644
index 0000000..a2274d0
--- /dev/null
+++ b/test/fuzzer/DSOTestExtra.cpp
@@ -0,0 +1,11 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Source code for a simple DSO.
+
+int DSOTestExtra(int a) {
+  if (a < 452345)
+    return 0;
+  return 1;
+}
+
diff --git a/test/fuzzer/DSOTestMain.cpp b/test/fuzzer/DSOTestMain.cpp
new file mode 100644
index 0000000..e0c857d
--- /dev/null
+++ b/test/fuzzer/DSOTestMain.cpp
@@ -0,0 +1,31 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Source code for a simple DSO.
+
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+extern int DSO1(int a);
+extern int DSO2(int a);
+extern int DSOTestExtra(int a);
+
+static volatile int *nil = 0;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  int x, y, z;
+  if (Size < sizeof(int) * 3) {
+    x = y = z = 0;
+  } else {
+    memcpy(&x, Data + 0 * sizeof(int), sizeof(int));
+    memcpy(&y, Data + 1 * sizeof(int), sizeof(int));
+    memcpy(&z, Data + 2 * sizeof(int), sizeof(int));
+  }
+  int sum = DSO1(x) + DSO2(y) + (z ? DSOTestExtra(z) : 0);
+  if (sum == 3) {
+    fprintf(stderr, "BINGO %d %d %d\n", x, y, z);
+    *nil = 0;
+  }
+  return 0;
+}
diff --git a/test/fuzzer/DeepRecursionTest.cpp b/test/fuzzer/DeepRecursionTest.cpp
new file mode 100644
index 0000000..bf4621d
--- /dev/null
+++ b/test/fuzzer/DeepRecursionTest.cpp
@@ -0,0 +1,25 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Simple test for a fuzzer. The fuzzer must find the deep recursion.
+// To generate a crashy input:
+// for((i=0;i<110;i++)); do echo -n ABCDEFGHIJ  >> INPUT; done
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+
+static volatile int Sink;
+
+void Recursive(const uint8_t *Data, size_t Size, int Depth) {
+  if (Depth > 1000) abort();
+  if (!Size) return;
+  if (*Data == ('A' + Depth % 10))
+    Recursive(Data + 1, Size - 1, Depth + 1);
+  Sink++;
+}
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  Recursive(Data, Size, 0);
+  return 0;
+}
+
diff --git a/test/fuzzer/DivTest.cpp b/test/fuzzer/DivTest.cpp
new file mode 100644
index 0000000..bce13fe
--- /dev/null
+++ b/test/fuzzer/DivTest.cpp
@@ -0,0 +1,20 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Simple test for a fuzzer: find the interesting argument for div.
+#include <assert.h>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <iostream>
+
+static volatile int Sink;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size < 4) return 0;
+  int a;
+  memcpy(&a, Data, 4);
+  Sink = 12345678 / (987654 - a);
+  return 0;
+}
+
diff --git a/test/fuzzer/EmptyTest.cpp b/test/fuzzer/EmptyTest.cpp
new file mode 100644
index 0000000..5e84330
--- /dev/null
+++ b/test/fuzzer/EmptyTest.cpp
@@ -0,0 +1,11 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// A fuzzer with empty target function.
+
+#include <cstdint>
+#include <cstdlib>
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  return 0;
+}
diff --git a/test/fuzzer/EquivalenceATest.cpp b/test/fuzzer/EquivalenceATest.cpp
new file mode 100644
index 0000000..7d1ebb0
--- /dev/null
+++ b/test/fuzzer/EquivalenceATest.cpp
@@ -0,0 +1,17 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+
+// Test for libFuzzer's "equivalence" fuzzing, part A.
+extern "C" void LLVMFuzzerAnnounceOutput(const uint8_t *Data, size_t Size);
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  // fprintf(stderr, "A %zd\n", Size);
+  uint8_t Result[50];
+  if (Size > 50) Size = 50;
+  for (size_t i = 0; i < Size; i++)
+    Result[Size - i - 1] = Data[i];
+  LLVMFuzzerAnnounceOutput(Result, Size);
+  return 0;
+}
diff --git a/test/fuzzer/EquivalenceBTest.cpp b/test/fuzzer/EquivalenceBTest.cpp
new file mode 100644
index 0000000..b1de208
--- /dev/null
+++ b/test/fuzzer/EquivalenceBTest.cpp
@@ -0,0 +1,27 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+
+// Test for libFuzzer's "equivalence" fuzzing, part B.
+extern "C" void LLVMFuzzerAnnounceOutput(const uint8_t *Data, size_t Size);
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  // fprintf(stderr, "B %zd\n", Size);
+  uint8_t Result[50];
+  if (Size > 50) Size = 50;
+  for (size_t i = 0; i < Size; i++)
+    Result[Size - i - 1] = Data[i];
+
+  // Be a bit different from EquivalenceATest
+  if (Size > 10 && Data[5] == 'B' && Data[6] == 'C' && Data[7] == 'D') {
+    static int c;
+    if (!c)
+      fprintf(stderr, "ZZZZZZZ\n");
+    c = 1;
+    Result[2]++;
+  }
+
+  LLVMFuzzerAnnounceOutput(Result, Size);
+  return 0;
+}
diff --git a/test/fuzzer/FlagsTest.cpp b/test/fuzzer/FlagsTest.cpp
new file mode 100644
index 0000000..6eeac17
--- /dev/null
+++ b/test/fuzzer/FlagsTest.cpp
@@ -0,0 +1,32 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Parse some flags
+#include <string>
+#include <vector>
+
+static std::vector<std::string> Flags;
+
+extern "C" int LLVMFuzzerInitialize(int *Argc, char ***Argv) {
+  // Parse --flags and anything after -ignore_remaining_args=1 is passed.
+  int I = 1;
+  while (I < *Argc) {
+    std::string S((*Argv)[I++]);
+    if (S == "-ignore_remaining_args=1")
+      break;
+    if (S.substr(0, 2) == "--")
+      Flags.push_back(S);
+  }
+  while (I < *Argc)
+    Flags.push_back(std::string((*Argv)[I++]));
+
+  return 0;
+}
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  fprintf(stderr, "BINGO ");
+  for (auto Flag : Flags)
+    fprintf(stderr, "%s ", Flag.c_str());
+  fprintf(stderr, "\n");
+  return 0;
+}
diff --git a/test/fuzzer/FourIndependentBranchesTest.cpp b/test/fuzzer/FourIndependentBranchesTest.cpp
new file mode 100644
index 0000000..bbf5ea2
--- /dev/null
+++ b/test/fuzzer/FourIndependentBranchesTest.cpp
@@ -0,0 +1,22 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Simple test for a fuzzer. The fuzzer must find the string "FUZZ".
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <iostream>
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  int bits = 0;
+  if (Size > 0 && Data[0] == 'F') bits |= 1;
+  if (Size > 1 && Data[1] == 'U') bits |= 2;
+  if (Size > 2 && Data[2] == 'Z') bits |= 4;
+  if (Size > 3 && Data[3] == 'Z') bits |= 8;
+  if (bits == 15) {
+    std::cerr <<  "BINGO!\n";
+    exit(1);
+  }
+  return 0;
+}
+
diff --git a/test/fuzzer/FullCoverageSetTest.cpp b/test/fuzzer/FullCoverageSetTest.cpp
new file mode 100644
index 0000000..6d7e48f
--- /dev/null
+++ b/test/fuzzer/FullCoverageSetTest.cpp
@@ -0,0 +1,24 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Simple test for a fuzzer. The fuzzer must find the string "FUZZER".
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <iostream>
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  int bits = 0;
+  if (Size > 0 && Data[0] == 'F') bits |= 1;
+  if (Size > 1 && Data[1] == 'U') bits |= 2;
+  if (Size > 2 && Data[2] == 'Z') bits |= 4;
+  if (Size > 3 && Data[3] == 'Z') bits |= 8;
+  if (Size > 4 && Data[4] == 'E') bits |= 16;
+  if (Size > 5 && Data[5] == 'R') bits |= 32;
+  if (bits == 63) {
+    std::cerr <<  "BINGO!\n";
+    exit(1);
+  }
+  return 0;
+}
+
diff --git a/test/fuzzer/GcSectionsTest.cpp b/test/fuzzer/GcSectionsTest.cpp
new file mode 100644
index 0000000..fd9da77
--- /dev/null
+++ b/test/fuzzer/GcSectionsTest.cpp
@@ -0,0 +1,14 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Simple test for a fuzzer.
+// The unused function should not be present in the binary.
+#include <cstddef>
+#include <cstdint>
+
+extern "C" void UnusedFunctionShouldBeRemovedByLinker() { }
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  return 0;
+}
+
diff --git a/test/fuzzer/InitializeTest.cpp b/test/fuzzer/InitializeTest.cpp
new file mode 100644
index 0000000..a93c2a5
--- /dev/null
+++ b/test/fuzzer/InitializeTest.cpp
@@ -0,0 +1,29 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Make sure LLVMFuzzerInitialize is called.
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+static char *argv0;
+
+extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) {
+  assert(*argc > 0);
+  argv0 = **argv;
+  fprintf(stderr, "LLVMFuzzerInitialize: %s\n", argv0);
+  return 0;
+}
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  assert(argv0);
+  if (Size == strlen(argv0) &&
+      !memmem(Data, Size, argv0, Size)) {
+    fprintf(stderr, "BINGO %s\n", argv0);
+    exit(1);
+  }
+  return 0;
+}
diff --git a/test/fuzzer/LargeTest.cpp b/test/fuzzer/LargeTest.cpp
new file mode 100644
index 0000000..83ed619
--- /dev/null
+++ b/test/fuzzer/LargeTest.cpp
@@ -0,0 +1,37 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// A fuzz target with lots of edges.
+#include <cstdint>
+#include <cstdlib>
+
+static inline void break_optimization(const void *arg) {
+    __asm__ __volatile__("" : : "r" (arg) : "memory");
+}
+
+#define A                                         \
+  do {                                            \
+    i++;                                          \
+    c++;                                          \
+    if (Data[(i + __LINE__) % Size] == (c % 256)) \
+      break_optimization(Data);                   \
+    else                                          \
+      break_optimization(0);                      \
+  } while (0)
+
+// for (int i = 0, n = Data[(__LINE__ - 1) % Size] % 16; i < n; i++)
+
+#define B do{A; A; A; A; A; A; A; A; A; A; A; A; A; A; A; A; A; A; }while(0)
+#define C do{B; B; B; B; B; B; B; B; B; B; B; B; B; B; B; B; B; B; }while(0)
+#define D do{C; C; C; C; C; C; C; C; C; C; C; C; C; C; C; C; C; C; }while(0)
+#define E do{D; D; D; D; D; D; D; D; D; D; D; D; D; D; D; D; D; D; }while(0)
+
+volatile int sink;
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (!Size) return 0;
+  int c = 0;
+  int i = 0;
+  D;
+  return 0;
+}
+
diff --git a/test/fuzzer/LeakTest.cpp b/test/fuzzer/LeakTest.cpp
new file mode 100644
index 0000000..ea89e39
--- /dev/null
+++ b/test/fuzzer/LeakTest.cpp
@@ -0,0 +1,17 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Test with a leak.
+#include <cstddef>
+#include <cstdint>
+
+static volatile void *Sink;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size > 0 && *Data == 'H') {
+    Sink = new int;
+    Sink = nullptr;
+  }
+  return 0;
+}
+
diff --git a/test/fuzzer/LeakTimeoutTest.cpp b/test/fuzzer/LeakTimeoutTest.cpp
new file mode 100644
index 0000000..9252619
--- /dev/null
+++ b/test/fuzzer/LeakTimeoutTest.cpp
@@ -0,0 +1,17 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Test with a leak.
+#include <cstddef>
+#include <cstdint>
+
+static volatile int *Sink;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (!Size) return 0;
+  Sink = new int;
+  Sink = new int;
+  while (Sink) *Sink = 0;  // Infinite loop.
+  return 0;
+}
+
diff --git a/test/fuzzer/LoadTest.cpp b/test/fuzzer/LoadTest.cpp
new file mode 100644
index 0000000..67a28c7
--- /dev/null
+++ b/test/fuzzer/LoadTest.cpp
@@ -0,0 +1,22 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Simple test for a fuzzer: find interesting value of array index.
+#include <assert.h>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <iostream>
+
+static volatile int Sink;
+const int kArraySize = 1234567;
+int array[kArraySize];
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size < 8) return 0;
+  uint64_t a = 0;
+  memcpy(&a, Data, 8);
+  Sink = array[a % (kArraySize + 1)];
+  return 0;
+}
+
diff --git a/test/fuzzer/Memcmp64BytesTest.cpp b/test/fuzzer/Memcmp64BytesTest.cpp
new file mode 100644
index 0000000..5b6cb70
--- /dev/null
+++ b/test/fuzzer/Memcmp64BytesTest.cpp
@@ -0,0 +1,20 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Simple test for a fuzzer. The fuzzer must find a particular string.
+#include <cassert>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  const char kString64Bytes[] =
+      "123456789 123456789 123456789 123456789 123456789 123456789 1234";
+  assert(sizeof(kString64Bytes) == 65);
+  if (Size >= 64 && memcmp(Data, kString64Bytes, 64) == 0) {
+    fprintf(stderr, "BINGO\n");
+    exit(1);
+  }
+  return 0;
+}
diff --git a/test/fuzzer/MemcmpTest.cpp b/test/fuzzer/MemcmpTest.cpp
new file mode 100644
index 0000000..8dbb7d8
--- /dev/null
+++ b/test/fuzzer/MemcmpTest.cpp
@@ -0,0 +1,31 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Simple test for a fuzzer. The fuzzer must find a particular string.
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  // TODO: check other sizes.
+  if (Size >= 8 && memcmp(Data, "01234567", 8) == 0) {
+    if (Size >= 12 && memcmp(Data + 8, "ABCD", 4) == 0) {
+      if (Size >= 14 && memcmp(Data + 12, "XY", 2) == 0) {
+        if (Size >= 17 && memcmp(Data + 14, "KLM", 3) == 0) {
+          if (Size >= 27 && memcmp(Data + 17, "ABCDE-GHIJ", 10) == 0){
+            fprintf(stderr, "BINGO %zd\n", Size);
+            for (size_t i = 0; i < Size; i++) {
+              uint8_t C = Data[i];
+              if (C >= 32 && C < 127)
+                fprintf(stderr, "%c", C);
+            }
+            fprintf(stderr, "\n");
+            exit(1);
+          }
+        }
+      }
+    }
+  }
+  return 0;
+}
diff --git a/test/fuzzer/NotinstrumentedTest.cpp b/test/fuzzer/NotinstrumentedTest.cpp
new file mode 100644
index 0000000..9141899
--- /dev/null
+++ b/test/fuzzer/NotinstrumentedTest.cpp
@@ -0,0 +1,11 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// This test should not be instrumented.
+#include <cstddef>
+#include <cstdint>
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  return 0;
+}
+
diff --git a/test/fuzzer/NthRunCrashTest.cpp b/test/fuzzer/NthRunCrashTest.cpp
new file mode 100644
index 0000000..26cdc8f
--- /dev/null
+++ b/test/fuzzer/NthRunCrashTest.cpp
@@ -0,0 +1,19 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Crash on the N-th execution.
+#include <cstddef>
+#include <cstdint>
+#include <iostream>
+#include <ostream>
+
+static int Counter;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Counter++ == 1000) {
+    std::cout << "BINGO; Found the target, exiting\n" << std::flush;
+    exit(1);
+  }
+  return 0;
+}
+
diff --git a/test/fuzzer/NullDerefOnEmptyTest.cpp b/test/fuzzer/NullDerefOnEmptyTest.cpp
new file mode 100644
index 0000000..459db51
--- /dev/null
+++ b/test/fuzzer/NullDerefOnEmptyTest.cpp
@@ -0,0 +1,19 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Simple test for a fuzzer. The fuzzer must find the empty string.
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <iostream>
+
+static volatile int *Null = 0;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size == 0) {
+    std::cout << "Found the target, dereferencing NULL\n";
+    *Null = 1;
+  }
+  return 0;
+}
+
diff --git a/test/fuzzer/NullDerefTest.cpp b/test/fuzzer/NullDerefTest.cpp
new file mode 100644
index 0000000..1b44b68
--- /dev/null
+++ b/test/fuzzer/NullDerefTest.cpp
@@ -0,0 +1,26 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Simple test for a fuzzer. The fuzzer must find the string "Hi!".
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <iostream>
+
+static volatile int Sink;
+static volatile int *Null = 0;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size > 0 && Data[0] == 'H') {
+    Sink = 1;
+    if (Size > 1 && Data[1] == 'i') {
+      Sink = 2;
+      if (Size > 2 && Data[2] == '!') {
+        std::cout << "Found the target, dereferencing NULL\n";
+        *Null = 1;
+      }
+    }
+  }
+  return 0;
+}
+
diff --git a/test/fuzzer/OneHugeAllocTest.cpp b/test/fuzzer/OneHugeAllocTest.cpp
new file mode 100644
index 0000000..32a5578
--- /dev/null
+++ b/test/fuzzer/OneHugeAllocTest.cpp
@@ -0,0 +1,28 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Tests OOM handling when there is a single large allocation.
+#include <assert.h>
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <iostream>
+
+static volatile char *SinkPtr;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size > 0 && Data[0] == 'H') {
+    if (Size > 1 && Data[1] == 'i') {
+      if (Size > 2 && Data[2] == '!') {
+        size_t kSize = (size_t)1 << 31;
+        char *p = new char[kSize];
+        memset(p, 0, kSize);
+        SinkPtr = p;
+        delete [] p;
+      }
+    }
+  }
+  return 0;
+}
+
diff --git a/test/fuzzer/OutOfMemorySingleLargeMallocTest.cpp b/test/fuzzer/OutOfMemorySingleLargeMallocTest.cpp
new file mode 100644
index 0000000..a07795a
--- /dev/null
+++ b/test/fuzzer/OutOfMemorySingleLargeMallocTest.cpp
@@ -0,0 +1,27 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Tests OOM handling.
+#include <assert.h>
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <iostream>
+
+static volatile char *SinkPtr;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size > 0 && Data[0] == 'H') {
+    if (Size > 1 && Data[1] == 'i') {
+      if (Size > 2 && Data[2] == '!') {
+          size_t kSize = 0x20000000U;
+          char *p = new char[kSize];
+          SinkPtr = p;
+          delete [] p;
+      }
+    }
+  }
+  return 0;
+}
+
diff --git a/test/fuzzer/OutOfMemoryTest.cpp b/test/fuzzer/OutOfMemoryTest.cpp
new file mode 100644
index 0000000..5e59bde
--- /dev/null
+++ b/test/fuzzer/OutOfMemoryTest.cpp
@@ -0,0 +1,31 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Tests OOM handling.
+#include <assert.h>
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <iostream>
+#include <thread>
+
+static volatile char *SinkPtr;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size > 0 && Data[0] == 'H') {
+    if (Size > 1 && Data[1] == 'i') {
+      if (Size > 2 && Data[2] == '!') {
+        while (true) {
+          size_t kSize = 1 << 28;
+          char *p = new char[kSize];
+          memset(p, 0, kSize);
+          SinkPtr = p;
+          std::this_thread::sleep_for(std::chrono::seconds(1));
+        }
+      }
+    }
+  }
+  return 0;
+}
+
diff --git a/test/fuzzer/OverwriteInputTest.cpp b/test/fuzzer/OverwriteInputTest.cpp
new file mode 100644
index 0000000..e688682
--- /dev/null
+++ b/test/fuzzer/OverwriteInputTest.cpp
@@ -0,0 +1,13 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Simple test for a fuzzer. Make sure we abort if Data is overwritten.
+#include <cstdint>
+#include <iostream>
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size)
+    *const_cast<uint8_t*>(Data) = 1;
+  return 0;
+}
+
diff --git a/test/fuzzer/PrintFuncTest.cpp b/test/fuzzer/PrintFuncTest.cpp
new file mode 100644
index 0000000..d41b462
--- /dev/null
+++ b/test/fuzzer/PrintFuncTest.cpp
@@ -0,0 +1,39 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Simple test for a fuzzer. The fuzzer must find the string "Hi!".
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <iostream>
+
+extern "C" {
+__attribute__((noinline))
+void FunctionC(const uint8_t *Data, size_t Size) {
+  if (Size > 3 && Data[3] == 'Z') {
+    static bool PrintedOnce = false;
+    if (!PrintedOnce) {
+      std::cout << "BINGO\n";
+      PrintedOnce = true;
+    }
+  }
+}
+
+__attribute__((noinline))
+void FunctionB(const uint8_t *Data, size_t Size) {
+  if (Size > 2 && Data[2] == 'Z')
+    FunctionC(Data, Size);
+}
+__attribute__((noinline))
+void FunctionA(const uint8_t *Data, size_t Size) {
+  if (Size > 1 && Data[1] == 'U')
+    FunctionB(Data, Size);
+}
+}
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size > 0 && Data[0] == 'F')
+    FunctionA(Data, Size);
+  return 0;
+}
+
diff --git a/test/fuzzer/RepeatedBytesTest.cpp b/test/fuzzer/RepeatedBytesTest.cpp
new file mode 100644
index 0000000..31868cf
--- /dev/null
+++ b/test/fuzzer/RepeatedBytesTest.cpp
@@ -0,0 +1,31 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Simple test for a fuzzer. The fuzzer must find repeated bytes.
+#include <assert.h>
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <iostream>
+#include <ostream>
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  assert(Data);
+  // Looking for AAAAAAAAAAAAAAAAAAAAAA or some such.
+  size_t CurA = 0, MaxA = 0;
+  for (size_t i = 0; i < Size; i++) {
+    // Make sure there are no conditionals in the loop so that
+    // coverage can't help the fuzzer.
+    int EQ = Data[i] == 'A';
+    CurA = EQ * (CurA + 1);
+    int GT = CurA > MaxA;
+    MaxA = GT * CurA + (!GT) * MaxA;
+  }
+  if (MaxA >= 20) {
+    std::cout << "BINGO; Found the target (Max: " << MaxA << "), exiting\n"
+              << std::flush;
+    exit(0);
+  }
+  return 0;
+}
+
diff --git a/test/fuzzer/RepeatedMemcmp.cpp b/test/fuzzer/RepeatedMemcmp.cpp
new file mode 100644
index 0000000..18369de
--- /dev/null
+++ b/test/fuzzer/RepeatedMemcmp.cpp
@@ -0,0 +1,24 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  int Matches1 = 0;
+  for (size_t i = 0; i + 2 < Size; i += 3)
+    if (!memcmp(Data + i, "foo", 3))
+      Matches1++;
+  int Matches2 = 0;
+  for (size_t i = 0; i + 2 < Size; i += 3)
+    if (!memcmp(Data + i, "bar", 3))
+      Matches2++;
+
+  if (Matches1 > 10 && Matches2 > 10) {
+    fprintf(stderr, "BINGO!\n");
+    exit(1);
+  }
+  return 0;
+}
diff --git a/test/fuzzer/ShrinkControlFlowSimpleTest.cpp b/test/fuzzer/ShrinkControlFlowSimpleTest.cpp
new file mode 100644
index 0000000..0afd26d
--- /dev/null
+++ b/test/fuzzer/ShrinkControlFlowSimpleTest.cpp
@@ -0,0 +1,19 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Test that we can find the minimal item in the corpus (3 bytes: "FUZ").
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+static volatile int Sink;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size < 2) return 0;
+  if (Data[0] == 'F' && Data[Size / 2] == 'U' && Data[Size - 1] == 'Z')
+    Sink++;
+  return 0;
+}
+
diff --git a/test/fuzzer/ShrinkControlFlowTest.cpp b/test/fuzzer/ShrinkControlFlowTest.cpp
new file mode 100644
index 0000000..1957c1f
--- /dev/null
+++ b/test/fuzzer/ShrinkControlFlowTest.cpp
@@ -0,0 +1,31 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Test that we can find the minimal item in the corpus (3 bytes: "FUZ").
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+static volatile int Sink;
+
+void Foo() {
+  Sink++;
+}
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  int8_t Ids[256];
+  memset(Ids, -1, sizeof(Ids));
+  for (size_t i = 0; i < Size; i++)
+    if (Ids[Data[i]] == -1)
+      Ids[Data[i]] = i;
+  int F = Ids[(unsigned char)'F'];
+  int U = Ids[(unsigned char)'U'];
+  int Z = Ids[(unsigned char)'Z'];
+  if (F >= 0 && U > F && Z > U) {
+    Foo();
+  }
+  return 0;
+}
+
diff --git a/test/fuzzer/ShrinkValueProfileTest.cpp b/test/fuzzer/ShrinkValueProfileTest.cpp
new file mode 100644
index 0000000..86e4e3c
--- /dev/null
+++ b/test/fuzzer/ShrinkValueProfileTest.cpp
@@ -0,0 +1,22 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Test that we can find the minimal item in the corpus (3 bytes: "FUZ").
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+static volatile uint32_t Sink;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size < sizeof(uint32_t)) return 0;
+  uint32_t X, Y;
+  size_t Offset = Size < 8 ? 0 : Size / 2;
+  memcpy(&X, Data + Offset, sizeof(uint32_t));
+  memcpy(&Y, "FUZZ", sizeof(uint32_t));
+  Sink = X == Y;
+  return 0;
+}
+
diff --git a/test/fuzzer/SignedIntOverflowTest.cpp b/test/fuzzer/SignedIntOverflowTest.cpp
new file mode 100644
index 0000000..d800602
--- /dev/null
+++ b/test/fuzzer/SignedIntOverflowTest.cpp
@@ -0,0 +1,28 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Test for signed-integer-overflow.
+#include <assert.h>
+#include <climits>
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <iostream>
+
+static volatile int Sink;
+static int Large = INT_MAX;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  assert(Data);
+  if (Size > 0 && Data[0] == 'H') {
+    Sink = 1;
+    if (Size > 1 && Data[1] == 'i') {
+      Sink = 2;
+      if (Size > 2 && Data[2] == '!') {
+        Large++;  // int overflow.
+      }
+    }
+  }
+  return 0;
+}
+
diff --git a/test/fuzzer/SimpleCmpTest.cpp b/test/fuzzer/SimpleCmpTest.cpp
new file mode 100644
index 0000000..8acad4a
--- /dev/null
+++ b/test/fuzzer/SimpleCmpTest.cpp
@@ -0,0 +1,47 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Simple test for a fuzzer. The fuzzer must find several narrow ranges.
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+extern int AllLines[];
+
+bool PrintOnce(int Line) {
+  if (!AllLines[Line])
+    fprintf(stderr, "Seen line %d\n", Line);
+  AllLines[Line] = 1;
+  return true;
+}
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size != 22) return 0;
+  uint64_t x = 0;
+  int64_t  y = 0;
+  int32_t z = 0;
+  uint16_t a = 0;
+  memcpy(&x, Data, 8);  // 8
+  memcpy(&y, Data + 8, 8);  // 16
+  memcpy(&z, Data + 16, sizeof(z));  // 20
+  memcpy(&a, Data + 20, sizeof(a));  // 22
+  const bool k32bit = sizeof(void*) == 4;
+
+  if ((k32bit || x > 1234567890) && PrintOnce(__LINE__) &&
+      (k32bit || x < 1234567895) && PrintOnce(__LINE__) &&
+      a == 0x4242 && PrintOnce(__LINE__) &&
+      (k32bit || y >= 987654321) && PrintOnce(__LINE__) &&
+      (k32bit || y <= 987654325) && PrintOnce(__LINE__) &&
+      z < -10000 && PrintOnce(__LINE__) &&
+      z >= -10005 && PrintOnce(__LINE__) &&
+      z != -10003 && PrintOnce(__LINE__) &&
+      true) {
+    fprintf(stderr, "BINGO; Found the target: size %zd (%zd, %zd, %d, %d), exiting.\n",
+            Size, x, y, z, a);
+    exit(1);
+  }
+  return 0;
+}
+
+int AllLines[__LINE__ + 1];  // Must be the last line.
diff --git a/test/fuzzer/SimpleDictionaryTest.cpp b/test/fuzzer/SimpleDictionaryTest.cpp
new file mode 100644
index 0000000..ffa2e41
--- /dev/null
+++ b/test/fuzzer/SimpleDictionaryTest.cpp
@@ -0,0 +1,30 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Simple test for a fuzzer.
+// The fuzzer must find a string based on dictionary words:
+//   "Elvis"
+//   "Presley"
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <iostream>
+#include <ostream>
+
+static volatile int Zero = 0;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  const char *Expected = "ElvisPresley";
+  if (Size < strlen(Expected)) return 0;
+  size_t Match = 0;
+  for (size_t i = 0; Expected[i]; i++)
+    if (Expected[i] + Zero == Data[i])
+      Match++;
+  if (Match == strlen(Expected)) {
+    std::cout << "BINGO; Found the target, exiting\n" << std::flush;
+    exit(1);
+  }
+  return 0;
+}
+
diff --git a/test/fuzzer/SimpleHashTest.cpp b/test/fuzzer/SimpleHashTest.cpp
new file mode 100644
index 0000000..99e96cb
--- /dev/null
+++ b/test/fuzzer/SimpleHashTest.cpp
@@ -0,0 +1,40 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// This test computes a checksum of the data (all but the last 4 bytes),
+// and then compares the last 4 bytes with the computed value.
+// A fuzzer with cmp traces is expected to defeat this check.
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+// A modified jenkins_one_at_a_time_hash initialized by non-zero,
+// so that simple_hash(0) != 0. See also
+// https://en.wikipedia.org/wiki/Jenkins_hash_function
+static uint32_t simple_hash(const uint8_t *Data, size_t Size) {
+  uint32_t Hash = 0x12039854;
+  for (uint32_t i = 0; i < Size; i++) {
+    Hash += Data[i];
+    Hash += (Hash << 10);
+    Hash ^= (Hash >> 6);
+  }
+  Hash += (Hash << 3);
+  Hash ^= (Hash >> 11);
+  Hash += (Hash << 15);
+  return Hash;
+}
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size < 14)
+    return 0;
+
+  uint32_t Hash = simple_hash(&Data[0], Size - 4);
+  uint32_t Want = reinterpret_cast<const uint32_t *>(&Data[Size - 4])[0];
+  if (Hash != Want)
+    return 0;
+  fprintf(stderr, "BINGO; simple_hash defeated: %x == %x\n", (unsigned int)Hash,
+          (unsigned int)Want);
+  exit(1);
+  return 0;
+}
diff --git a/test/fuzzer/SimpleTest.cpp b/test/fuzzer/SimpleTest.cpp
new file mode 100644
index 0000000..3882a84
--- /dev/null
+++ b/test/fuzzer/SimpleTest.cpp
@@ -0,0 +1,28 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Simple test for a fuzzer. The fuzzer must find the string "Hi!".
+#include <assert.h>
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <iostream>
+#include <ostream>
+
+static volatile int Sink;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  assert(Data);
+  if (Size > 0 && Data[0] == 'H') {
+    Sink = 1;
+    if (Size > 1 && Data[1] == 'i') {
+      Sink = 2;
+      if (Size > 2 && Data[2] == '!') {
+        std::cout << "BINGO; Found the target, exiting\n" << std::flush;
+        exit(0);
+      }
+    }
+  }
+  return 0;
+}
+
diff --git a/test/fuzzer/SimpleThreadedTest.cpp b/test/fuzzer/SimpleThreadedTest.cpp
new file mode 100644
index 0000000..deeae75
--- /dev/null
+++ b/test/fuzzer/SimpleThreadedTest.cpp
@@ -0,0 +1,26 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Threaded test for a fuzzer. The fuzzer should find "H"
+#include <assert.h>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <iostream>
+#include <ostream>
+#include <thread>
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  auto C = [&] {
+    if (Size >= 2 && Data[0] == 'H') {
+        std::cout << "BINGO; Found the target, exiting\n" << std::flush;
+        abort();
+    }
+  };
+  std::thread T[] = {std::thread(C), std::thread(C), std::thread(C),
+                     std::thread(C), std::thread(C), std::thread(C)};
+  for (auto &X : T)
+    X.join();
+  return 0;
+}
+
diff --git a/test/fuzzer/SingleByteInputTest.cpp b/test/fuzzer/SingleByteInputTest.cpp
new file mode 100644
index 0000000..72b58ba
--- /dev/null
+++ b/test/fuzzer/SingleByteInputTest.cpp
@@ -0,0 +1,17 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Simple test for a fuzzer, need just one byte to crash.
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size > 0 && Data[Size/2] == 42) {
+    fprintf(stderr, "BINGO\n");
+    abort();
+  }
+  return 0;
+}
+
diff --git a/test/fuzzer/SingleMemcmpTest.cpp b/test/fuzzer/SingleMemcmpTest.cpp
new file mode 100644
index 0000000..19781ba
--- /dev/null
+++ b/test/fuzzer/SingleMemcmpTest.cpp
@@ -0,0 +1,17 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Simple test for a fuzzer. The fuzzer must find a particular string.
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  const char *S = (const char*)Data;
+  if (Size >= 6 && !memcmp(S, "qwerty", 6)) {
+    fprintf(stderr, "BINGO\n");
+    exit(1);
+  }
+  return 0;
+}
diff --git a/test/fuzzer/SingleStrcmpTest.cpp b/test/fuzzer/SingleStrcmpTest.cpp
new file mode 100644
index 0000000..1490734
--- /dev/null
+++ b/test/fuzzer/SingleStrcmpTest.cpp
@@ -0,0 +1,21 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Simple test for a fuzzer. The fuzzer must find a particular string.
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size >= 7) {
+    char Copy[7];
+    memcpy(Copy, Data, 6);
+    Copy[6] = 0;
+    if (!strcmp(Copy, "qwerty")) {
+      fprintf(stderr, "BINGO\n");
+      exit(1);
+    }
+  }
+  return 0;
+}
diff --git a/test/fuzzer/SingleStrncmpTest.cpp b/test/fuzzer/SingleStrncmpTest.cpp
new file mode 100644
index 0000000..4729876
--- /dev/null
+++ b/test/fuzzer/SingleStrncmpTest.cpp
@@ -0,0 +1,18 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Simple test for a fuzzer. The fuzzer must find a particular string.
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  const char *S = (const char*)Data;
+  volatile auto Strncmp = &(strncmp);   // Make sure strncmp is not inlined.
+  if (Size >= 6 && !Strncmp(S, "qwerty", 6)) {
+    fprintf(stderr, "BINGO\n");
+    exit(1);
+  }
+  return 0;
+}
diff --git a/test/fuzzer/SpamyTest.cpp b/test/fuzzer/SpamyTest.cpp
new file mode 100644
index 0000000..721134e
--- /dev/null
+++ b/test/fuzzer/SpamyTest.cpp
@@ -0,0 +1,21 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// The test spams to stderr and stdout.
+#include <assert.h>
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <iostream>
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  assert(Data);
+  printf("PRINTF_STDOUT\n");
+  fflush(stdout);
+  fprintf(stderr, "PRINTF_STDERR\n");
+  std::cout << "STREAM_COUT\n";
+  std::cout.flush();
+  std::cerr << "STREAM_CERR\n";
+  return 0;
+}
+
diff --git a/test/fuzzer/StrcmpTest.cpp b/test/fuzzer/StrcmpTest.cpp
new file mode 100644
index 0000000..81f041d
--- /dev/null
+++ b/test/fuzzer/StrcmpTest.cpp
@@ -0,0 +1,32 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Break through a series of strcmp.
+#include <cassert>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+bool Eq(const uint8_t *Data, size_t Size, const char *Str) {
+  char Buff[1024];
+  size_t Len = strlen(Str);
+  if (Size < Len) return false;
+  if (Len >= sizeof(Buff)) return false;
+  memcpy(Buff, (const char*)Data, Len);
+  Buff[Len] = 0;
+  int res = strcmp(Buff, Str);
+  return res == 0;
+}
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Eq(Data, Size, "ABC") &&
+      Size >= 3 && Eq(Data + 3, Size - 3, "QWER") &&
+      Size >= 7 && Eq(Data + 7, Size - 7, "ZXCVN") &&
+      Size >= 14 && Data[13] == 42
+    ) {
+    fprintf(stderr, "BINGO\n");
+    exit(1);
+  }
+  return 0;
+}
diff --git a/test/fuzzer/StrncmpOOBTest.cpp b/test/fuzzer/StrncmpOOBTest.cpp
new file mode 100644
index 0000000..4ed71d9
--- /dev/null
+++ b/test/fuzzer/StrncmpOOBTest.cpp
@@ -0,0 +1,21 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Test that libFuzzer itself does not read out of bounds.
+#include <assert.h>
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <iostream>
+
+static volatile int Sink;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size < 5) return 0;
+  const char *Ch = reinterpret_cast<const char *>(Data);
+  if (Ch[Size - 3] == 'a')
+    Sink = strncmp(Ch + Size - 3, "abcdefg", 6);
+  return 0;
+}
+
diff --git a/test/fuzzer/StrncmpTest.cpp b/test/fuzzer/StrncmpTest.cpp
new file mode 100644
index 0000000..a40e056
--- /dev/null
+++ b/test/fuzzer/StrncmpTest.cpp
@@ -0,0 +1,28 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Simple test for a fuzzer. The fuzzer must find a particular string.
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+static volatile int sink;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  // TODO: check other sizes.
+  const char *S = (const char*)Data;
+  if (Size >= 8 && strncmp(S, "123", 8))
+    sink = 1;
+  if (Size >= 8 && strncmp(S, "01234567", 8) == 0) {
+    if (Size >= 12 && strncmp(S + 8, "ABCD", 4) == 0) {
+      if (Size >= 14 && strncmp(S + 12, "XY", 2) == 0) {
+        if (Size >= 17 && strncmp(S + 14, "KLM", 3) == 0) {
+          fprintf(stderr, "BINGO\n");
+          exit(1);
+        }
+      }
+    }
+  }
+  return 0;
+}
diff --git a/test/fuzzer/StrstrTest.cpp b/test/fuzzer/StrstrTest.cpp
new file mode 100644
index 0000000..a3ea4e0
--- /dev/null
+++ b/test/fuzzer/StrstrTest.cpp
@@ -0,0 +1,28 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Test strstr and strcasestr hooks.
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <string.h>
+#include <string>
+
+// Windows does not have strcasestr and memmem, so we are not testing them.
+#ifdef _WIN32
+#define strcasestr strstr
+#define memmem(a, b, c, d) true
+#endif
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size < 4) return 0;
+  std::string s(reinterpret_cast<const char*>(Data), Size);
+  if (strstr(s.c_str(), "FUZZ") &&
+      strcasestr(s.c_str(), "aBcD") &&
+      memmem(s.data(), s.size(), "kuku", 4)
+      ) {
+    fprintf(stderr, "BINGO\n");
+    exit(1);
+  }
+  return 0;
+}
diff --git a/test/fuzzer/SwapCmpTest.cpp b/test/fuzzer/SwapCmpTest.cpp
new file mode 100644
index 0000000..bbfbefe
--- /dev/null
+++ b/test/fuzzer/SwapCmpTest.cpp
@@ -0,0 +1,35 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// The fuzzer must find several constants with swapped bytes.
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size < 14) return 0;
+  uint64_t x = 0;
+  uint32_t y = 0;
+  uint16_t z = 0;
+  memcpy(&x, Data, sizeof(x));
+  memcpy(&y, Data + Size / 2, sizeof(y));
+  memcpy(&z, Data + Size - sizeof(z), sizeof(z));
+
+  x = __builtin_bswap64(x);
+  y = __builtin_bswap32(y);
+  z = __builtin_bswap16(z);
+  const bool k32bit = sizeof(void*) == 4;
+
+  if ((k32bit || x == 0x46555A5A5A5A5546ULL) &&
+      z == 0x4F4B &&
+      y == 0x66757A7A &&
+      true
+      ) {
+    if (Data[Size - 3] == 'z') {
+      fprintf(stderr, "BINGO; Found the target\n");
+      exit(1);
+    }
+  }
+  return 0;
+}
diff --git a/test/fuzzer/Switch2Test.cpp b/test/fuzzer/Switch2Test.cpp
new file mode 100644
index 0000000..5f66ac8
--- /dev/null
+++ b/test/fuzzer/Switch2Test.cpp
@@ -0,0 +1,35 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Simple test for a fuzzer. The fuzzer must find the interesting switch value.
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+int Switch(int a) {
+  switch(a) {
+    case 100001: return 1;
+    case 100002: return 2;
+    case 100003: return 4;
+  }
+  return 0;
+}
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  const int N = 3;
+  if (Size < N * sizeof(int)) return 0;
+  int Res = 0;
+  for (int i = 0; i < N; i++) {
+    int X;
+    memcpy(&X, Data + i * sizeof(int), sizeof(int));
+    Res += Switch(X);
+  }
+  if (Res == 5 || Res == 3 || Res == 6 || Res == 7) {
+    fprintf(stderr, "BINGO; Found the target, exiting; Res=%d\n", Res);
+    exit(1);
+  }
+  return 0;
+}
+
diff --git a/test/fuzzer/SwitchTest.cpp b/test/fuzzer/SwitchTest.cpp
new file mode 100644
index 0000000..86944ca
--- /dev/null
+++ b/test/fuzzer/SwitchTest.cpp
@@ -0,0 +1,58 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Simple test for a fuzzer. The fuzzer must find the interesting switch value.
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+static volatile int Sink;
+
+template<class T>
+bool Switch(const uint8_t *Data, size_t Size) {
+  T X;
+  if (Size < sizeof(X)) return false;
+  memcpy(&X, Data, sizeof(X));
+  switch (X) {
+    case 1: Sink = __LINE__; break;
+    case 101: Sink = __LINE__; break;
+    case 1001: Sink = __LINE__; break;
+    case 10001: Sink = __LINE__; break;
+    case 100001: Sink = __LINE__; break;
+    case 1000001: Sink = __LINE__; break;
+    case 10000001: Sink = __LINE__; break;
+    case 100000001: return true;
+  }
+  return false;
+}
+
+bool ShortSwitch(const uint8_t *Data, size_t Size) {
+  short X;
+  if (Size < sizeof(short)) return false;
+  memcpy(&X, Data, sizeof(short));
+  switch(X) {
+    case 42: Sink = __LINE__; break;
+    case 402: Sink = __LINE__; break;
+    case 4002: Sink = __LINE__; break;
+    case 5002: Sink = __LINE__; break;
+    case 7002: Sink = __LINE__; break;
+    case 9002: Sink = __LINE__; break;
+    case 14002: Sink = __LINE__; break;
+    case 21402: return true;
+  }
+  return false;
+}
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size >= 4  && Switch<int>(Data, Size) &&
+      Size >= 12 && Switch<uint64_t>(Data + 4, Size - 4) &&
+      Size >= 14 && ShortSwitch(Data + 12, 2)
+    ) {
+    fprintf(stderr, "BINGO; Found the target, exiting\n");
+    exit(1);
+  }
+  return 0;
+}
+
diff --git a/test/fuzzer/TableLookupTest.cpp b/test/fuzzer/TableLookupTest.cpp
new file mode 100644
index 0000000..4d8ab06
--- /dev/null
+++ b/test/fuzzer/TableLookupTest.cpp
@@ -0,0 +1,44 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Make sure the fuzzer eventually finds all possible values of a variable
+// within a range.
+#include <cassert>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <set>
+
+const size_t N = 1 << 12;
+
+// Define an array of counters that will be understood by libFuzzer
+// as extra coverage signal. The array must be:
+//  * uint8_t
+//  * in the section named __libfuzzer_extra_counters.
+// The target code may declare more than one such array.
+//
+// Use either `Counters[Idx] = 1` or `Counters[Idx]++;`
+// depending on whether multiple occurrences of the event 'Idx'
+// is important to distinguish from one occurrence.
+#ifdef __linux__
+__attribute__((section("__libfuzzer_extra_counters")))
+#endif
+static uint8_t Counters[N];
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  static std::set<uint16_t> SeenIdx;
+  if (Size != 4) return 0;
+  uint32_t Idx;
+  memcpy(&Idx, Data, 4);
+  Idx %= N;
+  assert(Counters[Idx] == 0);  // libFuzzer should reset these between the runs.
+  // Or Counters[Idx]=1 if we don't care how many times this happened.
+  Counters[Idx]++;
+  SeenIdx.insert(Idx);
+  if (SeenIdx.size() == N) {
+    fprintf(stderr, "BINGO: found all values\n");
+    abort();
+  }
+  return 0;
+}
diff --git a/test/fuzzer/ThreadedLeakTest.cpp b/test/fuzzer/ThreadedLeakTest.cpp
new file mode 100644
index 0000000..538d3b4
--- /dev/null
+++ b/test/fuzzer/ThreadedLeakTest.cpp
@@ -0,0 +1,18 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// The fuzzer should find a leak in a non-main thread.
+#include <cstddef>
+#include <cstdint>
+#include <thread>
+
+static volatile int *Sink;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size == 0) return 0;
+  if (Data[0] != 'F') return 0;
+  std::thread T([&] { Sink = new int; });
+  T.join();
+  return 0;
+}
+
diff --git a/test/fuzzer/ThreadedTest.cpp b/test/fuzzer/ThreadedTest.cpp
new file mode 100644
index 0000000..bb51ba7
--- /dev/null
+++ b/test/fuzzer/ThreadedTest.cpp
@@ -0,0 +1,26 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Threaded test for a fuzzer. The fuzzer should not crash.
+#include <assert.h>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <thread>
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size < 8) return 0;
+  assert(Data);
+  auto C = [&] {
+    size_t Res = 0;
+    for (size_t i = 0; i < Size / 2; i++)
+      Res += memcmp(Data, Data + Size / 2, 4);
+    return Res;
+  };
+  std::thread T[] = {std::thread(C), std::thread(C), std::thread(C),
+                     std::thread(C), std::thread(C), std::thread(C)};
+  for (auto &X : T)
+    X.join();
+  return 0;
+}
+
diff --git a/test/fuzzer/TimeoutEmptyTest.cpp b/test/fuzzer/TimeoutEmptyTest.cpp
new file mode 100644
index 0000000..1ddf1fa
--- /dev/null
+++ b/test/fuzzer/TimeoutEmptyTest.cpp
@@ -0,0 +1,14 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Simple test for a fuzzer. The fuzzer must find the empty string.
+#include <cstddef>
+#include <cstdint>
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  static volatile int Zero = 0;
+  if (!Size)
+    while(!Zero)
+      ;
+  return 0;
+}
diff --git a/test/fuzzer/TimeoutTest.cpp b/test/fuzzer/TimeoutTest.cpp
new file mode 100644
index 0000000..e3cdba3
--- /dev/null
+++ b/test/fuzzer/TimeoutTest.cpp
@@ -0,0 +1,26 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Simple test for a fuzzer. The fuzzer must find the string "Hi!".
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <iostream>
+
+static volatile int Sink;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size > 0 && Data[0] == 'H') {
+    Sink = 1;
+    if (Size > 1 && Data[1] == 'i') {
+      Sink = 2;
+      if (Size > 2 && Data[2] == '!') {
+        Sink = 2;
+        while (Sink)
+          ;
+      }
+    }
+  }
+  return 0;
+}
+
diff --git a/test/fuzzer/TraceMallocTest.cpp b/test/fuzzer/TraceMallocTest.cpp
new file mode 100644
index 0000000..af99756
--- /dev/null
+++ b/test/fuzzer/TraceMallocTest.cpp
@@ -0,0 +1,27 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Tests -trace_malloc
+#include <assert.h>
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <iostream>
+
+int *Ptr;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (!Size) return 0;
+  if (*Data == 1) {
+    delete Ptr;
+    Ptr = nullptr;
+  } else if (*Data == 2) {
+    delete Ptr;
+    Ptr = new int;
+  } else if (*Data == 3) {
+    if (!Ptr)
+      Ptr = new int;
+  }
+  return 0;
+}
+
diff --git a/test/fuzzer/TraceMallocThreadedTest.cpp b/test/fuzzer/TraceMallocThreadedTest.cpp
new file mode 100644
index 0000000..5603af3
--- /dev/null
+++ b/test/fuzzer/TraceMallocThreadedTest.cpp
@@ -0,0 +1,22 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Check that allocation tracing from different threads does not cause
+// interleaving of stack traces.
+#include <assert.h>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <thread>
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  auto C = [&] {
+    volatile void *a = malloc(5639);
+    free((void *)a);
+  };
+  std::thread T[] = {std::thread(C), std::thread(C), std::thread(C),
+                     std::thread(C), std::thread(C), std::thread(C)};
+  for (auto &X : T)
+    X.join();
+  return 0;
+}
diff --git a/test/fuzzer/TwoDifferentBugsTest.cpp b/test/fuzzer/TwoDifferentBugsTest.cpp
new file mode 100644
index 0000000..77d2cb1
--- /dev/null
+++ b/test/fuzzer/TwoDifferentBugsTest.cpp
@@ -0,0 +1,22 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Simple test for a fuzzer. This test may trigger two different bugs.
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <iostream>
+
+static volatile int *Null = 0;
+
+void Foo() { Null[1] = 0; }
+void Bar() { Null[2] = 0; }
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size < 10 && Data[0] == 'H')
+    Foo();
+  if (Size >= 10 && Data[0] == 'H')
+    Bar();
+  return 0;
+}
+
diff --git a/test/fuzzer/afl-driver-extra-stats.test b/test/fuzzer/afl-driver-extra-stats.test
new file mode 100644
index 0000000..a6de533
--- /dev/null
+++ b/test/fuzzer/afl-driver-extra-stats.test
@@ -0,0 +1,30 @@
+RUN: %no_fuzzer_cpp_compiler -fno-sanitize-coverage=edge,trace-cmp,indirect-calls,8bit-counters,trace-pc-guard %S/AFLDriverTest.cpp %libfuzzer_src/afl/afl_driver.cpp -o %t-AFLDriverTest
+
+; Test that not specifying an extra stats file isn't broken.
+RUN: unset AFL_DRIVER_EXTRA_STATS_FILENAME
+RUN: %t-AFLDriverTest
+
+; Test that specifying an invalid extra stats file causes a crash.
+RUN: ASAN_OPTIONS= AFL_DRIVER_EXTRA_STATS_FILENAME=%T not --crash %t-AFLDriverTest
+
+; Test that specifying a corrupted stats file causes a crash.
+echo "peak_rss_mb :0" > %t
+ASAN_OPTIONS= AFL_DRIVER_EXTRA_STATS_FILENAME=%t not --crash %t-AFLDriverTest
+
+; Test that specifying a valid nonexistent stats file works.
+RUN: rm -f %t
+RUN: AFL_DRIVER_EXTRA_STATS_FILENAME=%t %t-AFLDriverTest
+RUN: [[ $(grep "peak_rss_mb\|slowest_unit_time_sec" %t | wc -l) -eq 2 ]]
+
+; Test that specifying a valid preexisting stats file works.
+RUN: printf "peak_rss_mb : 0\nslowest_unit_time_sec: 0\n" > %t
+RUN: AFL_DRIVER_EXTRA_STATS_FILENAME=%t %t-AFLDriverTest
+; Check that both lines were printed.
+RUN: [[ $(grep "peak_rss_mb\|slowest_unit_time_sec" %t | wc -l) -eq 2 ]]
+
+; Test that peak_rss_mb and slowest_unit_time_in_secs are only updated when necessary.
+; Check that both lines have 9999 since there's no way we have exceeded that
+; amount of time or virtual memory.
+RUN: printf "peak_rss_mb : 9999\nslowest_unit_time_sec: 9999\n" > %t
+RUN: AFL_DRIVER_EXTRA_STATS_FILENAME=%t %t-AFLDriverTest
+RUN: [[ $(grep "9999" %t | wc -l) -eq 2 ]]
diff --git a/test/fuzzer/afl-driver-stderr.test b/test/fuzzer/afl-driver-stderr.test
new file mode 100644
index 0000000..be0efaa
--- /dev/null
+++ b/test/fuzzer/afl-driver-stderr.test
@@ -0,0 +1,12 @@
+RUN: %no_fuzzer_cpp_compiler -fno-sanitize-coverage=edge,trace-cmp,indirect-calls,8bit-counters,trace-pc-guard %S/AFLDriverTest.cpp %libfuzzer_src/afl/afl_driver.cpp -o %t-AFLDriverTest
+
+; Test that not specifying a stderr file isn't broken.
+RUN: unset AFL_DRIVER_STDERR_DUPLICATE_FILENAME
+RUN: %t-AFLDriverTest
+
+; Test that specifying an invalid file causes a crash.
+RUN: ASAN_OPTIONS= AFL_DRIVER_STDERR_DUPLICATE_FILENAME="%T" not --crash %t-AFLDriverTest
+
+; Test that a file is created when specified as the duplicate stderr.
+RUN: AFL_DRIVER_STDERR_DUPLICATE_FILENAME=%t %t-AFLDriverTest
+RUN: stat %t
diff --git a/test/fuzzer/afl-driver.test b/test/fuzzer/afl-driver.test
new file mode 100644
index 0000000..32e7d03
--- /dev/null
+++ b/test/fuzzer/afl-driver.test
@@ -0,0 +1,29 @@
+REQUIRES: linux
+
+RUN: %no_fuzzer_cpp_compiler -fno-sanitize-coverage=edge,trace-cmp,indirect-calls,8bit-counters,trace-pc-guard %S/AFLDriverTest.cpp %libfuzzer_src/afl/afl_driver.cpp -o %t-AFLDriverTest
+
+RUN: echo -n "abc" > %t.file3
+RUN: echo -n "abcd" > %t.file4
+
+RUN: %t-AFLDriverTest < %t.file3 2>&1 | FileCheck %s --check-prefix=CHECK1
+CHECK1: __afl_persistent_loop calle, Count = 1000
+CHECK1: LLVMFuzzerTestOneInput called; Size = 3
+
+
+RUN: %t-AFLDriverTest < %t.file3 -42 2>&1 | FileCheck %s --check-prefix=CHECK2
+CHECK2: __afl_persistent_loop calle, Count = 42
+CHECK2: LLVMFuzzerTestOneInput called; Size = 3
+
+
+RUN: %t-AFLDriverTest < %t.file3 666 2>&1 | FileCheck %s --check-prefix=CHECK3
+CHECK3: WARNING: using the deprecated call style
+CHECK3: __afl_persistent_loop calle, Count = 666
+CHECK3: LLVMFuzzerTestOneInput called; Size = 3
+
+
+RUN: %t-AFLDriverTest %t.file3 2>&1 | FileCheck %s --check-prefix=CHECK4
+CHECK4: LLVMFuzzerTestOneInput called; Size = 3
+
+RUN: %t-AFLDriverTest %t.file3 %t.file4  2>&1 | FileCheck %s --check-prefix=CHECK5
+CHECK5: LLVMFuzzerTestOneInput called; Size = 3
+CHECK5: LLVMFuzzerTestOneInput called; Size = 4
diff --git a/test/fuzzer/bad-strcmp.test b/test/fuzzer/bad-strcmp.test
new file mode 100644
index 0000000..fd1621a
--- /dev/null
+++ b/test/fuzzer/bad-strcmp.test
@@ -0,0 +1,2 @@
+RUN: %cpp_compiler %S/BadStrcmpTest.cpp -o %t-BadStrcmpTest
+RUN: %t-BadStrcmpTest -runs=100000
diff --git a/test/fuzzer/caller-callee.test b/test/fuzzer/caller-callee.test
new file mode 100644
index 0000000..e4eccdc
--- /dev/null
+++ b/test/fuzzer/caller-callee.test
@@ -0,0 +1,3 @@
+RUN: %cpp_compiler %S/CallerCalleeTest.cpp -o %t-CallerCalleeTest
+CHECK: BINGO
+RUN: not %t-CallerCalleeTest          -use_value_profile=1 -cross_over=0 -seed=1 -runs=10000000 2>&1 | FileCheck %s
diff --git a/test/fuzzer/cleanse.test b/test/fuzzer/cleanse.test
new file mode 100644
index 0000000..8e45dc7
--- /dev/null
+++ b/test/fuzzer/cleanse.test
@@ -0,0 +1,4 @@
+RUN: %cpp_compiler %S/CleanseTest.cpp -o %t-CleanseTest
+RUN: echo -n 0123456789ABCDEFGHIZ > %t-in
+RUN: %t-CleanseTest -cleanse_crash=1 %t-in -exact_artifact_path=%t-out
+RUN: echo -n ' 1   5    A        Z' | diff - %t-out
diff --git a/test/fuzzer/coverage.test b/test/fuzzer/coverage.test
new file mode 100644
index 0000000..9a2179d
--- /dev/null
+++ b/test/fuzzer/coverage.test
@@ -0,0 +1,21 @@
+RUN: %cpp_compiler -mllvm -use-unknown-locations=Disable  %S/NullDerefTest.cpp -o %t-NullDerefTest
+RUN: %cpp_compiler -mllvm -use-unknown-locations=Disable %S/DSO1.cpp -fPIC -shared -o %t-DSO1.so
+RUN: %cpp_compiler -mllvm -use-unknown-locations=Disable %S/DSO2.cpp -fPIC -shared -o %t-DSO2.so
+RUN: %cpp_compiler -mllvm -use-unknown-locations=Disable %S/DSOTestMain.cpp %S/DSOTestExtra.cpp -L. %t-DSO1.so %t-DSO2.so -o %t-DSOTest
+
+CHECK: COVERAGE:
+CHECK-DAG: COVERED: {{.*}}in LLVMFuzzerTestOneInput {{.*}}NullDerefTest.cpp:13
+CHECK-DAG: COVERED: {{.*}}in LLVMFuzzerTestOneInput {{.*}}NullDerefTest.cpp:14
+CHECK-DAG: COVERED: {{.*}}in LLVMFuzzerTestOneInput {{.*}}NullDerefTest.cpp:16
+RUN: not %t-NullDerefTest -print_coverage=1 2>&1 | FileCheck %s
+
+RUN: %t-DSOTest -print_coverage=1 -runs=0 2>&1 | FileCheck %s --check-prefix=DSO
+DSO: COVERAGE:
+DSO-DAG: COVERED:{{.*}}DSO1{{.*}}DSO1.cpp
+DSO-DAG: COVERED:{{.*}}DSO2{{.*}}DSO2.cpp
+DSO-DAG: COVERED:{{.*}}LLVMFuzzerTestOneInput{{.*}}DSOTestMain
+DSO-DAG: UNCOVERED_LINE:{{.*}}DSO1{{.*}}DSO1.cpp
+DSO-DAG: UNCOVERED_LINE:{{.*}}DSO2{{.*}}DSO2.cpp
+DSO-DAG: UNCOVERED_FUNC: in Uncovered1
+DSO-DAG: UNCOVERED_FUNC: in Uncovered2
+DSO-DAG: UNCOVERED_LINE: in LLVMFuzzerTestOneInput
diff --git a/test/fuzzer/cxxstring.test b/test/fuzzer/cxxstring.test
new file mode 100644
index 0000000..7bb341b
--- /dev/null
+++ b/test/fuzzer/cxxstring.test
@@ -0,0 +1,6 @@
+UNSUPPORTED: windows
+
+RUN: %cpp_compiler %S/CxxStringEqTest.cpp -o %t-CxxStringEqTest
+
+RUN: not %t-CxxStringEqTest -seed=1 -runs=1000000 2>&1 | FileCheck %s
+CHECK: BINGO
diff --git a/test/fuzzer/deep-recursion.test b/test/fuzzer/deep-recursion.test
new file mode 100644
index 0000000..22475f9
--- /dev/null
+++ b/test/fuzzer/deep-recursion.test
@@ -0,0 +1,5 @@
+# Test that we can find a stack overflow
+REQUIRES: linux
+RUN: %cpp_compiler %S/DeepRecursionTest.cpp -o %t
+RUN: not %t -seed=1 -runs=100000000 2>&1 | FileCheck %s
+CHECK: ERROR: libFuzzer: deadly signal
diff --git a/test/fuzzer/dict1.txt b/test/fuzzer/dict1.txt
new file mode 100644
index 0000000..520d0cc
--- /dev/null
+++ b/test/fuzzer/dict1.txt
@@ -0,0 +1,4 @@
+# Dictionary for SimpleDictionaryTest
+
+a="Elvis"
+b="Presley"
diff --git a/test/fuzzer/disable-leaks.test b/test/fuzzer/disable-leaks.test
new file mode 100644
index 0000000..bc120d9
--- /dev/null
+++ b/test/fuzzer/disable-leaks.test
@@ -0,0 +1,5 @@
+REQUIRES: lsan
+RUN: %cpp_compiler %S/AccumulateAllocationsTest.cpp -o %t-AccumulateAllocationsTest
+RUN: %t-AccumulateAllocationsTest -detect_leaks=1 -runs=100000 2>&1 | FileCheck %s --check-prefix=ACCUMULATE_ALLOCS
+ACCUMULATE_ALLOCS: INFO: libFuzzer disabled leak detection after every mutation
+
diff --git a/test/fuzzer/dump_coverage.test b/test/fuzzer/dump_coverage.test
new file mode 100644
index 0000000..b240089
--- /dev/null
+++ b/test/fuzzer/dump_coverage.test
@@ -0,0 +1,20 @@
+RUN: %cpp_compiler -fsanitize-coverage=0 -fsanitize-coverage=trace-pc-guard %S/DSO1.cpp -fPIC -shared -o %t-DSO1.so
+RUN: %cpp_compiler -fsanitize-coverage=0 -fsanitize-coverage=trace-pc-guard %S/DSO2.cpp -fPIC -shared -o %t-DSO2.so
+RUN: %cpp_compiler -fsanitize-coverage=0 -fsanitize-coverage=trace-pc-guard %S/DSOTestMain.cpp %S/DSOTestExtra.cpp -L. %t-DSO1.so %t-DSO2.so -o %t-DSOTest
+
+RUN: %cpp_compiler -fsanitize-coverage=0 -fsanitize-coverage=trace-pc-guard %S/NullDerefTest.cpp -o %t-NullDerefTest
+
+RUN: rm -rf %t_workdir && mkdir -p %t_workdir
+RUN: env ASAN_OPTIONS=coverage_dir='"%t_workdir"' not %t-NullDerefTest -dump_coverage=1 2>&1 | FileCheck %s
+RUN: sancov -covered-functions %t-NullDerefTest* %t_workdir/*.sancov | FileCheck %s --check-prefix=SANCOV
+RUN: env ASAN_OPTIONS=coverage_dir='"%t_workdir"' %t-DSOTest -dump_coverage=1 -runs=0 2>&1 | FileCheck %s --check-prefix=DSO
+RUN: env ASAN_OPTIONS=coverage_dir='"%t_workdir"' not %t-NullDerefTest -dump_coverage=0 2>&1 | FileCheck %s --check-prefix=NOCOV
+
+CHECK: SanitizerCoverage: {{.*}}NullDerefTest.{{.*}}.sancov: {{.*}} PCs written
+SANCOV: LLVMFuzzerTestOneInput
+
+DSO: SanitizerCoverage: {{.*}}DSOTest.{{.*}}.sancov: {{.*}} PCs written
+DSO-DAG: SanitizerCoverage: {{.*}}DSO1.{{.*}}.sancov: {{.*}} PCs written
+DSO-DAG: SanitizerCoverage: {{.*}}DSO2.{{.*}}.sancov: {{.*}} PCs written
+
+NOCOV-NOT: SanitizerCoverage: {{.*}} PCs written
diff --git a/test/fuzzer/equivalence-signals.test b/test/fuzzer/equivalence-signals.test
new file mode 100644
index 0000000..7951636
--- /dev/null
+++ b/test/fuzzer/equivalence-signals.test
@@ -0,0 +1,9 @@
+# Run EquivalenceATest against itself with a small timeout
+# to stress the signal handling and ensure that shmem doesn't mind
+# the signals.
+
+RUN: %cpp_compiler %S/EquivalenceATest.cpp -o %t-EquivalenceATest
+RUN: %t-EquivalenceATest -timeout=1 -run_equivalence_server=EQUIV_SIG_TEST & export APID=$!
+RUN: sleep 3
+RUN: %t-EquivalenceATest -timeout=1 -use_equivalence_server=EQUIV_SIG_TEST -runs=500000 2>&1
+RUN: kill -9 $APID
diff --git a/test/fuzzer/equivalence.test b/test/fuzzer/equivalence.test
new file mode 100644
index 0000000..12964f4
--- /dev/null
+++ b/test/fuzzer/equivalence.test
@@ -0,0 +1,9 @@
+RUN: %cpp_compiler %S/EquivalenceATest.cpp -o %t-EquivalenceATest
+RUN: %cpp_compiler %S/EquivalenceBTest.cpp -o %t-EquivalenceBTest
+
+RUN: %t-EquivalenceATest -run_equivalence_server=EQUIV_TEST & export APID=$!
+RUN: sleep 3
+RUN: not %t-EquivalenceBTest -use_equivalence_server=EQUIV_TEST -max_len=4096 2>&1 | FileCheck %s
+CHECK: ERROR: libFuzzer: equivalence-mismatch. Sizes: {{.*}}; offset 2
+CHECK: SUMMARY: libFuzzer: equivalence-mismatch
+RUN: kill -9 $APID
diff --git a/test/fuzzer/exit-report.test b/test/fuzzer/exit-report.test
new file mode 100644
index 0000000..f754c13
--- /dev/null
+++ b/test/fuzzer/exit-report.test
@@ -0,0 +1,6 @@
+RUN: %cpp_compiler %S/SimpleTest.cpp -o %t-SimpleTest
+RUN: not %t-SimpleTest 2>&1 | FileCheck %s
+
+CHECK: ERROR: libFuzzer: fuzz target exited
+CHECK: SUMMARY: libFuzzer: fuzz target exited
+CHECK: Test unit written to
diff --git a/test/fuzzer/exit_on_src_pos.test b/test/fuzzer/exit_on_src_pos.test
new file mode 100644
index 0000000..6a42c7a
--- /dev/null
+++ b/test/fuzzer/exit_on_src_pos.test
@@ -0,0 +1,8 @@
+# Temporary use -mllvm -use-unknown-locations=Disable so that
+# all instructions have debug info (file line numbers) attached.
+RUN: %cpp_compiler %S/SimpleTest.cpp -o %t-SimpleTest -mllvm -use-unknown-locations=Disable
+RUN: %cpp_compiler %S/ShrinkControlFlowTest.cpp -o %t-ShrinkControlFlowTest
+
+RUN: %t-SimpleTest  -exit_on_src_pos=SimpleTest.cpp:18                 2>&1 | FileCheck %s --check-prefix=EXIT_ON_SRC_POS
+RUN: %t-ShrinkControlFlowTest  -exit_on_src_pos=Foo 2>&1 | FileCheck %s --check-prefix=EXIT_ON_SRC_POS
+EXIT_ON_SRC_POS: INFO: found line matching '{{.*}}', exiting.
diff --git a/test/fuzzer/extra-counters.test b/test/fuzzer/extra-counters.test
new file mode 100644
index 0000000..230f74a
--- /dev/null
+++ b/test/fuzzer/extra-counters.test
@@ -0,0 +1,7 @@
+REQUIRES: linux
+
+RUN: %cpp_compiler %S/TableLookupTest.cpp -o %t-TableLookupTest
+RUN: not %t-TableLookupTest -print_final_stats=1 2>&1 | FileCheck %s
+CHECK: BINGO
+// Expecting >= 4096 new_units_added
+CHECK: stat::new_units_added:{{.*[4][0-9][0-9][0-9]}}
diff --git a/test/fuzzer/fprofile-instr-generate.test b/test/fuzzer/fprofile-instr-generate.test
new file mode 100644
index 0000000..2a3ec96
--- /dev/null
+++ b/test/fuzzer/fprofile-instr-generate.test
@@ -0,0 +1,7 @@
+# Test libFuzzer + -fprofile-instr-generate
+REQUIRES: linux
+RUN: %cpp_compiler %S/SimpleTest.cpp -fsanitize-coverage=0 -fprofile-instr-generate -o %t-SimpleTest-fprofile-instr-generate
+CHECK-NOT: INFO: Loaded 1 modules
+CHECK: INFO: {{.*}} Clang Coverage Counters
+CHECK: BINGO
+RUN: not %t-SimpleTest-fprofile-instr-generate -runs=1000000 -seed=1 -use_clang_coverage=1 2>&1 | FileCheck %s
diff --git a/test/fuzzer/fuzzer-customcrossover.test b/test/fuzzer/fuzzer-customcrossover.test
new file mode 100644
index 0000000..5a78307
--- /dev/null
+++ b/test/fuzzer/fuzzer-customcrossover.test
@@ -0,0 +1,12 @@
+RUN: %cpp_compiler %S/CustomCrossOverTest.cpp -o %t-CustomCrossOverTest
+
+RUN: not %t-CustomCrossOverTest -seed=1 -runs=1000000                2>&1 | FileCheck %s --check-prefix=CHECK_CO
+Disable cross_over, verify that we can't find the target w/o it.
+RUN:     %t-CustomCrossOverTest -seed=1 -runs=1000000 -cross_over=0 2>&1 | FileCheck %s --check-prefix=CHECK_NO_CO
+
+CHECK_CO: In LLVMFuzzerCustomCrossover
+CHECK_CO: BINGO
+
+CHECK_NO_CO-NO: LLVMFuzzerCustomCrossover
+CHECK_NO_CO: DONE
+
diff --git a/test/fuzzer/fuzzer-customcrossoverandmutate.test b/test/fuzzer/fuzzer-customcrossoverandmutate.test
new file mode 100644
index 0000000..4a7dfba
--- /dev/null
+++ b/test/fuzzer/fuzzer-customcrossoverandmutate.test
@@ -0,0 +1,2 @@
+RUN: %cpp_compiler %S/CustomCrossOverAndMutateTest.cpp -o %t-CustomCrossOverAndMutateTest
+RUN: %t-CustomCrossOverAndMutateTest -seed=1 -runs=100000
diff --git a/test/fuzzer/fuzzer-custommutator.test b/test/fuzzer/fuzzer-custommutator.test
new file mode 100644
index 0000000..7a693cd
--- /dev/null
+++ b/test/fuzzer/fuzzer-custommutator.test
@@ -0,0 +1,5 @@
+RUN: %cpp_compiler %S/CustomMutatorTest.cpp -o %t-CustomMutatorTest
+RUN: not %t-CustomMutatorTest 2>&1 | FileCheck %s --check-prefix=LLVMFuzzerCustomMutator
+LLVMFuzzerCustomMutator: In LLVMFuzzerCustomMutator
+LLVMFuzzerCustomMutator: BINGO
+
diff --git a/test/fuzzer/fuzzer-dict.test b/test/fuzzer/fuzzer-dict.test
new file mode 100644
index 0000000..48c91dc
--- /dev/null
+++ b/test/fuzzer/fuzzer-dict.test
@@ -0,0 +1,8 @@
+RUN: %cpp_compiler %S/SimpleDictionaryTest.cpp -o %t-SimpleDictionaryTest
+
+CHECK: BINGO
+Done1000000: Done 1000000 runs in
+
+RUN: not %t-SimpleDictionaryTest -dict=%S/dict1.txt -seed=1 -runs=1000003  2>&1 | FileCheck %s
+RUN:     %t-SimpleDictionaryTest                    -seed=1 -runs=1000000  2>&1 | FileCheck %s --check-prefix=Done1000000
+
diff --git a/test/fuzzer/fuzzer-dirs.test b/test/fuzzer/fuzzer-dirs.test
new file mode 100644
index 0000000..9b6e4d1
--- /dev/null
+++ b/test/fuzzer/fuzzer-dirs.test
@@ -0,0 +1,21 @@
+RUN: %cpp_compiler %S/SimpleTest.cpp -o %t-SimpleTest
+
+RUN: rm -rf %t/SUB1
+RUN: mkdir -p %t/SUB1/SUB2/SUB3
+RUN: echo a > %t/SUB1/a
+RUN: echo b > %t/SUB1/SUB2/b
+RUN: echo c > %t/SUB1/SUB2/SUB3/c
+RUN: %t-SimpleTest %t/SUB1 -runs=0 2>&1 | FileCheck %s --check-prefix=SUBDIRS
+SUBDIRS: INFO: seed corpus: files: 3 min: 2b max: 2b total: 6b
+RUN: echo -n zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz > %t/SUB1/f64
+RUN: cat %t/SUB1/f64 %t/SUB1/f64 %t/SUB1/f64 %t/SUB1/f64 > %t/SUB1/f256
+RUN: cat %t/SUB1/f256 %t/SUB1/f256 %t/SUB1/f256 %t/SUB1/f256 > %t/SUB1/f1024
+RUN: cat %t/SUB1/f1024 %t/SUB1/f1024 %t/SUB1/f1024 %t/SUB1/f1024 > %t/SUB1/f4096
+RUN: cat %t/SUB1/f4096 %t/SUB1/f4096 > %t/SUB1/f8192
+RUN: %t-SimpleTest %t/SUB1 -runs=0 2>&1 | FileCheck %s --check-prefix=LONG
+LONG: INFO: -max_len is not provided; libFuzzer will not generate inputs larger than 8192 bytes
+RUN: rm -rf %t/SUB1
+
+RUN: not %t-SimpleTest NONEXISTENT_DIR 2>&1 | FileCheck %s --check-prefix=NONEXISTENT_DIR
+NONEXISTENT_DIR: No such directory: NONEXISTENT_DIR; exiting
+
diff --git a/test/fuzzer/fuzzer-fdmask.test b/test/fuzzer/fuzzer-fdmask.test
new file mode 100644
index 0000000..3f04993
--- /dev/null
+++ b/test/fuzzer/fuzzer-fdmask.test
@@ -0,0 +1,32 @@
+RUN: %cpp_compiler %S/SpamyTest.cpp -o %t-SpamyTest
+
+RUN: %t-SpamyTest -runs=1                  2>&1 | FileCheck %s --check-prefix=FD_MASK_0
+RUN: %t-SpamyTest -runs=1 -close_fd_mask=0 2>&1 | FileCheck %s --check-prefix=FD_MASK_0
+RUN: %t-SpamyTest -runs=1 -close_fd_mask=1 2>&1 | FileCheck %s --check-prefix=FD_MASK_1
+RUN: %t-SpamyTest -runs=1 -close_fd_mask=2 2>&1 | FileCheck %s --check-prefix=FD_MASK_2
+RUN: %t-SpamyTest -runs=1 -close_fd_mask=3 2>&1 | FileCheck %s --check-prefix=FD_MASK_3
+
+FD_MASK_0: PRINTF_STDOUT
+FD_MASK_0: PRINTF_STDERR
+FD_MASK_0: STREAM_COUT
+FD_MASK_0: STREAM_CERR
+FD_MASK_0: INITED
+
+FD_MASK_1-NOT: PRINTF_STDOUT
+FD_MASK_1: PRINTF_STDERR
+FD_MASK_1-NOT: STREAM_COUT
+FD_MASK_1: STREAM_CERR
+FD_MASK_1: INITED
+
+FD_MASK_2: PRINTF_STDOUT
+FD_MASK_2-NOT: PRINTF_STDERR
+FD_MASK_2: STREAM_COUT
+FD_MASK_2-NOTE: STREAM_CERR
+FD_MASK_2: INITED
+
+FD_MASK_3-NOT: PRINTF_STDOUT
+FD_MASK_3-NOT: PRINTF_STDERR
+FD_MASK_3-NOT: STREAM_COUT
+FD_MASK_3-NOT: STREAM_CERR
+FD_MASK_3: INITED
+
diff --git a/test/fuzzer/fuzzer-finalstats.test b/test/fuzzer/fuzzer-finalstats.test
new file mode 100644
index 0000000..4f983be
--- /dev/null
+++ b/test/fuzzer/fuzzer-finalstats.test
@@ -0,0 +1,12 @@
+RUN: %cpp_compiler %S/SimpleTest.cpp -o %t-SimpleTest
+RUN: %t-SimpleTest -seed=1 -runs=77 -print_final_stats=1 2>&1 | FileCheck %s --check-prefix=FINAL_STATS
+FINAL_STATS: stat::number_of_executed_units: 77
+FINAL_STATS: stat::average_exec_per_sec:     0
+FINAL_STATS: stat::new_units_added:
+FINAL_STATS: stat::slowest_unit_time_sec:    0
+FINAL_STATS: stat::peak_rss_mb:
+
+RUN: %t-SimpleTest %S/dict1.txt -runs=33 -print_final_stats=1 2>&1 | FileCheck %s --check-prefix=FINAL_STATS1
+FINAL_STATS1: stat::number_of_executed_units: 33
+FINAL_STATS1: stat::peak_rss_mb:
+
diff --git a/test/fuzzer/fuzzer-flags.test b/test/fuzzer/fuzzer-flags.test
new file mode 100644
index 0000000..b812b01
--- /dev/null
+++ b/test/fuzzer/fuzzer-flags.test
@@ -0,0 +1,19 @@
+RUN: %cpp_compiler %S/FlagsTest.cpp -o %t-FlagsTest
+RUN: %t-FlagsTest -runs=10 -foo_bar=1 2>&1 | FileCheck %s --check-prefix=FOO_BAR
+FOO_BAR: WARNING: unrecognized flag '-foo_bar=1'; use -help=1 to list all flags
+FOO_BAR: BINGO
+
+RUN: %t-FlagsTest -runs=10 --max_len=100 2>&1 | FileCheck %s --check-prefix=DASH_DASH
+DASH_DASH: WARNING: did you mean '-max_len=100' (single dash)?
+DASH_DASH: INFO: A corpus is not provided, starting from an empty corpus
+
+RUN: %t-FlagsTest -help=1 2>&1 | FileCheck %s --check-prefix=NO_INTERNAL
+NO_INTERNAL-NOT: internal flag
+
+RUN: %t-FlagsTest --foo-bar -runs=10 -ignore_remaining_args=1 --baz -help=1 test 2>&1 | FileCheck %s --check-prefix=PASSTHRU
+PASSTHRU: BINGO --foo-bar --baz -help=1 test
+
+RUN: mkdir -p %t/T0 %t/T1
+RUN: echo z > %t/T1/z
+RUN: %t-FlagsTest -runs=10 --foo-bar -merge=1 %t/T0 %t/T1 -ignore_remaining_args=1 --baz -help=1 test 2>&1 | FileCheck %s --check-prefix=PASSTHRU-MERGE
+PASSTHRU-MERGE: BINGO --foo-bar --baz -help=1 test
diff --git a/test/fuzzer/fuzzer-leak.test b/test/fuzzer/fuzzer-leak.test
new file mode 100644
index 0000000..f8e99ce
--- /dev/null
+++ b/test/fuzzer/fuzzer-leak.test
@@ -0,0 +1,37 @@
+REQUIRES: lsan
+RUN: %cpp_compiler %S/LeakTest.cpp -o %t-LeakTest
+RUN: %cpp_compiler %S/ThreadedLeakTest.cpp -o %t-ThreadedLeakTest
+RUN: %cpp_compiler %S/LeakTimeoutTest.cpp -o %t-LeakTimeoutTest
+
+RUN: not %t-LeakTest -runs=100000 -detect_leaks=1 2>&1 | FileCheck %s --check-prefix=LEAK_DURING
+LEAK_DURING: ERROR: LeakSanitizer: detected memory leaks
+LEAK_DURING: Direct leak of 4 byte(s) in 1 object(s) allocated from:
+LEAK_DURING: INFO: to ignore leaks on libFuzzer side use -detect_leaks=0
+LEAK_DURING: Test unit written to ./leak-
+LEAK_DURING-NOT: DONE
+LEAK_DURING-NOT: Done
+
+RUN: not %t-LeakTest -runs=0 -detect_leaks=1 %S 2>&1 | FileCheck %s --check-prefix=LEAK_IN_CORPUS
+LEAK_IN_CORPUS: ERROR: LeakSanitizer: detected memory leaks
+LEAK_IN_CORPUS: INFO: a leak has been found in the initial corpus.
+
+RUN: not %t-LeakTest -runs=100000000 %S/hi.txt 2>&1 | FileCheck %s --check-prefix=MULTI_RUN_LEAK
+MULTI_RUN_LEAK-NOT: pulse
+MULTI_RUN_LEAK: LeakSanitizer: detected memory leaks
+
+RUN: not %t-LeakTest -runs=100000 -detect_leaks=0 2>&1 | FileCheck %s --check-prefix=LEAK_AFTER
+RUN: not %t-LeakTest -runs=100000                 2>&1 | FileCheck %s --check-prefix=LEAK_DURING
+RUN: not %t-ThreadedLeakTest -runs=100000 -detect_leaks=0 2>&1 | FileCheck %s --check-prefix=LEAK_AFTER
+RUN: not %t-ThreadedLeakTest -runs=100000                 2>&1 | FileCheck %s --check-prefix=LEAK_DURING
+LEAK_AFTER: Done 100000 runs in
+LEAK_AFTER: ERROR: LeakSanitizer: detected memory leaks
+
+RUN: not %t-LeakTest -runs=100000 -max_len=1 2>&1 | FileCheck %s --check-prefix=MAX_LEN_1
+MAX_LEN_1: Test unit written to ./leak-7cf184f4c67ad58283ecb19349720b0cae756829
+
+RUN: not %t-LeakTimeoutTest -timeout=1 2>&1 | FileCheck %s --check-prefix=LEAK_TIMEOUT
+LEAK_TIMEOUT: ERROR: libFuzzer: timeout after
+LEAK_TIMEOUT-NOT: LeakSanitizer
+
+
+RUN: %t-LeakTest -error_exitcode=0
diff --git a/test/fuzzer/fuzzer-oom-with-profile.test b/test/fuzzer/fuzzer-oom-with-profile.test
new file mode 100644
index 0000000..75cf484
--- /dev/null
+++ b/test/fuzzer/fuzzer-oom-with-profile.test
@@ -0,0 +1,7 @@
+REQUIRES: linux
+RUN: %cpp_compiler %S/OutOfMemoryTest.cpp -o %t-OutOfMemoryTest
+RUN: not %t-OutOfMemoryTest -rss_limit_mb=300 2>&1 | FileCheck %s
+CHECK: ERROR: libFuzzer: out-of-memory (used: {{.*}}; limit: 300Mb)
+CHECK: Live Heap Allocations
+CHECK: Test unit written to ./oom-
+SUMMARY: libFuzzer: out-of-memory
diff --git a/test/fuzzer/fuzzer-oom.test b/test/fuzzer/fuzzer-oom.test
new file mode 100644
index 0000000..9ef7c48
--- /dev/null
+++ b/test/fuzzer/fuzzer-oom.test
@@ -0,0 +1,20 @@
+RUN: %cpp_compiler %S/OutOfMemoryTest.cpp -o %t-OutOfMemoryTest
+RUN: %cpp_compiler %S/OutOfMemorySingleLargeMallocTest.cpp -o %t-OutOfMemorySingleLargeMallocTest
+RUN: %cpp_compiler %S/AccumulateAllocationsTest.cpp -o %t-AccumulateAllocationsTest
+
+RUN: not %t-OutOfMemoryTest -rss_limit_mb=300 2>&1 | FileCheck %s
+
+CHECK: ERROR: libFuzzer: out-of-memory (used: {{.*}}; limit: 300Mb)
+CHECK: Test unit written to ./oom-
+SUMMARY: libFuzzer: out-of-memory
+
+RUN: not %t-OutOfMemorySingleLargeMallocTest -rss_limit_mb=300 2>&1 | FileCheck %s --check-prefix=SINGLE_LARGE_MALLOC
+
+We used to check for "out-of-memory (malloc(53{{.*}}))", but that would fail
+sometimes, so now we accept any OOM message.
+
+SINGLE_LARGE_MALLOC: libFuzzer: out-of-memory
+SINGLE_LARGE_MALLOC: in LLVMFuzzerTestOneInput
+
+# Check that -rss_limit_mb=0 means no limit.
+RUN: %t-AccumulateAllocationsTest -runs=1000 -rss_limit_mb=0
diff --git a/test/fuzzer/fuzzer-printcovpcs.test b/test/fuzzer/fuzzer-printcovpcs.test
new file mode 100644
index 0000000..e55ce14
--- /dev/null
+++ b/test/fuzzer/fuzzer-printcovpcs.test
@@ -0,0 +1,9 @@
+RUN: %cpp_compiler %S/SimpleTest.cpp -o %t-SimpleTest
+RUN: not %t-SimpleTest -print_pcs=1 -seed=1 2>&1 | FileCheck %s --check-prefix=PCS
+PCS-NOT: NEW_PC
+PCS:INITED
+PCS:NEW_PC: {{0x[a-f0-9]+}}
+PCS:NEW_PC: {{0x[a-f0-9]+}}
+PCS:NEW
+PCS:BINGO
+
diff --git a/test/fuzzer/fuzzer-runs.test b/test/fuzzer/fuzzer-runs.test
new file mode 100644
index 0000000..04987ee
--- /dev/null
+++ b/test/fuzzer/fuzzer-runs.test
@@ -0,0 +1,9 @@
+RUN: mkdir -p %t
+RUN: %cpp_compiler %S/NthRunCrashTest.cpp -o %t-NthRunCrashTest
+RUN: echo abcd > %t/NthRunCrashTest.in
+RUN: %t-NthRunCrashTest %t/NthRunCrashTest.in
+RUN: %t-NthRunCrashTest %t/NthRunCrashTest.in -runs=10
+RUN: not %t-NthRunCrashTest %t/NthRunCrashTest.in -runs=10000 2>&1 | FileCheck %s
+RUN: rm %t/NthRunCrashTest.in
+CHECK: BINGO
+
diff --git a/test/fuzzer/fuzzer-seed.test b/test/fuzzer/fuzzer-seed.test
new file mode 100644
index 0000000..a69ea54
--- /dev/null
+++ b/test/fuzzer/fuzzer-seed.test
@@ -0,0 +1,4 @@
+RUN: %cpp_compiler %S/NullDerefTest.cpp -o %t-SimpleCmpTest
+RUN: %t-SimpleCmpTest -seed=-1 -runs=0 2>&1 | FileCheck %s --check-prefix=CHECK_SEED_MINUS_ONE
+CHECK_SEED_MINUS_ONE: Seed: 4294967295
+
diff --git a/test/fuzzer/fuzzer-segv.test b/test/fuzzer/fuzzer-segv.test
new file mode 100644
index 0000000..4d3c757
--- /dev/null
+++ b/test/fuzzer/fuzzer-segv.test
@@ -0,0 +1,8 @@
+RUN: %cpp_compiler %S/NullDerefTest.cpp -o %t-NullDerefTest
+RUN: env ASAN_OPTIONS=handle_segv=0 not %t-NullDerefTest 2>&1 | FileCheck %s --check-prefix=LIBFUZZER_OWN_SEGV_HANDLER
+LIBFUZZER_OWN_SEGV_HANDLER: == ERROR: libFuzzer: deadly signal
+LIBFUZZER_OWN_SEGV_HANDLER: SUMMARY: libFuzzer: deadly signal
+LIBFUZZER_OWN_SEGV_HANDLER: Test unit written to ./crash-
+
+RUN: env ASAN_OPTIONS=handle_segv=1 not %t-NullDerefTest 2>&1 | FileCheck %s --check-prefix=LIBFUZZER_ASAN_SEGV_HANDLER
+LIBFUZZER_ASAN_SEGV_HANDLER: ERROR: AddressSanitizer: {{SEGV|access-violation}} on unknown address
diff --git a/test/fuzzer/fuzzer-singleinputs.test b/test/fuzzer/fuzzer-singleinputs.test
new file mode 100644
index 0000000..468da56
--- /dev/null
+++ b/test/fuzzer/fuzzer-singleinputs.test
@@ -0,0 +1,19 @@
+RUN: %cpp_compiler %S/NullDerefTest.cpp -o %t-NullDerefTest
+RUN: %cpp_compiler %S/SimpleTest.cpp -o %t-SimpleTest
+
+RUN: not %t-NullDerefTest %S/hi.txt 2>&1 | FileCheck %s --check-prefix=SingleInput
+SingleInput-NOT: Test unit written to ./crash-
+
+RUN: rm -rf  %tmp/SINGLE_INPUTS
+RUN: mkdir -p  %tmp/SINGLE_INPUTS
+RUN: echo aaa > %tmp/SINGLE_INPUTS/aaa
+RUN: echo bbb > %tmp/SINGLE_INPUTS/bbb
+RUN: %t-SimpleTest            %tmp/SINGLE_INPUTS/aaa %tmp/SINGLE_INPUTS/bbb 2>&1 | FileCheck %s --check-prefix=SINGLE_INPUTS
+RUN: %t-SimpleTest -max_len=2 %tmp/SINGLE_INPUTS/aaa %tmp/SINGLE_INPUTS/bbb 2>&1 | FileCheck %s --check-prefix=SINGLE_INPUTS
+RUN: rm -rf  %tmp/SINGLE_INPUTS
+SINGLE_INPUTS: SimpleTest{{.*}}: Running 2 inputs 1 time(s) each.
+SINGLE_INPUTS: aaa in
+SINGLE_INPUTS: bbb in
+SINGLE_INPUTS: NOTE: fuzzing was not performed, you have only
+SINGLE_INPUTS: executed the target code on a fixed set of inputs.
+
diff --git a/test/fuzzer/fuzzer-threaded.test b/test/fuzzer/fuzzer-threaded.test
new file mode 100644
index 0000000..572ed5a
--- /dev/null
+++ b/test/fuzzer/fuzzer-threaded.test
@@ -0,0 +1,8 @@
+CHECK: Done 1000 runs in
+RUN: %cpp_compiler %S/ThreadedTest.cpp -o %t-ThreadedTest
+
+RUN: %t-ThreadedTest -use_traces=1 -runs=1000  2>&1 | FileCheck %s
+RUN: %t-ThreadedTest -use_traces=1 -runs=1000  2>&1 | FileCheck %s
+RUN: %t-ThreadedTest -use_traces=1 -runs=1000  2>&1 | FileCheck %s
+RUN: %t-ThreadedTest -use_traces=1 -runs=1000  2>&1 | FileCheck %s
+
diff --git a/test/fuzzer/fuzzer-timeout.test b/test/fuzzer/fuzzer-timeout.test
new file mode 100644
index 0000000..41f4ba3
--- /dev/null
+++ b/test/fuzzer/fuzzer-timeout.test
@@ -0,0 +1,21 @@
+RUN: %cpp_compiler %S/TimeoutTest.cpp -o %t-TimeoutTest
+RUN: %cpp_compiler %S/TimeoutEmptyTest.cpp -o %t-TimeoutEmptyTest
+RUN: not %t-TimeoutTest -timeout=1 2>&1 | FileCheck %s --check-prefix=TimeoutTest
+TimeoutTest: ALARM: working on the last Unit for
+TimeoutTest: Test unit written to ./timeout-
+TimeoutTest: == ERROR: libFuzzer: timeout after
+TimeoutTest: #0
+TimeoutTest: #1
+TimeoutTest: #2
+TimeoutTest: SUMMARY: libFuzzer: timeout
+
+RUN: not %t-TimeoutTest -timeout=1 %S/hi.txt 2>&1 | FileCheck %s --check-prefix=SingleInputTimeoutTest
+SingleInputTimeoutTest: ALARM: working on the last Unit for {{[1-3]}} seconds
+SingleInputTimeoutTest-NOT: Test unit written to ./timeout-
+
+RUN: %t-TimeoutTest -timeout=1 -timeout_exitcode=0
+
+RUN: not %t-TimeoutEmptyTest -timeout=1 2>&1 | FileCheck %s --check-prefix=TimeoutEmptyTest
+TimeoutEmptyTest: ALARM: working on the last Unit for
+TimeoutEmptyTest: == ERROR: libFuzzer: timeout after
+TimeoutEmptyTest: SUMMARY: libFuzzer: timeout
diff --git a/test/fuzzer/fuzzer-ubsan.test b/test/fuzzer/fuzzer-ubsan.test
new file mode 100644
index 0000000..49c190c
--- /dev/null
+++ b/test/fuzzer/fuzzer-ubsan.test
@@ -0,0 +1,5 @@
+RUN: %cpp_compiler -fsanitize=undefined -fno-sanitize-recover=all %S/SignedIntOverflowTest.cpp -o %t-SignedIntOverflowTest-Ubsan
+RUN: not %t-SignedIntOverflowTest-Ubsan 2>&1 | FileCheck %s
+CHECK: runtime error: signed integer overflow: 2147483647 + 1 cannot be represented in type 'int'
+CHECK: Test unit written to ./crash-
+
diff --git a/test/fuzzer/fuzzer.test b/test/fuzzer/fuzzer.test
new file mode 100644
index 0000000..29bc8f0
--- /dev/null
+++ b/test/fuzzer/fuzzer.test
@@ -0,0 +1,70 @@
+CHECK: BINGO
+Done1000000: Done 1000000 runs in
+RUN: %cpp_compiler %S/BogusInitializeTest.cpp -o %t-BogusInitializeTest
+RUN: %cpp_compiler %S/BufferOverflowOnInput.cpp -o %t-BufferOverflowOnInput
+RUN: %cpp_compiler %S/CounterTest.cpp -o %t-CounterTest
+RUN: %cpp_compiler %S/DSO1.cpp -fPIC -shared -o %t-DSO1.so
+RUN: %cpp_compiler %S/DSO2.cpp -fPIC -shared -o %t-DSO2.so
+RUN: %cpp_compiler %S/DSOTestMain.cpp %S/DSOTestExtra.cpp -L. %t-DSO1.so %t-DSO2.so -o %t-DSOTest
+RUN: %cpp_compiler %S/FullCoverageSetTest.cpp -o %t-FullCoverageSetTest
+RUN: %cpp_compiler %S/InitializeTest.cpp -o %t-InitializeTest
+RUN: %cpp_compiler %S/NotinstrumentedTest.cpp -fsanitize-coverage=0 -o %t-NotinstrumentedTest-NoCoverage
+RUN: %cpp_compiler %S/NullDerefOnEmptyTest.cpp -o %t-NullDerefOnEmptyTest
+RUN: %cpp_compiler %S/NullDerefTest.cpp -o %t-NullDerefTest
+RUN: %cpp_compiler %S/SimpleCmpTest.cpp -o %t-SimpleCmpTest
+RUN: %cpp_compiler %S/SimpleTest.cpp -o %t-SimpleTest
+RUN: %cpp_compiler %S/StrncmpOOBTest.cpp -o %t-StrncmpOOBTest
+
+RUN: not %t-SimpleTest 2>&1 | FileCheck %s
+
+# only_ascii mode. Will perform some minimal self-validation.
+RUN: not %t-SimpleTest -only_ascii=1 2>&1
+
+RUN: %t-SimpleCmpTest -max_total_time=1 -use_cmp=0 2>&1 | FileCheck %s --check-prefix=MaxTotalTime
+MaxTotalTime: Done {{.*}} runs in {{.}} second(s)
+
+RUN: not %t-NullDerefTest                  2>&1 | FileCheck %s --check-prefix=NullDerefTest
+RUN: not %t-NullDerefTest -close_fd_mask=3 2>&1 | FileCheck %s --check-prefix=NullDerefTest
+NullDerefTest: ERROR: AddressSanitizer: {{SEGV|access-violation}} on unknown address
+NullDerefTest: Test unit written to ./crash-
+RUN: not %t-NullDerefTest  -artifact_prefix=ZZZ 2>&1 | FileCheck %s --check-prefix=NullDerefTestPrefix
+NullDerefTestPrefix: Test unit written to ZZZcrash-
+RUN: not %t-NullDerefTest  -artifact_prefix=ZZZ -exact_artifact_path=FOOBAR 2>&1 | FileCheck %s --check-prefix=NullDerefTestExactPath
+NullDerefTestExactPath: Test unit written to FOOBAR
+
+RUN: not %t-NullDerefOnEmptyTest -print_final_stats=1 2>&1 | FileCheck %s --check-prefix=NULL_DEREF_ON_EMPTY
+NULL_DEREF_ON_EMPTY: stat::number_of_executed_units:
+
+#not %t-FullCoverageSetTest -timeout=15 -seed=1 -mutate_depth=2 -use_full_coverage_set=1 2>&1 | FileCheck %s
+
+RUN: not %t-CounterTest  -max_len=6 -seed=1 -timeout=15 2>&1 | FileCheck %s --check-prefix=COUNTERS
+
+COUNTERS: INITED {{.*}} {{bits:|ft:}}
+COUNTERS: NEW {{.*}} {{bits:|ft:}} {{[1-9]*}}
+COUNTERS: NEW {{.*}} {{bits:|ft:}} {{[1-9]*}}
+COUNTERS: BINGO
+
+# Don't run UninstrumentedTest for now since we build libFuzzer itself with asan.
+DISABLED: not %t-UninstrumentedTest-Uninstrumented 2>&1 | FileCheck %s --check-prefix=UNINSTRUMENTED
+UNINSTRUMENTED: ERROR: __sanitizer_set_death_callback is not defined. Exiting.
+
+RUN: not %t-NotinstrumentedTest-NoCoverage 2>&1 | FileCheck %s --check-prefix=NO_COVERAGE
+NO_COVERAGE: ERROR: no interesting inputs were found. Is the code instrumented for coverage? Exiting
+
+RUN: not %t-BufferOverflowOnInput 2>&1 | FileCheck %s --check-prefix=OOB
+OOB: AddressSanitizer: heap-buffer-overflow
+OOB: is located 0 bytes to the right of 3-byte region
+
+RUN: not %t-InitializeTest -use_value_profile=1 2>&1 | FileCheck %s
+
+RUN: not %t-DSOTest 2>&1 | FileCheck %s --check-prefix=DSO
+DSO: INFO: Loaded 3 modules
+DSO: BINGO
+
+RUN: env ASAN_OPTIONS=strict_string_checks=1 not %t-StrncmpOOBTest -seed=1 -runs=1000000 2>&1 | FileCheck %s --check-prefix=STRNCMP
+STRNCMP: AddressSanitizer: heap-buffer-overflow
+STRNCMP-NOT: __sanitizer_weak_hook_strncmp
+STRNCMP: in LLVMFuzzerTestOneInput
+
+RUN: not %t-BogusInitializeTest 2>&1 | FileCheck %s --check-prefix=BOGUS_INITIALIZE
+BOGUS_INITIALIZE: argv[0] has been modified in LLVMFuzzerInitialize
diff --git a/test/fuzzer/gc-sections.test b/test/fuzzer/gc-sections.test
new file mode 100644
index 0000000..8785bb0
--- /dev/null
+++ b/test/fuzzer/gc-sections.test
@@ -0,0 +1,13 @@
+REQUIRES: linux
+
+No gc-sections:
+RUN: %cpp_compiler %S/GcSectionsTest.cpp -o %t
+RUN: nm %t | grep UnusedFunctionShouldBeRemovedByLinker | count 1
+
+With gc-sections. Currently, we can't remove unused code.
+DISABLED: %cpp_compiler %S/GcSectionsTest.cpp -o %t -ffunction-sections -Wl,-gc-sections
+DISABLED: nm %t | grep UnusedFunctionShouldBeRemovedByLinker | count 1
+
+With gc sections, with trace-pc. Unused code is removed.
+RUN: %cpp_compiler %S/GcSectionsTest.cpp -o %t -fsanitize-coverage=0 -fsanitize-coverage=trace-pc -ffunction-sections -Wl,-gc-sections
+RUN: nm %t | not grep UnusedFunctionShouldBeRemovedByLinker
diff --git a/test/fuzzer/hi.txt b/test/fuzzer/hi.txt
new file mode 100644
index 0000000..2f9031f
--- /dev/null
+++ b/test/fuzzer/hi.txt
@@ -0,0 +1 @@
+Hi!
\ No newline at end of file
diff --git a/test/fuzzer/inline-8bit-counters.test b/test/fuzzer/inline-8bit-counters.test
new file mode 100644
index 0000000..76ae1f5
--- /dev/null
+++ b/test/fuzzer/inline-8bit-counters.test
@@ -0,0 +1,4 @@
+RUN: %cpp_compiler %S/SimpleTest.cpp -fno-sanitize-coverage=trace-pc-guard -fsanitize-coverage=inline-8bit-counters -o %t-SimpleTest-Inline8bitCounters
+CHECK: INFO: Loaded 1 modules ({{.*}} inline 8-bit counters)
+CHECK: BINGO
+RUN: not %t-SimpleTest-Inline8bitCounters -runs=1000000 -seed=1 2>&1 | FileCheck %s
diff --git a/test/fuzzer/lit.cfg b/test/fuzzer/lit.cfg
new file mode 100644
index 0000000..0350a1a
--- /dev/null
+++ b/test/fuzzer/lit.cfg
@@ -0,0 +1,79 @@
+import lit.formats
+import sys
+import os
+
+config.name = "LLVMFuzzer"
+config.test_format = lit.formats.ShTest(True)
+config.suffixes = ['.test']
+config.test_source_root = os.path.dirname(__file__)
+
+# Choose between lit's internal shell pipeline runner and a real shell.  If
+# LIT_USE_INTERNAL_SHELL is in the environment, we use that as an override.
+use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
+if use_lit_shell:
+    # 0 is external, "" is default, and everything else is internal.
+    execute_external = (use_lit_shell == "0")
+else:
+    # Otherwise we default to internal on Windows and external elsewhere, as
+    # bash on Windows is usually very slow.
+    execute_external = (not sys.platform in ['win32'])
+
+# testFormat: The test format to use to interpret tests.
+#
+# For now we require '&&' between commands, until they get globally killed and
+# the test runner updated.
+config.test_format = lit.formats.ShTest(execute_external)
+
+# LeakSanitizer is not supported on OSX right now.
+if sys.platform.startswith('darwin'):
+  lit_config.note('lsan feature unavailable')
+else:
+  lit_config.note('lsan feature available')
+  config.available_features.add('lsan')
+
+if sys.platform.startswith('win') or sys.platform.startswith('cygwin'):
+  config.available_features.add('windows')
+
+if sys.platform.startswith('darwin'):
+  config.available_features.add('darwin')
+
+if sys.platform.startswith('linux'):
+  # Note the value of ``sys.platform`` is not consistent
+  # between python 2 and 3, hence the use of ``.startswith()``.
+  lit_config.note('linux feature available')
+  config.available_features.add('linux')
+else:
+  lit_config.note('linux feature unavailable')
+
+config.substitutions.append(('%build_dir', config.cmake_binary_dir))
+libfuzzer_src_root = os.path.join(config.compiler_rt_src_root, "lib", "fuzzer")
+config.substitutions.append(('%libfuzzer_src', libfuzzer_src_root))
+
+def generate_compiler_cmd(is_cpp=True, fuzzer_enabled=True):
+  compiler_cmd = config.c_compiler
+  link_cmd = '-lc++' if 'darwin' in config.target_triple else '-lstdc++'
+  std_cmd = '-std=c++11' if is_cpp else ''
+  sanitizers = ['address']
+  if fuzzer_enabled:
+    sanitizers.append('fuzzer')
+  sanitizers_cmd = ('-fsanitize=%s' % ','.join(sanitizers))
+  isysroot_cmd = config.osx_sysroot_flag if config.osx_sysroot_flag else ''
+  include_cmd = '-I%s' % libfuzzer_src_root
+  return '%s %s %s -gline-tables-only %s %s %s' % (
+      compiler_cmd, std_cmd, link_cmd, isysroot_cmd, sanitizers_cmd, include_cmd)
+
+config.substitutions.append(('%cpp_compiler',
+      generate_compiler_cmd(is_cpp=True, fuzzer_enabled=True)
+      ))
+
+config.substitutions.append(('%c_compiler',
+      generate_compiler_cmd(is_cpp=False, fuzzer_enabled=True)
+      ))
+
+config.substitutions.append(('%no_fuzzer_cpp_compiler',
+      generate_compiler_cmd(is_cpp=True, fuzzer_enabled=False)
+      ))
+
+config.substitutions.append(('%no_fuzzer_c_compiler',
+      generate_compiler_cmd(is_cpp=False, fuzzer_enabled=False)
+      ))
diff --git a/test/fuzzer/lit.site.cfg.in b/test/fuzzer/lit.site.cfg.in
new file mode 100644
index 0000000..7f70c8f
--- /dev/null
+++ b/test/fuzzer/lit.site.cfg.in
@@ -0,0 +1,17 @@
+@LIT_SITE_CFG_IN_HEADER@
+
+config.test_exec_root = "@CMAKE_CURRENT_BINARY_DIR@"
+
+config.cpp_compiler = "@LIBFUZZER_TEST_COMPILER@"
+config.target_flags = "@LIBFUZZER_TEST_FLAGS@"
+config.c_compiler = "@LIBFUZZER_TEST_COMPILER@"
+
+config.osx_sysroot_flag = "@OSX_SYSROOT_FLAG@"
+config.cmake_binary_dir = "@CMAKE_BINARY_DIR@"
+config.target_triple = "@TARGET_TRIPLE@"
+
+# Load common config for all compiler-rt lit tests.
+lit_config.load_config(config,
+    "@COMPILER_RT_BINARY_DIR@/test/lit.common.configured")
+
+lit_config.load_config(config, "@CMAKE_CURRENT_SOURCE_DIR@/lit.cfg")
diff --git a/test/fuzzer/max-number-of-runs.test b/test/fuzzer/max-number-of-runs.test
new file mode 100644
index 0000000..efe7a9c
--- /dev/null
+++ b/test/fuzzer/max-number-of-runs.test
@@ -0,0 +1,10 @@
+RUN: %cpp_compiler %S/AccumulateAllocationsTest.cpp -o %t-AccumulateAllocationsTest
+
+RUN: %t-AccumulateAllocationsTest -seed=1 -runs=2 2>&1 | FileCheck %s --check-prefix=CHECK1
+CHECK1: Done 2 runs
+
+RUN: %t-AccumulateAllocationsTest -seed=1 -runs=3 2>&1 | FileCheck %s --check-prefix=CHECK2
+CHECK2: Done 3 runs
+
+RUN: %t-AccumulateAllocationsTest -seed=1 -runs=4 2>&1 | FileCheck %s --check-prefix=CHECK3
+CHECK3: Done 4 runs
diff --git a/test/fuzzer/memcmp.test b/test/fuzzer/memcmp.test
new file mode 100644
index 0000000..3431a52
--- /dev/null
+++ b/test/fuzzer/memcmp.test
@@ -0,0 +1,3 @@
+RUN: %cpp_compiler %S/MemcmpTest.cpp -o %t-MemcmpTest
+RUN: not %t-MemcmpTest               -seed=1 -runs=10000000   2>&1 | FileCheck %s
+CHECK: BINGO
diff --git a/test/fuzzer/memcmp64.test b/test/fuzzer/memcmp64.test
new file mode 100644
index 0000000..223c3bd
--- /dev/null
+++ b/test/fuzzer/memcmp64.test
@@ -0,0 +1,3 @@
+RUN: %cpp_compiler %S/Memcmp64BytesTest.cpp -o %t-Memcmp64BytesTest
+RUN: not %t-Memcmp64BytesTest        -seed=1 -runs=1000000   2>&1 | FileCheck %s
+CHECK: BINGO
diff --git a/test/fuzzer/merge-posix.test b/test/fuzzer/merge-posix.test
new file mode 100644
index 0000000..e34e3a3
--- /dev/null
+++ b/test/fuzzer/merge-posix.test
@@ -0,0 +1,23 @@
+RUN: %cpp_compiler %S/FullCoverageSetTest.cpp -o %t-FullCoverageSetTest
+
+RUN: rm -rf  %tmp/T1 %tmp/T2
+RUN: mkdir -p %tmp/T1 %tmp/T2
+
+RUN: echo F..... > %tmp/T1/1
+RUN: echo .U.... > %tmp/T1/2
+RUN: echo ..Z... > %tmp/T1/3
+
+RUN: echo .....F > %tmp/T2/1
+RUN: echo ....U. > %tmp/T2/2
+RUN: echo ...Z.. > %tmp/T2/3
+RUN: echo ...Z.. > %tmp/T2/4
+RUN: echo ....E. > %tmp/T2/5
+RUN: echo .....R > %tmp/T2/6
+
+# Check that we can report an error if file size exceeded
+RUN: (ulimit -f 1; not %t-FullCoverageSetTest -merge=1 %tmp/T1 %tmp/T2 2>&1 | FileCheck %s --check-prefix=SIGXFSZ)
+SIGXFSZ: ERROR: libFuzzer: file size exceeded
+
+# Check that we honor TMPDIR
+RUN: TMPDIR=DIR_DOES_NOT_EXIST not %t-FullCoverageSetTest -merge=1 %tmp/T1 %tmp/T2 2>&1 | FileCheck %s --check-prefix=TMPDIR
+TMPDIR: MERGE-OUTER: failed to write to the control file: DIR_DOES_NOT_EXIST/libFuzzerTemp
diff --git a/test/fuzzer/merge-summary.test b/test/fuzzer/merge-summary.test
new file mode 100644
index 0000000..3e21c23
--- /dev/null
+++ b/test/fuzzer/merge-summary.test
@@ -0,0 +1,17 @@
+RUN: %cpp_compiler %S/FullCoverageSetTest.cpp -o %t-FullCoverageSetTest
+
+RUN: rm -rf %t/T1 %t/T2
+RUN: mkdir -p %t/T0 %t/T1 %t/T2
+RUN: echo ...Z.. > %t/T2/1
+RUN: echo ....E. > %t/T2/2
+RUN: echo .....R > %t/T2/3
+RUN: echo F..... > %t/T2/a
+RUN: echo .U.... > %t/T2/b
+RUN: echo ..Z... > %t/T2/c
+
+RUN: %t-FullCoverageSetTest -merge=1 %t/T1 %t/T2 -save_coverage_summary=%t/SUMMARY 2>&1 | FileCheck %s --check-prefix=SAVE_SUMMARY
+SAVE_SUMMARY: MERGE-OUTER: writing coverage summary for 6 files to {{.*}}SUMMARY
+RUN: rm %t/T1/*
+RUN: %t-FullCoverageSetTest -merge=1 %t/T1 %t/T2 -load_coverage_summary=%t/SUMMARY 2>&1 | FileCheck %s --check-prefix=LOAD_SUMMARY
+LOAD_SUMMARY: MERGE-OUTER: coverage summary loaded from {{.*}}SUMMAR
+LOAD_SUMMARY: MERGE-OUTER: 0 new files with 0 new features added
diff --git a/test/fuzzer/merge.test b/test/fuzzer/merge.test
new file mode 100644
index 0000000..30e27b8
--- /dev/null
+++ b/test/fuzzer/merge.test
@@ -0,0 +1,55 @@
+CHECK: BINGO
+
+RUN: %cpp_compiler %S/FullCoverageSetTest.cpp -o %t-FullCoverageSetTest
+
+RUN: rm -rf %tmp/T0 %tmp/T1 %tmp/T2
+RUN: mkdir -p %tmp/T0 %tmp/T1 %tmp/T2
+RUN: echo F..... > %tmp/T0/1
+RUN: echo .U.... > %tmp/T0/2
+RUN: echo ..Z... > %tmp/T0/3
+
+# T1 has 3 elements, T2 is empty.
+RUN: cp %tmp/T0/* %tmp/T1/
+RUN: %t-FullCoverageSetTest         -merge=1 %tmp/T1 %tmp/T2 2>&1 | FileCheck %s --check-prefix=CHECK1
+CHECK1: MERGE-OUTER: 3 files, 3 in the initial corpus
+CHECK1: MERGE-OUTER: 0 new files with 0 new features added
+
+RUN: echo ...Z.. > %tmp/T2/1
+RUN: echo ....E. > %tmp/T2/2
+RUN: echo .....R > %tmp/T2/3
+RUN: echo F..... > %tmp/T2/a
+RUN: echo .U.... > %tmp/T2/b
+RUN: echo ..Z... > %tmp/T2/c
+
+# T1 has 3 elements, T2 has 6 elements, only 3 are new.
+RUN: %t-FullCoverageSetTest         -merge=1 %tmp/T1 %tmp/T2 2>&1 | FileCheck %s --check-prefix=CHECK2
+CHECK2: MERGE-OUTER: 9 files, 3 in the initial corpus
+CHECK2: MERGE-OUTER: 3 new files with 3 new features added
+
+# Now, T1 has 6 units and T2 has no new interesting units.
+RUN: %t-FullCoverageSetTest         -merge=1 %tmp/T1 %tmp/T2 2>&1 | FileCheck %s --check-prefix=CHECK3
+CHECK3: MERGE-OUTER: 12 files, 6 in the initial corpus
+CHECK3: MERGE-OUTER: 0 new files with 0 new features added
+
+# Check that we respect max_len during the merge and don't crash.
+RUN: rm %tmp/T1/*
+RUN: cp %tmp/T0/* %tmp/T1/
+RUN: echo looooooooong > %tmp/T2/looooooooong
+RUN: %t-FullCoverageSetTest         -merge=1 %tmp/T1 %tmp/T2 -max_len=6 2>&1 | FileCheck %s --check-prefix=MAX_LEN
+MAX_LEN: MERGE-OUTER: 3 new files
+
+# Check that merge tolerates failures.
+RUN: rm %tmp/T1/*
+RUN: cp %tmp/T0/* %tmp/T1/
+RUN: echo 'FUZZER' > %tmp/T2/FUZZER
+RUN: %t-FullCoverageSetTest -merge=1 %tmp/T1 %tmp/T2 2>&1 | FileCheck %s --check-prefix=MERGE_WITH_CRASH
+MERGE_WITH_CRASH: MERGE-OUTER: succesfull in 2 attempt(s)
+MERGE_WITH_CRASH: MERGE-OUTER: 3 new files
+
+# Check that we actually limit the size with max_len
+RUN: %t-FullCoverageSetTest -merge=1 %tmp/T1 %tmp/T2  -max_len=5 2>&1 | FileCheck %s --check-prefix=MERGE_LEN5
+MERGE_LEN5: MERGE-OUTER: succesfull in 1 attempt(s)
+
+RUN: rm -rf  %tmp/T1/* %tmp/T2/*
+RUN: not %t-FullCoverageSetTest -merge=1 %tmp/T1 %tmp/T2 2>&1 | FileCheck %s --check-prefix=EMPTY
+EMPTY: MERGE-OUTER: zero succesfull attempts, exiting
diff --git a/test/fuzzer/minimize_crash.test b/test/fuzzer/minimize_crash.test
new file mode 100644
index 0000000..77ab370
--- /dev/null
+++ b/test/fuzzer/minimize_crash.test
@@ -0,0 +1,16 @@
+RUN: %cpp_compiler %S/NullDerefTest.cpp -o %t-NullDerefTest
+RUN: %cpp_compiler %S/SingleByteInputTest.cpp -o %t-SingleByteInputTest
+
+RUN: echo 'Hi!rv349f34t3gg' > not_minimal_crash
+RUN: %t-NullDerefTest -minimize_crash=1 not_minimal_crash -max_total_time=2 2>&1 | FileCheck %s
+CHECK: CRASH_MIN: failed to minimize beyond ./minimized-from-{{.*}} (3 bytes), exiting
+RUN: %t-NullDerefTest -minimize_crash=1 not_minimal_crash -max_total_time=2 -exact_artifact_path=exact_minimized_path 2>&1 | FileCheck %s --check-prefix=CHECK_EXACT
+CHECK_EXACT: CRASH_MIN: failed to minimize beyond exact_minimized_path (3 bytes), exiting
+RUN: rm not_minimal_crash minimized-from-* exact_minimized_path
+
+RUN: echo -n 'abcd*xyz' > not_minimal_crash
+RUN: %t-SingleByteInputTest -minimize_crash=1 not_minimal_crash -exact_artifact_path=exact_minimized_path 2>&1 | FileCheck %s --check-prefix=MIN1
+MIN1: Test unit written to exact_minimized_path
+MIN1: Test unit written to exact_minimized_path
+MIN1: INFO: The input is small enough, exiting
+MIN1: CRASH_MIN: failed to minimize beyond exact_minimized_path (1 bytes), exiting
diff --git a/test/fuzzer/minimize_two_crashes.test b/test/fuzzer/minimize_two_crashes.test
new file mode 100644
index 0000000..e6ff999
--- /dev/null
+++ b/test/fuzzer/minimize_two_crashes.test
@@ -0,0 +1,18 @@
+# Test that the minimizer stops when it sees a differe bug.
+
+RUN: %cpp_compiler %S/TwoDifferentBugsTest.cpp -o %t-TwoDifferentBugsTest
+
+RUN: rm -rf %t && mkdir %t
+RUN: echo H12345678901234667888090 > %t/long_crash
+RUN: env ASAN_OPTIONS=dedup_token_length=3 %t-TwoDifferentBugsTest -seed=1 -minimize_crash=1 %t/long_crash -exact_artifact_path=%t/result 2>&1 | FileCheck %s
+
+CHECK: DedupToken1: DEDUP_TOKEN: Bar
+CHECK: DedupToken2: DEDUP_TOKEN: Bar
+CHECK: DedupToken1: DEDUP_TOKEN: Bar
+CHECK: DedupToken2: DEDUP_TOKEN: Foo
+CHECK: CRASH_MIN: mismatch in dedup tokens
+
+RUN: not  %t-TwoDifferentBugsTest %t/result 2>&1 | FileCheck %s --check-prefix=VERIFY
+
+VERIFY: ERROR: AddressSanitizer:
+VERIFY: in Bar
diff --git a/test/fuzzer/overwrite-input.test b/test/fuzzer/overwrite-input.test
new file mode 100644
index 0000000..3695622
--- /dev/null
+++ b/test/fuzzer/overwrite-input.test
@@ -0,0 +1,3 @@
+RUN: %cpp_compiler %S/OverwriteInputTest.cpp -o %t-OverwriteInputTest
+RUN: not %t-OverwriteInputTest 2>&1 | FileCheck %s
+CHECK: ERROR: libFuzzer: fuzz target overwrites it's const input
diff --git a/test/fuzzer/print-func.test b/test/fuzzer/print-func.test
new file mode 100644
index 0000000..930e999
--- /dev/null
+++ b/test/fuzzer/print-func.test
@@ -0,0 +1,10 @@
+RUN: %cpp_compiler %S/PrintFuncTest.cpp -o %t
+RUN: %t -seed=1 -runs=100000 2>&1 | FileCheck %s
+RUN: %t -seed=1 -runs=100000 -print_funcs=0 2>&1 | FileCheck %s --check-prefix=NO
+CHECK: NEW_FUNC{{.*}} FunctionA
+CHECK: NEW_FUNC{{.*}} FunctionB
+CHECK: NEW_FUNC{{.*}} FunctionC
+CHECK: BINGO
+
+NO-NOT: NEW_FUNC
+NO: BINGO
diff --git a/test/fuzzer/recommended-dictionary.test b/test/fuzzer/recommended-dictionary.test
new file mode 100644
index 0000000..41b62c9
--- /dev/null
+++ b/test/fuzzer/recommended-dictionary.test
@@ -0,0 +1,6 @@
+RUN: %cpp_compiler %S/RepeatedMemcmp.cpp -o %t-RepeatedMemcmp
+RUN: %t-RepeatedMemcmp -seed=11 -runs=100000 -max_len=20 2>&1 | FileCheck %s --check-prefix=RECOMMENDED_DICT
+RECOMMENDED_DICT:###### Recommended dictionary. ######
+RECOMMENDED_DICT-DAG: "foo"
+RECOMMENDED_DICT-DAG: "bar"
+RECOMMENDED_DICT:###### End of recommended dictionary. ######
diff --git a/test/fuzzer/reduce_inputs.test b/test/fuzzer/reduce_inputs.test
new file mode 100644
index 0000000..94f8cc4
--- /dev/null
+++ b/test/fuzzer/reduce_inputs.test
@@ -0,0 +1,16 @@
+# Test -reduce_inputs=1
+
+RUN: rm -rf %t/C
+RUN: mkdir -p %t/C
+RUN: %cpp_compiler %S/ShrinkControlFlowSimpleTest.cpp -o %t-ShrinkControlFlowSimpleTest
+RUN: %cpp_compiler %S/ShrinkControlFlowTest.cpp -o %t-ShrinkControlFlowTest
+RUN: %t-ShrinkControlFlowSimpleTest  -exit_on_item=0eb8e4ed029b774d80f2b66408203801cb982a60   -runs=1000000 %t/C 2>&1 | FileCheck %s
+CHECK: INFO: found item with checksum '0eb8e4ed029b774d80f2b66408203801cb982a60'
+
+# Test that reduce_inputs deletes redundant files in the corpus.
+RUN: %t-ShrinkControlFlowSimpleTest -runs=0 %t/C 2>&1 | FileCheck %s --check-prefix=COUNT
+COUNT: seed corpus: files: 4
+
+# a bit longer test
+RUN: %t-ShrinkControlFlowTest  -exit_on_item=0eb8e4ed029b774d80f2b66408203801cb982a60  -seed=1 -runs=1000000  2>&1 | FileCheck %s
+
diff --git a/test/fuzzer/repeated-bytes.test b/test/fuzzer/repeated-bytes.test
new file mode 100644
index 0000000..0bba2a9
--- /dev/null
+++ b/test/fuzzer/repeated-bytes.test
@@ -0,0 +1,3 @@
+RUN: %cpp_compiler %S/RepeatedBytesTest.cpp -o %t-RepeatedBytesTest
+CHECK: BINGO
+RUN: not %t-RepeatedBytesTest -seed=1 -runs=1000000 2>&1 | FileCheck %s
diff --git a/test/fuzzer/shrink.test b/test/fuzzer/shrink.test
new file mode 100644
index 0000000..2988d4b
--- /dev/null
+++ b/test/fuzzer/shrink.test
@@ -0,0 +1,10 @@
+RUN: %cpp_compiler %S/ShrinkControlFlowTest.cpp -o %t-ShrinkControlFlowTest
+RUN: %cpp_compiler %S/ShrinkValueProfileTest.cpp -o %t-ShrinkValueProfileTest
+RUN: %t-ShrinkControlFlowTest -seed=1 -exit_on_item=0eb8e4ed029b774d80f2b66408203801cb982a60 -runs=1000000  -shrink=1 -reduce_inputs=0 2>&1 | FileCheck %s --check-prefix=SHRINK1
+# Limit max_len to run this negative test faster.
+RUN: %t-ShrinkControlFlowTest -seed=1 -exit_on_item=0eb8e4ed029b774d80f2b66408203801cb982a60 -runs=1000000 -shrink=0 -reduce_inputs=0 -max_len=64 2>&1 | FileCheck %s --check-prefix=SHRINK0
+RUN: %t-ShrinkValueProfileTest -seed=1 -exit_on_item=aea2e3923af219a8956f626558ef32f30a914ebc -runs=100000 -shrink=1 -reduce_inputs=0 -use_value_profile=1 2>&1 | FileCheck %s --check-prefix=SHRINK1_VP
+
+SHRINK0: Done 1000000 runs in
+SHRINK1: INFO: found item with checksum '0eb8e4ed029b774d80f2b66408203801cb982a60', exiting.
+SHRINK1_VP: INFO: found item with checksum 'aea2e3923af219a8956f626558ef32f30a914ebc', exiting
diff --git a/test/fuzzer/simple-cmp.test b/test/fuzzer/simple-cmp.test
new file mode 100644
index 0000000..08123ed
--- /dev/null
+++ b/test/fuzzer/simple-cmp.test
@@ -0,0 +1,3 @@
+RUN: %cpp_compiler %S/SimpleCmpTest.cpp -o %t-SimpleCmpTest
+CHECK: BINGO
+RUN: not %t-SimpleCmpTest -seed=1 -runs=100000000 2>&1 | FileCheck %s
diff --git a/test/fuzzer/standalone.test b/test/fuzzer/standalone.test
new file mode 100644
index 0000000..e648370
--- /dev/null
+++ b/test/fuzzer/standalone.test
@@ -0,0 +1,8 @@
+RUN: %no_fuzzer_c_compiler %libfuzzer_src/standalone/StandaloneFuzzTargetMain.c -c -o %t_1.o
+RUN: %no_fuzzer_cpp_compiler %S/InitializeTest.cpp -c -o %t_2.o
+
+RUN: %no_fuzzer_cpp_compiler %t_1.o %t_2.o -o %t-StandaloneInitializeTest
+RUN: %t-StandaloneInitializeTest %S/hi.txt %S/dict1.txt 2>&1 | FileCheck %s
+CHECK: StandaloneFuzzTargetMain: running 2 inputs
+CHECK: Done:    {{.*}}hi.txt: (3 bytes)
+CHECK: Done:    {{.*}}dict1.txt: (61 bytes)
diff --git a/test/fuzzer/strcmp.test b/test/fuzzer/strcmp.test
new file mode 100644
index 0000000..47ad8f9
--- /dev/null
+++ b/test/fuzzer/strcmp.test
@@ -0,0 +1,4 @@
+RUN: %cpp_compiler %S/StrcmpTest.cpp -o %t-StrcmpTest
+RUN: not %t-StrcmpTest               -seed=1 -runs=2000000   2>&1 | FileCheck %s
+CHECK: BINGO
+
diff --git a/test/fuzzer/strncmp.test b/test/fuzzer/strncmp.test
new file mode 100644
index 0000000..49693c8
--- /dev/null
+++ b/test/fuzzer/strncmp.test
@@ -0,0 +1,4 @@
+RUN: %cpp_compiler %S/StrncmpTest.cpp -o %t-StrncmpTest
+RUN: not %t-StrncmpTest              -seed=2 -runs=10000000   2>&1 | FileCheck %s
+CHECK: BINGO
+
diff --git a/test/fuzzer/strstr.test b/test/fuzzer/strstr.test
new file mode 100644
index 0000000..c39d580
--- /dev/null
+++ b/test/fuzzer/strstr.test
@@ -0,0 +1,4 @@
+RUN: %cpp_compiler %S/StrstrTest.cpp -o %t-StrstrTest
+RUN: not %t-StrstrTest               -seed=1 -runs=2000000   2>&1 | FileCheck %s
+CHECK: BINGO
+
diff --git a/test/fuzzer/swap-cmp.test b/test/fuzzer/swap-cmp.test
new file mode 100644
index 0000000..5c2379c
--- /dev/null
+++ b/test/fuzzer/swap-cmp.test
@@ -0,0 +1,3 @@
+RUN: %cpp_compiler %S/SwapCmpTest.cpp -o %t-SwapCmpTest
+CHECK: BINGO
+RUN: not %t-SwapCmpTest -seed=1 -runs=10000000 2>&1 | FileCheck %s
diff --git a/test/fuzzer/trace-malloc-2.test b/test/fuzzer/trace-malloc-2.test
new file mode 100644
index 0000000..56f16d7
--- /dev/null
+++ b/test/fuzzer/trace-malloc-2.test
@@ -0,0 +1,10 @@
+// FIXME: This test infinite loops on darwin because it crashes
+// printing a stack trace repeatedly
+UNSUPPORTED: darwin
+
+RUN: %cpp_compiler %S/TraceMallocTest.cpp -o %t-TraceMallocTest
+
+RUN: %t-TraceMallocTest -seed=1 -trace_malloc=2 -runs=1000 2>&1 | FileCheck %s --check-prefix=TRACE2
+TRACE2-DAG: FREE[0]
+TRACE2-DAG: MALLOC[0]
+TRACE2-DAG: in LLVMFuzzerTestOneInput
diff --git a/test/fuzzer/trace-malloc-threaded.test b/test/fuzzer/trace-malloc-threaded.test
new file mode 100644
index 0000000..11f3f04
--- /dev/null
+++ b/test/fuzzer/trace-malloc-threaded.test
@@ -0,0 +1,36 @@
+// FIXME: This test infinite loops on darwin because it crashes
+// printing a stack trace repeatedly
+UNSUPPORTED: darwin
+
+RUN: %cpp_compiler %S/TraceMallocThreadedTest.cpp -o %t-TraceMallocThreadedTest
+
+RUN: %t-TraceMallocThreadedTest -trace_malloc=2 -runs=1 2>&1 | FileCheck %s
+CHECK: {{MALLOC\[[0-9]+] +0x[0-9]+ 5639}}
+CHECK-NEXT: {{ +\#0 +}}
+CHECK-NEXT: {{ +\#1 +}}
+CHECK-NEXT: {{ +\#2 +}}
+
+CHECK: {{MALLOC\[[0-9]+] +0x[0-9]+ 5639}}
+CHECK-NEXT: {{ +\#0 +}}
+CHECK-NEXT: {{ +\#1 +}}
+CHECK-NEXT: {{ +\#2 +}}
+
+CHECK: {{MALLOC\[[0-9]+] +0x[0-9]+ 5639}}
+CHECK-NEXT: {{ +\#0 +}}
+CHECK-NEXT: {{ +\#1 +}}
+CHECK-NEXT: {{ +\#2 +}}
+
+CHECK: {{MALLOC\[[0-9]+] +0x[0-9]+ 5639}}
+CHECK-NEXT: {{ +\#0 +}}
+CHECK-NEXT: {{ +\#1 +}}
+CHECK-NEXT: {{ +\#2 +}}
+
+CHECK: {{MALLOC\[[0-9]+] +0x[0-9]+ 5639}}
+CHECK-NEXT: {{ +\#0 +}}
+CHECK-NEXT: {{ +\#1 +}}
+CHECK-NEXT: {{ +\#2 +}}
+
+CHECK: {{MALLOC\[[0-9]+] +0x[0-9]+ 5639}}
+CHECK-NEXT: {{ +\#0 +}}
+CHECK-NEXT: {{ +\#1 +}}
+CHECK-NEXT: {{ +\#2 +}}
diff --git a/test/fuzzer/trace-malloc-unbalanced.test b/test/fuzzer/trace-malloc-unbalanced.test
new file mode 100644
index 0000000..53b83fb
--- /dev/null
+++ b/test/fuzzer/trace-malloc-unbalanced.test
@@ -0,0 +1,27 @@
+// FIXME: This test infinite loops on darwin because it crashes
+// printing a stack trace repeatedly
+UNSUPPORTED: darwin
+
+// Verifies lib/fuzzer/scripts/unbalanced_allocs.py script
+
+RUN: %cpp_compiler %S/TraceMallocTest.cpp -o %t-TraceMallocTest
+
+RUN: %t-TraceMallocTest -seed=1 -trace_malloc=1 -runs=100 2>&1 | \
+RUN:    %libfuzzer_src/scripts/unbalanced_allocs.py --skip=5 | FileCheck %s
+
+RUN: %t-TraceMallocTest -seed=1 -trace_malloc=2 -runs=100 2>&1 | \
+RUN:    %libfuzzer_src/scripts/unbalanced_allocs.py --skip=5 | FileCheck %s --check-prefixes=CHECK,CHECK2
+
+CHECK: MallocFreeTracer: START
+CHECK: Unbalanced MALLOC[{{[0-9]+}}] [[PTR:0x[0-9a-f]+]] 4
+CHECK2-NEXT: {{ #0 0x[0-9a-f]+ in }}
+CHECK2-NEXT: {{ #1 0x[0-9a-f]+ in }}
+CHECK2-NEXT: {{ #2 0x[0-9a-f]+ in }}
+CHECK: MallocFreeTracer: STOP
+
+CHECK: MallocFreeTracer: START
+CHECK: Unbalanced FREE[{{[0-9]+}}] [[PTR]]
+CHECK2-NEXT: {{ #0 0x[0-9a-f]+ in }}
+CHECK2-NEXT: {{ #1 0x[0-9a-f]+ in }}
+CHECK2-NEXT: {{ #2 0x[0-9a-f]+ in }}
+CHECK: MallocFreeTracer: STOP
diff --git a/test/fuzzer/trace-malloc.test b/test/fuzzer/trace-malloc.test
new file mode 100644
index 0000000..979be99
--- /dev/null
+++ b/test/fuzzer/trace-malloc.test
@@ -0,0 +1,7 @@
+RUN: %cpp_compiler %S/TraceMallocTest.cpp -o %t-TraceMallocTest
+
+RUN: %t-TraceMallocTest -seed=1 -trace_malloc=1 -runs=10000 2>&1 | FileCheck %s
+CHECK-DAG: MallocFreeTracer: STOP 0 0 (same)
+CHECK-DAG: MallocFreeTracer: STOP 0 1 (DIFFERENT)
+CHECK-DAG: MallocFreeTracer: STOP 1 0 (DIFFERENT)
+CHECK-DAG: MallocFreeTracer: STOP 1 1 (same)
diff --git a/test/fuzzer/trace-pc.test b/test/fuzzer/trace-pc.test
new file mode 100644
index 0000000..eaa0cb0
--- /dev/null
+++ b/test/fuzzer/trace-pc.test
@@ -0,0 +1,3 @@
+RUN: %cpp_compiler %S/SimpleTest.cpp -fno-sanitize-coverage=edge,trace-cmp,indirect-calls,8bit-counters,trace-pc-guard -fsanitize-coverage=trace-pc -o %t-SimpleTest-TracePC
+CHECK: BINGO
+RUN: not %t-SimpleTest-TracePC -runs=1000000 -seed=1 2>&1 | FileCheck %s
diff --git a/test/fuzzer/ulimit.test b/test/fuzzer/ulimit.test
new file mode 100644
index 0000000..8772caa
--- /dev/null
+++ b/test/fuzzer/ulimit.test
@@ -0,0 +1,3 @@
+RUN: %cpp_compiler %S/SimpleTest.cpp -o %t-SimpleTest
+RUN: ulimit -s 1000
+RUN: not %t-SimpleTest
diff --git a/test/fuzzer/unit/lit.site.cfg.in b/test/fuzzer/unit/lit.site.cfg.in
new file mode 100644
index 0000000..bab2824
--- /dev/null
+++ b/test/fuzzer/unit/lit.site.cfg.in
@@ -0,0 +1,9 @@
+@LIT_SITE_CFG_IN_HEADER@
+
+config.name = "LLVMFuzzer-Unittest"
+# Load common config for all compiler-rt unit tests.
+lit_config.load_config(config, "@COMPILER_RT_BINARY_DIR@/unittests/lit.common.unit.configured")
+
+config.test_exec_root = os.path.join("@COMPILER_RT_BINARY_DIR@",
+                                     "lib", "fuzzer", "tests")
+config.test_source_root = config.test_exec_root
diff --git a/test/fuzzer/value-profile-cmp.test b/test/fuzzer/value-profile-cmp.test
new file mode 100644
index 0000000..6424429
--- /dev/null
+++ b/test/fuzzer/value-profile-cmp.test
@@ -0,0 +1,3 @@
+CHECK: BINGO
+RUN: %cpp_compiler %S/SimpleCmpTest.cpp -o %t-SimpleCmpTest
+RUN: not %t-SimpleCmpTest -seed=1 -use_cmp=0 -use_value_profile=1 -runs=100000000 2>&1 | FileCheck %s
diff --git a/test/fuzzer/value-profile-cmp2.test b/test/fuzzer/value-profile-cmp2.test
new file mode 100644
index 0000000..43dd8f9
--- /dev/null
+++ b/test/fuzzer/value-profile-cmp2.test
@@ -0,0 +1,3 @@
+CHECK: BINGO
+RUN: %cpp_compiler %S/SimpleHashTest.cpp -o %t-SimpleHashTest
+RUN: not %t-SimpleHashTest -seed=1 -use_cmp=0 -use_value_profile=1 -runs=100000000 -max_len=64 2>&1 | FileCheck %s
diff --git a/test/fuzzer/value-profile-cmp3.test b/test/fuzzer/value-profile-cmp3.test
new file mode 100644
index 0000000..d228475
--- /dev/null
+++ b/test/fuzzer/value-profile-cmp3.test
@@ -0,0 +1,3 @@
+CHECK: BINGO
+RUN: %cpp_compiler %S/AbsNegAndConstantTest.cpp -o %t-AbsNegAndConstantTest
+RUN: not %t-AbsNegAndConstantTest -seed=1 -use_cmp=0 -use_value_profile=1 -runs=100000000 2>&1 | FileCheck %s
diff --git a/test/fuzzer/value-profile-cmp4.test b/test/fuzzer/value-profile-cmp4.test
new file mode 100644
index 0000000..bcbc67b
--- /dev/null
+++ b/test/fuzzer/value-profile-cmp4.test
@@ -0,0 +1,3 @@
+CHECK: BINGO
+RUN: %cpp_compiler %S/AbsNegAndConstant64Test.cpp -o %t-AbsNegAndConstant64Test
+RUN: not %t-AbsNegAndConstant64Test -seed=1 -use_cmp=0 -use_value_profile=1 -runs=100000000 2>&1 | FileCheck %s
diff --git a/test/fuzzer/value-profile-div.test b/test/fuzzer/value-profile-div.test
new file mode 100644
index 0000000..8711a25
--- /dev/null
+++ b/test/fuzzer/value-profile-div.test
@@ -0,0 +1,4 @@
+CHECK: AddressSanitizer: {{FPE|int-divide-by-zero}}
+RUN: %cpp_compiler %S/DivTest.cpp -fsanitize-coverage=trace-div -o %t-DivTest
+RUN: not %t-DivTest -seed=1 -use_value_profile=1 -runs=10000000 2>&1 | FileCheck %s
+
diff --git a/test/fuzzer/value-profile-load.test b/test/fuzzer/value-profile-load.test
new file mode 100644
index 0000000..3bf2a65
--- /dev/null
+++ b/test/fuzzer/value-profile-load.test
@@ -0,0 +1,3 @@
+CHECK: AddressSanitizer: global-buffer-overflow
+RUN: %cpp_compiler %S/LoadTest.cpp -fsanitize-coverage=trace-pc-guard,indirect-calls,trace-gep,trace-div,trace-cmp -o %t-LoadTest
+RUN: not %t-LoadTest -seed=2 -use_cmp=0 -use_value_profile=1 -runs=20000000 2>&1 | FileCheck %s
diff --git a/test/fuzzer/value-profile-mem.test b/test/fuzzer/value-profile-mem.test
new file mode 100644
index 0000000..0b0c21d
--- /dev/null
+++ b/test/fuzzer/value-profile-mem.test
@@ -0,0 +1,3 @@
+CHECK: BINGO
+RUN: %cpp_compiler %S/SingleMemcmpTest.cpp -o %t-SingleMemcmpTest
+RUN: not %t-SingleMemcmpTest -seed=1  -use_cmp=0 -use_value_profile=1 -runs=10000000 2>&1 | FileCheck %s
diff --git a/test/fuzzer/value-profile-set.test b/test/fuzzer/value-profile-set.test
new file mode 100644
index 0000000..e2e3fb4
--- /dev/null
+++ b/test/fuzzer/value-profile-set.test
@@ -0,0 +1,4 @@
+CHECK: BINGO
+RUN: %cpp_compiler %S/FourIndependentBranchesTest.cpp -o %t-FourIndependentBranchesTest
+RUN: not %t-FourIndependentBranchesTest -seed=1 -use_cmp=0 -use_value_profile=1 -runs=100000000 2>&1 | FileCheck %s
+
diff --git a/test/fuzzer/value-profile-strcmp.test b/test/fuzzer/value-profile-strcmp.test
new file mode 100644
index 0000000..f5c766a
--- /dev/null
+++ b/test/fuzzer/value-profile-strcmp.test
@@ -0,0 +1,3 @@
+CHECK: BINGO
+RUN: %cpp_compiler %S/SingleStrcmpTest.cpp -o %t-SingleStrcmpTest
+RUN: not %t-SingleStrcmpTest -seed=1  -use_cmp=0 -use_value_profile=1 -runs=10000000 2>&1 | FileCheck %s
diff --git a/test/fuzzer/value-profile-strncmp.test b/test/fuzzer/value-profile-strncmp.test
new file mode 100644
index 0000000..2dfe43c
--- /dev/null
+++ b/test/fuzzer/value-profile-strncmp.test
@@ -0,0 +1,3 @@
+CHECK: BINGO
+RUN: %cpp_compiler %S/SingleStrncmpTest.cpp -o %t-SingleStrncmpTest
+RUN: not %t-SingleStrncmpTest -seed=1 -use_cmp=0 -use_value_profile=1 -runs=100000000 2>&1 | FileCheck %s
diff --git a/test/fuzzer/value-profile-switch.test b/test/fuzzer/value-profile-switch.test
new file mode 100644
index 0000000..7edb312
--- /dev/null
+++ b/test/fuzzer/value-profile-switch.test
@@ -0,0 +1,5 @@
+CHECK: BINGO
+RUN: %cpp_compiler %S/SwitchTest.cpp -o %t-SwitchTest
+RUN: %cpp_compiler %S/Switch2Test.cpp -o %t-Switch2Test
+RUN: not %t-SwitchTest  -use_cmp=0 -use_value_profile=1 -runs=100000000 -seed=1 2>&1 | FileCheck %s
+RUN: not %t-Switch2Test -use_cmp=0 -use_value_profile=1 -runs=100000000 -seed=1 2>&1 | FileCheck %s
diff --git a/test/lit.common.cfg b/test/lit.common.cfg
index bffdc43..26fbea0 100644
--- a/test/lit.common.cfg
+++ b/test/lit.common.cfg
@@ -11,8 +11,18 @@
 import lit.formats
 import lit.util
 
-# Setup test format. Use bash on Unix and the lit shell on Windows.
-execute_external = (not sys.platform in ['win32'])
+# Choose between lit's internal shell pipeline runner and a real shell.  If
+# LIT_USE_INTERNAL_SHELL is in the environment, we use that as an override.
+use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
+if use_lit_shell:
+    # 0 is external, "" is default, and everything else is internal.
+    execute_external = (use_lit_shell == "0")
+else:
+    # Otherwise we default to internal on Windows and external elsewhere, as
+    # bash on Windows is usually very slow.
+    execute_external = (not sys.platform in ['win32'])
+
+# Setup test format.
 config.test_format = lit.formats.ShTest(execute_external)
 if execute_external:
   config.available_features.add('shell')
@@ -41,6 +51,11 @@
 # Add compiler ID to the list of available features.
 config.available_features.add(compiler_id)
 
+# BFD linker in 64-bit android toolchains fails to find libm.so, which is a
+# transitive shared library dependency (via asan runtime).
+if config.android:
+  config.target_cflags += " -pie -fuse-ld=gold -Wl,--enable-new-dtags"
+
 # Clear some environment variables that might affect Clang.
 possibly_dangerous_env_vars = ['ASAN_OPTIONS', 'DFSAN_OPTIONS', 'LSAN_OPTIONS',
                                'MSAN_OPTIONS', 'UBSAN_OPTIONS',
@@ -62,10 +77,9 @@
     del config.environment[name]
 
 # Tweak PATH to include llvm tools dir.
-llvm_tools_dir = getattr(config, 'llvm_tools_dir', None)
-if (not llvm_tools_dir) or (not os.path.exists(llvm_tools_dir)):
-  lit_config.fatal("Invalid llvm_tools_dir config attribute: %r" % llvm_tools_dir)
-path = os.path.pathsep.join((llvm_tools_dir, config.environment['PATH']))
+if (not config.llvm_tools_dir) or (not os.path.exists(config.llvm_tools_dir)):
+  lit_config.fatal("Invalid llvm_tools_dir config attribute: %r" % config.llvm_tools_dir)
+path = os.path.pathsep.join((config.llvm_tools_dir, config.environment['PATH']))
 config.environment['PATH'] = path
 
 # Help MSVS link.exe find the standard libraries.
@@ -83,7 +97,31 @@
      instead define '%clangXXX' substitution in lit config. ***\n\n""") )
 
 # Allow tests to be executed on a simulator or remotely.
-config.substitutions.append( ('%run', config.emulator) )
+if config.emulator:
+  config.substitutions.append( ('%run', config.emulator) )
+  config.substitutions.append( ('%env ', "env ") )
+  config.compile_wrapper = ""
+elif config.ios:
+  config.available_features.add('ios')
+  device_id_env = "SANITIZER_IOSSIM_TEST_DEVICE_IDENTIFIER" if config.iossim else "SANITIZER_IOS_TEST_DEVICE_IDENTIFIER"
+  if device_id_env in os.environ: config.environment[device_id_env] = os.environ[device_id_env]
+  ios_commands_dir = os.path.join(config.compiler_rt_src_root, "test", "sanitizer_common", "ios_commands")
+  run_wrapper = os.path.join(ios_commands_dir, "iossim_run.py" if config.iossim else "ios_run.py")
+  config.substitutions.append(('%run', run_wrapper))
+  env_wrapper = os.path.join(ios_commands_dir, "iossim_env.py" if config.iossim else "ios_env.py")
+  config.substitutions.append(('%env ', env_wrapper + " "))
+  compile_wrapper = os.path.join(ios_commands_dir, "iossim_compile.py" if config.iossim else "ios_compile.py")
+  config.compile_wrapper = compile_wrapper
+elif config.android:
+  config.available_features.add('android')
+  compile_wrapper = os.path.join(config.compiler_rt_src_root, "test", "sanitizer_common", "android_commands", "android_compile.py") + " "
+  config.compile_wrapper = compile_wrapper
+  config.substitutions.append( ('%run', "") )
+  config.substitutions.append( ('%env ', "env ") )
+else:
+  config.substitutions.append( ('%run', "") )
+  config.substitutions.append( ('%env ', "env ") )
+  config.compile_wrapper = ""
 
 # Define CHECK-%os to check for OS-dependent output.
 config.substitutions.append( ('CHECK-%os', ("CHECK-" + config.host_os)))
@@ -105,7 +143,7 @@
 target_arch = getattr(config, 'target_arch', None)
 if target_arch:
   config.available_features.add(target_arch + '-target-arch')
-  if target_arch in ['x86_64', 'i386', 'i686']:
+  if target_arch in ['x86_64', 'i386']:
     config.available_features.add('x86-target-arch')
   config.available_features.add(target_arch + '-' + config.host_os.lower())
 
@@ -118,6 +156,9 @@
   config.available_features.add('cxxabi')
 
 if config.has_lld:
+  config.available_features.add('lld-available')
+
+if config.use_lld:
   config.available_features.add('lld')
 
 if config.can_symbolize:
@@ -126,10 +167,11 @@
 lit.util.usePlatformSdkOnDarwin(config, lit_config)
 
 if config.host_os == 'Darwin':
+  osx_version = (10, 0, 0)
   try:
     osx_version = subprocess.check_output(["sw_vers", "-productVersion"])
     osx_version = tuple(int(x) for x in osx_version.split('.'))
-    config.darwin_osx_version = osx_version
+    if len(osx_version) == 2: osx_version = (osx_version[0], osx_version[1], 0)
     if osx_version >= (10, 11):
       config.available_features.add('osx-autointerception')
       config.available_features.add('osx-ld64-live_support')
@@ -142,6 +184,8 @@
   except:
     pass
 
+  config.darwin_osx_version = osx_version
+
   # Detect x86_64h
   try:
     output = subprocess.check_output(["sysctl", "hw.cpusubtype"])
@@ -157,7 +201,20 @@
 else:
   config.substitutions.append( ("%macos_min_target_10_11", "") )
 
-sancovcc_path = os.path.join(llvm_tools_dir, "sancov") 
+if config.android:
+  adb = os.environ.get('ADB', 'adb')
+  try:
+    android_api_level_str = subprocess.check_output([adb, "shell", "getprop", "ro.build.version.sdk"]).rstrip()
+  except (subprocess.CalledProcessError, OSError):
+    lit_config.fatal("Failed to read ro.build.version.sdk (using '%s' as adb)" % adb)
+  try:
+    android_api_level = int(android_api_level_str)
+  except ValueError:
+    lit_config.fatal("Failed to read ro.build.version.sdk (using '%s' as adb): got '%s'" % (adb, android_api_level_str))
+  if android_api_level >= 26:
+    config.available_features.add('android-26')
+
+sancovcc_path = os.path.join(config.llvm_tools_dir, "sancov")
 if os.path.exists(sancovcc_path):
   config.available_features.add("has_sancovcc")
   config.substitutions.append( ("%sancovcc ", sancovcc_path) )
@@ -166,6 +223,9 @@
   return os.path.exists(os.path.join(config.llvm_shlib_dir, 'libLTO.dylib'))
 
 def is_linux_lto_supported():
+  if config.use_lld:
+    return True
+
   if not os.path.exists(os.path.join(config.llvm_shlib_dir, 'LLVMgold.so')):
     return False
 
@@ -188,16 +248,25 @@
 elif config.host_os == 'Linux' and is_linux_lto_supported():
   config.lto_supported = True
   config.lto_launch = []
-  config.lto_flags = ["-fuse-ld=gold"]
+  if config.use_lld:
+    config.lto_flags = ["-fuse-ld=lld"]
+  else:
+    config.lto_flags = ["-fuse-ld=gold"]
 elif config.host_os == 'Windows' and is_windows_lto_supported():
   config.lto_supported = True
   config.lto_launch = []
-  config.lto_flags = ["-fuse-ld=lld"]
+  # FIXME: Remove -nopdb when PDB writing is ready.
+  config.lto_flags = ["-fuse-ld=lld -Wl,-nopdb"]
 else:
   config.lto_supported = False
 
 if config.lto_supported:
   config.available_features.add('lto')
+  if config.use_thinlto:
+    config.available_features.add('thinlto')
+    config.lto_flags += ["-flto=thin"]
+  else:
+    config.lto_flags += ["-flto"]
 
 # Ask llvm-config about assertion mode.
 try:
@@ -206,7 +275,7 @@
       stdout = subprocess.PIPE,
       env=config.environment)
 except OSError:
-  print("Could not find llvm-config in " + llvm_tools_dir)
+  print("Could not find llvm-config in " + config.llvm_tools_dir)
   exit(42)
 
 if re.search(r'ON', llvm_config_cmd.stdout.read().decode('ascii')):
@@ -223,3 +292,32 @@
 # of large mmap'd regions (terabytes) by the kernel.
 if platform.system() == 'Darwin':
   lit_config.parallelism_groups["darwin-64bit-sanitizer"] = 3
+
+if config.host_os == 'Darwin':
+  config.substitutions.append( ("%ld_flags_rpath_exe", '-Wl,-rpath,@executable_path/ %dynamiclib') )
+  config.substitutions.append( ("%ld_flags_rpath_so", '-install_name @rpath/`basename %dynamiclib`') )
+elif config.host_os == 'FreeBSD':
+  config.substitutions.append( ("%ld_flags_rpath_exe", "-Wl,-z,origin -Wl,-rpath,\$ORIGIN -L%T -l%xdynamiclib_namespec") )
+  config.substitutions.append( ("%ld_flags_rpath_so", '') )
+elif config.host_os == 'Linux':
+  config.substitutions.append( ("%ld_flags_rpath_exe", "-Wl,-rpath,\$ORIGIN -L%T -l%xdynamiclib_namespec") )
+  config.substitutions.append( ("%ld_flags_rpath_so", '') )
+
+# Must be defined after the substitutions that use %dynamiclib.
+config.substitutions.append( ("%dynamiclib", '%T/%xdynamiclib_filename') )
+config.substitutions.append( ("%xdynamiclib_filename", 'lib%xdynamiclib_namespec.so') )
+config.substitutions.append( ("%xdynamiclib_namespec", '%basename_t.dynamic') )
+
+config.default_sanitizer_opts = []
+if config.host_os == 'Darwin':
+  # On Darwin, we default to `abort_on_error=1`, which would make tests run
+  # much slower. Let's override this and run lit tests with 'abort_on_error=0'.
+  config.default_sanitizer_opts += ['abort_on_error=0']
+  config.default_sanitizer_opts += ['log_to_syslog=0']
+elif config.android:
+  config.default_sanitizer_opts += ['abort_on_error=0']
+
+# Allow tests to use REQUIRES=stable-runtime.  For use when you cannot use XFAIL
+# because the test hangs or fails on one configuration and not the other.
+if config.android or (config.target_arch not in ['arm', 'armhf', 'aarch64']):
+  config.available_features.add('stable-runtime')
diff --git a/test/lit.common.configured.in b/test/lit.common.configured.in
index 862d06b..b49e8eb 100644
--- a/test/lit.common.configured.in
+++ b/test/lit.common.configured.in
@@ -25,9 +25,14 @@
 set_default("compiler_rt_debug", @COMPILER_RT_DEBUG_PYBOOL@)
 set_default("compiler_rt_libdir", "@COMPILER_RT_LIBRARY_OUTPUT_DIR@")
 set_default("emulator", "@COMPILER_RT_EMULATOR@")
+set_default("ios", False)
+set_default("iossim", False)
 set_default("sanitizer_can_use_cxxabi", @SANITIZER_CAN_USE_CXXABI_PYBOOL@)
-set_default("has_lld", @COMPILER_RT_HAS_LLD_SOURCES_PYBOOL@)
+set_default("has_lld", @COMPILER_RT_HAS_LLD_PYBOOL@)
 set_default("can_symbolize", @CAN_SYMBOLIZE@)
+set_default("use_lld", False)
+set_default("use_thinlto", False)
+set_default("android", @ANDROID_PYBOOL@)
 config.available_features.add('target-is-%s' % config.target_arch)
 
 # LLVM tools dir can be passed in lit parameters, so try to
diff --git a/test/lsan/TestCases/Darwin/dispatch.mm b/test/lsan/TestCases/Darwin/dispatch.mm
new file mode 100644
index 0000000..606cc9e
--- /dev/null
+++ b/test/lsan/TestCases/Darwin/dispatch.mm
@@ -0,0 +1,59 @@
+// Test for threads spawned with wqthread_start
+// RUN: LSAN_BASE="report_objects=1:use_stacks=0:use_registers=0"
+// RUN: %clangxx_lsan %s -DDISPATCH_ASYNC -o %t-async -framework Foundation
+// RUN: %clangxx_lsan %s -DDISPATCH_SYNC -o %t-sync -framework Foundation
+// RUN: %env_lsan_opts=$LSAN_BASE not %run %t-async 2>&1 | FileCheck %s
+// RUN: %env_lsan_opts=$LSAN_BASE not %run %t-sync 2>&1 | FileCheck %s
+
+#include <dispatch/dispatch.h>
+#include <pthread.h>
+#include <stdlib.h>
+
+#include "sanitizer_common/print_address.h"
+
+bool done = false;
+
+void worker_do_leak(int size) {
+  void *p = malloc(size);
+  print_address("Test alloc: ", 1, p);
+  done = true;
+}
+
+#if DISPATCH_ASYNC
+// Tests for the Grand Central Dispatch. See
+// http://developer.apple.com/library/mac/#documentation/Performance/Reference/GCD_libdispatch_Ref/Reference/reference.html
+// for the reference.
+void TestGCDDispatch() {
+  dispatch_queue_t queue = dispatch_get_global_queue(0, 0);
+  dispatch_block_t block = ^{
+    worker_do_leak(1337);
+  };
+  // dispatch_async() runs the task on a worker thread that does not go through
+  // pthread_create(). We need to verify that LeakSanitizer notices that the
+  // thread has started.
+  dispatch_async(queue, block);
+  while (!done)
+    pthread_yield_np();
+}
+#elif DISPATCH_SYNC
+void TestGCDDispatch() {
+  dispatch_queue_t queue = dispatch_get_global_queue(2, 0);
+  dispatch_block_t block = ^{
+    worker_do_leak(1337);
+  };
+  // dispatch_sync() runs the task on a worker thread that does not go through
+  // pthread_create(). We need to verify that LeakSanitizer notices that the
+  // thread has started.
+  dispatch_sync(queue, block);
+}
+#endif
+
+int main() {
+  TestGCDDispatch();
+  return 0;
+}
+
+// CHECK: Test alloc: [[addr:0x[0-9,a-f]+]]
+// CHECK: LeakSanitizer: detected memory leaks
+// CHECK: [[addr]] (1337 bytes)
+// CHECK: SUMMARY: {{(Leak|Address)}}Sanitizer:
diff --git a/test/lsan/TestCases/Darwin/lit.local.cfg b/test/lsan/TestCases/Darwin/lit.local.cfg
new file mode 100644
index 0000000..a85dfcd
--- /dev/null
+++ b/test/lsan/TestCases/Darwin/lit.local.cfg
@@ -0,0 +1,9 @@
+def getRoot(config):
+  if not config.parent:
+    return config
+  return getRoot(config.parent)
+
+root = getRoot(config)
+
+if root.host_os not in ['Darwin']:
+  config.unsupported = True
diff --git a/test/lsan/TestCases/cleanup_in_tsd_destructor.c b/test/lsan/TestCases/Linux/cleanup_in_tsd_destructor.c
similarity index 90%
rename from test/lsan/TestCases/cleanup_in_tsd_destructor.c
rename to test/lsan/TestCases/Linux/cleanup_in_tsd_destructor.c
index 6da7595..cf080e4 100644
--- a/test/lsan/TestCases/cleanup_in_tsd_destructor.c
+++ b/test/lsan/TestCases/Linux/cleanup_in_tsd_destructor.c
@@ -5,8 +5,8 @@
 // makes its best effort.
 // RUN: LSAN_BASE="report_objects=1:use_registers=0:use_stacks=0:use_globals=0"
 // RUN: %clang_lsan %s -o %t
-// RUN: LSAN_OPTIONS=$LSAN_BASE:use_tls=1 %run %t
-// RUN: LSAN_OPTIONS=$LSAN_BASE:use_tls=0 not %run %t 2>&1 | FileCheck %s
+// RUN: %env_lsan_opts=$LSAN_BASE:use_tls=1 %run %t
+// RUN: %env_lsan_opts=$LSAN_BASE:use_tls=0 not %run %t 2>&1 | FileCheck %s
 
 #include <assert.h>
 #include <pthread.h>
diff --git a/test/lsan/TestCases/disabler_in_tsd_destructor.c b/test/lsan/TestCases/Linux/disabler_in_tsd_destructor.c
similarity index 95%
rename from test/lsan/TestCases/disabler_in_tsd_destructor.c
rename to test/lsan/TestCases/Linux/disabler_in_tsd_destructor.c
index 4a3a7ac..52819bb 100644
--- a/test/lsan/TestCases/disabler_in_tsd_destructor.c
+++ b/test/lsan/TestCases/Linux/disabler_in_tsd_destructor.c
@@ -1,7 +1,7 @@
 // Regression test. Disabler should not depend on TSD validity.
 // RUN: LSAN_BASE="report_objects=1:use_registers=0:use_stacks=0:use_globals=0:use_tls=1:use_ld_allocations=0"
 // RUN: %clang_lsan %s -o %t
-// RUN: LSAN_OPTIONS=$LSAN_BASE %run %t
+// RUN: %env_lsan_opts=$LSAN_BASE %run %t
 
 #include <assert.h>
 #include <pthread.h>
diff --git a/test/lsan/TestCases/fork.cc b/test/lsan/TestCases/Linux/fork.cc
similarity index 100%
rename from test/lsan/TestCases/fork.cc
rename to test/lsan/TestCases/Linux/fork.cc
diff --git a/test/lsan/TestCases/fork_threaded.cc b/test/lsan/TestCases/Linux/fork_threaded.cc
similarity index 100%
rename from test/lsan/TestCases/fork_threaded.cc
rename to test/lsan/TestCases/Linux/fork_threaded.cc
diff --git a/test/lsan/TestCases/guard-page.c b/test/lsan/TestCases/Linux/guard-page.c
similarity index 100%
rename from test/lsan/TestCases/guard-page.c
rename to test/lsan/TestCases/Linux/guard-page.c
diff --git a/test/lsan/TestCases/Linux/lit.local.cfg b/test/lsan/TestCases/Linux/lit.local.cfg
new file mode 100644
index 0000000..57271b8
--- /dev/null
+++ b/test/lsan/TestCases/Linux/lit.local.cfg
@@ -0,0 +1,9 @@
+def getRoot(config):
+  if not config.parent:
+    return config
+  return getRoot(config.parent)
+
+root = getRoot(config)
+
+if root.host_os not in ['Linux']:
+  config.unsupported = True
diff --git a/test/lsan/TestCases/use_tls_dynamic.cc b/test/lsan/TestCases/Linux/use_tls_dynamic.cc
similarity index 84%
rename from test/lsan/TestCases/use_tls_dynamic.cc
rename to test/lsan/TestCases/Linux/use_tls_dynamic.cc
index 770ffaf..f5df231 100644
--- a/test/lsan/TestCases/use_tls_dynamic.cc
+++ b/test/lsan/TestCases/Linux/use_tls_dynamic.cc
@@ -1,10 +1,11 @@
 // Test that dynamically allocated TLS space is included in the root set.
-// RUN: LSAN_BASE="detect_leaks=1:report_objects=1:use_stacks=0:use_registers=0:use_ld_allocations=0"
+// RUN: LSAN_BASE="report_objects=1:use_stacks=0:use_registers=0:use_ld_allocations=0"
 // RUN: %clangxx %s -DBUILD_DSO -fPIC -shared -o %t-so.so
 // RUN: %clangxx_lsan %s -o %t
-// RUN: LSAN_OPTIONS=$LSAN_BASE:"use_tls=0" not %run %t 2>&1 | FileCheck %s
-// RUN: LSAN_OPTIONS=$LSAN_BASE:"use_tls=1" %run %t 2>&1
-// RUN: LSAN_OPTIONS="" %run %t 2>&1
+// RUN: %env_lsan_opts=$LSAN_BASE:"use_tls=0" not %run %t 2>&1 | FileCheck %s
+// RUN: %env_lsan_opts=$LSAN_BASE:"use_tls=1" %run %t 2>&1
+// RUN: %env_lsan_opts="" %run %t 2>&1
+// UNSUPPORTED: i386-linux,arm
 
 #ifndef BUILD_DSO
 #include <assert.h>
diff --git a/test/lsan/TestCases/use_tls_pthread_specific_dynamic.cc b/test/lsan/TestCases/Linux/use_tls_pthread_specific_dynamic.cc
similarity index 80%
rename from test/lsan/TestCases/use_tls_pthread_specific_dynamic.cc
rename to test/lsan/TestCases/Linux/use_tls_pthread_specific_dynamic.cc
index f075d03..650e6ad 100644
--- a/test/lsan/TestCases/use_tls_pthread_specific_dynamic.cc
+++ b/test/lsan/TestCases/Linux/use_tls_pthread_specific_dynamic.cc
@@ -1,9 +1,9 @@
 // Test that dynamically allocated thread-specific storage is included in the root set.
-// RUN: LSAN_BASE="detect_leaks=1:report_objects=1:use_stacks=0:use_registers=0"
+// RUN: LSAN_BASE="report_objects=1:use_stacks=0:use_registers=0"
 // RUN: %clangxx_lsan %s -o %t
-// RUN: LSAN_OPTIONS=$LSAN_BASE:"use_tls=0" not %run %t 2>&1 | FileCheck %s
-// RUN: LSAN_OPTIONS=$LSAN_BASE:"use_tls=1" %run %t 2>&1
-// RUN: LSAN_OPTIONS="" %run %t 2>&1
+// RUN: %env_lsan_opts=$LSAN_BASE:"use_tls=0" not %run %t 2>&1 | FileCheck %s
+// RUN: %env_lsan_opts=$LSAN_BASE:"use_tls=1" %run %t 2>&1
+// RUN: %env_lsan_opts="" %run %t 2>&1
 
 #include <assert.h>
 #include <pthread.h>
diff --git a/test/lsan/TestCases/use_tls_pthread_specific_static.cc b/test/lsan/TestCases/Linux/use_tls_pthread_specific_static.cc
similarity index 76%
rename from test/lsan/TestCases/use_tls_pthread_specific_static.cc
rename to test/lsan/TestCases/Linux/use_tls_pthread_specific_static.cc
index d97abab..cafe40f 100644
--- a/test/lsan/TestCases/use_tls_pthread_specific_static.cc
+++ b/test/lsan/TestCases/Linux/use_tls_pthread_specific_static.cc
@@ -1,9 +1,9 @@
 // Test that statically allocated thread-specific storage is included in the root set.
-// RUN: LSAN_BASE="detect_leaks=1:report_objects=1:use_stacks=0:use_registers=0"
+// RUN: LSAN_BASE="report_objects=1:use_stacks=0:use_registers=0"
 // RUN: %clangxx_lsan %s -o %t
-// RUN: LSAN_OPTIONS=$LSAN_BASE:"use_tls=0" not %run %t 2>&1 | FileCheck %s
-// RUN: LSAN_OPTIONS=$LSAN_BASE:"use_tls=1" %run %t 2>&1
-// RUN: LSAN_OPTIONS="" %run %t 2>&1
+// RUN: %env_lsan_opts=$LSAN_BASE:"use_tls=0" not %run %t 2>&1 | FileCheck %s
+// RUN: %env_lsan_opts=$LSAN_BASE:"use_tls=1" %run %t 2>&1
+// RUN: %env_lsan_opts="" %run %t 2>&1
 
 #include <assert.h>
 #include <pthread.h>
diff --git a/test/lsan/TestCases/use_tls_static.cc b/test/lsan/TestCases/Linux/use_tls_static.cc
similarity index 65%
rename from test/lsan/TestCases/use_tls_static.cc
rename to test/lsan/TestCases/Linux/use_tls_static.cc
index be34a3a..84cc6c9 100644
--- a/test/lsan/TestCases/use_tls_static.cc
+++ b/test/lsan/TestCases/Linux/use_tls_static.cc
@@ -1,9 +1,9 @@
 // Test that statically allocated TLS space is included in the root set.
-// RUN: LSAN_BASE="detect_leaks=1:report_objects=1:use_stacks=0:use_registers=0"
+// RUN: LSAN_BASE="report_objects=1:use_stacks=0:use_registers=0"
 // RUN: %clangxx_lsan %s -o %t
-// RUN: LSAN_OPTIONS=$LSAN_BASE:"use_tls=0" not %run %t 2>&1 | FileCheck %s
-// RUN: LSAN_OPTIONS=$LSAN_BASE:"use_tls=1" %run %t 2>&1
-// RUN: LSAN_OPTIONS="" %run %t 2>&1
+// RUN: %env_lsan_opts=$LSAN_BASE:"use_tls=0" not %run %t 2>&1 | FileCheck %s
+// RUN: %env_lsan_opts=$LSAN_BASE:"use_tls=1" %run %t 2>&1
+// RUN: %env_lsan_opts="" %run %t 2>&1
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/test/lsan/TestCases/allocator_returns_null.cc b/test/lsan/TestCases/allocator_returns_null.cc
new file mode 100644
index 0000000..28dd696
--- /dev/null
+++ b/test/lsan/TestCases/allocator_returns_null.cc
@@ -0,0 +1,131 @@
+// Test the behavior of malloc/calloc/realloc/new when the allocation size is
+// more than LSan allocator's max allowed one.
+// By default (allocator_may_return_null=0) the process should crash.
+// With allocator_may_return_null=1 the allocator should return 0, except the
+// operator new(), which should crash anyway (operator new(std::nothrow) should
+// return nullptr, indeed).
+//
+// RUN: %clangxx_lsan -O0 %s -o %t
+// RUN: not %run %t malloc 2>&1 | FileCheck %s --check-prefix=CHECK-mCRASH
+// RUN: %env_lsan_opts=allocator_may_return_null=0 not %run %t malloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-mCRASH
+// RUN: %env_lsan_opts=allocator_may_return_null=1     %run %t malloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-mNULL
+// RUN: %env_lsan_opts=allocator_may_return_null=0 not %run %t calloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-cCRASH
+// RUN: %env_lsan_opts=allocator_may_return_null=1     %run %t calloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-cNULL
+// RUN: %env_lsan_opts=allocator_may_return_null=0 not %run %t calloc-overflow 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-coCRASH
+// RUN: %env_lsan_opts=allocator_may_return_null=1     %run %t calloc-overflow 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-coNULL
+// RUN: %env_lsan_opts=allocator_may_return_null=0 not %run %t realloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-rCRASH
+// RUN: %env_lsan_opts=allocator_may_return_null=1     %run %t realloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-rNULL
+// RUN: %env_lsan_opts=allocator_may_return_null=0 not %run %t realloc-after-malloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-mrCRASH
+// RUN: %env_lsan_opts=allocator_may_return_null=1     %run %t realloc-after-malloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-mrNULL
+// RUN: %env_lsan_opts=allocator_may_return_null=0 not %run %t new 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-nCRASH
+// RUN: %env_lsan_opts=allocator_may_return_null=1 not %run %t new 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-nCRASH
+// RUN: %env_lsan_opts=allocator_may_return_null=0 not %run %t new-nothrow 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-nnCRASH
+// RUN: %env_lsan_opts=allocator_may_return_null=1     %run %t new-nothrow 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-nnNULL
+
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits>
+#include <new>
+
+int main(int argc, char **argv) {
+  // Disable stderr buffering. Needed on Windows.
+  setvbuf(stderr, NULL, _IONBF, 0);
+
+  assert(argc == 2);
+  const char *action = argv[1];
+  fprintf(stderr, "%s:\n", action);
+
+  // Use max of ASan and LSan allocator limits to cover both "lsan" and
+  // "lsan + asan" configs.
+  static const size_t kMaxAllowedMallocSizePlusOne =
+#if __LP64__ || defined(_WIN64)
+      (1ULL << 40) + 1;
+#else
+      (3UL << 30) + 1;
+#endif
+
+  void *x = 0;
+  if (!strcmp(action, "malloc")) {
+    x = malloc(kMaxAllowedMallocSizePlusOne);
+  } else if (!strcmp(action, "calloc")) {
+    x = calloc((kMaxAllowedMallocSizePlusOne / 4) + 1, 4);
+  } else if (!strcmp(action, "calloc-overflow")) {
+    volatile size_t kMaxSizeT = std::numeric_limits<size_t>::max();
+    size_t kArraySize = 4096;
+    volatile size_t kArraySize2 = kMaxSizeT / kArraySize + 10;
+    x = calloc(kArraySize, kArraySize2);
+  } else if (!strcmp(action, "realloc")) {
+    x = realloc(0, kMaxAllowedMallocSizePlusOne);
+  } else if (!strcmp(action, "realloc-after-malloc")) {
+    char *t = (char*)malloc(100);
+    *t = 42;
+    x = realloc(t, kMaxAllowedMallocSizePlusOne);
+    assert(*t == 42);
+    free(t);
+  } else if (!strcmp(action, "new")) {
+    x = operator new(kMaxAllowedMallocSizePlusOne);
+  } else if (!strcmp(action, "new-nothrow")) {
+    x = operator new(kMaxAllowedMallocSizePlusOne, std::nothrow);
+  } else {
+    assert(0);
+  }
+
+  fprintf(stderr, "errno: %d\n", errno);
+
+  // The NULL pointer is printed differently on different systems, while (long)0
+  // is always the same.
+  fprintf(stderr, "x: %zu\n", (size_t)x);
+  free(x);
+
+  return x != 0;
+}
+
+// CHECK-mCRASH: malloc:
+// CHECK-mCRASH: Sanitizer's allocator is terminating the process
+// CHECK-cCRASH: calloc:
+// CHECK-cCRASH: Sanitizer's allocator is terminating the process
+// CHECK-coCRASH: calloc-overflow:
+// CHECK-coCRASH: Sanitizer's allocator is terminating the process
+// CHECK-rCRASH: realloc:
+// CHECK-rCRASH: Sanitizer's allocator is terminating the process
+// CHECK-mrCRASH: realloc-after-malloc:
+// CHECK-mrCRASH: Sanitizer's allocator is terminating the process
+// CHECK-nCRASH: new:
+// CHECK-nCRASH: Sanitizer's allocator is terminating the process
+// CHECK-nnCRASH: new-nothrow:
+// CHECK-nnCRASH: Sanitizer's allocator is terminating the process
+
+// CHECK-mNULL: malloc:
+// CHECK-mNULL: errno: 12
+// CHECK-mNULL: x: 0
+// CHECK-cNULL: calloc:
+// CHECK-cNULL: errno: 12
+// CHECK-cNULL: x: 0
+// CHECK-coNULL: calloc-overflow:
+// CHECK-coNULL: errno: 12
+// CHECK-coNULL: x: 0
+// CHECK-rNULL: realloc:
+// CHECK-rNULL: errno: 12
+// CHECK-rNULL: x: 0
+// CHECK-mrNULL: realloc-after-malloc:
+// CHECK-mrNULL: errno: 12
+// CHECK-mrNULL: x: 0
+// CHECK-nnNULL: new-nothrow:
+// CHECK-nnNULL: x: 0
diff --git a/test/lsan/TestCases/default_options.cc b/test/lsan/TestCases/default_options.cc
new file mode 100644
index 0000000..b5361c0
--- /dev/null
+++ b/test/lsan/TestCases/default_options.cc
@@ -0,0 +1,11 @@
+// RUN: %clangxx_lsan -O2 %s -o %t && %run %t 2>&1 | FileCheck %s
+
+extern "C"
+const char *__lsan_default_options() {
+  // CHECK: Available flags for {{Leak|Address}}Sanitizer:
+  return "verbosity=1 help=1";
+}
+
+int main() {
+  return 0;
+}
diff --git a/test/lsan/TestCases/disabler.c b/test/lsan/TestCases/disabler.c
index 1c4529d..f8b7f0d 100644
--- a/test/lsan/TestCases/disabler.c
+++ b/test/lsan/TestCases/disabler.c
@@ -1,7 +1,7 @@
 // Test for __lsan_disable() / __lsan_enable().
 // RUN: LSAN_BASE="report_objects=1:use_registers=0:use_stacks=0:use_globals=0:use_tls=0"
 // RUN: %clang_lsan %s -o %t
-// RUN: LSAN_OPTIONS=$LSAN_BASE not %run %t 2>&1 | FileCheck %s
+// RUN: %env_lsan_opts=$LSAN_BASE not %run %t 2>&1 | FileCheck %s
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/test/lsan/TestCases/disabler.cc b/test/lsan/TestCases/disabler.cc
index 95e58f4..c5ffdb0 100644
--- a/test/lsan/TestCases/disabler.cc
+++ b/test/lsan/TestCases/disabler.cc
@@ -1,7 +1,7 @@
 // Test for ScopedDisabler.
-// RUN: LSAN_BASE="detect_leaks=1:report_objects=1:use_registers=0:use_stacks=0:use_globals=0:use_tls=0"
+// RUN: LSAN_BASE="report_objects=1:use_registers=0:use_stacks=0:use_globals=0:use_tls=0"
 // RUN: %clangxx_lsan %s -o %t
-// RUN: LSAN_OPTIONS=$LSAN_BASE not %run %t 2>&1 | FileCheck %s
+// RUN: %env_lsan_opts=$LSAN_BASE not %run %t 2>&1 | FileCheck %s
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/test/lsan/TestCases/do_leak_check_override.cc b/test/lsan/TestCases/do_leak_check_override.cc
index 3d191f8..40a9763 100644
--- a/test/lsan/TestCases/do_leak_check_override.cc
+++ b/test/lsan/TestCases/do_leak_check_override.cc
@@ -1,10 +1,10 @@
 // Test for __lsan_do_leak_check(). We test it by making the leak check run
 // before global destructors, which also tests compatibility with HeapChecker's
 // "normal" mode (LSan runs in "strict" mode by default).
-// RUN: LSAN_BASE="detect_leaks=1:use_stacks=0:use_registers=0"
+// RUN: LSAN_BASE="use_stacks=0:use_registers=0"
 // RUN: %clangxx_lsan %s -o %t
-// RUN: LSAN_OPTIONS=$LSAN_BASE not %run %t 2>&1 | FileCheck --check-prefix=CHECK-strict %s
-// RUN: LSAN_OPTIONS=$LSAN_BASE not %run %t foo 2>&1 | FileCheck --check-prefix=CHECK-normal %s
+// RUN: %env_lsan_opts=$LSAN_BASE not %run %t 2>&1 | FileCheck --check-prefix=CHECK-strict %s
+// RUN: %env_lsan_opts=$LSAN_BASE not %run %t foo 2>&1 | FileCheck --check-prefix=CHECK-normal %s
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/test/lsan/TestCases/high_allocator_contention.cc b/test/lsan/TestCases/high_allocator_contention.cc
index 322e61b..cbe592c 100644
--- a/test/lsan/TestCases/high_allocator_contention.cc
+++ b/test/lsan/TestCases/high_allocator_contention.cc
@@ -1,8 +1,8 @@
 // A benchmark that executes malloc/free pairs in parallel.
 // Usage: ./a.out number_of_threads total_number_of_allocations
-// RUN: LSAN_BASE="detect_leaks=1:use_ld_allocations=0"
+// RUN: LSAN_BASE="use_ld_allocations=0"
 // RUN: %clangxx_lsan %s -o %t
-// RUN: LSAN_OPTIONS=$LSAN_BASE %run %t 5 1000000 2>&1
+// RUN: %env_lsan_opts=$LSAN_BASE %run %t 5 1000000 2>&1
 #include <assert.h>
 #include <pthread.h>
 #include <stdlib.h>
diff --git a/test/lsan/TestCases/ignore_object.c b/test/lsan/TestCases/ignore_object.c
index 2aa4f14..53dea75 100644
--- a/test/lsan/TestCases/ignore_object.c
+++ b/test/lsan/TestCases/ignore_object.c
@@ -1,7 +1,7 @@
 // Test for __lsan_ignore_object().
 // RUN: LSAN_BASE="report_objects=1:use_registers=0:use_stacks=0:use_globals=0:use_tls=0"
 // RUN: %clang_lsan %s -o %t
-// RUN: LSAN_OPTIONS=$LSAN_BASE not %run %t 2>&1 | FileCheck %s
+// RUN: %env_lsan_opts=$LSAN_BASE not %run %t 2>&1 | FileCheck %s
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/test/lsan/TestCases/ignore_object_errors.cc b/test/lsan/TestCases/ignore_object_errors.cc
index 4160327..76cd3bb 100644
--- a/test/lsan/TestCases/ignore_object_errors.cc
+++ b/test/lsan/TestCases/ignore_object_errors.cc
@@ -1,6 +1,6 @@
 // Test for incorrect use of __lsan_ignore_object().
 // RUN: %clangxx_lsan %s -o %t
-// RUN: LSAN_OPTIONS=$LSAN_BASE %run %t 2>&1 | FileCheck %s
+// RUN: %env_lsan_opts=$LSAN_BASE %run %t 2>&1 | FileCheck %s
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/test/lsan/TestCases/large_allocation_leak.cc b/test/lsan/TestCases/large_allocation_leak.cc
index 7254f9c..66f364f 100644
--- a/test/lsan/TestCases/large_allocation_leak.cc
+++ b/test/lsan/TestCases/large_allocation_leak.cc
@@ -1,11 +1,11 @@
 // Test that LargeMmapAllocator's chunks aren't reachable via some internal data structure.
-// RUN: LSAN_BASE="detect_leaks=1:report_objects=1:use_stacks=0:use_registers=0"
+// RUN: LSAN_BASE="report_objects=1:use_stacks=0:use_registers=0"
 // RUN: %clangxx_lsan %s -o %t
-// RUN: LSAN_OPTIONS=$LSAN_BASE not %run %t 2>&1 | FileCheck %s
+// RUN: %env_lsan_opts=$LSAN_BASE not %run %t 2>&1 | FileCheck %s
 
 // For 32 bit LSan it's pretty likely that large chunks are "reachable" from some
 // internal data structures (e.g. Glibc global data).
-// UNSUPPORTED: x86
+// UNSUPPORTED: x86, arm
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/test/lsan/TestCases/leak_check_at_exit.cc b/test/lsan/TestCases/leak_check_at_exit.cc
index 5659b39..8a8ff82 100644
--- a/test/lsan/TestCases/leak_check_at_exit.cc
+++ b/test/lsan/TestCases/leak_check_at_exit.cc
@@ -1,10 +1,10 @@
 // Test for the leak_check_at_exit flag.
-// RUN: LSAN_BASE="detect_leaks=1:use_stacks=0:use_registers=0"
+// RUN: LSAN_BASE="use_stacks=0:use_registers=0"
 // RUN: %clangxx_lsan %s -o %t
-// RUN: LSAN_OPTIONS=$LSAN_BASE not %run %t foo 2>&1 | FileCheck %s --check-prefix=CHECK-do
-// RUN: LSAN_OPTIONS=$LSAN_BASE not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-do
-// RUN: LSAN_OPTIONS=$LSAN_BASE:"leak_check_at_exit=0" not %run %t foo 2>&1 | FileCheck %s --check-prefix=CHECK-do
-// RUN: LSAN_OPTIONS=$LSAN_BASE:"leak_check_at_exit=0" %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-dont
+// RUN: %env_lsan_opts=$LSAN_BASE not %run %t foo 2>&1 | FileCheck %s --check-prefix=CHECK-do
+// RUN: %env_lsan_opts=$LSAN_BASE not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-do
+// RUN: %env_lsan_opts=$LSAN_BASE:"leak_check_at_exit=0" not %run %t foo 2>&1 | FileCheck %s --check-prefix=CHECK-do
+// RUN: %env_lsan_opts=$LSAN_BASE:"leak_check_at_exit=0" %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-dont
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/test/lsan/TestCases/leak_check_before_thread_started.cc b/test/lsan/TestCases/leak_check_before_thread_started.cc
index ca818e1..94084dc 100644
--- a/test/lsan/TestCases/leak_check_before_thread_started.cc
+++ b/test/lsan/TestCases/leak_check_before_thread_started.cc
@@ -1,7 +1,7 @@
 // Regression test for http://llvm.org/bugs/show_bug.cgi?id=21621
 // This test relies on timing between threads, so any failures will be flaky.
 // RUN: %clangxx_lsan %s -o %t
-// RUN: LSAN_OPTIONS="log_pointers=1:log_threads=1" %run %t
+// RUN: %env_lsan_opts="log_pointers=1:log_threads=1" %run %t
 #include <assert.h>
 #include <pthread.h>
 #include <stdio.h>
diff --git a/test/lsan/TestCases/link_turned_off.cc b/test/lsan/TestCases/link_turned_off.cc
index a425a6c..7e1b33e 100644
--- a/test/lsan/TestCases/link_turned_off.cc
+++ b/test/lsan/TestCases/link_turned_off.cc
@@ -1,15 +1,17 @@
 // Test for disabling LSan at link-time.
-// RUN: LSAN_BASE="detect_leaks=1:use_stacks=0:use_registers=0"
+// RUN: LSAN_BASE="use_stacks=0:use_registers=0"
 // RUN: %clangxx_lsan %s -o %t
-// RUN: LSAN_OPTIONS=$LSAN_BASE %run %t
-// RUN: LSAN_OPTIONS=$LSAN_BASE not %run %t foo 2>&1 | FileCheck %s
+// RUN: %env_lsan_opts=$LSAN_BASE %run %t
+// RUN: %env_lsan_opts=$LSAN_BASE not %run %t foo 2>&1 | FileCheck %s
+//
+// UNSUPPORTED: darwin
 
 #include <sanitizer/lsan_interface.h>
 
 int argc_copy;
 
 extern "C" {
-int __lsan_is_turned_off() {
+int __attribute__((used)) __lsan_is_turned_off() {
   return (argc_copy == 1);
 }
 }
diff --git a/test/lsan/TestCases/many_tls_keys.cc b/test/lsan/TestCases/many_tls_keys.cc
new file mode 100644
index 0000000..5b5d692
--- /dev/null
+++ b/test/lsan/TestCases/many_tls_keys.cc
@@ -0,0 +1,97 @@
+// Test that lsan handles tls correctly for many threads
+// RUN: LSAN_BASE="report_objects=1:use_stacks=0:use_registers=0"
+// RUN: %clangxx_lsan %s -DUSE_THREAD -o %t-thread
+// RUN: %clangxx_lsan %s -DUSE_PTHREAD -o %t-pthread
+// RUN: %env_lsan_opts=$LSAN_BASE:"use_tls=0" not %run %t-thread 2>&1 | FileCheck %s
+// RUN: %env_lsan_opts=$LSAN_BASE:"use_tls=1" %run %t-thread 2>&1
+// RUN: %env_lsan_opts="" %run %t-thread 2>&1
+// RUN: %env_lsan_opts=$LSAN_BASE:"use_tls=0" not %run %t-pthread 2>&1 | FileCheck %s
+// RUN: %env_lsan_opts=$LSAN_BASE:"use_tls=1" %run %t-pthread 2>&1
+// RUN: %env_lsan_opts="" %run %t-pthread 2>&1
+
+// Patch r303906 did not fix all the problems.
+// UNSUPPORTED: arm-linux,armhf-linux
+
+#include <assert.h>
+#include <limits.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+static const int NUM_THREADS = 10;
+
+pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
+pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+int finished = 0;
+
+#if USE_THREAD
+__thread void *ptr1;
+__thread void *ptr2;
+__thread void *ptr3;
+__thread void *ptr4;
+__thread void *ptr5;
+
+void alloc() {
+  ptr1 = malloc(1111);
+  ptr2 = malloc(2222);
+  ptr3 = malloc(3333);
+  ptr4 = malloc(4444);
+  ptr5 = malloc(5555);
+}
+
+#elif USE_PTHREAD
+// We won't be able to create the maximum number of keys, due to other users
+// of the tls, but we'll use as many keys as we can before failing to create
+// a new key.
+pthread_key_t keys[PTHREAD_KEYS_MAX];
+static const int PTHREAD_KEY_INVALID = 0xffffffff;
+
+void alloc() {
+  for (int i = 0; i < PTHREAD_KEYS_MAX; ++i) {
+    void *ptr = malloc(123);
+    if ((keys[i] == PTHREAD_KEY_INVALID) || pthread_setspecific(keys[i], ptr)) {
+      free(ptr);
+      break;
+    }
+  }
+}
+
+void pthread_destructor(void *arg) {
+  assert(0 && "pthread destructors shouldn't be called");
+}
+#endif
+
+void *thread_start(void *arg) {
+  alloc();
+
+  pthread_mutex_lock(&mutex);
+  finished++;
+  pthread_mutex_unlock(&mutex);
+
+  // don't exit, to intentionally leak tls data
+  while (1)
+    sleep(100);
+}
+
+int main() {
+#if USE_PTHREAD
+  for (int i = 0; i < PTHREAD_KEYS_MAX; ++i) {
+    if (pthread_key_create(&keys[i], pthread_destructor)) {
+      keys[i] = PTHREAD_KEY_INVALID;
+      break;
+    }
+  }
+#endif
+
+  pthread_t thread[NUM_THREADS];
+  for (int i = 0; i < NUM_THREADS; ++i) {
+    assert(0 == pthread_create(&thread[i], 0, thread_start, 0));
+  }
+  // spin until all threads have finished
+  while (finished < NUM_THREADS)
+    sleep(1);
+  exit(0);
+}
+
+// CHECK: LeakSanitizer: detected memory leaks
+// CHECK: SUMMARY: {{(Leak|Address)}}Sanitizer:
diff --git a/test/lsan/TestCases/pointer_to_self.cc b/test/lsan/TestCases/pointer_to_self.cc
index ea1208d..62683a2 100644
--- a/test/lsan/TestCases/pointer_to_self.cc
+++ b/test/lsan/TestCases/pointer_to_self.cc
@@ -1,8 +1,8 @@
 // Regression test: pointers to self should not confuse LSan into thinking the
 // object is indirectly leaked. Only external pointers count.
-// RUN: LSAN_BASE="detect_leaks=1:report_objects=1:use_registers=0"
+// RUN: LSAN_BASE="report_objects=1:use_registers=0"
 // RUN: %clangxx_lsan %s -o %t
-// RUN: LSAN_OPTIONS=$LSAN_BASE:"use_stacks=0" not %run %t 2>&1 | FileCheck %s
+// RUN: %env_lsan_opts=$LSAN_BASE:"use_stacks=0" not %run %t 2>&1 | FileCheck %s
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/test/lsan/TestCases/print_suppressions.cc b/test/lsan/TestCases/print_suppressions.cc
index 1a252e4..2fa199d 100644
--- a/test/lsan/TestCases/print_suppressions.cc
+++ b/test/lsan/TestCases/print_suppressions.cc
@@ -1,11 +1,11 @@
 // Print matched suppressions only if print_suppressions=1 AND at least one is
 // matched. Default is print_suppressions=true.
-// RUN: LSAN_BASE="detect_leaks=1:use_registers=0:use_stacks=0"
+// RUN: LSAN_BASE="use_registers=0:use_stacks=0"
 // RUN: %clangxx_lsan %s -o %t
-// RUN: LSAN_OPTIONS=$LSAN_BASE:print_suppressions=0 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-dont-print
-// RUN: LSAN_OPTIONS=$LSAN_BASE %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-dont-print
-// RUN: LSAN_OPTIONS=$LSAN_BASE:print_suppressions=0 %run %t foo 2>&1 | FileCheck %s --check-prefix=CHECK-dont-print
-// RUN: LSAN_OPTIONS=$LSAN_BASE %run %t foo 2>&1 | FileCheck %s --check-prefix=CHECK-print
+// RUN: %env_lsan_opts=$LSAN_BASE:print_suppressions=0 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-dont-print
+// RUN: %env_lsan_opts=$LSAN_BASE %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-dont-print
+// RUN: %env_lsan_opts=$LSAN_BASE:print_suppressions=0 %run %t foo 2>&1 | FileCheck %s --check-prefix=CHECK-dont-print
+// RUN: %env_lsan_opts=$LSAN_BASE %run %t foo 2>&1 | FileCheck %s --check-prefix=CHECK-print
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/test/lsan/TestCases/recoverable_leak_check.cc b/test/lsan/TestCases/recoverable_leak_check.cc
index 04686a5..85988e2 100644
--- a/test/lsan/TestCases/recoverable_leak_check.cc
+++ b/test/lsan/TestCases/recoverable_leak_check.cc
@@ -1,8 +1,10 @@
 // Test for on-demand leak checking.
-// RUN: LSAN_BASE="detect_leaks=1:use_stacks=0:use_registers=0"
+// RUN: LSAN_BASE="use_stacks=0:use_registers=0"
 // RUN: %clangxx_lsan %s -o %t
-// RUN: LSAN_OPTIONS=$LSAN_BASE %run %t foo 2>&1 | FileCheck %s
-// RUN: LSAN_OPTIONS=$LSAN_BASE %run %t 2>&1 | FileCheck %s
+// RUN: %env_lsan_opts=$LSAN_BASE %run %t foo 2>&1 | FileCheck %s
+// RUN: %env_lsan_opts=$LSAN_BASE %run %t 2>&1 | FileCheck %s
+//
+// UNSUPPORTED: darwin
 
 #include <assert.h>
 #include <stdio.h>
diff --git a/test/lsan/TestCases/register_root_region.cc b/test/lsan/TestCases/register_root_region.cc
index a63b0cc..b73b56b 100644
--- a/test/lsan/TestCases/register_root_region.cc
+++ b/test/lsan/TestCases/register_root_region.cc
@@ -1,9 +1,9 @@
 // Test for __lsan_(un)register_root_region().
-// RUN: LSAN_BASE="detect_leaks=1:use_stacks=0:use_registers=0"
+// RUN: LSAN_BASE="use_stacks=0:use_registers=0"
 // RUN: %clangxx_lsan %s -o %t
-// RUN: LSAN_OPTIONS=$LSAN_BASE %run %t
-// RUN: LSAN_OPTIONS=$LSAN_BASE not %run %t foo 2>&1 | FileCheck %s
-// RUN: LSAN_OPTIONS=$LSAN_BASE:use_root_regions=0 not %run %t 2>&1 | FileCheck %s
+// RUN: %env_lsan_opts=$LSAN_BASE %run %t
+// RUN: %env_lsan_opts=$LSAN_BASE not %run %t foo 2>&1 | FileCheck %s
+// RUN: %env_lsan_opts=$LSAN_BASE:use_root_regions=0 not %run %t 2>&1 | FileCheck %s
 
 #include <assert.h>
 #include <stdio.h>
diff --git a/test/lsan/TestCases/stale_stack_leak.cc b/test/lsan/TestCases/stale_stack_leak.cc
index 8f7ab9c..8c34958 100644
--- a/test/lsan/TestCases/stale_stack_leak.cc
+++ b/test/lsan/TestCases/stale_stack_leak.cc
@@ -1,11 +1,11 @@
 // Test that out-of-scope local variables are ignored by LSan.
-// RUN: LSAN_BASE="detect_leaks=1:report_objects=1:use_registers=0:use_stacks=1"
+// RUN: LSAN_BASE="report_objects=1:use_registers=0:use_stacks=1"
 // RUN: %clangxx_lsan %s -o %t
-// RUN: LSAN_OPTIONS=$LSAN_BASE not %run %t 2>&1 | FileCheck %s
-// RUN: LSAN_OPTIONS=$LSAN_BASE":exitcode=0" %run %t 2>&1 | FileCheck --check-prefix=CHECK-sanity %s
+// RUN: %env_lsan_opts=$LSAN_BASE not %run %t 2>&1 | FileCheck %s
+// RUN: %env_lsan_opts=$LSAN_BASE":exitcode=0" %run %t 2>&1 | FileCheck --check-prefix=CHECK-sanity %s
 //
 // x86 passes parameters through stack that may lead to false negatives
-// UNSUPPORTED: x86
+// UNSUPPORTED: x86,powerpc64
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/test/lsan/TestCases/suppressions_default.cc b/test/lsan/TestCases/suppressions_default.cc
index 6c0364e..9a660e6 100644
--- a/test/lsan/TestCases/suppressions_default.cc
+++ b/test/lsan/TestCases/suppressions_default.cc
@@ -1,6 +1,6 @@
-// RUN: LSAN_BASE="detect_leaks=1:use_registers=0:use_stacks=0"
+// RUN: LSAN_BASE="use_registers=0:use_stacks=0"
 // RUN: %clangxx_lsan %s -o %t
-// RUN: LSAN_OPTIONS=$LSAN_BASE not %run %t 2>&1 | FileCheck %s
+// RUN: %env_lsan_opts=$LSAN_BASE not %run %t 2>&1 | FileCheck %s
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/test/lsan/TestCases/suppressions_file.cc b/test/lsan/TestCases/suppressions_file.cc
index 1d8a064..33cf020 100644
--- a/test/lsan/TestCases/suppressions_file.cc
+++ b/test/lsan/TestCases/suppressions_file.cc
@@ -1,15 +1,15 @@
-// RUN: LSAN_BASE="detect_leaks=1:use_registers=0:use_stacks=0"
+// RUN: LSAN_BASE="use_registers=0:use_stacks=0"
 // RUN: %clangxx_lsan %s -o %t
 
 // RUN: rm -f %t.supp
 // RUN: touch %t.supp
-// RUN: LSAN_OPTIONS="$LSAN_BASE:suppressions='%t.supp'" not %run %t 2>&1 | FileCheck %s --check-prefix=NOSUPP
+// RUN: %env_lsan_opts="$LSAN_BASE:suppressions='%t.supp'" not %run %t 2>&1 | FileCheck %s --check-prefix=NOSUPP
 
 // RUN: echo "leak:*LSanTestLeakingFunc*" > %t.supp
-// RUN: LSAN_OPTIONS="$LSAN_BASE:suppressions='%t.supp'" not %run %t 2>&1 | FileCheck %s
+// RUN: %env_lsan_opts="$LSAN_BASE:suppressions='%t.supp'" not %run %t 2>&1 | FileCheck %s
 
 // RUN: echo "leak:%t" > %t.supp
-// RUN: LSAN_OPTIONS="$LSAN_BASE:suppressions='%t.supp':symbolize=false" %run %t
+// RUN: %env_lsan_opts="$LSAN_BASE:suppressions='%t.supp':symbolize=false" %run %t
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/test/lsan/TestCases/swapcontext.cc b/test/lsan/TestCases/swapcontext.cc
index f7e95ed..9774f6c 100644
--- a/test/lsan/TestCases/swapcontext.cc
+++ b/test/lsan/TestCases/swapcontext.cc
@@ -2,8 +2,9 @@
 // memory. Make sure we don't report these leaks.
 
 // RUN: %clangxx_lsan %s -o %t
-// RUN: %run %t 2>&1
-// RUN: not %run %t foo 2>&1 | FileCheck %s
+// RUN: %env_lsan_opts= %run %t 2>&1
+// RUN: %env_lsan_opts= not %run %t foo 2>&1 | FileCheck %s
+// UNSUPPORTED: arm,powerpc64
 
 #include <stdio.h>
 #if defined(__APPLE__)
@@ -23,7 +24,7 @@
 }
 
 int main(int argc, char *argv[]) {
-  char stack_memory[kStackSize + 1];
+  char stack_memory[kStackSize + 1] __attribute__((aligned(16)));
   char *heap_memory = new char[kStackSize + 1];
   char *child_stack = (argc > 1) ? stack_memory : heap_memory;
 
diff --git a/test/lsan/TestCases/use_after_return.cc b/test/lsan/TestCases/use_after_return.cc
index 4137752..5c60ec6 100644
--- a/test/lsan/TestCases/use_after_return.cc
+++ b/test/lsan/TestCases/use_after_return.cc
@@ -1,10 +1,10 @@
 // Test that fake stack (introduced by ASan's use-after-return mode) is included
 // in the root set.
-// RUN: LSAN_BASE="detect_leaks=1:report_objects=1:use_registers=0"
+// RUN: LSAN_BASE="report_objects=1:use_registers=0"
 // RUN: %clangxx_lsan %s -O2 -o %t
-// RUN: ASAN_OPTIONS=$ASAN_OPTIONS:detect_stack_use_after_return=1 LSAN_OPTIONS=$LSAN_BASE:"use_stacks=0" not %run %t 2>&1 | FileCheck %s
-// RUN: ASAN_OPTIONS=$ASAN_OPTIONS:detect_stack_use_after_return=1 LSAN_OPTIONS=$LSAN_BASE:"use_stacks=1" %run %t 2>&1
-// RUN: ASAN_OPTIONS=$ASAN_OPTIONS:detect_stack_use_after_return=1 LSAN_OPTIONS="" %run %t 2>&1
+// RUN: ASAN_OPTIONS=$ASAN_OPTIONS:detect_stack_use_after_return=1 %env_lsan_opts=$LSAN_BASE:"use_stacks=0" not %run %t 2>&1 | FileCheck %s
+// RUN: ASAN_OPTIONS=$ASAN_OPTIONS:detect_stack_use_after_return=1 %env_lsan_opts=$LSAN_BASE:"use_stacks=1" %run %t 2>&1
+// RUN: ASAN_OPTIONS=$ASAN_OPTIONS:detect_stack_use_after_return=1 %env_lsan_opts="" %run %t 2>&1
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/test/lsan/TestCases/use_globals_initialized.cc b/test/lsan/TestCases/use_globals_initialized.cc
index 996052d..8664618 100644
--- a/test/lsan/TestCases/use_globals_initialized.cc
+++ b/test/lsan/TestCases/use_globals_initialized.cc
@@ -1,9 +1,9 @@
 // Test that initialized globals are included in the root set.
-// RUN: LSAN_BASE="detect_leaks=1:report_objects=1:use_stacks=0:use_registers=0"
+// RUN: LSAN_BASE="report_objects=1:use_stacks=0:use_registers=0"
 // RUN: %clangxx_lsan %s -o %t
-// RUN: LSAN_OPTIONS=$LSAN_BASE:"use_globals=0" not %run %t 2>&1 | FileCheck %s
-// RUN: LSAN_OPTIONS=$LSAN_BASE:"use_globals=1" %run %t 2>&1
-// RUN: LSAN_OPTIONS="" %run %t 2>&1
+// RUN: %env_lsan_opts=$LSAN_BASE:"use_globals=0" not %run %t 2>&1 | FileCheck %s
+// RUN: %env_lsan_opts=$LSAN_BASE:"use_globals=1" %run %t 2>&1
+// RUN: %env_lsan_opts="" %run %t 2>&1
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/test/lsan/TestCases/use_globals_uninitialized.cc b/test/lsan/TestCases/use_globals_uninitialized.cc
index 25ccacc..ef8f8e1 100644
--- a/test/lsan/TestCases/use_globals_uninitialized.cc
+++ b/test/lsan/TestCases/use_globals_uninitialized.cc
@@ -1,9 +1,9 @@
 // Test that uninitialized globals are included in the root set.
-// RUN: LSAN_BASE="detect_leaks=1:report_objects=1:use_stacks=0:use_registers=0"
+// RUN: LSAN_BASE="report_objects=1:use_stacks=0:use_registers=0"
 // RUN: %clangxx_lsan %s -o %t
-// RUN: LSAN_OPTIONS=$LSAN_BASE:"use_globals=0" not %run %t 2>&1 | FileCheck %s
-// RUN: LSAN_OPTIONS=$LSAN_BASE:"use_globals=1" %run %t 2>&1
-// RUN: LSAN_OPTIONS="" %run %t 2>&1
+// RUN: %env_lsan_opts=$LSAN_BASE:"use_globals=0" not %run %t 2>&1 | FileCheck %s
+// RUN: %env_lsan_opts=$LSAN_BASE:"use_globals=1" %run %t 2>&1
+// RUN: %env_lsan_opts="" %run %t 2>&1
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/test/lsan/TestCases/use_poisoned_asan.cc b/test/lsan/TestCases/use_poisoned_asan.cc
index d9ef16a..780792e 100644
--- a/test/lsan/TestCases/use_poisoned_asan.cc
+++ b/test/lsan/TestCases/use_poisoned_asan.cc
@@ -1,9 +1,9 @@
 // ASan-poisoned memory should be ignored if use_poisoned is false.
 // REQUIRES: asan
-// RUN: LSAN_BASE="detect_leaks=1:report_objects=1:use_stacks=0:use_registers=0"
+// RUN: LSAN_BASE="report_objects=1:use_stacks=0:use_registers=0"
 // RUN: %clangxx_lsan %s -o %t
-// RUN: LSAN_OPTIONS=$LSAN_BASE:"use_poisoned=0" not %run %t 2>&1 | FileCheck %s
-// RUN: LSAN_OPTIONS=$LSAN_BASE:"use_poisoned=1" %run %t 2>&1
+// RUN: %env_lsan_opts=$LSAN_BASE:"use_poisoned=0" not %run %t 2>&1 | FileCheck %s
+// RUN: %env_lsan_opts=$LSAN_BASE:"use_poisoned=1" %run %t 2>&1
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/test/lsan/TestCases/use_registers.cc b/test/lsan/TestCases/use_registers.cc
index 7896874..edcf1ae 100644
--- a/test/lsan/TestCases/use_registers.cc
+++ b/test/lsan/TestCases/use_registers.cc
@@ -1,9 +1,9 @@
 // Test that registers of running threads are included in the root set.
-// RUN: LSAN_BASE="detect_leaks=1:report_objects=1:use_stacks=0"
+// RUN: LSAN_BASE="report_objects=1:use_stacks=0"
 // RUN: %clangxx_lsan -pthread %s -o %t
-// RUN: LSAN_OPTIONS=$LSAN_BASE:"use_registers=0" not %run %t 2>&1 | FileCheck %s
-// RUN: LSAN_OPTIONS=$LSAN_BASE:"use_registers=1" %run %t 2>&1
-// RUN: LSAN_OPTIONS="" %run %t 2>&1
+// RUN: %env_lsan_opts=$LSAN_BASE:"use_registers=0" not %run %t 2>&1 | FileCheck %s
+// RUN: %env_lsan_opts=$LSAN_BASE:"use_registers=1" %run %t 2>&1
+// RUN: %env_lsan_opts="" %run %t 2>&1
 
 #include <assert.h>
 #include <pthread.h>
@@ -33,6 +33,16 @@
       :
       : "r" (p)
       );
+#elif defined(__arm__)
+  asm ( "mov r5, %0"
+      :
+      : "r" (p)
+      );
+#elif defined(__powerpc__)
+  asm ( "mr 30, %0"
+      :
+      : "r" (p)
+      );
 #else
 #error "Test is not supported on this architecture."
 #endif
diff --git a/test/lsan/TestCases/use_stacks.cc b/test/lsan/TestCases/use_stacks.cc
index 95a0100..855a8e4 100644
--- a/test/lsan/TestCases/use_stacks.cc
+++ b/test/lsan/TestCases/use_stacks.cc
@@ -1,9 +1,9 @@
 // Test that stack of main thread is included in the root set.
-// RUN: LSAN_BASE="detect_leaks=1:report_objects=1:use_registers=0"
+// RUN: LSAN_BASE="report_objects=1:use_registers=0"
 // RUN: %clangxx_lsan %s -o %t
-// RUN: LSAN_OPTIONS=$LSAN_BASE:"use_stacks=0" not %run %t 2>&1 | FileCheck %s
-// RUN: LSAN_OPTIONS=$LSAN_BASE:"use_stacks=1" %run %t 2>&1
-// RUN: LSAN_OPTIONS="" %run %t 2>&1
+// RUN: %env_lsan_opts=$LSAN_BASE:"use_stacks=0" not %run %t 2>&1 | FileCheck %s
+// RUN: %env_lsan_opts=$LSAN_BASE:"use_stacks=1" %run %t 2>&1
+// RUN: %env_lsan_opts="" %run %t 2>&1
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/test/lsan/TestCases/use_stacks_threaded.cc b/test/lsan/TestCases/use_stacks_threaded.cc
index 310171f..579dcff 100644
--- a/test/lsan/TestCases/use_stacks_threaded.cc
+++ b/test/lsan/TestCases/use_stacks_threaded.cc
@@ -1,9 +1,9 @@
 // Test that stacks of non-main threads are included in the root set.
-// RUN: LSAN_BASE="detect_leaks=1:report_objects=1:use_registers=0"
+// RUN: LSAN_BASE="report_objects=1:use_registers=0"
 // RUN: %clangxx_lsan -pthread %s -o %t
-// RUN: LSAN_OPTIONS=$LSAN_BASE:"use_stacks=0" not %run %t 2>&1 | FileCheck %s
-// RUN: LSAN_OPTIONS=$LSAN_BASE:"use_stacks=1" %run %t 2>&1
-// RUN: LSAN_OPTIONS="" %run %t 2>&1
+// RUN: %env_lsan_opts=$LSAN_BASE:"use_stacks=0" not %run %t 2>&1 | FileCheck %s
+// RUN: %env_lsan_opts=$LSAN_BASE:"use_stacks=1" %run %t 2>&1
+// RUN: %env_lsan_opts="" %run %t 2>&1
 
 #include <assert.h>
 #include <pthread.h>
diff --git a/test/lsan/TestCases/use_unaligned.cc b/test/lsan/TestCases/use_unaligned.cc
index 1179c15..26afc2d 100644
--- a/test/lsan/TestCases/use_unaligned.cc
+++ b/test/lsan/TestCases/use_unaligned.cc
@@ -1,8 +1,8 @@
 // Test that unaligned pointers are detected correctly.
-// RUN: LSAN_BASE="detect_leaks=1:report_objects=1:use_stacks=0:use_registers=0"
+// RUN: LSAN_BASE="report_objects=1:use_stacks=0:use_registers=0"
 // RUN: %clangxx_lsan %s -o %t
-// RUN: LSAN_OPTIONS=$LSAN_BASE:"use_unaligned=0" not %run %t 2>&1 | FileCheck %s
-// RUN: LSAN_OPTIONS=$LSAN_BASE:"use_unaligned=1" %run %t 2>&1
+// RUN: %env_lsan_opts=$LSAN_BASE:"use_unaligned=0" not %run %t 2>&1 | FileCheck %s
+// RUN: %env_lsan_opts=$LSAN_BASE:"use_unaligned=1" %run %t 2>&1
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/test/lsan/lit.common.cfg b/test/lsan/lit.common.cfg
index 8580eec..a5df951 100644
--- a/test/lsan/lit.common.cfg
+++ b/test/lsan/lit.common.cfg
@@ -3,6 +3,7 @@
 # Common configuration for running leak detection tests under LSan/ASan.
 
 import os
+import re
 
 import lit.util
 
@@ -31,6 +32,21 @@
   lit_config.fatal("Unknown LSan test mode: %r" % lsan_lit_test_mode)
 config.name += config.name_suffix
 
+# Platform-specific default LSAN_OPTIONS for lit tests.
+default_lsan_opts = 'detect_leaks=1'
+if config.host_os == 'Darwin':
+  # On Darwin, we default to `abort_on_error=1`, which would make tests run
+  # much slower. Let's override this and run lit tests with 'abort_on_error=0'.
+  # Also, make sure we do not overwhelm the syslog while testing.
+  default_lsan_opts += ':abort_on_error=0'
+  default_lsan_opts += ':log_to_syslog=0'
+
+if default_lsan_opts:
+  config.environment['LSAN_OPTIONS'] = default_lsan_opts
+  default_lsan_opts += ':'
+config.substitutions.append(('%env_lsan_opts=',
+                             'env LSAN_OPTIONS=' + default_lsan_opts))
+
 if lit.util.which('strace'):
   config.available_features.add('strace')
 
@@ -51,8 +67,14 @@
 config.substitutions.append( ("%clang_lsan ", build_invocation(clang_lsan_cflags)) )
 config.substitutions.append( ("%clangxx_lsan ", build_invocation(clang_lsan_cxxflags)) )
 
-# LeakSanitizer tests are currently supported on x86-64 Linux and mips64 Linux only.
-if config.host_os not in ['Linux'] or config.host_arch not in ['x86_64', 'mips64']:
+# LeakSanitizer tests are currently supported on x86-64 Linux, PowerPC64 Linux, arm Linux, mips64 Linux, and x86_64 Darwin.
+supported_linux = config.host_os is 'Linux' and config.host_arch in ['x86_64', 'ppc64', 'ppc64le', 'mips64', 'arm', 'armhf', 'armv7l']
+supported_darwin = config.host_os is 'Darwin' and config.target_arch is 'x86_64'
+if not (supported_linux or supported_darwin):
   config.unsupported = True
 
-config.suffixes = ['.c', '.cc', '.cpp']
+# Don't support Thumb due to broken fast unwinder
+if re.search('mthumb', config.target_cflags) is not None:
+  config.unsupported = True
+
+config.suffixes = ['.c', '.cc', '.cpp', '.mm']
diff --git a/test/msan/Linux/mallinfo.cc b/test/msan/Linux/mallinfo.cc
index 545ae93..b2021c5 100644
--- a/test/msan/Linux/mallinfo.cc
+++ b/test/msan/Linux/mallinfo.cc
@@ -1,5 +1,5 @@
 // RUN: %clangxx_msan -O0 -g %s -o %t && %run %t
-// REQUIRES: stable-runtime
+// UNSUPPORTED: aarch64-target-arch
 
 #include <assert.h>
 #include <malloc.h>
diff --git a/test/msan/Linux/poll.cc b/test/msan/Linux/poll.cc
new file mode 100644
index 0000000..7870d18
--- /dev/null
+++ b/test/msan/Linux/poll.cc
@@ -0,0 +1,42 @@
+// RUN: %clangxx_msan -O0 -std=c++11 -g %s -o %t
+// RUN: %run %t _ 2>&1 | FileCheck %s --check-prefix=CLEAN
+// RUN: not %run %t A 2>&1 | FileCheck %s --check-prefix=A
+// RUN: not %run %t B 2>&1 | FileCheck %s --check-prefix=B
+
+#include <assert.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdio.h>
+
+#include <sanitizer/msan_interface.h>
+
+int main(int argc, char **argv) {
+  char T = argv[1][0];
+
+  struct timespec ts;
+  ts.tv_sec = 0;
+  ts.tv_nsec = 1000;
+  int res = ppoll(nullptr, 0, &ts, nullptr);
+  assert(res == 0);
+
+  if (T == 'A') {
+    __msan_poison(&ts.tv_sec, sizeof(ts.tv_sec));
+    ppoll(nullptr, 0, &ts, nullptr);
+    // A: use-of-uninitialized-value
+  }
+
+  // A-NOT: ==1
+  // B: ==1
+  fprintf(stderr, "==1\n");
+
+  sigset_t sig;
+  if (T != 'B')
+    sigemptyset(&sig);
+  ppoll(nullptr, 0, &ts, &sig);
+  // B: use-of-uninitialized-value
+
+  // B-NOT: ==2
+  // CLEAN: ==2
+  fprintf(stderr, "==2\n");
+  return 0;
+}
diff --git a/test/msan/Linux/sendmsg.cc b/test/msan/Linux/sendmsg.cc
index 6a8ef83..4fc6c88 100644
--- a/test/msan/Linux/sendmsg.cc
+++ b/test/msan/Linux/sendmsg.cc
@@ -33,17 +33,31 @@
   char buf[kBufSize] = {0};
   pthread_t client_thread;
   struct sockaddr_in serveraddr;
-
-  sockfd = socket(AF_INET, SOCK_DGRAM, 0);
+  struct sockaddr_in6 serveraddr6;
 
   memset(&serveraddr, 0, sizeof(serveraddr));
   serveraddr.sin_family = AF_INET;
   serveraddr.sin_addr.s_addr = htonl(INADDR_ANY);
   serveraddr.sin_port = 0;
-
-  bind(sockfd, (struct sockaddr *)&serveraddr, sizeof(serveraddr));
+  struct sockaddr *addr = (struct sockaddr *)&serveraddr;
   socklen_t addrlen = sizeof(serveraddr);
-  getsockname(sockfd, (struct sockaddr *)&serveraddr, &addrlen);
+
+  sockfd = socket(addr->sa_family, SOCK_DGRAM, 0);
+  if (sockfd <= 0) {
+    // Try to fall-back to IPv6
+    memset(&serveraddr6, 0, sizeof(serveraddr6));
+    serveraddr6.sin6_family = AF_INET6;
+    serveraddr6.sin6_addr = in6addr_any;
+    serveraddr6.sin6_port = 0;
+    addr = (struct sockaddr *)&serveraddr6;
+    addrlen = sizeof(serveraddr6);
+
+    sockfd = socket(addr->sa_family, SOCK_DGRAM, 0);
+  }
+  assert(sockfd > 0);
+
+  bind(sockfd, addr, addrlen);
+  getsockname(sockfd, addr, &addrlen);
 
 #if defined(POISON)
   __msan_poison(buf + 7, 1);
@@ -52,7 +66,7 @@
 #if defined(SENDMSG)
   struct iovec iov[2] = {{buf, 5}, {buf + 5, 5}};
   struct msghdr msg;
-  msg.msg_name = &serveraddr;
+  msg.msg_name = addr;
   msg.msg_namelen = addrlen;
   msg.msg_iov = iov;
   msg.msg_iovlen = 2;
@@ -62,14 +76,13 @@
 #endif
 
 #if defined(SEND)
-  ret = connect(sockfd, (struct sockaddr *)&serveraddr, addrlen);
+  ret = connect(sockfd, addr, addrlen);
   assert(ret == 0);
   ret = send(sockfd, buf, kBufSize, 0);
   // SEND: Uninitialized bytes in __interceptor_send at offset 7 inside [{{.*}}, 10)
   assert(ret > 0);
 #elif defined(SENDTO)
-  ret =
-      sendto(sockfd, buf, kBufSize, 0, (struct sockaddr *)&serveraddr, addrlen);
+  ret = sendto(sockfd, buf, kBufSize, 0, addr, addrlen);
   // SENDTO: Uninitialized bytes in __interceptor_sendto at offset 7 inside [{{.*}}, 10)
   assert(ret > 0);
 #elif defined(SENDMSG)
diff --git a/test/msan/Linux/strerror_r.cc b/test/msan/Linux/strerror_r.cc
new file mode 100644
index 0000000..aec653f
--- /dev/null
+++ b/test/msan/Linux/strerror_r.cc
@@ -0,0 +1,18 @@
+// RUN: %clang_msan -O0 -g %s -o %t && %run %t
+
+#include <assert.h>
+#include <errno.h>
+#include <string.h>
+
+int main() {
+  char buf[1000];
+  char *res = strerror_r(EINVAL, buf, sizeof(buf));
+  assert(res);
+  volatile int z = strlen(res);
+
+  res = strerror_r(-1, buf, sizeof(buf));
+  assert(res);
+  z = strlen(res);
+
+  return 0;
+}
diff --git a/test/msan/__strxfrm_l.cc b/test/msan/__strxfrm_l.cc
new file mode 100644
index 0000000..c4eb10e
--- /dev/null
+++ b/test/msan/__strxfrm_l.cc
@@ -0,0 +1,19 @@
+// RUN: %clangxx_msan -std=c++11 -O0 -g %s -o %t && %run %t
+// REQUIRES: x86_64-linux
+
+#include <assert.h>
+#include <locale.h>
+#include <sanitizer/msan_interface.h>
+#include <stdlib.h>
+#include <string.h>
+
+extern "C" decltype(strxfrm_l) __strxfrm_l;
+
+int main(void) {
+  char q[10];
+  locale_t loc = newlocale(LC_ALL_MASK, "", (locale_t)0);
+  size_t n = __strxfrm_l(q, "qwerty", sizeof(q), loc);
+  assert(n < sizeof(q));
+  __msan_check_mem_is_initialized(q, n + 1);
+  return 0;
+}
diff --git a/test/msan/allocator_returns_null.cc b/test/msan/allocator_returns_null.cc
index f4ea51d..583b5b4 100644
--- a/test/msan/allocator_returns_null.cc
+++ b/test/msan/allocator_returns_null.cc
@@ -1,63 +1,102 @@
-// Test the behavior of malloc/calloc/realloc when the allocation size is huge.
+// Test the behavior of malloc/calloc/realloc/new when the allocation size is
+// more than MSan allocator's max allowed one.
 // By default (allocator_may_return_null=0) the process should crash.
-// With allocator_may_return_null=1 the allocator should return 0.
+// With allocator_may_return_null=1 the allocator should return 0, except the
+// operator new(), which should crash anyway (operator new(std::nothrow) should
+// return nullptr, indeed).
 //
 // RUN: %clangxx_msan -O0 %s -o %t
 // RUN: not %run %t malloc 2>&1 | FileCheck %s --check-prefix=CHECK-mCRASH
-// RUN: MSAN_OPTIONS=allocator_may_return_null=0 not %run %t malloc 2>&1 | FileCheck %s --check-prefix=CHECK-mCRASH
-// RUN: MSAN_OPTIONS=allocator_may_return_null=1     %run %t malloc 2>&1 | FileCheck %s --check-prefix=CHECK-mNULL
-// RUN: MSAN_OPTIONS=allocator_may_return_null=0 not %run %t calloc 2>&1 | FileCheck %s --check-prefix=CHECK-cCRASH
-// RUN: MSAN_OPTIONS=allocator_may_return_null=1     %run %t calloc 2>&1 | FileCheck %s --check-prefix=CHECK-cNULL
-// RUN: MSAN_OPTIONS=allocator_may_return_null=0 not %run %t calloc-overflow 2>&1 | FileCheck %s --check-prefix=CHECK-coCRASH
-// RUN: MSAN_OPTIONS=allocator_may_return_null=1     %run %t calloc-overflow 2>&1 | FileCheck %s --check-prefix=CHECK-coNULL
-// RUN: MSAN_OPTIONS=allocator_may_return_null=0 not %run %t realloc 2>&1 | FileCheck %s --check-prefix=CHECK-rCRASH
-// RUN: MSAN_OPTIONS=allocator_may_return_null=1     %run %t realloc 2>&1 | FileCheck %s --check-prefix=CHECK-rNULL
-// RUN: MSAN_OPTIONS=allocator_may_return_null=0 not %run %t realloc-after-malloc 2>&1 | FileCheck %s --check-prefix=CHECK-mrCRASH
-// RUN: MSAN_OPTIONS=allocator_may_return_null=1     %run %t realloc-after-malloc 2>&1 | FileCheck %s --check-prefix=CHECK-mrNULL
+// RUN: MSAN_OPTIONS=allocator_may_return_null=0 not %run %t malloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-mCRASH
+// RUN: MSAN_OPTIONS=allocator_may_return_null=1     %run %t malloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-mNULL
+// RUN: MSAN_OPTIONS=allocator_may_return_null=0 not %run %t calloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-cCRASH
+// RUN: MSAN_OPTIONS=allocator_may_return_null=1     %run %t calloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-cNULL
+// RUN: MSAN_OPTIONS=allocator_may_return_null=0 not %run %t calloc-overflow 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-coCRASH
+// RUN: MSAN_OPTIONS=allocator_may_return_null=1     %run %t calloc-overflow 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-coNULL
+// RUN: MSAN_OPTIONS=allocator_may_return_null=0 not %run %t realloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-rCRASH
+// RUN: MSAN_OPTIONS=allocator_may_return_null=1     %run %t realloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-rNULL
+// RUN: MSAN_OPTIONS=allocator_may_return_null=0 not %run %t realloc-after-malloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-mrCRASH
+// RUN: MSAN_OPTIONS=allocator_may_return_null=1     %run %t realloc-after-malloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-mrNULL
+// RUN: MSAN_OPTIONS=allocator_may_return_null=0 not %run %t new 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-nCRASH
+// RUN: MSAN_OPTIONS=allocator_may_return_null=1 not %run %t new 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-nCRASH
+// RUN: MSAN_OPTIONS=allocator_may_return_null=0 not %run %t new-nothrow 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-nnCRASH
+// RUN: MSAN_OPTIONS=allocator_may_return_null=1     %run %t new-nothrow 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-nnNULL
 
-#include <limits.h>
+// UNSUPPORTED: win32
+
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <stdio.h>
-#include <assert.h>
 #include <limits>
-int main(int argc, char **argv) {
-  volatile size_t size = std::numeric_limits<size_t>::max() - 10000;
-  assert(argc == 2);
-  char *x = 0;
-  if (!strcmp(argv[1], "malloc")) {
-    fprintf(stderr, "malloc:\n");
-    x = (char*)malloc(size);
-  }
-  if (!strcmp(argv[1], "calloc")) {
-    fprintf(stderr, "calloc:\n");
-    x = (char*)calloc(size / 4, 4);
-  }
+#include <new>
 
-  if (!strcmp(argv[1], "calloc-overflow")) {
-    fprintf(stderr, "calloc-overflow:\n");
+int main(int argc, char **argv) {
+  // Disable stderr buffering. Needed on Windows.
+  setvbuf(stderr, NULL, _IONBF, 0);
+
+  assert(argc == 2);
+  const char *action = argv[1];
+  fprintf(stderr, "%s:\n", action);
+
+  static const size_t kMaxAllowedMallocSizePlusOne =
+#if __LP64__ || defined(_WIN64)
+      (8UL << 30) + 1;
+#else
+      (2UL << 30) + 1;
+#endif
+
+  void *x = 0;
+  if (!strcmp(action, "malloc")) {
+    x = malloc(kMaxAllowedMallocSizePlusOne);
+  } else if (!strcmp(action, "calloc")) {
+    x = calloc((kMaxAllowedMallocSizePlusOne / 4) + 1, 4);
+  } else if (!strcmp(action, "calloc-overflow")) {
     volatile size_t kMaxSizeT = std::numeric_limits<size_t>::max();
     size_t kArraySize = 4096;
     volatile size_t kArraySize2 = kMaxSizeT / kArraySize + 10;
-    x = (char*)calloc(kArraySize, kArraySize2);
-  }
-
-  if (!strcmp(argv[1], "realloc")) {
-    fprintf(stderr, "realloc:\n");
-    x = (char*)realloc(0, size);
-  }
-  if (!strcmp(argv[1], "realloc-after-malloc")) {
-    fprintf(stderr, "realloc-after-malloc:\n");
+    x = calloc(kArraySize, kArraySize2);
+  } else if (!strcmp(action, "realloc")) {
+    x = realloc(0, kMaxAllowedMallocSizePlusOne);
+  } else if (!strcmp(action, "realloc-after-malloc")) {
     char *t = (char*)malloc(100);
     *t = 42;
-    x = (char*)realloc(t, size);
+    x = realloc(t, kMaxAllowedMallocSizePlusOne);
     assert(*t == 42);
+    free(t);
+  } else if (!strcmp(action, "new")) {
+    x = operator new(kMaxAllowedMallocSizePlusOne);
+  } else if (!strcmp(action, "new-nothrow")) {
+    x = operator new(kMaxAllowedMallocSizePlusOne, std::nothrow);
+  } else {
+    assert(0);
   }
+
+  fprintf(stderr, "errno: %d\n", errno);
+
   // The NULL pointer is printed differently on different systems, while (long)0
   // is always the same.
   fprintf(stderr, "x: %lx\n", (long)x);
+  free(x);
+
   return x != 0;
 }
+
 // CHECK-mCRASH: malloc:
 // CHECK-mCRASH: MemorySanitizer's allocator is terminating the process
 // CHECK-cCRASH: calloc:
@@ -68,14 +107,25 @@
 // CHECK-rCRASH: MemorySanitizer's allocator is terminating the process
 // CHECK-mrCRASH: realloc-after-malloc:
 // CHECK-mrCRASH: MemorySanitizer's allocator is terminating the process
+// CHECK-nCRASH: new:
+// CHECK-nCRASH: MemorySanitizer's allocator is terminating the process
+// CHECK-nnCRASH: new-nothrow:
+// CHECK-nnCRASH: MemorySanitizer's allocator is terminating the process
 
 // CHECK-mNULL: malloc:
+// CHECK-mNULL: errno: 12
 // CHECK-mNULL: x: 0
 // CHECK-cNULL: calloc:
+// CHECK-cNULL: errno: 12
 // CHECK-cNULL: x: 0
 // CHECK-coNULL: calloc-overflow:
+// CHECK-coNULL: errno: 12
 // CHECK-coNULL: x: 0
 // CHECK-rNULL: realloc:
+// CHECK-rNULL: errno: 12
 // CHECK-rNULL: x: 0
 // CHECK-mrNULL: realloc-after-malloc:
+// CHECK-mrNULL: errno: 12
 // CHECK-mrNULL: x: 0
+// CHECK-nnNULL: new-nothrow:
+// CHECK-nnNULL: x: 0
diff --git a/test/msan/chained_origin_memcpy.cc b/test/msan/chained_origin_memcpy.cc
index bfe50df..0c94f2b 100644
--- a/test/msan/chained_origin_memcpy.cc
+++ b/test/msan/chained_origin_memcpy.cc
@@ -50,7 +50,7 @@
 
 // CHECK: Uninitialized value was stored to memory at
 // CHECK-FULL-STACK: {{#1 .* in fn_h.*chained_origin_memcpy.cc:}}[[@LINE-15]]
-// CHECK-SHORT-STACK: {{#0 .* in __msan_memcpy .*msan_interceptors.cc:}}
+// CHECK-SHORT-STACK: {{#0 .* in __msan_memcpy.*msan_interceptors.cc:}}
 
 // CHECK: Uninitialized value was stored to memory at
 // CHECK-FULL-STACK: {{#0 .* in fn_g.*chained_origin_memcpy.cc:}}[[@LINE-29]]
diff --git a/test/msan/fread_fwrite.cc b/test/msan/fread_fwrite.cc
new file mode 100644
index 0000000..3d50034
--- /dev/null
+++ b/test/msan/fread_fwrite.cc
@@ -0,0 +1,34 @@
+// RUN: %clangxx_msan -g %s -o %t
+// RUN: not %t 2>&1 | FileCheck %s
+// RUN: %t 1
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int test_fread() {
+  FILE *f = fopen("/dev/zero", "r");
+  char c;
+  unsigned read = fread(&c, sizeof(c), 1, f);
+  fclose(f);
+  if (c == '1') // No error
+    return 1;
+  return 0;
+}
+
+int test_fwrite() {
+  FILE *f = fopen("/dev/null", "w");
+  char c;
+  if (fwrite(&c, sizeof(c), 1, f) != sizeof(c)) // BOOM
+    return 1;
+  return fclose(f);
+}
+
+int main(int argc, char *argv[]) {
+  if (argc > 1)
+    test_fread();
+  else
+    test_fwrite();
+  return 0;
+}
+
+// CHECK: Uninitialized bytes in __interceptor_fwrite at offset 0 inside
diff --git a/test/msan/getloadavg.cc b/test/msan/getloadavg.cc
new file mode 100644
index 0000000..7facd58
--- /dev/null
+++ b/test/msan/getloadavg.cc
@@ -0,0 +1,16 @@
+// RUN: %clangxx_msan -O0 -g %s -o %t && %run %t
+
+#define _BSD_SOURCE
+#include <assert.h>
+#include <stdlib.h>
+
+#include <sanitizer/msan_interface.h>
+
+int main(void) {
+  double x[4];
+  int ret = getloadavg(x, 3);
+  assert(ret > 0);
+  assert(ret <= 3);
+  assert(__msan_test_shadow(x, sizeof(double) * ret) == -1);
+  assert(__msan_test_shadow(&x[ret], sizeof(double)) == 0);
+}
diff --git a/test/msan/ioctl.cc b/test/msan/ioctl.cc
index e21ef63..66ac6e9 100644
--- a/test/msan/ioctl.cc
+++ b/test/msan/ioctl.cc
@@ -8,7 +8,7 @@
 #include <unistd.h>
 
 int main(int argc, char **argv) {
-  int fd = socket(AF_INET, SOCK_DGRAM, 0);
+  int fd = socket(AF_UNIX, SOCK_DGRAM, 0);
 
   unsigned int z;
   int res = ioctl(fd, FIOGETOWN, &z);
diff --git a/test/msan/ioctl_custom.cc b/test/msan/ioctl_custom.cc
index 6df22d7..eaab633 100644
--- a/test/msan/ioctl_custom.cc
+++ b/test/msan/ioctl_custom.cc
@@ -14,7 +14,7 @@
 #include <unistd.h>
 
 int main(int argc, char **argv) {
-  int fd = socket(AF_INET, SOCK_STREAM, 0);
+  int fd = socket(AF_UNIX, SOCK_STREAM, 0);
 
   struct ifreq ifreqs[20];
   struct ifconf ifc;
diff --git a/test/msan/lit.cfg b/test/msan/lit.cfg
index eb0ed43..cac2609 100644
--- a/test/msan/lit.cfg
+++ b/test/msan/lit.cfg
@@ -29,13 +29,9 @@
 # Default test suffixes.
 config.suffixes = ['.c', '.cc', '.cpp']
 
-# MemorySanitizer tests are currently supported on Linux only.
-if config.host_os not in ['Linux']:
+if config.host_os not in ['Linux', 'NetBSD']:
   config.unsupported = True
 
-if config.target_arch != 'aarch64':
-  config.available_features.add('stable-runtime')
-
 # For mips64, mips64el we have forced store_context_size to 1 because these
 # archs use slow unwinder which is not async signal safe. Therefore we only
 # check the first frame since store_context size is 1.
diff --git a/test/msan/pr32842.c b/test/msan/pr32842.c
new file mode 100644
index 0000000..b0a05f7
--- /dev/null
+++ b/test/msan/pr32842.c
@@ -0,0 +1,22 @@
+// Regression test for https://bugs.llvm.org/show_bug.cgi?id=32842
+//
+// RUN: %clang_msan -g %s -o %t
+// RUN: not %run %t 2>&1 | FileCheck  %s
+
+struct iphdr {
+  unsigned char pad1: 2, ihl:4, pad2: 2;
+};
+
+int raw_send_hdrinc(unsigned long int length) {
+  struct iphdr iph;
+  if (iph.ihl * 4 > length) {
+    return 1;
+  }
+  return 0;
+}
+
+int main(int argc, char *argv[]) {
+  return raw_send_hdrinc(12);
+}
+
+// CHECK: WARNING: MemorySanitizer: use-of-uninitialized-value
diff --git a/test/msan/pvalloc.cc b/test/msan/pvalloc.cc
new file mode 100644
index 0000000..21b2300
--- /dev/null
+++ b/test/msan/pvalloc.cc
@@ -0,0 +1,43 @@
+// RUN: %clangxx_msan -O0 %s -o %t
+// RUN: MSAN_OPTIONS=allocator_may_return_null=0 not %run %t m1 2>&1 | FileCheck %s
+// RUN: MSAN_OPTIONS=allocator_may_return_null=1     %run %t m1 2>&1
+// RUN: MSAN_OPTIONS=allocator_may_return_null=0 not %run %t psm1 2>&1 | FileCheck %s
+// RUN: MSAN_OPTIONS=allocator_may_return_null=1     %run %t psm1 2>&1
+
+// UNSUPPORTED: win32, freebsd
+
+// Checks that pvalloc overflows are caught. If the allocator is allowed to
+// return null, the errno should be set to ENOMEM.
+
+#include <assert.h>
+#include <errno.h>
+#include <malloc.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+
+int main(int argc, char *argv[]) {
+  void *p;
+  size_t page_size;
+
+  assert(argc == 2);
+
+  page_size = sysconf(_SC_PAGESIZE);
+  // Check that the page size is a power of two.
+  assert((page_size & (page_size - 1)) == 0);
+
+  if (!strcmp(argv[1], "m1")) {
+    p = pvalloc((uintptr_t)-1);
+    assert(!p);
+    assert(errno == ENOMEM);
+  }
+  if (!strcmp(argv[1], "psm1")) {
+    p = pvalloc((uintptr_t)-(page_size - 1));
+    assert(!p);
+    assert(errno == ENOMEM);
+  }
+
+  return 0;
+}
+
+// CHECK: MemorySanitizer's allocator is terminating the process
diff --git a/test/msan/sigaction.cc b/test/msan/sigaction.cc
new file mode 100644
index 0000000..0c69f11
--- /dev/null
+++ b/test/msan/sigaction.cc
@@ -0,0 +1,47 @@
+// RUN: %clangxx_msan -std=c++11 -O0 -g %s -o %t
+// RUN: %run %t __
+// RUN: not %run %t A_ 2>&1 | FileCheck %s
+// RUN: not %run %t AH 2>&1 | FileCheck %s
+// RUN: not %run %t B_ 2>&1 | FileCheck %s
+// RUN: not %run %t BH 2>&1 | FileCheck %s
+// RUN: not %run %t C_ 2>&1 | FileCheck %s
+// RUN: not %run %t CH 2>&1 | FileCheck %s
+
+#include <assert.h>
+#include <signal.h>
+#include <string.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+#include <sanitizer/msan_interface.h>
+
+void handler(int) {}
+void action(int, siginfo_t *, void *) {}
+
+int main(int argc, char **argv) {
+  char T = argv[1][0];
+  char H = argv[1][1];
+  struct sigaction sa;
+  memset(&sa, 0, sizeof(sa));
+  if (H == 'H') {
+    sa.sa_handler = handler;
+  } else {
+    sa.sa_sigaction = action;
+    sa.sa_flags = SA_SIGINFO;
+  }
+
+  if (T == 'A') {
+    if (H == 'H')
+      __msan_poison(&sa.sa_handler, sizeof(sa.sa_handler));
+    else
+      __msan_poison(&sa.sa_sigaction, sizeof(sa.sa_sigaction));
+  }
+  if (T == 'B')
+    __msan_poison(&sa.sa_flags, sizeof(sa.sa_flags));
+  if (T == 'C')
+    __msan_poison(&sa.sa_mask, sizeof(sa.sa_mask));
+  // CHECK: use-of-uninitialized-value
+  int res = sigaction(SIGUSR1, &sa, nullptr);
+  assert(res == 0);
+  return 0;
+}
diff --git a/test/msan/sigwait.cc b/test/msan/sigwait.cc
index f2e77cf..222fc34 100644
--- a/test/msan/sigwait.cc
+++ b/test/msan/sigwait.cc
@@ -1,16 +1,21 @@
 // RUN: %clangxx_msan -std=c++11 -O0 -g %s -o %t && %run %t
+// RUN: %clangxx_msan -DPOSITIVE -std=c++11 -O0 -g %s -o %t && not %run %t 2>&1 | FileCheck %s
 
 #include <assert.h>
-#include <sanitizer/msan_interface.h>
 #include <signal.h>
 #include <sys/time.h>
 #include <unistd.h>
 
+#include <sanitizer/msan_interface.h>
+
 void test_sigwait() {
   sigset_t s;
+#ifndef POSITIVE
   sigemptyset(&s);
   sigaddset(&s, SIGUSR1);
+#endif
   sigprocmask(SIG_BLOCK, &s, 0);
+  // CHECK:  MemorySanitizer: use-of-uninitialized-value
 
   if (pid_t pid = fork()) {
     kill(pid, SIGUSR1);
diff --git a/test/msan/strndup.cc b/test/msan/strndup.cc
new file mode 100644
index 0000000..d4b9af1
--- /dev/null
+++ b/test/msan/strndup.cc
@@ -0,0 +1,28 @@
+// RUN: %clangxx_msan %s -o %t && not %run %t 2>&1 | FileCheck --check-prefix=ON %s
+// RUN: %clangxx_msan %s -o %t && MSAN_OPTIONS=intercept_strndup=0 %run %t 2>&1 | FileCheck --check-prefix=OFF --allow-empty %s
+
+// When built as C on Linux, strndup is transformed to __strndup.
+// RUN: %clangxx_msan -O3 -xc %s -o %t && not %run %t 2>&1 | FileCheck --check-prefix=ON %s
+
+// UNSUPPORTED: win32
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sanitizer/msan_interface.h>
+
+int main(int argc, char **argv) {
+  char kString[4] = "abc";
+  __msan_poison(kString + 2, 1);
+  char *copy = strndup(kString, 4); // BOOM
+  assert(__msan_test_shadow(copy, 4) == 2); // Poisoning is preserved.
+  free(copy);
+  return 0;
+  // ON: Uninitialized bytes in __interceptor_{{(__)?}}strndup at offset 2 inside [{{.*}}, 4)
+  // ON: MemorySanitizer: use-of-uninitialized-value
+  // ON: #0 {{.*}}main {{.*}}strndup.cc:[[@LINE-6]]
+  // ON-LABEL: SUMMARY
+  // ON: {{.*}}strndup.cc:[[@LINE-8]]
+  // OFF-NOT: MemorySanitizer
+}
+
diff --git a/test/msan/strxfrm.cc b/test/msan/strxfrm.cc
index 9a30d03..94b8c70 100644
--- a/test/msan/strxfrm.cc
+++ b/test/msan/strxfrm.cc
@@ -1,14 +1,21 @@
 // RUN: %clangxx_msan -O0 -g %s -o %t && %run %t
 
 #include <assert.h>
+#include <locale.h>
 #include <sanitizer/msan_interface.h>
 #include <stdlib.h>
 #include <string.h>
 
 int main(void) {
-  const char *p = "abcdef";
   char q[10];
-  size_t n = strxfrm(q, p, sizeof(q));
+  size_t n = strxfrm(q, "abcdef", sizeof(q));
+  assert(n < sizeof(q));
+  __msan_check_mem_is_initialized(q, n + 1);
+
+  locale_t loc = newlocale(LC_ALL_MASK, "", (locale_t)0);
+
+  __msan_poison(&q, sizeof(q));
+  n = strxfrm_l(q, "qwerty", sizeof(q), loc);
   assert(n < sizeof(q));
   __msan_check_mem_is_initialized(q, n + 1);
   return 0;
diff --git a/test/msan/wcsncpy.cc b/test/msan/wcsncpy.cc
new file mode 100644
index 0000000..6471371
--- /dev/null
+++ b/test/msan/wcsncpy.cc
@@ -0,0 +1,40 @@
+// RUN: %clangxx_msan -fsanitize-memory-track-origins -O0 %s -o %t && not %run %t >%t.out 2>&1
+// RUN: FileCheck %s < %t.out && FileCheck %s < %t.out
+
+// XFAIL: mips
+
+#include <assert.h>
+#include <wchar.h>
+
+#include <sanitizer/msan_interface.h>
+
+int main() {
+  const wchar_t *s = L"abc";
+  assert(wcslen(s) == 3);
+
+  wchar_t s2[5];
+  assert(wcsncpy(s2, s, 3) == s2);
+  assert(__msan_test_shadow(&s2, 5 * sizeof(wchar_t)) == 3 * sizeof(wchar_t));
+  assert(wcsncpy(s2, s, 5) == s2);
+  assert(__msan_test_shadow(&s2, 5 * sizeof(wchar_t)) == -1);
+
+  wchar_t s3[5];
+  assert(wcsncpy(s3, s, 2) == s3);
+  assert(__msan_test_shadow(&s3, 5 * sizeof(wchar_t)) == 2 * sizeof(wchar_t));
+
+  __msan_allocated_memory(&s2[1], sizeof(wchar_t));
+  wchar_t s4[5];
+  assert(wcsncpy(s4, s2, 3) == s4);
+  __msan_check_mem_is_initialized(&s4, sizeof(s4));
+}
+// CHECK:  Uninitialized bytes in __msan_check_mem_is_initialized
+// CHECK:  WARNING: MemorySanitizer: use-of-uninitialized-value
+// CHECK:    in main {{.*}}wcsncpy.cc:28
+
+// CHECK:  Uninitialized value was stored to memory at
+// CHECK:    in {{[^\s]*}}wcsncpy
+// CHECK:    in main {{.*}}wcsncpy.cc:27
+
+// CHECK:  Memory was marked as uninitialized
+// CHECK:    in __msan_allocated_memory
+// CHECK:    in main {{.*}}wcsncpy.cc:25
diff --git a/test/profile/Linux/counter_promo_for.c b/test/profile/Linux/counter_promo_for.c
new file mode 100644
index 0000000..3139646
--- /dev/null
+++ b/test/profile/Linux/counter_promo_for.c
@@ -0,0 +1,59 @@
+// RUN: rm -fr %t.promo.prof
+// RUN: rm -fr %t.nopromo.prof
+// RUN: %clang_pgogen=%t.promo.prof/ -o %t.promo.gen -O2 %s
+// RUN: %clang_pgogen=%t.promo.prof/ -o %t.promo.gen.ll -emit-llvm -S -O2 %s
+// RUN: cat %t.promo.gen.ll | FileCheck --check-prefix=PROMO %s
+// RUN: %run %t.promo.gen
+// RUN: llvm-profdata merge -o %t.promo.profdata %t.promo.prof/
+// RUN: llvm-profdata show --counts --all-functions %t.promo.profdata  > %t.promo.dump
+// RUN: %clang_pgogen=%t.nopromo.prof/ -mllvm -do-counter-promotion=false -o %t.nopromo.gen -O2 %s
+// RUN: %clang_pgogen=%t.nopromo.prof/ -mllvm -do-counter-promotion=false -o %t.nopromo.gen.ll -emit-llvm -S -O2 %s
+// RUN: cat %t.nopromo.gen.ll | FileCheck --check-prefix=NOPROMO %s
+// RUN: %run %t.nopromo.gen
+// RUN: llvm-profdata merge -o %t.nopromo.profdata %t.nopromo.prof/
+// RUN: llvm-profdata show --counts --all-functions %t.nopromo.profdata  > %t.nopromo.dump
+// RUN: diff %t.promo.profdata %t.nopromo.profdata
+
+int g;
+__attribute__((noinline)) void bar(int i) { g += i; }
+
+__attribute__((noinline)) void foo(int n, int N) {
+// PROMO-LABEL: @foo
+// PROMO: load{{.*}}@__profc_foo{{.*}} 0){{.*}}
+// PROMO-NEXT: add
+// PROMO-NEXT: store{{.*}}@__profc_foo{{.*}} 0){{.*}}
+// PROMO-NEXT: load{{.*}}@__profc_foo{{.*}} 1){{.*}}
+// PROMO-NEXT: add
+// PROMO-NEXT: store{{.*}}@__profc_foo{{.*}} 1){{.*}}
+// PROMO-NEXT: load{{.*}}@__profc_foo{{.*}} 2){{.*}}
+// PROMO-NEXT: add
+// PROMO-NEXT: store{{.*}}@__profc_foo{{.*}} 2){{.*}}
+// PROMO: load{{.*}}@__profc_foo{{.*}} 3){{.*}}
+// PROMO-NEXT: add
+// PROMO-NEXT: store{{.*}}@__profc_foo{{.*}} 3){{.*}}
+//
+// NOPROMO-LABEL: @foo
+// NOPROMO: load{{.*}}@__profc_foo{{.*}} 0){{.*}}
+// NOPROMO-NEXT: add
+// NOPROMO-NEXT: store{{.*}}@__profc_foo{{.*}} 0){{.*}}
+// NOPROMO: load{{.*}}@__profc_foo{{.*}} 1){{.*}}
+// NOPROMO-NEXT: add
+// NOPROMO-NEXT: store{{.*}}@__profc_foo{{.*}} 1){{.*}}
+// NOPROMO: load{{.*}}@__profc_foo{{.*}} 2){{.*}}
+// NOPROMO-NEXT: add
+// NOPROMO-NEXT: store{{.*}}@__profc_foo{{.*}} 2){{.*}}
+  int i;
+  for (i = 0; i < N; i++) {
+    if (i < n + 1)
+      bar(1);
+    else if (i < n - 1)
+      bar(2);
+    else
+      bar(3);
+  }
+}
+
+int main() {
+  foo(10, 20); 
+  return 0;
+}
diff --git a/test/profile/Linux/counter_promo_nest.c b/test/profile/Linux/counter_promo_nest.c
new file mode 100644
index 0000000..0792f0c
--- /dev/null
+++ b/test/profile/Linux/counter_promo_nest.c
@@ -0,0 +1,48 @@
+// RUN: rm -fr %t.promo.prof
+// RUN: rm -fr %t.nopromo.prof
+// RUN: %clang_pgogen=%t.promo.prof/ -o %t.promo.gen -O2 %s
+// RUN: %clang_pgogen=%t.promo.prof/ -o %t.promo.gen.ll -emit-llvm -S -O2 %s
+// RUN: cat %t.promo.gen.ll | FileCheck --check-prefix=PROMO %s
+// RUN: %run %t.promo.gen
+// RUN: llvm-profdata merge -o %t.promo.profdata %t.promo.prof/
+// RUN: llvm-profdata show --counts --all-functions %t.promo.profdata  > %t.promo.dump
+// RUN: %clang_pgogen=%t.nopromo.prof/ -mllvm -do-counter-promotion=false -o %t.nopromo.gen -O2 %s
+// RUN: %run %t.nopromo.gen
+// RUN: llvm-profdata merge -o %t.nopromo.profdata %t.nopromo.prof/
+// RUN: llvm-profdata show --counts --all-functions %t.nopromo.profdata  > %t.nopromo.dump
+// RUN: diff %t.promo.profdata %t.nopromo.profdata
+int g;
+__attribute__((noinline)) void bar() {
+ g++;
+}
+
+extern int printf(const char*,...);
+
+int c = 10;
+
+int main()
+// PROMO-LABEL: @main
+// PROMO: load{{.*}}@__profc_main{{.*}}
+// PROMO-NEXT: add
+// PROMO-NEXT: store{{.*}}@__profc_main{{.*}}
+// PROMO-NEXT: load{{.*}}@__profc_main{{.*}}
+// PROMO-NEXT: add
+// PROMO-NEXT: store{{.*}}@__profc_main{{.*}}
+{
+  int i, j, k;
+
+  g = 0;
+  for (i = 0; i < c; i++)
+    for (j = 0; j < c; j++)
+       for (k = 0; k < c; k++)
+           bar();
+
+  for (i = 0; i < c; i++)
+    for (j = 0; j < 10*c;j++)
+        bar();
+
+  for (i = 0; i < 100*c; i++)
+    bar();
+
+  return 0;
+}
diff --git a/test/profile/Linux/counter_promo_while.c b/test/profile/Linux/counter_promo_while.c
new file mode 100644
index 0000000..b4d4e7a
--- /dev/null
+++ b/test/profile/Linux/counter_promo_while.c
@@ -0,0 +1,55 @@
+// RUN: rm -fr %t.promo.prof
+// RUN: rm -fr %t.nopromo.prof
+// RUN: %clang_pgogen=%t.promo.prof/ -o %t.promo.gen -O2 %s
+// RUN: %clang_pgogen=%t.promo.prof/ -o %t.promo.gen.ll -emit-llvm -S -O2 %s
+// RUN: cat %t.promo.gen.ll | FileCheck --check-prefix=PROMO %s
+// RUN: %run %t.promo.gen
+// RUN: llvm-profdata merge -o %t.promo.profdata %t.promo.prof/
+// RUN: llvm-profdata show --counts --all-functions %t.promo.profdata  > %t.promo.dump
+// RUN: %clang_pgogen=%t.nopromo.prof/ -mllvm -do-counter-promotion=false -o %t.nopromo.gen -O2 %s
+// RUN: %clang_pgogen=%t.nopromo.prof/ -mllvm -do-counter-promotion=false -o %t.nopromo.gen.ll -emit-llvm -S -O2 %s
+// RUN: cat %t.nopromo.gen.ll | FileCheck --check-prefix=NOPROMO %s
+// RUN: %run %t.nopromo.gen
+// RUN: llvm-profdata merge -o %t.nopromo.profdata %t.nopromo.prof/
+// RUN: llvm-profdata show --counts --all-functions %t.nopromo.profdata  > %t.nopromo.dump
+// RUN: diff %t.promo.profdata %t.nopromo.profdata
+int g;
+__attribute__((noinline)) void bar(int i) { g += i; }
+__attribute__((noinline)) void foo(int n, int N) {
+// PROMO-LABEL: @foo
+// PROMO: load{{.*}}@__profc_foo{{.*}} 0){{.*}}
+// PROMO-NEXT: add
+// PROMO-NEXT: store{{.*}}@__profc_foo{{.*}} 0){{.*}}
+// PROMO-NEXT: load{{.*}}@__profc_foo{{.*}} 1){{.*}}
+// PROMO-NEXT: add
+// PROMO-NEXT: store{{.*}}@__profc_foo{{.*}} 1){{.*}}
+// PROMO-NEXT: load{{.*}}@__profc_foo{{.*}} 2){{.*}}
+// PROMO-NEXT: add
+// PROMO-NEXT: store{{.*}}@__profc_foo{{.*}} 2){{.*}}
+//
+// NOPROMO-LABEL: @foo
+// NOPROMO: load{{.*}}@__profc_foo{{.*}} 0){{.*}}
+// NOPROMO-NEXT: add
+// NOPROMO-NEXT: store{{.*}}@__profc_foo{{.*}} 0){{.*}}
+// NOPROMO: load{{.*}}@__profc_foo{{.*}} 1){{.*}}
+// NOPROMO-NEXT: add
+// NOPROMO-NEXT: store{{.*}}@__profc_foo{{.*}} 1){{.*}}
+// NOPROMO: load{{.*}}@__profc_foo{{.*}} 2){{.*}}
+// NOPROMO-NEXT: add
+// NOPROMO-NEXT: store{{.*}}@__profc_foo{{.*}} 2){{.*}}
+  int i = 0;
+  while (i < N) {
+    if (i < n + 1)
+      bar(1);
+    else if (i < n - 1)
+      bar(2);
+    else
+      bar(3);
+    i++;
+  }
+}
+
+int main() {
+  foo(10, 20);
+  return 0;
+}
diff --git a/test/profile/Linux/coverage_ctors.cpp b/test/profile/Linux/coverage_ctors.cpp
index 021d9df..adf078e 100644
--- a/test/profile/Linux/coverage_ctors.cpp
+++ b/test/profile/Linux/coverage_ctors.cpp
@@ -1,7 +1,7 @@
 // RUN: %clangxx_profgen -std=c++11 -fuse-ld=gold -fcoverage-mapping -o %t %s
 // RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t
 // RUN: llvm-profdata merge -o %t.profdata %t.profraw
-// RUN: llvm-cov show %t -instr-profile %t.profdata -filename-equivalence 2>&1 | FileCheck %s
+// RUN: llvm-cov show %t -instr-profile %t.profdata -path-equivalence=/tmp,%S 2>&1 | FileCheck %s
 
 struct Base {
   int B;
diff --git a/test/profile/Linux/coverage_dtor.cpp b/test/profile/Linux/coverage_dtor.cpp
index 1641512..c91dd42 100644
--- a/test/profile/Linux/coverage_dtor.cpp
+++ b/test/profile/Linux/coverage_dtor.cpp
@@ -1,7 +1,7 @@
 // RUN: %clang_profgen -x c++ -fno-exceptions  -std=c++11 -fuse-ld=gold -fcoverage-mapping -o %t %s
 // RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t
 // RUN: llvm-profdata merge -o %t.profdata %t.profraw
-// RUN: llvm-cov show %t -instr-profile %t.profdata -filename-equivalence 2>&1 | FileCheck %s
+// RUN: llvm-cov show %t -instr-profile %t.profdata -path-equivalence=/tmp,%S 2>&1 | FileCheck %s
 
 int g = 100;
 struct Base {
diff --git a/test/profile/Linux/coverage_test.cpp b/test/profile/Linux/coverage_test.cpp
index db9a14e..67adeb7 100644
--- a/test/profile/Linux/coverage_test.cpp
+++ b/test/profile/Linux/coverage_test.cpp
@@ -1,12 +1,12 @@
 // RUN: %clang_profgen -fuse-ld=gold -O2 -fdata-sections -ffunction-sections -fcoverage-mapping -Wl,--gc-sections -o %t %s
 // RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t
 // RUN: llvm-profdata merge -o %t.profdata %t.profraw
-// RUN: llvm-cov show %t -instr-profile %t.profdata -filename-equivalence 2>&1 | FileCheck %s
+// RUN: llvm-cov show %t -instr-profile %t.profdata -path-equivalence=/tmp,%S 2>&1 | FileCheck %s
 // BFD linker older than 2.26 has a bug that per-func profile data will be wrongly garbage collected when GC is turned on. We only do end-to-end test here without GC:
 // RUN: %clang_profgen -O2  -fcoverage-mapping  -o %t.2 %s
 // RUN: env LLVM_PROFILE_FILE=%t.2.profraw %run %t.2
 // RUN: llvm-profdata merge -o %t.2.profdata %t.2.profraw
-// RUN: llvm-cov show %t.2 -instr-profile %t.2.profdata -filename-equivalence 2>&1 | FileCheck %s
+// RUN: llvm-cov show %t.2 -instr-profile %t.2.profdata -path-equivalence=/tmp,%S 2>&1 | FileCheck %s
 // Check covmap is not garbage collected when GC is turned on with BFD linker. Due to the bug mentioned above, we can only
 // do the check with objdump:
 // RUN: %clang_profgen -O2  -fcoverage-mapping -Wl,--gc-sections -o %t.3 %s
@@ -15,7 +15,7 @@
 // RUN: %clang_profgen -fuse-ld=gold -O2 -fdata-sections -ffunction-sections -fPIE -pie -fcoverage-mapping -Wl,--gc-sections -o %t.pie %s
 // RUN: env LLVM_PROFILE_FILE=%t.pie.profraw %run %t.pie
 // RUN: llvm-profdata merge -o %t.pie.profdata %t.pie.profraw
-// RUN: llvm-cov show %t.pie -instr-profile %t.pie.profdata -filename-equivalence 2>&1 | FileCheck %s
+// RUN: llvm-cov show %t.pie -instr-profile %t.pie.profdata -path-equivalence=/tmp,%S 2>&1 | FileCheck %s
 
 void foo(bool cond) { // CHECK:  [[@LINE]]| 1|void foo(
   if (cond) {         // CHECK:  [[@LINE]]| 1| if (cond) {
diff --git a/test/profile/Linux/instrprof-alloc.test b/test/profile/Linux/instrprof-alloc.test
index 752b108..4db7647 100644
--- a/test/profile/Linux/instrprof-alloc.test
+++ b/test/profile/Linux/instrprof-alloc.test
@@ -1,6 +1,6 @@
-// RUN: %clang_profgen -Xclang -fprofile-instrument=llvm  -fuse-ld=gold -Wl,-wrap,malloc -Wl,-wrap,calloc -o %t -O3 %S/../Inputs/instrprof-alloc.c
+// RUN: %clang_pgogen  -fuse-ld=gold -Wl,-wrap,malloc -Wl,-wrap,calloc -o %t -O3 %S/../Inputs/instrprof-alloc.c
 // RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t
 
-// RUN: %clang_profgen  -Xclang -fprofile-instrument=llvm -mllvm -vp-static-alloc=false -fuse-ld=gold -Wl,-wrap,malloc -Wl,-wrap,calloc -o %t.dyn -O3 %S/../Inputs/instrprof-alloc.c
+// RUN: %clang_pgogen  -mllvm -vp-static-alloc=false -fuse-ld=gold -Wl,-wrap,malloc -Wl,-wrap,calloc -o %t.dyn -O3 %S/../Inputs/instrprof-alloc.c
 // RUN: env LLVM_PROFILE_FILE=%t.profraw not %run %t.dyn
 
diff --git a/test/profile/Linux/instrprof-comdat.test b/test/profile/Linux/instrprof-comdat.test
index 5a11a24..1490ea7 100644
--- a/test/profile/Linux/instrprof-comdat.test
+++ b/test/profile/Linux/instrprof-comdat.test
@@ -2,5 +2,5 @@
 RUN: %clangxx_profgen -o %t.d/comdat -fcoverage-mapping -fuse-ld=gold %S/../Inputs/instrprof-comdat-1.cpp %S/../Inputs/instrprof-comdat-2.cpp
 RUN: LLVM_PROFILE_FILE=%t-comdat.profraw %run %t.d/comdat
 RUN: llvm-profdata merge -o %t.d/comdat.prof %t-comdat.profraw 
-RUN: llvm-cov show --filename-equivalence --instr-profile=%t.d/comdat.prof %t.d/comdat | FileCheck --check-prefix=HEADER %S/../Inputs/instrprof-comdat.h
+RUN: llvm-cov show --path-equivalence=/tmp,%S --instr-profile=%t.d/comdat.prof %t.d/comdat | FileCheck --check-prefix=HEADER %S/../Inputs/instrprof-comdat.h
 
diff --git a/test/profile/Linux/instrprof-value-prof-warn.test b/test/profile/Linux/instrprof-value-prof-warn.test
index 26502cc..6ca1603 100644
--- a/test/profile/Linux/instrprof-value-prof-warn.test
+++ b/test/profile/Linux/instrprof-value-prof-warn.test
@@ -1,4 +1,4 @@
-RUN: %clang_profgen -O2 -mllvm -disable-vp=false -Xclang -fprofile-instrument=llvm -mllvm -vp-static-alloc=true -DSTRESS=1 -o %t.ir.warn  %S/../Inputs/instrprof-value-prof-real.c
+RUN: %clang_pgogen -O2 -mllvm -disable-vp=false -mllvm -vp-static-alloc=true -DSTRESS=1 -o %t.ir.warn  %S/../Inputs/instrprof-value-prof-real.c
 RUN: env LLVM_PROFILE_FILE=%t.ir.profraw LLVM_VP_MAX_NUM_VALS_PER_SITE=255  %run %t.ir.warn 2>&1 |FileCheck --check-prefix=WARNING %s
 #  Test that enough static counters have been allocated
 RUN: env LLVM_PROFILE_FILE=%t.ir.profraw LLVM_VP_MAX_NUM_VALS_PER_SITE=150  %run %t.ir.warn 2>&1 |FileCheck --check-prefix=NOWARNING --allow-empty %s
diff --git a/test/profile/instrprof-override-filename.c b/test/profile/instrprof-override-filename.c
index a67c707..d2b6b69 100644
--- a/test/profile/instrprof-override-filename.c
+++ b/test/profile/instrprof-override-filename.c
@@ -1,14 +1,24 @@
-// RUN: %clang_profgen=%t.profraw -o %t -O3 %s
-// RUN: %run %t %t.profraw
-// RUN: llvm-profdata merge -o %t.profdata %t.profraw
-// RUN: %clang_profuse=%t.profdata -o - -S -emit-llvm %s | FileCheck %s
+// RUN: rm -rf %t.dir && mkdir -p %t.dir
+// RUN: cd %t.dir
+//
+// RUN: %clang_profgen=P_RAW -o %t -O3 %s
+// RUN: %run %t P_RAW
+// RUN: llvm-profdata merge -o %t.profdata P_RAW
+// RUN: %clang_profuse=%t.profdata -o - -S -emit-llvm %s | FileCheck %s --check-prefix=FE
+//
+// RUN: %clang_pgogen=I_RAW -o %t.2 %s
+// RUN: %run %t.2 I_RAW
+// RUN: llvm-profdata merge -o %t2.profdata I_RAW
+// RUN: %clang_profuse=%t2.profdata -o - -S -emit-llvm %s | FileCheck %s --check-prefix=IR
 
 void bar() {}
 int main(int argc, const char *argv[]) {
-  // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !prof ![[PD1:[0-9]+]]
+  // FE: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !prof ![[PD1:[0-9]+]]
+  // IR: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !prof ![[PD1:[0-9]+]]
   if (argc < 2)
     return 1;
   bar();
   return 0;
 }
-// CHECK: ![[PD1]] = !{!"branch_weights", i32 1, i32 2}
+// FE: ![[PD1]] = !{!"branch_weights", i32 1, i32 2}
+// IR: ![[PD1]] = !{!"branch_weights", i32 0, i32 1}
diff --git a/test/profile/lit.cfg b/test/profile/lit.cfg
index 9ca3942..2686207 100644
--- a/test/profile/lit.cfg
+++ b/test/profile/lit.cfg
@@ -22,17 +22,6 @@
         config.profile_lit_binary_dir is not None:
     config.test_exec_root = os.path.join(config.profile_lit_binary_dir, config.name)
 
-# If the above check didn't work, we're probably in the source tree.  Use some
-# magic to re-execute from the build tree.
-if config.test_exec_root is None:
-    # The magic relies on knowing compilerrt_site_basedir.
-    compilerrt_basedir = lit_config.params.get('compilerrt_site_basedir', None)
-    if compilerrt_basedir:
-        site_cfg = os.path.join(compilerrt_basedir, 'profile', 'lit.site.cfg')
-        if os.path.exists(site_cfg):
-            lit_config.load_config(config, site_cfg)
-            raise SystemExit
-
 if config.host_os in ['Linux']:
   extra_link_flags = ["-ldl"]
 else:
@@ -83,3 +72,6 @@
 
 if config.target_arch in ['armv7l']:
   config.unsupported = True
+
+if config.android:
+  config.unsupported = True
diff --git a/test/safestack/canary.c b/test/safestack/canary.c
index c6b81f2..1ceaa50 100644
--- a/test/safestack/canary.c
+++ b/test/safestack/canary.c
@@ -2,7 +2,8 @@
 // RUN: %run %t.nossp 2>&1 | FileCheck --check-prefix=NOSSP %s
 
 // RUN: %clang_safestack -fstack-protector-all -D_FORTIFY_SOURCE=0 -g %s -o %t.ssp
-// RUN: not --crash %run %t.ssp 2>&1 | FileCheck -check-prefix=SSP %s
+// RUN: env LIBC_FATAL_STDERR_=1 not --crash %run %t.ssp 2>&1 | \
+// RUN:     FileCheck -check-prefix=SSP %s
 
 // Test stack canaries on the unsafe stack.
 
diff --git a/test/safestack/lit.cfg b/test/safestack/lit.cfg
index d4ec73c..10cd8a5 100644
--- a/test/safestack/lit.cfg
+++ b/test/safestack/lit.cfg
@@ -16,13 +16,7 @@
 config.substitutions.append( ("%clang_safestack ", config.clang + " -O0 -fsanitize=safe-stack ") )
 
 if config.lto_supported:
-  config.substitutions.append((r"%clang_lto_safestack ", ' '.join(config.lto_launch + [config.clang] + config.lto_flags + ['-flto -fsanitize=safe-stack '])))
+  config.substitutions.append((r"%clang_lto_safestack ", ' '.join(config.lto_launch + [config.clang] + config.lto_flags + ['-fsanitize=safe-stack '])))
 
-# SafeStack tests are currently supported on Linux, FreeBSD and Darwin only.
-if config.host_os not in ['Linux', 'FreeBSD', 'Darwin']:
+if config.host_os not in ['Linux', 'FreeBSD', 'Darwin', 'NetBSD']:
    config.unsupported = True
-
-# Allow tests to use REQUIRES=stable-runtime.  For use when you cannot use XFAIL
-# because the test fail due some runtime issue.
-if config.target_arch != 'aarch64':
-  config.available_features.add('stable-runtime')
diff --git a/test/sanitizer_common/CMakeLists.txt b/test/sanitizer_common/CMakeLists.txt
index 9b4070b..8b210a0 100644
--- a/test/sanitizer_common/CMakeLists.txt
+++ b/test/sanitizer_common/CMakeLists.txt
@@ -4,13 +4,14 @@
 set(SANITIZER_COMMON_TESTSUITES)
 
 set(SUPPORTED_TOOLS)
-if(CMAKE_SYSTEM_NAME MATCHES "Darwin|Linux|FreeBSD" AND NOT ANDROID)
+if(CMAKE_SYSTEM_NAME MATCHES "Darwin|Linux|FreeBSD|NetBSD")
   list(APPEND SUPPORTED_TOOLS asan)
 endif()
 if(CMAKE_SYSTEM_NAME MATCHES "Linux" AND NOT ANDROID)
   list(APPEND SUPPORTED_TOOLS tsan)
   list(APPEND SUPPORTED_TOOLS msan)
   list(APPEND SUPPORTED_TOOLS lsan)
+  list(APPEND SUPPORTED_TOOLS ubsan)
 endif()
 
 # Create a separate config for each tool we support.
@@ -42,8 +43,11 @@
   configure_lit_site_cfg(
     ${CMAKE_CURRENT_SOURCE_DIR}/Unit/lit.site.cfg.in
     ${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg)
-  list(APPEND SANITIZER_COMMON_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/Unit)
-  list(APPEND SANITIZER_COMMON_TEST_DEPS SanitizerUnitTests)
+  # FIXME: support unit test in the android test runner
+  if (NOT ANDROID)
+    list(APPEND SANITIZER_COMMON_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/Unit)
+    list(APPEND SANITIZER_COMMON_TEST_DEPS SanitizerUnitTests)
+  endif()
 endif()
 
 if(SANITIZER_COMMON_TESTSUITES)
diff --git a/test/sanitizer_common/TestCases/Darwin/print-stack-trace.cc b/test/sanitizer_common/TestCases/Darwin/print-stack-trace.cc
new file mode 100644
index 0000000..715282f
--- /dev/null
+++ b/test/sanitizer_common/TestCases/Darwin/print-stack-trace.cc
@@ -0,0 +1,19 @@
+// RUN: %clangxx -O0 %s -o %t && %env_tool_opts=stack_trace_format=DEFAULT %run %t 2>&1 | FileCheck %s
+// RUN: %env_tool_opts=stack_trace_format='"frame:%n lineno:%l"' %run %t 2>&1 | FileCheck %s --check-prefix=CUSTOM
+
+#include <sanitizer/common_interface_defs.h>
+
+static inline void FooBarBaz() {
+  __sanitizer_print_stack_trace();
+}
+
+int main() {
+  FooBarBaz();
+  return 0;
+}
+// CHECK: {{    #0 0x.* in __sanitizer_print_stack_trace}}
+// CHECK: {{    #1 0x.* in FooBarBaz(\(\))? .*}}print-stack-trace.cc:[[@LINE-8]]
+// CHECK: {{    #2 0x.* in main.*}}print-stack-trace.cc:[[@LINE-5]]
+
+// CUSTOM: frame:1 lineno:[[@LINE-11]]
+// CUSTOM: frame:2 lineno:[[@LINE-8]]
diff --git a/test/sanitizer_common/TestCases/Linux/abort_on_error.cc b/test/sanitizer_common/TestCases/Linux/abort_on_error.cc
index a5ef665..e4b246e 100644
--- a/test/sanitizer_common/TestCases/Linux/abort_on_error.cc
+++ b/test/sanitizer_common/TestCases/Linux/abort_on_error.cc
@@ -10,6 +10,9 @@
 // lit doesn't set options anyway.
 // RUN: not %run %t 2>&1
 
+// Android needs abort_on_error=0
+// UNSUPPORTED: android
+
 namespace __sanitizer {
 void Die();
 }
diff --git a/test/sanitizer_common/TestCases/Linux/allow_user_segv.cc b/test/sanitizer_common/TestCases/Linux/allow_user_segv.cc
new file mode 100644
index 0000000..ab6b7d7
--- /dev/null
+++ b/test/sanitizer_common/TestCases/Linux/allow_user_segv.cc
@@ -0,0 +1,94 @@
+// Regression test for
+// https://code.google.com/p/address-sanitizer/issues/detail?id=180
+
+// clang-format off
+// RUN: %clangxx -O0 %s -o %t
+
+// RUN: %env_tool_opts=handle_segv=0 not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK0
+// RUN: %env_tool_opts=handle_segv=1 not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK1
+// RUN: %env_tool_opts=handle_segv=2 not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK2
+
+// RUN: %env_tool_opts=handle_segv=0:allow_user_segv_handler=0 not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK0
+// RUN: %env_tool_opts=handle_segv=1:allow_user_segv_handler=0 not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK2
+// RUN: %env_tool_opts=handle_segv=2:allow_user_segv_handler=0 not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK2
+
+// RUN: %env_tool_opts=handle_segv=0:allow_user_segv_handler=1 not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK0
+// RUN: %env_tool_opts=handle_segv=1:allow_user_segv_handler=1 not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK1
+// RUN: %env_tool_opts=handle_segv=2:allow_user_segv_handler=1 not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK2
+// clang-format on
+
+// Remove when fixed: https://github.com/google/sanitizers/issues/637
+// XFAIL: msan
+// XFAIL: tsan
+
+// Flaky errors in debuggerd with "waitpid returned unexpected pid (0)" in logcat.
+// UNSUPPORTED: android && i386-target-arch
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+struct sigaction original_sigaction_sigbus;
+struct sigaction original_sigaction_sigsegv;
+
+void User_OnSIGSEGV(int signum, siginfo_t *siginfo, void *context) {
+  fprintf(stderr, "User sigaction called\n");
+  struct sigaction original_sigaction;
+  if (signum == SIGBUS)
+    original_sigaction = original_sigaction_sigbus;
+  else if (signum == SIGSEGV)
+    original_sigaction = original_sigaction_sigsegv;
+  else {
+    printf("Invalid signum");
+    exit(1);
+  }
+  if (original_sigaction.sa_flags | SA_SIGINFO) {
+    if (original_sigaction.sa_sigaction)
+      original_sigaction.sa_sigaction(signum, siginfo, context);
+  } else {
+    if (original_sigaction.sa_handler)
+      original_sigaction.sa_handler(signum);
+  }
+  exit(1);
+}
+
+int DoSEGV() {
+  volatile int *x = 0;
+  return *x;
+}
+
+bool InstallHandler(int signum, struct sigaction *original_sigaction) {
+  struct sigaction user_sigaction;
+  user_sigaction.sa_sigaction = User_OnSIGSEGV;
+  user_sigaction.sa_flags = SA_SIGINFO;
+  if (sigaction(signum, &user_sigaction, original_sigaction)) {
+    perror("sigaction");
+    return false;
+  }
+  return true;
+}
+
+int main() {
+  // Let's install handlers for both SIGSEGV and SIGBUS, since pre-Yosemite
+  // 32-bit Darwin triggers SIGBUS instead.
+  if (InstallHandler(SIGSEGV, &original_sigaction_sigsegv) &&
+      InstallHandler(SIGBUS, &original_sigaction_sigbus)) {
+    fprintf(stderr, "User sigaction installed\n");
+  }
+  return DoSEGV();
+}
+
+// CHECK0-NOT: Sanitizer:DEADLYSIGNAL
+// CHECK0-NOT: Sanitizer: SEGV on unknown address
+// CHECK0: User sigaction installed
+// CHECK0-NEXT: User sigaction called
+
+// CHECK1: User sigaction installed
+// CHECK1-NEXT: User sigaction called
+// CHECK1-NEXT: Sanitizer:DEADLYSIGNAL
+// CHECK1: Sanitizer: SEGV on unknown address
+
+// CHECK2-NOT: User sigaction called
+// CHECK2: User sigaction installed
+// CHECK2-NEXT: Sanitizer:DEADLYSIGNAL
+// CHECK2: Sanitizer: SEGV on unknown address
diff --git a/test/sanitizer_common/TestCases/Linux/assert.cc b/test/sanitizer_common/TestCases/Linux/assert.cc
index 5d58ea4..f10ddf3 100644
--- a/test/sanitizer_common/TestCases/Linux/assert.cc
+++ b/test/sanitizer_common/TestCases/Linux/assert.cc
@@ -1,12 +1,16 @@
 // Test the handle_abort option.
-// RUN: %clang %s -o %t
+
+// clang-format off
+// RUN: %clangxx %s -o %t
 // RUN:                              not --crash %run %t 2>&1 | FileCheck --check-prefix=CHECK0 %s
 // RUN: %env_tool_opts=handle_abort=0 not --crash %run %t 2>&1 | FileCheck --check-prefix=CHECK0 %s
 // RUN: %env_tool_opts=handle_abort=1 not         %run %t 2>&1 | FileCheck --check-prefix=CHECK1 %s
-// FIXME: implement in other sanitizers, not just asan.
+// clang-format on
+
+// FIXME: implement in other sanitizers.
 // XFAIL: msan
-// XFAIL: lsan
 // XFAIL: tsan
+
 #include <assert.h>
 #include <stdio.h>
 #include <sanitizer/asan_interface.h>
diff --git a/test/sanitizer_common/TestCases/Linux/decorate_proc_maps.cc b/test/sanitizer_common/TestCases/Linux/decorate_proc_maps.cc
index 36d4df5..8a05f4b 100644
--- a/test/sanitizer_common/TestCases/Linux/decorate_proc_maps.cc
+++ b/test/sanitizer_common/TestCases/Linux/decorate_proc_maps.cc
@@ -1,6 +1,9 @@
 // RUN: %clangxx -g %s -o %t
 // RUN: %env_tool_opts=decorate_proc_maps=1 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-%tool_name
+
 // REQUIRES: stable-runtime
+// XFAIL: android && asan
+
 #include <errno.h>
 #include <fcntl.h>
 #include <pthread.h>
@@ -57,5 +60,6 @@
 // CHECK-tsan: rw-p {{.*}} [trace 1]
 // CHECK-tsan: rw-p {{.*}} [trace header 1]
 
-// Nothing interesting with standalone LSan.
+// Nothing interesting with standalone LSan and UBSan.
 // CHECK-lsan: decorate_proc_maps
+// CHECK-ubsan: decorate_proc_maps
diff --git a/test/sanitizer_common/TestCases/Linux/deepbind.cc b/test/sanitizer_common/TestCases/Linux/deepbind.cc
index fc810ad..81150fa 100644
--- a/test/sanitizer_common/TestCases/Linux/deepbind.cc
+++ b/test/sanitizer_common/TestCases/Linux/deepbind.cc
@@ -1,5 +1,5 @@
 // RUN: %clangxx %s -o %t && %run not %t 1 2>&1 | FileCheck %s
-// UNSUPPORTED: lsan, android
+// UNSUPPORTED: lsan,ubsan,android
 
 #include <dlfcn.h>
 #include <stdio.h>
diff --git a/test/sanitizer_common/TestCases/Linux/hard_rss_limit_mb_test.cc b/test/sanitizer_common/TestCases/Linux/hard_rss_limit_mb_test.cc
index d4a60a0..3c875c1 100644
--- a/test/sanitizer_common/TestCases/Linux/hard_rss_limit_mb_test.cc
+++ b/test/sanitizer_common/TestCases/Linux/hard_rss_limit_mb_test.cc
@@ -14,6 +14,7 @@
 // XFAIL: lsan
 // XFAIL: tsan
 // XFAIL: msan
+// XFAIL: ubsan
 
 #include <string.h>
 #include <stdio.h>
@@ -26,7 +27,10 @@
 int main(int argc, char **argv) {
   for (int i = 0; i < kNumAllocs; i++) {
     if ((i % 1000) == 0) {
-      fprintf(stderr, "[%d]\n", i);
+      // Don't write to stderr! Doing that triggers a kernel race condition
+      // between this thread and the rss-limit thread, and may lose part of the
+      // output. See https://lkml.org/lkml/2014/2/17/324.
+      printf("[%d]\n", i);
     }
     char *x = new char[kAllocSize];
     memset(x, 0, kAllocSize);
diff --git a/test/sanitizer_common/TestCases/Linux/iconv_test.c b/test/sanitizer_common/TestCases/Linux/iconv_test.c
new file mode 100644
index 0000000..eb995d2
--- /dev/null
+++ b/test/sanitizer_common/TestCases/Linux/iconv_test.c
@@ -0,0 +1,31 @@
+// RUN: %clang %s -o %t && %run %t
+// Verify that even if iconv returned -1
+// we still treat the initialized part of outbuf as properly initialized.
+
+// UNSUPPORTED: android
+
+#include <iconv.h>
+#include <assert.h>
+#include <stdio.h>
+
+int main() {
+  iconv_t cd = iconv_open("UTF-8", "no");
+  assert(cd != (iconv_t)-1);
+  char in[11] = {0x7e, 0x7e, 0x5f, 0x53, 0x55, 0x3e,
+                 0x99, 0x3c, 0x7e, 0x7e, 0x7e};
+  fprintf(stderr, "cd: %p\n", (void*)cd);
+  char out[100];
+  char *inbuf = &in[0];
+  size_t inbytesleft = 11;
+  char *outbuf = &out[0];
+  size_t outbytesleft = 100;
+  int ret = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
+  assert(ret == -1);
+  assert(outbuf - &out[0] == 10);
+  for (int i = 0; i < 10; i++) {
+    if (out[i] == 0x77) return 1;
+    fprintf(stderr, "OUT%d 0x%x -- OK\n", i, (unsigned char)out[i]);
+  }
+  iconv_close(cd);
+}
+
diff --git a/test/sanitizer_common/TestCases/Linux/ill.cc b/test/sanitizer_common/TestCases/Linux/ill.cc
index 2c69618..7d39abe 100644
--- a/test/sanitizer_common/TestCases/Linux/ill.cc
+++ b/test/sanitizer_common/TestCases/Linux/ill.cc
@@ -1,12 +1,16 @@
 // Test the handle_sigill option.
+
+// clang-format off
 // RUN: %clang %s -o %t -O1
 // RUN:                                not --crash %run %t 2>&1 | FileCheck --check-prefix=CHECK0 %s
 // RUN: %env_tool_opts=handle_sigill=0 not --crash %run %t 2>&1 | FileCheck --check-prefix=CHECK0 %s
 // RUN: %env_tool_opts=handle_sigill=1 not         %run %t 2>&1 | FileCheck --check-prefix=CHECK1 %s
-// FIXME: implement in other sanitizers, not just asan.
+// clang-format on
+
+// FIXME: implement in other sanitizers.
 // XFAIL: msan
-// XFAIL: lsan
 // XFAIL: tsan
+// XFAIL: ubsan
 //
 // FIXME: seems to fail on ARM
 // REQUIRES: x86_64-target-arch
diff --git a/test/sanitizer_common/TestCases/Linux/mlock_test.cc b/test/sanitizer_common/TestCases/Linux/mlock_test.cc
index 69ea7cb..a952922 100644
--- a/test/sanitizer_common/TestCases/Linux/mlock_test.cc
+++ b/test/sanitizer_common/TestCases/Linux/mlock_test.cc
@@ -1,5 +1,5 @@
 // RUN: %clang  %s -o %t && %run %t
-// XFAIL: lsan
+// XFAIL: ubsan,lsan
 
 #include <assert.h>
 #include <sys/mman.h>
diff --git a/test/sanitizer_common/TestCases/Linux/mprobe.cc b/test/sanitizer_common/TestCases/Linux/mprobe.cc
new file mode 100644
index 0000000..82c0faf
--- /dev/null
+++ b/test/sanitizer_common/TestCases/Linux/mprobe.cc
@@ -0,0 +1,42 @@
+// RUN: %clangxx %s -o %t && %run %t 2>&1 | FileCheck %s
+// UNSUPPORTED: android, ubsan
+
+#include <stdio.h>
+#include <stdlib.h>
+#if defined(__GLIBC_PREREQ) && __GLIBC_PREREQ(2, 2)
+#include <mcheck.h>
+#else
+#define MCHECK_OK 0
+extern "C" int mcheck(void (*abortfunc)(int mstatus));
+extern "C" int mcheck_pedantic(void (*abortfunc)(int mstatus));
+extern "C" int mprobe(void *ptr);
+#endif
+
+void check_heap() {
+  void *p = malloc(1000);
+  int res = mprobe(p);
+  if (res == MCHECK_OK)
+    printf("Success!\n");
+  free(p);
+}
+
+int main(int argc, char *argv[]) {
+  void *p;
+  if (mcheck(NULL) != 0) {
+    fprintf(stderr, "mcheck() failed\n");
+    exit(EXIT_FAILURE);
+  }
+
+  check_heap();
+  // CHECK: Success!
+
+  if (mcheck_pedantic(NULL) != 0) {
+    fprintf(stderr, "mcheck_pedantic() failed\n");
+    exit(EXIT_FAILURE);
+  }
+
+  check_heap();
+  // CHECK: Success!
+
+  return 0;
+}
diff --git a/test/sanitizer_common/TestCases/Linux/ptrace.cc b/test/sanitizer_common/TestCases/Linux/ptrace.cc
index b10aecd..82532c3 100644
--- a/test/sanitizer_common/TestCases/Linux/ptrace.cc
+++ b/test/sanitizer_common/TestCases/Linux/ptrace.cc
@@ -1,5 +1,7 @@
 // RUN: %clangxx -O0 %s -o %t && %run %t
 
+// UNSUPPORTED: android
+
 #include <assert.h>
 #include <signal.h>
 #include <stdio.h>
diff --git a/test/sanitizer_common/TestCases/Linux/sem_init_glibc.cc b/test/sanitizer_common/TestCases/Linux/sem_init_glibc.cc
index 92557b7..d623cca 100644
--- a/test/sanitizer_common/TestCases/Linux/sem_init_glibc.cc
+++ b/test/sanitizer_common/TestCases/Linux/sem_init_glibc.cc
@@ -1,15 +1,19 @@
 // RUN: %clangxx -O0 -g %s -lutil -o %t && %run %t
 // This test depends on the glibc layout of struct sem_t and checks that we
 // don't leave sem_t::private uninitialized.
-// UNSUPPORTED: android, lsan-x86
+// UNSUPPORTED: android, lsan-x86, ubsan, target-is-mips64, target-is-mips64el
 #include <features.h>
 #include <assert.h>
 #include <semaphore.h>
 #include <string.h>
 #include <stdint.h>
 
+// On powerpc64be semval_t must be 64 bits even with "old" versions of glibc.
+#if __PPC64__ && __BIG_ENDIAN__
+typedef uint64_t semval_t;
+
 // This condition needs to correspond to __HAVE_64B_ATOMICS macro in glibc.
-#if (defined(__x86_64__) || defined(__aarch64__) || defined(__powerpc64__) || \
+#elif (defined(__x86_64__) || defined(__aarch64__) || defined(__powerpc64__) || \
      defined(__s390x__) || defined(__sparc64__) || defined(__alpha__) || \
      defined(__ia64__) || defined(__m68k__)) && __GLIBC_PREREQ(2, 21)
 typedef uint64_t semval_t;
diff --git a/test/sanitizer_common/TestCases/Linux/signal_segv_handler.cc b/test/sanitizer_common/TestCases/Linux/signal_segv_handler.cc
index 643fb48..51e8bdb 100644
--- a/test/sanitizer_common/TestCases/Linux/signal_segv_handler.cc
+++ b/test/sanitizer_common/TestCases/Linux/signal_segv_handler.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx -O1 %s -o %t && TSAN_OPTIONS="flush_memory_ms=1 memory_limit_mb=1" ASAN_OPTIONS="handle_segv=0 allow_user_segv_handler=1" %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx -O1 %s -o %t && TSAN_OPTIONS="flush_memory_ms=1 memory_limit_mb=1" ASAN_OPTIONS="handle_segv=0" %run %t 2>&1 | FileCheck %s
 
 // JVM uses SEGV to preempt threads. All threads do a load from a known address
 // periodically. When runtime needs to preempt threads, it unmaps the page.
diff --git a/test/sanitizer_common/TestCases/Linux/soft_rss_limit_mb_test.cc b/test/sanitizer_common/TestCases/Linux/soft_rss_limit_mb_test.cc
index 83570a9..2ee8095 100644
--- a/test/sanitizer_common/TestCases/Linux/soft_rss_limit_mb_test.cc
+++ b/test/sanitizer_common/TestCases/Linux/soft_rss_limit_mb_test.cc
@@ -14,6 +14,7 @@
 // XFAIL: lsan
 // XFAIL: tsan
 // XFAIL: msan
+// XFAIL: ubsan
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
diff --git a/test/sanitizer_common/TestCases/Linux/sysconf_interceptor_bypass_test.cc b/test/sanitizer_common/TestCases/Linux/sysconf_interceptor_bypass_test.cc
new file mode 100644
index 0000000..c3a6560
--- /dev/null
+++ b/test/sanitizer_common/TestCases/Linux/sysconf_interceptor_bypass_test.cc
@@ -0,0 +1,27 @@
+// RUN: %clangxx -O2 %s -o %t && %run %t 2>&1 | FileCheck %s
+
+// XFAIL: android
+
+#include <stdio.h>
+
+// getauxval() used instead of sysconf() in GetPageSize() is defined starting
+// glbc version 2.16.
+#if defined(__GLIBC_PREREQ) && __GLIBC_PREREQ(2, 16)
+extern "C" long sysconf(int name) {
+  fprintf(stderr, "sysconf wrapper called\n");
+  return 0;
+}
+#endif  // defined(__GLIBC_PREREQ) && __GLIBC_PREREQ(2, 16)
+
+int main() {
+  // All we need to check is that the sysconf() interceptor defined above was
+  // not called. Should it get called, it will crash right there, any
+  // instrumented code executed before sanitizer init is finished will crash
+  // accessing non-initialized sanitizer internals. Even if it will not crash
+  // in some configuration, it should never be called anyway.
+  fprintf(stderr, "Passed\n");
+  // CHECK-NOT: sysconf wrapper called
+  // CHECK: Passed
+  // CHECK-NOT: sysconf wrapper called
+  return 0;
+}
diff --git a/test/sanitizer_common/TestCases/Linux/unexpected_format_specifier_test.cc b/test/sanitizer_common/TestCases/Linux/unexpected_format_specifier_test.cc
new file mode 100644
index 0000000..6414955
--- /dev/null
+++ b/test/sanitizer_common/TestCases/Linux/unexpected_format_specifier_test.cc
@@ -0,0 +1,13 @@
+// RUN: %clang -w -O0 %s -o %t && %run %t 2>&1 | FileCheck %s
+// UNSUPPORTED: lsan
+// UNSUPPORTED: msan
+// UNSUPPORTED: ubsan
+#include <stdio.h>
+int main() {
+  int a;
+  printf("%Q\n", 1);
+  printf("%Q\n", 1);
+  printf("%Q\n", 1);
+}
+// CHECK: unexpected format specifier in printf interceptor: %Q (reported once per process)
+// CHECK-NOT: unexpected format specifier in printf interceptor
diff --git a/test/sanitizer_common/TestCases/Posix/dedup_token_length_test.cc b/test/sanitizer_common/TestCases/Posix/dedup_token_length_test.cc
index 2612957..7e93af4 100644
--- a/test/sanitizer_common/TestCases/Posix/dedup_token_length_test.cc
+++ b/test/sanitizer_common/TestCases/Posix/dedup_token_length_test.cc
@@ -7,9 +7,8 @@
 // RUN: env %tool_options='abort_on_error=0, dedup_token_length=3' not %run %t 2>&1   | FileCheck %s --check-prefix=CHECK3 --match-full-lines
 
 // REQUIRES: stable-runtime
-// FIXME: implement SEGV handler in other sanitizers, not just asan.
+// FIXME: implement SEGV handler in other sanitizers.
 // XFAIL: msan
-// XFAIL: lsan
 // XFAIL: tsan
 
 volatile int *null = 0;
diff --git a/test/asan/TestCases/Posix/dump_instruction_bytes.cc b/test/sanitizer_common/TestCases/Posix/dump_instruction_bytes.cc
similarity index 64%
rename from test/asan/TestCases/Posix/dump_instruction_bytes.cc
rename to test/sanitizer_common/TestCases/Posix/dump_instruction_bytes.cc
index b5b38ff..25e801a 100644
--- a/test/asan/TestCases/Posix/dump_instruction_bytes.cc
+++ b/test/sanitizer_common/TestCases/Posix/dump_instruction_bytes.cc
@@ -1,10 +1,16 @@
-// Check that ASan prints the faulting instruction bytes on
+// Check that sanitizer prints the faulting instruction bytes on
 // dump_instruction_bytes=1
-// RUN: %clangxx_asan  %s -o %t
-// RUN: %env_asan_opts=dump_instruction_bytes=1 not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-DUMP
+
+// clang-format off
+// RUN: %clangxx  %s -o %t
+// RUN: %env_tool_opts=dump_instruction_bytes=1 not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-DUMP
 // RUN: not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-NODUMP
-//
+// clang-format on
+
 // REQUIRES: x86-target-arch
+// FIXME: implement in other sanitizers.
+// XFAIL: msan
+// XFAIL: tsan
 
 int main() {
 #if defined(__x86_64__)
diff --git a/test/sanitizer_common/TestCases/Posix/dump_registers.cc b/test/sanitizer_common/TestCases/Posix/dump_registers.cc
new file mode 100644
index 0000000..07e87be
--- /dev/null
+++ b/test/sanitizer_common/TestCases/Posix/dump_registers.cc
@@ -0,0 +1,20 @@
+// Check that sanitizer prints registers dump_registers on dump_registers=1
+// RUN: %clangxx  %s -o %t
+// RUN: %env_tool_opts=dump_registers=0 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-NODUMP
+// RUN: %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-DUMP
+//
+// FIXME: Implement.
+// UNSUPPORTED: asan
+// UNSUPPORTED: lsan
+// UNSUPPORTED: msan
+// UNSUPPORTED: tsan
+// UNSUPPORTED: ubsan
+
+#include <signal.h>
+
+int main() {
+  raise(SIGSEGV);
+  // CHECK-DUMP: Register values
+  // CHECK-NODUMP-NOT: Register values
+  return 0;
+}
diff --git a/test/sanitizer_common/TestCases/Posix/fpe.cc b/test/sanitizer_common/TestCases/Posix/fpe.cc
index 9a6f808..46fe4f4 100644
--- a/test/sanitizer_common/TestCases/Posix/fpe.cc
+++ b/test/sanitizer_common/TestCases/Posix/fpe.cc
@@ -5,8 +5,8 @@
 // RUN: %env_tool_opts=handle_sigfpe=1 not         %run %t 2>&1 | FileCheck --check-prefix=CHECK1 %s
 // FIXME: implement in other sanitizers, not just asan.
 // XFAIL: msan
-// XFAIL: lsan
 // XFAIL: tsan
+// XFAIL: ubsan
 //
 // FIXME: seems to fail on ARM
 // REQUIRES: x86_64-target-arch
diff --git a/test/sanitizer_common/TestCases/Posix/getpass.cc b/test/sanitizer_common/TestCases/Posix/getpass.cc
index 251f911..b91a3d7 100644
--- a/test/sanitizer_common/TestCases/Posix/getpass.cc
+++ b/test/sanitizer_common/TestCases/Posix/getpass.cc
@@ -1,5 +1,8 @@
 // RUN: %clangxx -O0 -g %s -lutil -o %t && %run %t | FileCheck %s
+
 // REQUIRES: stable-runtime
+// XFAIL: android && asan
+
 #include <assert.h>
 #include <stdio.h>
 #include <unistd.h>
diff --git a/test/sanitizer_common/TestCases/Posix/sanitizer_set_death_callback_test.cc b/test/sanitizer_common/TestCases/Posix/sanitizer_set_death_callback_test.cc
index 12a56c7..8d2db36 100644
--- a/test/sanitizer_common/TestCases/Posix/sanitizer_set_death_callback_test.cc
+++ b/test/sanitizer_common/TestCases/Posix/sanitizer_set_death_callback_test.cc
@@ -8,6 +8,7 @@
 // the last line of main function. The problem doesn't reproduce with ASan because
 // quarantine prohibits memory block reuse for different allocations.
 // XFAIL: lsan-x86
+// XFAIL: ubsan
 
 #include <sanitizer/common_interface_defs.h>
 #include <stdio.h>
diff --git a/test/sanitizer_common/TestCases/Posix/sanitizer_set_report_fd_test.cc b/test/sanitizer_common/TestCases/Posix/sanitizer_set_report_fd_test.cc
index af7eea1..baa1d9a 100644
--- a/test/sanitizer_common/TestCases/Posix/sanitizer_set_report_fd_test.cc
+++ b/test/sanitizer_common/TestCases/Posix/sanitizer_set_report_fd_test.cc
@@ -5,9 +5,9 @@
 // RUN: not %run %t %t-out && FileCheck < %t-out %s
 
 // REQUIRES: stable-runtime
+// XFAIL: android && asan
 // FIXME: implement SEGV handler in other sanitizers, not just asan.
 // XFAIL: msan
-// XFAIL: lsan
 // XFAIL: tsan
 
 #include <sanitizer/common_interface_defs.h>
diff --git a/test/sanitizer_common/TestCases/Posix/weak_hook_test.cc b/test/sanitizer_common/TestCases/Posix/weak_hook_test.cc
index d566764..9176a52 100644
--- a/test/sanitizer_common/TestCases/Posix/weak_hook_test.cc
+++ b/test/sanitizer_common/TestCases/Posix/weak_hook_test.cc
@@ -4,6 +4,7 @@
 
 // Hooks are not implemented for lsan.
 // XFAIL: lsan
+// XFAIL: ubsan
 
 #include <string.h>
 #include <assert.h>
diff --git a/test/sanitizer_common/TestCases/corelimit.cc b/test/sanitizer_common/TestCases/corelimit.cc
index 0a86e5b..eb02afc 100644
--- a/test/sanitizer_common/TestCases/corelimit.cc
+++ b/test/sanitizer_common/TestCases/corelimit.cc
@@ -1,5 +1,5 @@
 // RUN: %clangxx -O0 %s -o %t && %run %t
-// UNSUPPORTED: lsan
+// UNSUPPORTED: lsan,ubsan
 
 #include <assert.h>
 #include <sys/time.h>
diff --git a/test/sanitizer_common/TestCases/get_module_and_offset_for_pc.cc b/test/sanitizer_common/TestCases/get_module_and_offset_for_pc.cc
index f5a18e6..69ccb72 100644
--- a/test/sanitizer_common/TestCases/get_module_and_offset_for_pc.cc
+++ b/test/sanitizer_common/TestCases/get_module_and_offset_for_pc.cc
@@ -1,8 +1,10 @@
 // RUN: %clangxx -DSHARED %s -shared -o %T/get_module_and_offset_for_pc.so -fPIC
 // RUN: %clangxx -DSO_DIR=\"%T\" -O0 %s -ldl -o %t
 // RUN: %run %t 2>&1 | FileCheck %s
+
 // UNSUPPORTED: i386-darwin
-//
+// XFAIL: android
+
 // Tests __sanitizer_get_module_and_offset_for_pc.
 
 #include <assert.h>
diff --git a/test/sanitizer_common/TestCases/malloc_hook.cc b/test/sanitizer_common/TestCases/malloc_hook.cc
index 59cd620..853bb66 100644
--- a/test/sanitizer_common/TestCases/malloc_hook.cc
+++ b/test/sanitizer_common/TestCases/malloc_hook.cc
@@ -2,6 +2,7 @@
 
 // Malloc/free hooks are not supported on Windows.
 // XFAIL: win32
+// XFAIL: ubsan
 
 #include <stdlib.h>
 #include <unistd.h>
diff --git a/test/sanitizer_common/TestCases/options-include.cc b/test/sanitizer_common/TestCases/options-include.cc
index 5b0b6d5..3d9127b 100644
--- a/test/sanitizer_common/TestCases/options-include.cc
+++ b/test/sanitizer_common/TestCases/options-include.cc
@@ -34,6 +34,8 @@
 // RUN: %env_tool_opts=include_if_exists='"%t.options-not-found.%b"' %run %t 2>&1 | tee %t.out
 // RUN: FileCheck %s --check-prefix=CHECK-WITHOUT-HELP --check-prefix=CHECK-FOUND < %t.out
 
+// Android tests run on remote device so includes will not work.
+// UNSUPPORTED: android
 
 #include <stdio.h>
 
diff --git a/test/sanitizer_common/TestCases/print-stack-trace.cc b/test/sanitizer_common/TestCases/print-stack-trace.cc
index 0055b27..fce8503 100644
--- a/test/sanitizer_common/TestCases/print-stack-trace.cc
+++ b/test/sanitizer_common/TestCases/print-stack-trace.cc
@@ -1,8 +1,10 @@
 // RUN: %clangxx -O0 %s -o %t && %env_tool_opts=stack_trace_format=DEFAULT %run %t 2>&1 | FileCheck %s
 // RUN: %clangxx -O3 %s -o %t && %env_tool_opts=stack_trace_format=DEFAULT %run %t 2>&1 | FileCheck %s
-// RUN: %env_tool_opts=stack_trace_format='"frame:%n lineno:%l"' %run %t 2>&1 | FileCheck %s --check-prefix=CUSTOM
+// RUN: %env_tool_opts=stack_trace_format=frame%n_lineno%l %run %t 2>&1 | FileCheck %s --check-prefix=CUSTOM
 // RUN: %env_tool_opts=symbolize_inline_frames=false:stack_trace_format=DEFAULT %run %t 2>&1 | FileCheck %s --check-prefix=NOINLINE
 
+// UNSUPPORTED: darwin
+
 #include <sanitizer/common_interface_defs.h>
 
 static inline void FooBarBaz() {
@@ -17,8 +19,8 @@
 // CHECK: {{    #1 0x.* in FooBarBaz(\(\))? .*}}print-stack-trace.cc:[[@LINE-8]]
 // CHECK: {{    #2 0x.* in main.*}}print-stack-trace.cc:[[@LINE-5]]
 
-// CUSTOM: frame:1 lineno:[[@LINE-11]]
-// CUSTOM: frame:2 lineno:[[@LINE-8]]
+// CUSTOM: frame1_lineno[[@LINE-11]]
+// CUSTOM: frame2_lineno[[@LINE-8]]
 
 // NOINLINE: #0 0x{{.*}} in __sanitizer_print_stack_trace
 // NOINLINE: #1 0x{{.*}} in main{{.*}}print-stack-trace.cc:[[@LINE-15]]
diff --git a/test/sanitizer_common/TestCases/sanitizer_coverage_inline8bit_counter.cc b/test/sanitizer_common/TestCases/sanitizer_coverage_inline8bit_counter.cc
new file mode 100644
index 0000000..58a64d1
--- /dev/null
+++ b/test/sanitizer_common/TestCases/sanitizer_coverage_inline8bit_counter.cc
@@ -0,0 +1,43 @@
+// Tests -fsanitize-coverage=inline-8bit-counters,pc-table
+//
+// REQUIRES: has_sancovcc,stable-runtime
+// UNSUPPORTED: i386-darwin
+//
+// RUN: %clangxx -O0 %s -fsanitize-coverage=inline-8bit-counters,pc-table -o %t
+// RUN: %run %t 2>&1 | FileCheck %s
+// XFAIL: tsan
+
+#include <stdio.h>
+#include <stdint.h>
+#include <assert.h>
+
+const char *first_counter;
+
+extern "C"
+void __sanitizer_cov_8bit_counters_init(const char *start, const char *end) {
+  printf("INIT: %p %p\n", start, end);
+  assert(end - start > 1);
+  first_counter = start;
+}
+
+uintptr_t FirstPC;
+uintptr_t FirstPCFlag;
+
+extern "C" void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg,
+                                         const uintptr_t *pcs_end) {
+  const uintptr_t *B = (const uintptr_t *)pcs_beg;
+  const uintptr_t *E = (const uintptr_t *)pcs_end;
+  assert(B + 1 < E);
+  FirstPC = B[0];
+  FirstPCFlag = B[1];
+}
+
+
+int main() {
+  assert(first_counter);
+  assert(*first_counter == 1);
+  assert(FirstPC == (uintptr_t)&main);
+  assert(FirstPCFlag == 1);
+  fprintf(stderr, "PASS\n");
+  // CHECK: PASS
+}
diff --git a/test/sanitizer_common/TestCases/sanitizer_coverage_no_prune.cc b/test/sanitizer_common/TestCases/sanitizer_coverage_no_prune.cc
new file mode 100644
index 0000000..9604da2
--- /dev/null
+++ b/test/sanitizer_common/TestCases/sanitizer_coverage_no_prune.cc
@@ -0,0 +1,16 @@
+// Tests -fsanitize-coverage=no-prune
+
+// REQUIRES: has_sancovcc,stable-runtime
+// UNSUPPORTED: i386-darwin
+// XFAIL: ubsan,tsan
+// XFAIL: android && asan
+
+// RUN: %clangxx -O0 %s -S -o - -emit-llvm -fsanitize-coverage=trace-pc,bb,no-prune 2>&1 | grep "call void @__sanitizer_cov_trace_pc" | count 3
+// RUN: %clangxx -O0 %s -S -o - -emit-llvm -fsanitize-coverage=trace-pc,bb          2>&1 | grep "call void @__sanitizer_cov_trace_pc" | count 2
+// RUN: %clangxx -O0 %s -S -o - -emit-llvm -fsanitize-coverage=trace-pc,no-prune    2>&1 | grep "call void @__sanitizer_cov_trace_pc" | count 4
+// RUN: %clangxx -O0 %s -S -o - -emit-llvm -fsanitize-coverage=trace-pc             2>&1 | grep "call void @__sanitizer_cov_trace_pc" | count 3
+
+void foo(int *a) {
+  if (a)
+    *a = 1;
+}
diff --git a/test/sanitizer_common/TestCases/sanitizer_coverage_stack_depth.cc b/test/sanitizer_common/TestCases/sanitizer_coverage_stack_depth.cc
new file mode 100644
index 0000000..90959ef
--- /dev/null
+++ b/test/sanitizer_common/TestCases/sanitizer_coverage_stack_depth.cc
@@ -0,0 +1,32 @@
+// Tests -fsanitize-coverage=stack-depth
+//
+// XFAIL: tsan
+//
+// RUN: %clangxx -O0 -std=c++11 -fsanitize-coverage=stack-depth %s -o %t
+// RUN: %run %t 2>&1 | FileCheck %s --implicit-check-not Assertion{{.*}}failed
+// RUN: %clangxx -O0 -std=c++11 -fsanitize-coverage=trace-pc-guard,stack-depth \
+// RUN:     %s -o %t
+// RUN: %run %t 2>&1 | FileCheck %s --implicit-check-not Assertion{{.*}}failed
+
+#include <cstdint>
+#include <cstdio>
+#include <cassert>
+
+thread_local uintptr_t __sancov_lowest_stack;
+uintptr_t last_stack;
+
+void foo(int recurse) {
+  assert(__sancov_lowest_stack < last_stack);
+  last_stack = __sancov_lowest_stack;
+  if (recurse <= 0) return;
+  foo(recurse - 1);
+}
+
+int main() {
+  last_stack = __sancov_lowest_stack;
+  foo(100);
+  printf("Success!\n");
+  return 0;
+}
+
+// CHECK: Success!
diff --git a/test/sanitizer_common/TestCases/sanitizer_coverage_symbolize.cc b/test/sanitizer_common/TestCases/sanitizer_coverage_symbolize.cc
index 48f32a7..28e2378 100644
--- a/test/sanitizer_common/TestCases/sanitizer_coverage_symbolize.cc
+++ b/test/sanitizer_common/TestCases/sanitizer_coverage_symbolize.cc
@@ -9,7 +9,6 @@
 // RUN: cd $DIR
 // RUN: %clangxx -O0 -fsanitize-coverage=trace-pc-guard %s -ldl -o %t
 // RUN: %env_tool_opts=coverage=1 %t 2>&1 | FileCheck %s
-// RUN: %env_tool_opts=coverage=1 SANCOV_OPTIONS=symbolize=0 %t 2>&1 | FileCheck %s --check-prefix=CHECK-NOSYM
 // RUN: rm -rf $DIR
 
 #include <stdio.h>
@@ -26,9 +25,4 @@
 }
 
 // CHECK: main
-// CHECK: SanitizerCoverage: ./sanitizer_coverage_symbolize.{{.*}}.sancov 2 PCs written
-// CHECK: call sancov
-
-// CHECK-NOSYM: main
-// CHECK-NOSYM: SanitizerCoverage: ./sanitizer_coverage_symbolize.{{.*}}.sancov 2 PCs written
-// CHECK-NOSYM-NOT: call sancov
+// CHECK: SanitizerCoverage: ./sanitizer_coverage_symbolize.{{.*}}.sancov: 2 PCs written
diff --git a/test/sanitizer_common/TestCases/sanitizer_coverage_trace_pc_guard-dso.cc b/test/sanitizer_common/TestCases/sanitizer_coverage_trace_pc_guard-dso.cc
index 68459b1..7a2eca8 100644
--- a/test/sanitizer_common/TestCases/sanitizer_coverage_trace_pc_guard-dso.cc
+++ b/test/sanitizer_common/TestCases/sanitizer_coverage_trace_pc_guard-dso.cc
@@ -1,8 +1,10 @@
 // Tests trace pc guard coverage collection.
-//
+
 // REQUIRES: has_sancovcc,stable-runtime
+// UNSUPPORTED: ubsan
 // XFAIL: tsan,darwin,powerpc64,s390x,mips
-//
+// XFAIL: android && asan
+
 // RUN: DIR=%t_workdir
 // RUN: CLANG_ARGS="-O0 -fsanitize-coverage=trace-pc-guard"
 // RUN: rm -rf $DIR
@@ -62,11 +64,11 @@
 // CHECK-NEXT: foo
 // CHECK-NEXT: bar
 // CHECK-NEXT: baz
-// CHECK-DAG: SanitizerCoverage: ./sanitizer_coverage_trace_pc_guard-dso.{{.*}}.sancov 2 PCs written
-// CHECK-DAG: SanitizerCoverage: ./sanitizer_coverage_trace_pc_guard-dso.{{.*}}_2.so.{{.*}}.sancov 1 PCs written
-// CHECK-DAG: SanitizerCoverage: ./sanitizer_coverage_trace_pc_guard-dso.{{.*}}_1.so.{{.*}}.sancov 1 PCs written
+// CHECK-DAG: SanitizerCoverage: ./sanitizer_coverage_trace_pc_guard-dso.{{.*}}.sancov: 2 PCs written
+// CHECK-DAG: SanitizerCoverage: ./sanitizer_coverage_trace_pc_guard-dso.{{.*}}_2.so.{{.*}}.sancov: 1 PCs written
+// CHECK-DAG: SanitizerCoverage: ./sanitizer_coverage_trace_pc_guard-dso.{{.*}}_1.so.{{.*}}.sancov: 1 PCs written
 //
 // CHECK-SANCOV: Ignoring {{.*}}_1.so and its coverage because __sanitizer_cov* functions were not found.
 // CHECK-SANCOV: Ignoring {{.*}}_2.so and its coverage because __sanitizer_cov* functions were not found.
-// CHECK-SANCOV-NEXT: sanitizer_coverage_trace_pc_guard-dso.cc:29 foo
-// CHECK-SANCOV-NEXT: sanitizer_coverage_trace_pc_guard-dso.cc:34 main
+// CHECK-SANCOV-NEXT: sanitizer_coverage_trace_pc_guard-dso.cc:[[@LINE-42]] foo
+// CHECK-SANCOV-NEXT: sanitizer_coverage_trace_pc_guard-dso.cc:[[@LINE-38]] main
diff --git a/test/sanitizer_common/TestCases/sanitizer_coverage_trace_pc_guard.cc b/test/sanitizer_common/TestCases/sanitizer_coverage_trace_pc_guard.cc
index 9dcbe6f..1adbf65 100644
--- a/test/sanitizer_common/TestCases/sanitizer_coverage_trace_pc_guard.cc
+++ b/test/sanitizer_common/TestCases/sanitizer_coverage_trace_pc_guard.cc
@@ -1,9 +1,10 @@
 // Tests trace pc guard coverage collection.
-//
+
 // REQUIRES: has_sancovcc,stable-runtime
-// UNSUPPORTED: i386-darwin
+// UNSUPPORTED: ubsan,i386-darwin
 // XFAIL: tsan,powerpc64,s390x,mips
-//
+// XFAIL: android && asan
+
 // RUN: DIR=%t_workdir
 // RUN: rm -rf $DIR
 // RUN: mkdir -p $DIR
@@ -15,9 +16,7 @@
 // RUN: %env_tool_opts=coverage=0 %t 2>&1 | FileCheck --check-prefix=CHECK-NOCOV %s
 // RUN: rm -rf $DIR
 // Make some room to stabilize line numbers
-//
-//
-//
+
 #include <stdio.h>
 
 int foo() {
@@ -34,9 +33,9 @@
 // CHECK: main
 // CHECK-NEXT: foo
 // CHECK-NEXT: foo
-// CHECK-NEXT: SanitizerCoverage: ./sanitizer_coverage_trace_pc_guard.{{.*}}.sancov 2 PCs written
+// CHECK-NEXT: SanitizerCoverage: ./sanitizer_coverage_trace_pc_guard.{{.*}}.sancov: 2 PCs written
 //
-// CHECK-SANCOV: sanitizer_coverage_trace_pc_guard.cc:23 foo
-// CHECK-SANCOV-NEXT: sanitizer_coverage_trace_pc_guard.cc:28 main
+// CHECK-SANCOV: sanitizer_coverage_trace_pc_guard.cc:[[@LINE-16]] foo
+// CHECK-SANCOV-NEXT: sanitizer_coverage_trace_pc_guard.cc:[[@LINE-12]] main
 //
 // CHECK-NOCOV-NOT: SanitizerCoverage
diff --git a/test/asan/android_commands/android_common.py b/test/sanitizer_common/android_commands/android_common.py
similarity index 73%
rename from test/asan/android_commands/android_common.py
rename to test/sanitizer_common/android_commands/android_common.py
index 1a295b7..fc26ee2 100644
--- a/test/asan/android_commands/android_common.py
+++ b/test/sanitizer_common/android_commands/android_common.py
@@ -1,4 +1,4 @@
-import os, subprocess, tempfile
+import os, sys, subprocess, tempfile
 import time
 
 ANDROID_TMPDIR = '/data/local/tmp/Output'
@@ -8,6 +8,11 @@
 if os.environ.get('ANDROID_RUN_VERBOSE') == '1':
     verbose = True
 
+def host_to_device_path(path):
+    rel = os.path.relpath(path, "/")
+    dev = os.path.join(ANDROID_TMPDIR, rel)
+    return dev
+
 def adb(args, attempts = 1):
     if verbose:
         print args
@@ -37,8 +42,5 @@
     return text
 
 def push_to_device(path):
-    # Workaround for https://code.google.com/p/android/issues/detail?id=65857
-    dst_path = os.path.join(ANDROID_TMPDIR, os.path.basename(path))
-    tmp_path = dst_path + '.push'
-    adb(['push', path, tmp_path], 5)
-    adb(['shell', 'cp "%s" "%s" 2>&1' % (tmp_path, dst_path)], 5)
+    dst_path = host_to_device_path(path)
+    adb(['push', path, dst_path], 5)
diff --git a/test/asan/android_commands/android_compile.py b/test/sanitizer_common/android_commands/android_compile.py
similarity index 100%
rename from test/asan/android_commands/android_compile.py
rename to test/sanitizer_common/android_commands/android_compile.py
diff --git a/test/asan/android_commands/android_run.py b/test/sanitizer_common/android_commands/android_run.py
similarity index 74%
rename from test/asan/android_commands/android_run.py
rename to test/sanitizer_common/android_commands/android_run.py
index f4ea52b..1c4f405 100755
--- a/test/asan/android_commands/android_run.py
+++ b/test/sanitizer_common/android_commands/android_run.py
@@ -5,28 +5,26 @@
 
 ANDROID_TMPDIR = '/data/local/tmp/Output'
 
-here = os.path.abspath(os.path.dirname(sys.argv[0]))
-device_binary = os.path.join(ANDROID_TMPDIR, os.path.basename(sys.argv[0]))
+device_binary = host_to_device_path(sys.argv[0])
 
 def build_env():
     args = []
     # Android linker ignores RPATH. Set LD_LIBRARY_PATH to Output dir.
     args.append('LD_LIBRARY_PATH=%s' % (ANDROID_TMPDIR,))
     for (key, value) in os.environ.items():
-        if key in ['ASAN_OPTIONS', 'ASAN_ACTIVATION_OPTIONS']:
+        if key in ['ASAN_OPTIONS', 'ASAN_ACTIVATION_OPTIONS', 'SCUDO_OPTIONS', 'UBSAN_OPTIONS']:
             args.append('%s="%s"' % (key, value))
     return ' '.join(args)
 
 is_64bit = (subprocess.check_output(['file', sys.argv[0] + '.real']).find('64-bit') != -1)
-asanwrapper = "" if is_64bit else "asanwrapper "
 
 device_env = build_env()
 device_args = ' '.join(sys.argv[1:]) # FIXME: escape?
 device_stdout = device_binary + '.stdout'
 device_stderr = device_binary + '.stderr'
 device_exitcode = device_binary + '.exitcode'
-ret = adb(['shell', 'cd %s && %s %s%s %s >%s 2>%s ; echo $? >%s' %
-           (ANDROID_TMPDIR, device_env, asanwrapper, device_binary, device_args,
+ret = adb(['shell', 'cd %s && %s %s %s >%s 2>%s ; echo $? >%s' %
+           (ANDROID_TMPDIR, device_env, device_binary, device_args,
             device_stdout, device_stderr, device_exitcode)])
 if ret != 0:
     sys.exit(ret)
diff --git a/test/sanitizer_common/ios_commands/iossim_compile.py b/test/sanitizer_common/ios_commands/iossim_compile.py
new file mode 100755
index 0000000..8fa480e
--- /dev/null
+++ b/test/sanitizer_common/ios_commands/iossim_compile.py
@@ -0,0 +1,32 @@
+#!/usr/bin/python
+
+import os, sys, subprocess
+
+output = None
+output_type = 'executable'
+
+args = sys.argv[1:]
+while args:
+    arg = args.pop(0)
+    if arg == '-shared':
+        output_type = 'shared'
+    elif arg == '-dynamiclib':
+        output_type = 'dylib'
+    elif arg == '-c':
+        output_type = 'object'
+    elif arg == '-S':
+        output_type = 'assembly'
+    elif arg == '-o':
+        output = args.pop(0)
+
+if output == None:
+    print "No output file name!"
+    sys.exit(1)
+
+ret = subprocess.call(sys.argv[1:])
+if ret != 0:
+    sys.exit(ret)
+
+# If we produce a dylib, ad-hoc sign it.
+if output_type in ['shared', 'dylib']:
+    ret = subprocess.call(["codesign", "-s", "-", output])
diff --git a/test/sanitizer_common/ios_commands/iossim_env.py b/test/sanitizer_common/ios_commands/iossim_env.py
new file mode 100755
index 0000000..28f6269
--- /dev/null
+++ b/test/sanitizer_common/ios_commands/iossim_env.py
@@ -0,0 +1,17 @@
+#!/usr/bin/python
+
+import os, sys, subprocess
+
+
+idx = 1
+for arg in sys.argv[1:]:
+  if not "=" in arg:
+    break
+  idx += 1
+  (argname, argval) = arg.split("=")
+  os.environ["SIMCTL_CHILD_" + argname] = argval
+
+exitcode = subprocess.call(sys.argv[idx:])
+if exitcode > 125:
+  exitcode = 126
+sys.exit(exitcode)
diff --git a/test/sanitizer_common/ios_commands/iossim_run.py b/test/sanitizer_common/ios_commands/iossim_run.py
new file mode 100755
index 0000000..47b847f
--- /dev/null
+++ b/test/sanitizer_common/ios_commands/iossim_run.py
@@ -0,0 +1,18 @@
+#!/usr/bin/python
+
+import os, sys, subprocess
+
+
+if not "SANITIZER_IOSSIM_TEST_DEVICE_IDENTIFIER" in os.environ:
+  raise EnvironmentError("Specify SANITIZER_IOSSIM_TEST_DEVICE_IDENTIFIER to select which simulator to use.")
+
+device_id = os.environ["SANITIZER_IOSSIM_TEST_DEVICE_IDENTIFIER"]
+
+for e in ["ASAN_OPTIONS", "TSAN_OPTIONS"]:
+  if e in os.environ:
+    os.environ["SIMCTL_CHILD_" + e] = os.environ[e]
+
+exitcode = subprocess.call(["xcrun", "simctl", "spawn", device_id] + sys.argv[1:])
+if exitcode > 125:
+  exitcode = 126
+sys.exit(exitcode)
diff --git a/test/sanitizer_common/lit.common.cfg b/test/sanitizer_common/lit.common.cfg
index 2926edb..a4a5d4e 100644
--- a/test/sanitizer_common/lit.common.cfg
+++ b/test/sanitizer_common/lit.common.cfg
@@ -18,21 +18,27 @@
 elif config.tool_name == "lsan":
   tool_cflags = ["-fsanitize=leak"]
   tool_options = "LSAN_OPTIONS"
+elif config.tool_name == "ubsan":
+  tool_cflags = ["-fsanitize=undefined"]
+  tool_options = "UBSAN_OPTIONS"
 else:
   lit_config.fatal("Unknown tool for sanitizer_common tests: %r" % config.tool_name)
 
 config.available_features.add(config.tool_name)
 
-if config.target_arch not in ['arm', 'armhf', 'aarch64']:
-  config.available_features.add('stable-runtime')
-
-if config.host_os == 'Linux' and config.target_arch == 'i386' and config.tool_name == "lsan":
+if config.host_os == 'Linux' and config.tool_name == "lsan" and config.target_arch == 'i386':
   config.available_features.add("lsan-x86")
 
 if config.host_os == 'Darwin':
   # On Darwin, we default to `abort_on_error=1`, which would make tests run
   # much slower. Let's override this and run lit tests with 'abort_on_error=0'.
   default_tool_options += ['abort_on_error=0']
+elif config.android:
+  # The same as on Darwin, we default to "abort_on_error=1" which slows down
+  # testing. Also, all existing tests are using "not" instead of "not --crash"
+  # which does not work for abort()-terminated programs.
+  default_tool_options += ['abort_on_error=0']
+
 default_tool_options_str = ':'.join(default_tool_options)
 if default_tool_options_str:
   config.environment[tool_options] = default_tool_options_str
@@ -42,7 +48,7 @@
 clang_cxxflags = config.cxx_mode_flags + clang_cflags
 
 def build_invocation(compile_flags):
-  return " " + " ".join([config.clang] + compile_flags) + " "
+  return " " + " ".join([config.compile_wrapper, config.clang] + compile_flags) + " "
 
 config.substitutions.append( ("%clang ", build_invocation(clang_cflags)) )
 config.substitutions.append( ("%clangxx ", build_invocation(clang_cxxflags)) )
diff --git a/test/sanitizer_common/print_address.h b/test/sanitizer_common/print_address.h
index 99261b3..db2e834 100644
--- a/test/sanitizer_common/print_address.h
+++ b/test/sanitizer_common/print_address.h
@@ -11,8 +11,8 @@
     // On FreeBSD, the %p conversion specifier works as 0x%x and thus does not
     // match to the format used in the diagnotic message.
     fprintf(stderr, "0x%012lx ", (unsigned long) p);
-#elif defined(__i386__)
-    fprintf(stderr, "0x%8lx ", (unsigned long) p);
+#elif defined(__i386__) || defined(__arm__)
+    fprintf(stderr, "0x%08lx ", (unsigned long) p);
 #elif defined(__mips64)
     fprintf(stderr, "0x%010lx ", (unsigned long) p);
 #endif
diff --git a/test/scudo/CMakeLists.txt b/test/scudo/CMakeLists.txt
index a899099..513168b 100644
--- a/test/scudo/CMakeLists.txt
+++ b/test/scudo/CMakeLists.txt
@@ -15,7 +15,15 @@
 
 set(SCUDO_TEST_ARCH ${SCUDO_SUPPORTED_ARCH})
 foreach(arch ${SCUDO_TEST_ARCH})
-  set(SCUDO_TEST_TARGET_ARCH ${arch})
+  if(ANDROID)
+    if (${arch} STREQUAL "i386")
+      set(SCUDO_TEST_TARGET_ARCH i686-android)
+    else()
+      set(SCUDO_TEST_TARGET_ARCH ${arch}-android)
+    endif()
+  else()
+    set(SCUDO_TEST_TARGET_ARCH ${arch})
+  endif()
   string(TOLOWER "-${arch}" SCUDO_TEST_CONFIG_SUFFIX)
   get_test_cc_for_arch(${arch} SCUDO_TEST_TARGET_CC SCUDO_TEST_TARGET_CFLAGS)
   string(TOUPPER ${arch} ARCH_UPPER_CASE)
diff --git a/test/scudo/alignment.cpp b/test/scudo/alignment.c
similarity index 86%
rename from test/scudo/alignment.cpp
rename to test/scudo/alignment.c
index 125ad8c..6235d50 100644
--- a/test/scudo/alignment.cpp
+++ b/test/scudo/alignment.c
@@ -15,7 +15,7 @@
   if (!strcmp(argv[1], "pointers")) {
     void *p = malloc(1U << 16);
     assert(p);
-    free(reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(p) | 1));
+    free((void *)((uintptr_t)p | 1));
   }
   return 0;
 }
diff --git a/test/scudo/double-free.cpp b/test/scudo/double-free.cpp
index ddc5205..5611803 100644
--- a/test/scudo/double-free.cpp
+++ b/test/scudo/double-free.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_scudo %s -o %t
+// RUN: %clangxx_scudo %s -o %t
 // RUN: not %run %t malloc   2>&1 | FileCheck %s
 // RUN: not %run %t new      2>&1 | FileCheck %s
 // RUN: not %run %t newarray 2>&1 | FileCheck %s
diff --git a/test/scudo/interface.cpp b/test/scudo/interface.cpp
index e9575ad..16523bf 100644
--- a/test/scudo/interface.cpp
+++ b/test/scudo/interface.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_scudo %s -lstdc++ -o %t
+// RUN: %clangxx_scudo %s -lstdc++ -o %t
 // RUN: %run %t ownership          2>&1
 // RUN: %run %t ownership-and-size 2>&1
 // RUN: %run %t heap-size          2>&1
diff --git a/test/scudo/lit.cfg b/test/scudo/lit.cfg
index adf16f5..2f6469d 100644
--- a/test/scudo/lit.cfg
+++ b/test/scudo/lit.cfg
@@ -8,32 +8,53 @@
 # Setup source root.
 config.test_source_root = os.path.dirname(__file__)
 
-# Path to the static library
-base_lib = os.path.join(config.compiler_rt_libdir,
-                        "libclang_rt.scudo-%s.a" % config.target_arch)
-whole_archive = "-Wl,-whole-archive %s -Wl,-no-whole-archive " % base_lib
+# Path to the shared & static libraries
+shared_libscudo = os.path.join(config.compiler_rt_libdir, "libclang_rt.scudo-%s.so" % config.target_arch)
+static_libscudo = os.path.join(config.compiler_rt_libdir, "libclang_rt.scudo-%s.a" % config.target_arch)
+static_libscudo_cxx = os.path.join(config.compiler_rt_libdir, "libclang_rt.scudo_cxx-%s.a" % config.target_arch)
+
+whole_archive = "-Wl,-whole-archive %s -Wl,-no-whole-archive " % static_libscudo
+whole_archive_cxx = "-Wl,-whole-archive %s -Wl,-no-whole-archive " % static_libscudo_cxx
 
 # Test suffixes.
 config.suffixes = ['.c', '.cc', '.cpp']
 
-# C flags.
+# C & CXX flags.
 c_flags = ([config.target_cflags] +
-           ["-std=c++11",
-           "-pthread",
+           ["-pthread",
            "-fPIE",
            "-pie",
            "-O0",
            "-UNDEBUG",
            "-ldl",
-           "-lrt",
            "-Wl,--gc-sections"])
 
+# Android doesn't want -lrt.
+if not config.android:
+  c_flags += ["-lrt"]
+
+cxx_flags = (c_flags + config.cxx_mode_flags + ["-std=c++11"])
+
 def build_invocation(compile_flags):                                            
-  return " " + " ".join([config.clang] + compile_flags) + " "                   
+  return " " + " ".join([config.compile_wrapper, config.clang] + compile_flags) + " "                   
 
 # Add clang substitutions.
-config.substitutions.append( ("%clang_scudo ",
-                              build_invocation(c_flags) + whole_archive) )
+config.substitutions.append(("%clang ", build_invocation(c_flags)))
+config.substitutions.append(("%clang_scudo ", build_invocation(c_flags) + whole_archive))
+config.substitutions.append(("%clangxx_scudo ", build_invocation(cxx_flags) + whole_archive + whole_archive_cxx))
+config.substitutions.append(("%shared_libscudo", shared_libscudo))
+
+# Platform-specific default SCUDO_OPTIONS for lit tests.
+default_scudo_opts = ''
+if config.android:
+  # Android defaults to abort_on_error=1, which doesn't work for us.
+  default_scudo_opts = 'abort_on_error=0'
+
+if default_scudo_opts:
+  config.environment['SCUDO_OPTIONS'] = default_scudo_opts
+  default_scudo_opts += ':'
+config.substitutions.append(('%env_scudo_opts=',
+                             'env SCUDO_OPTIONS=' + default_scudo_opts))
 
 # Hardened Allocator tests are currently supported on Linux only.
 if config.host_os not in ['Linux']:
diff --git a/test/scudo/malloc.cpp b/test/scudo/malloc.cpp
index 50e5259..6c6a6c4 100644
--- a/test/scudo/malloc.cpp
+++ b/test/scudo/malloc.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_scudo %s -lstdc++ -o %t
+// RUN: %clangxx_scudo %s -lstdc++ -o %t
 // RUN: %run %t 2>&1
 
 // Tests that a regular workflow of allocation, memory fill and free works as
diff --git a/test/scudo/memalign.c b/test/scudo/memalign.c
new file mode 100644
index 0000000..1fe6e3e
--- /dev/null
+++ b/test/scudo/memalign.c
@@ -0,0 +1,81 @@
+// RUN: %clang_scudo %s -o %t
+// RUN:                                                 %run %t valid   2>&1
+// RUN:                                             not %run %t invalid 2>&1
+// RUN: %env_scudo_opts=allocator_may_return_null=1     %run %t invalid 2>&1
+
+// Tests that the various aligned allocation functions work as intended. Also
+// tests for the condition where the alignment is not a power of 2.
+
+#include <assert.h>
+#include <errno.h>
+#include <malloc.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+// Sometimes the headers may not have this...
+void *aligned_alloc(size_t alignment, size_t size);
+
+int main(int argc, char **argv)
+{
+  void *p = NULL;
+  size_t alignment = 1U << 12;
+  size_t size = 1U << 12;
+  int err;
+
+  assert(argc == 2);
+
+  if (!strcmp(argv[1], "valid")) {
+    posix_memalign(&p, alignment, size);
+    assert(p);
+    assert(((uintptr_t)p & (alignment - 1)) == 0);
+    free(p);
+    p = aligned_alloc(alignment, size);
+    assert(p);
+    assert(((uintptr_t)p & (alignment - 1)) == 0);
+    free(p);
+    // Tests various combinations of alignment and sizes
+    for (int i = (sizeof(void *) == 4) ? 3 : 4; i < 19; i++) {
+      alignment = 1U << i;
+      for (int j = 1; j < 33; j++) {
+        size = 0x800 * j;
+        for (int k = 0; k < 3; k++) {
+          p = memalign(alignment, size - (2 * sizeof(void *) * k));
+          assert(p);
+          assert(((uintptr_t)p & (alignment - 1)) == 0);
+          free(p);
+        }
+      }
+    }
+    // For larger alignment, reduce the number of allocations to avoid running
+    // out of potential addresses (on 32-bit).
+    for (int i = 19; i <= 24; i++) {
+      for (int k = 0; k < 3; k++) {
+        p = memalign(alignment, 0x1000 - (2 * sizeof(void *) * k));
+        assert(p);
+        assert(((uintptr_t)p & (alignment - 1)) == 0);
+        free(p);
+      }
+    }
+  }
+  if (!strcmp(argv[1], "invalid")) {
+    // Alignment is not a power of 2.
+    p = memalign(alignment - 1, size);
+    assert(!p);
+    // Size is not a multiple of alignment.
+    p = aligned_alloc(alignment, size >> 1);
+    assert(!p);
+    void *p_unchanged = (void *)0x42UL;
+    p = p_unchanged;
+    // Alignment is not a power of 2.
+    err = posix_memalign(&p, 3, size);
+    assert(p == p_unchanged);
+    assert(err == EINVAL);
+    // Alignment is a power of 2, but not a multiple of size(void *).
+    err = posix_memalign(&p, 2, size);
+    assert(p == p_unchanged);
+    assert(err == EINVAL);
+  }
+  return 0;
+}
diff --git a/test/scudo/memalign.cpp b/test/scudo/memalign.cpp
deleted file mode 100644
index c263da7..0000000
--- a/test/scudo/memalign.cpp
+++ /dev/null
@@ -1,66 +0,0 @@
-// RUN: %clang_scudo %s -o %t
-// RUN:     %run %t valid   2>&1
-// RUN: not %run %t invalid 2>&1 | FileCheck %s
-
-// Tests that the various aligned allocation functions work as intended. Also
-// tests for the condition where the alignment is not a power of 2.
-
-#include <assert.h>
-#include <malloc.h>
-#include <stdlib.h>
-#include <string.h>
-
-// Reduce the size of the quarantine, or the test can run out of aligned memory
-// on 32-bit for the larger alignments.
-extern "C" const char *__scudo_default_options() {
-  return "QuarantineSizeMb=1";
-}
-
-// Sometimes the headers may not have this...
-extern "C" void *aligned_alloc (size_t alignment, size_t size);
-
-int main(int argc, char **argv)
-{
-  void *p = nullptr;
-  size_t alignment = 1U << 12;
-  size_t size = 1U << 12;
-
-  assert(argc == 2);
-
-  if (!strcmp(argv[1], "valid")) {
-    posix_memalign(&p, alignment, size);
-    assert(p);
-    free(p);
-    p = aligned_alloc(alignment, size);
-    assert(p);
-    free(p);
-    // Tests various combinations of alignment and sizes
-    for (int i = (sizeof(void *) == 4) ? 3 : 4; i < 19; i++) {
-      alignment = 1U << i;
-      for (int j = 1; j < 33; j++) {
-        size = 0x800 * j;
-        for (int k = 0; k < 3; k++) {
-          p = memalign(alignment, size - (2 * sizeof(void *) * k));
-          assert(p);
-          free(p);
-        }
-      }
-    }
-    // For larger alignment, reduce the number of allocations to avoid running
-    // out of potential addresses (on 32-bit).
-    for (int i = 19; i <= 24; i++) {
-      for (int k = 0; k < 3; k++) {
-        p = memalign(alignment, 0x1000 - (2 * sizeof(void *) * k));
-        assert(p);
-        free(p);
-      }
-    }
-  }
-  if (!strcmp(argv[1], "invalid")) {
-    p = memalign(alignment - 1, size);
-    free(p);
-  }
-  return 0;
-}
-
-// CHECK: ERROR: alignment is not a power of 2
diff --git a/test/scudo/mismatch.cpp b/test/scudo/mismatch.cpp
index 15dce83..b49e0ea 100644
--- a/test/scudo/mismatch.cpp
+++ b/test/scudo/mismatch.cpp
@@ -1,10 +1,12 @@
-// RUN: %clang_scudo %s -o %t
-// RUN: SCUDO_OPTIONS=DeallocationTypeMismatch=1 not %run %t mallocdel   2>&1 | FileCheck %s
-// RUN: SCUDO_OPTIONS=DeallocationTypeMismatch=0     %run %t mallocdel   2>&1
-// RUN: SCUDO_OPTIONS=DeallocationTypeMismatch=1 not %run %t newfree     2>&1 | FileCheck %s
-// RUN: SCUDO_OPTIONS=DeallocationTypeMismatch=0     %run %t newfree     2>&1
-// RUN: SCUDO_OPTIONS=DeallocationTypeMismatch=1 not %run %t memaligndel 2>&1 | FileCheck %s
-// RUN: SCUDO_OPTIONS=DeallocationTypeMismatch=0     %run %t memaligndel 2>&1
+// RUN: %clangxx_scudo %s -o %t
+// RUN: %env_scudo_opts=DeallocationTypeMismatch=1 not %run %t mallocdel       2>&1 | FileCheck --check-prefix=CHECK-dealloc %s
+// RUN: %env_scudo_opts=DeallocationTypeMismatch=0     %run %t mallocdel       2>&1
+// RUN: %env_scudo_opts=DeallocationTypeMismatch=1 not %run %t newfree         2>&1 | FileCheck --check-prefix=CHECK-dealloc %s
+// RUN: %env_scudo_opts=DeallocationTypeMismatch=0     %run %t newfree         2>&1
+// RUN: %env_scudo_opts=DeallocationTypeMismatch=1 not %run %t memaligndel     2>&1 | FileCheck --check-prefix=CHECK-dealloc %s
+// RUN: %env_scudo_opts=DeallocationTypeMismatch=0     %run %t memaligndel     2>&1
+// RUN: %env_scudo_opts=DeallocationTypeMismatch=1 not %run %t memalignrealloc 2>&1 | FileCheck --check-prefix=CHECK-realloc %s
+// RUN: %env_scudo_opts=DeallocationTypeMismatch=0     %run %t memalignrealloc 2>&1
 
 // Tests that type mismatches between allocation and deallocation functions are
 // caught when the related option is set.
@@ -32,7 +34,14 @@
     assert(p);
     delete p;
   }
+  if (!strcmp(argv[1], "memalignrealloc")) {
+    void *p = memalign(16, 16);
+    assert(p);
+    p = realloc(p, 32);
+    free(p);
+  }
   return 0;
 }
 
-// CHECK: ERROR: allocation type mismatch on address
+// CHECK-dealloc: ERROR: allocation type mismatch when deallocating address
+// CHECK-realloc: ERROR: allocation type mismatch when reallocating address
diff --git a/test/scudo/options.cpp b/test/scudo/options.cpp
index f4afe7d..605b632 100644
--- a/test/scudo/options.cpp
+++ b/test/scudo/options.cpp
@@ -1,7 +1,7 @@
-// RUN: %clang_scudo %s -o %t
-// RUN:                                              %run %t 2>&1
-// RUN: SCUDO_OPTIONS=DeallocationTypeMismatch=0     %run %t 2>&1
-// RUN: SCUDO_OPTIONS=DeallocationTypeMismatch=1 not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_scudo %s -o %t
+// RUN:                                                %run %t 2>&1
+// RUN: %env_scudo_opts=DeallocationTypeMismatch=0     %run %t 2>&1
+// RUN: %env_scudo_opts=DeallocationTypeMismatch=1 not %run %t 2>&1 | FileCheck %s
 
 // Tests that the options can be passed using getScudoDefaultOptions, and that
 // the environment ones take precedence over them.
@@ -22,4 +22,4 @@
   return 0;
 }
 
-// CHECK: ERROR: allocation type mismatch on address
+// CHECK: ERROR: allocation type mismatch when deallocating address
diff --git a/test/scudo/overflow.cpp b/test/scudo/overflow.c
similarity index 80%
rename from test/scudo/overflow.cpp
rename to test/scudo/overflow.c
index d128245..c5a58f8 100644
--- a/test/scudo/overflow.cpp
+++ b/test/scudo/overflow.c
@@ -1,6 +1,6 @@
 // RUN: %clang_scudo %s -o %t
-// RUN:                                  not %run %t malloc     2>&1 | FileCheck %s
-// RUN: SCUDO_OPTIONS=QuarantineSizeMb=1 not %run %t quarantine 2>&1 | FileCheck %s
+// RUN:                                     not %run %t malloc     2>&1 | FileCheck %s
+// RUN: %env_scudo_opts=QuarantineSizeKb=64 not %run %t quarantine 2>&1 | FileCheck %s
 
 // Tests that header corruption of an allocated or quarantined chunk is caught.
 
@@ -29,7 +29,7 @@
     ((char *)p)[-(offset + 2)] ^= 1;
     // Trigger the quarantine recycle
     for (int i = 0; i < 0x100; i++) {
-      p = malloc(1U << 16);
+      p = malloc(1U << 8);
       free(p);
     }
   }
diff --git a/test/scudo/preinit.cpp b/test/scudo/preinit.c
similarity index 76%
rename from test/scudo/preinit.cpp
rename to test/scudo/preinit.c
index b8c01a4..792b236 100644
--- a/test/scudo/preinit.cpp
+++ b/test/scudo/preinit.c
@@ -4,11 +4,15 @@
 // Verifies that calling malloc in a preinit_array function succeeds, and that
 // the resulting pointer can be freed at program termination.
 
+// On some Android versions, calling mmap() from a preinit function segfaults.
+// It looks like __mmap2.S ends up calling a NULL function pointer.
+// UNSUPPORTED: android
+
 #include <assert.h>
 #include <stdlib.h>
 #include <string.h>
 
-static void *global_p = nullptr;
+static void *global_p = NULL;
 
 void __init(void) {
   global_p = malloc(1);
diff --git a/test/scudo/preload.cpp b/test/scudo/preload.cpp
new file mode 100644
index 0000000..b41a70e
--- /dev/null
+++ b/test/scudo/preload.cpp
@@ -0,0 +1,20 @@
+// Test that the preloaded runtime works without linking the static library.
+
+// RUN: %clang %s -lstdc++ -o %t
+// RUN: env LD_PRELOAD=%shared_libscudo not %run %t 2>&1 | FileCheck %s
+
+// This way of setting LD_PRELOAD does not work with Android test runner.
+// REQUIRES: !android
+
+#include <assert.h>
+
+int main(int argc, char *argv[]) {
+  int *p = new int;
+  assert(p);
+  *p = 0;
+  delete p;
+  delete p;
+  return 0;
+}
+
+// CHECK: ERROR: invalid chunk state
diff --git a/test/scudo/quarantine.c b/test/scudo/quarantine.c
new file mode 100644
index 0000000..ddbb920
--- /dev/null
+++ b/test/scudo/quarantine.c
@@ -0,0 +1,124 @@
+// RUN: %clang_scudo %s -o %t
+// RUN: %env_scudo_opts="QuarantineSizeMb=1:QuarantineSizeKb=64"           not %run %t unused 2>&1
+// RUN: %env_scudo_opts="QuarantineSizeMb=1:QuarantineChunksUpToSize=256"  not %run %t unused 2>&1
+// RUN: %env_scudo_opts="QuarantineSizeKb=0:ThreadLocalQuarantineSizeKb=0"     %run %t zeroquarantine 2>&1
+// RUN: %env_scudo_opts=QuarantineSizeKb=64                                    %run %t smallquarantine 2>&1
+// RUN: %env_scudo_opts=QuarantineChunksUpToSize=256                           %run %t threshold 2>&1
+// RUN: %env_scudo_opts="QuarantineSizeMb=1"                                   %run %t oldquarantine 2>&1
+
+// Tests that the quarantine prevents a chunk from being reused right away.
+// Also tests that a chunk will eventually become available again for
+// allocation when the recycling criteria has been met. Finally, tests the
+// threshold up to which a chunk is quarantine, and the old quarantine behavior.
+
+#include <assert.h>
+#include <malloc.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sanitizer/allocator_interface.h>
+
+int main(int argc, char **argv)
+{
+  void *p, *old_p;
+  size_t allocated_bytes, size = 1U << 8, alignment = 1U << 8;
+
+  assert(argc == 2);
+  // First, warm up the allocator for the classes used.
+  p = malloc(size);
+  assert(p);
+  free(p);
+  p = malloc(size + 1);
+  assert(p);
+  free(p);
+  assert(posix_memalign(&p, alignment, size) == 0);
+  assert(p);
+  free(p);
+  assert(posix_memalign(&p, alignment, size + 1) == 0);
+  assert(p);
+  free(p);
+
+  if (!strcmp(argv[1], "zeroquarantine")) {
+    // Verifies that a chunk is deallocated right away when the local and
+    // global quarantine sizes are 0.
+    allocated_bytes = __sanitizer_get_current_allocated_bytes();
+    p = malloc(size);
+    assert(p);
+    assert(__sanitizer_get_current_allocated_bytes() > allocated_bytes);
+    free(p);
+    assert(__sanitizer_get_current_allocated_bytes() == allocated_bytes);
+  }
+  if (!strcmp(argv[1], "smallquarantine")) {
+    // The delayed freelist will prevent a chunk from being available right
+    // away.
+    p = malloc(size);
+    assert(p);
+    old_p = p;
+    free(p);
+    p = malloc(size);
+    assert(p);
+    assert(old_p != p);
+    free(p);
+
+    // Eventually the chunk should become available again.
+    char found = 0;
+    for (int i = 0; i < 0x200 && !found; i++) {
+      p = malloc(size);
+      assert(p);
+      found = (p == old_p);
+      free(p);
+    }
+    assert(found);
+  }
+  if (!strcmp(argv[1], "threshold")) {
+    // Verifies that a chunk of size greater than the threshold will be freed
+    // right away. Alignment has no impact on the threshold.
+    allocated_bytes = __sanitizer_get_current_allocated_bytes();
+    p = malloc(size + 1);
+    assert(p);
+    assert(__sanitizer_get_current_allocated_bytes() > allocated_bytes);
+    free(p);
+    assert(__sanitizer_get_current_allocated_bytes() == allocated_bytes);
+    assert(posix_memalign(&p, alignment, size + 1) == 0);
+    assert(__sanitizer_get_current_allocated_bytes() > allocated_bytes);
+    free(p);
+    assert(__sanitizer_get_current_allocated_bytes() == allocated_bytes);
+    // Verifies that a chunk of size lower or equal to the threshold will be
+    // quarantined.
+    p = malloc(size);
+    assert(p);
+    assert(__sanitizer_get_current_allocated_bytes() > allocated_bytes);
+    free(p);
+    assert(__sanitizer_get_current_allocated_bytes() > allocated_bytes);
+    allocated_bytes = __sanitizer_get_current_allocated_bytes();
+    assert(posix_memalign(&p, alignment, size) == 0);
+    assert(__sanitizer_get_current_allocated_bytes() > allocated_bytes);
+    free(p);
+    assert(__sanitizer_get_current_allocated_bytes() > allocated_bytes);
+  }
+  if (!strcmp(argv[1], "oldquarantine")) {
+    // Verifies that we quarantine everything if the deprecated quarantine
+    // option is specified. Alignment has no impact on the threshold.
+    allocated_bytes = __sanitizer_get_current_allocated_bytes();
+    p = malloc(size);
+    assert(p);
+    assert(__sanitizer_get_current_allocated_bytes() > allocated_bytes);
+    free(p);
+    assert(__sanitizer_get_current_allocated_bytes() > allocated_bytes);
+    allocated_bytes = __sanitizer_get_current_allocated_bytes();
+    assert(posix_memalign(&p, alignment, size) == 0);
+    assert(p);
+    assert(__sanitizer_get_current_allocated_bytes() > allocated_bytes);
+    free(p);
+    assert(__sanitizer_get_current_allocated_bytes() > allocated_bytes);
+    // Secondary backed allocation.
+    allocated_bytes = __sanitizer_get_current_allocated_bytes();
+    p = malloc(1U << 19);
+    assert(p);
+    assert(__sanitizer_get_current_allocated_bytes() > allocated_bytes);
+    free(p);
+    assert(__sanitizer_get_current_allocated_bytes() > allocated_bytes);
+  }
+
+  return 0;
+}
diff --git a/test/scudo/quarantine.cpp b/test/scudo/quarantine.cpp
deleted file mode 100644
index 39ce1bd..0000000
--- a/test/scudo/quarantine.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-// RUN: %clang_scudo %s -o %t
-// RUN: SCUDO_OPTIONS="QuarantineSizeMb=0:ThreadLocalQuarantineSizeKb=0" %run %t zeroquarantine 2>&1
-// RUN: SCUDO_OPTIONS=QuarantineSizeMb=1                                 %run %t smallquarantine 2>&1
-
-// Tests that the quarantine prevents a chunk from being reused right away.
-// Also tests that a chunk will eventually become available again for
-// allocation when the recycling criteria has been met.
-
-#include <assert.h>
-#include <malloc.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <sanitizer/allocator_interface.h>
-
-int main(int argc, char **argv)
-{
-  void *p, *old_p;
-  size_t allocated_bytes, size = 1U << 16;
-
-  assert(argc == 2);
-
-  if (!strcmp(argv[1], "zeroquarantine")) {
-    // Verifies that a chunk is deallocated right away when the local and
-    // global quarantine sizes are 0.
-    allocated_bytes = __sanitizer_get_current_allocated_bytes();
-    p = malloc(size);
-    assert(p);
-    assert(__sanitizer_get_current_allocated_bytes() > allocated_bytes);
-    free(p);
-    assert(__sanitizer_get_current_allocated_bytes() == allocated_bytes);
-  }
-  if (!strcmp(argv[1], "smallquarantine")) {
-    // The delayed freelist will prevent a chunk from being available right
-    // away.
-    p = malloc(size);
-    assert(p);
-    old_p = p;
-    free(p);
-    p = malloc(size);
-    assert(p);
-    assert(old_p != p);
-    free(p);
-
-    // Eventually the chunk should become available again.
-    bool found = false;
-    for (int i = 0; i < 0x100 && found == false; i++) {
-      p = malloc(size);
-      assert(p);
-      found = (p == old_p);
-      free(p);
-    }
-    assert(found == true);
-  }
-
-  return 0;
-}
diff --git a/test/scudo/random_shuffle.cpp b/test/scudo/random_shuffle.cpp
index 41e67de..f886cb1 100644
--- a/test/scudo/random_shuffle.cpp
+++ b/test/scudo/random_shuffle.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_scudo %s -o %t
+// RUN: %clangxx_scudo %s -o %t
 // RUN: rm -rf %T/random_shuffle_tmp_dir
 // RUN: mkdir %T/random_shuffle_tmp_dir
 // RUN: %run %t 100 > %T/random_shuffle_tmp_dir/out1
@@ -7,7 +7,6 @@
 // RUN: %run %t 10000 > %T/random_shuffle_tmp_dir/out2
 // RUN: not diff %T/random_shuffle_tmp_dir/out?
 // RUN: rm -rf %T/random_shuffle_tmp_dir
-// UNSUPPORTED: i386-linux,i686-linux,arm-linux,armhf-linux,aarch64-linux
 
 // Tests that the allocator shuffles the chunks before returning to the user.
 
diff --git a/test/scudo/realloc.cpp b/test/scudo/realloc.cpp
index da37720..254c67a 100644
--- a/test/scudo/realloc.cpp
+++ b/test/scudo/realloc.cpp
@@ -1,14 +1,13 @@
-// RUN: %clang_scudo %s -lstdc++ -o %t
-// RUN:     %run %t pointers 2>&1
-// RUN:     %run %t contents 2>&1
-// RUN: not %run %t memalign 2>&1 | FileCheck %s
+// RUN: %clangxx_scudo %s -lstdc++ -o %t
+// RUN: %run %t pointers 2>&1
+// RUN: %run %t contents 2>&1
+// RUN: %run %t usablesize 2>&1
 
 // Tests that our reallocation function returns the same pointer when the
 // requested size can fit into the previously allocated chunk. Also tests that
 // a new chunk is returned if the size is greater, and that the contents of the
-// chunk are left unchanged.
-// As a final test, make sure that a chunk allocated by memalign cannot be
-// reallocated.
+// chunk are left unchanged. Finally, checks that realloc copies the usable
+// size of the old chunk to the new one (as opposed to the requested size).
 
 #include <assert.h>
 #include <malloc.h>
@@ -24,42 +23,65 @@
 
   assert(argc == 2);
 
-  for (size_t size : sizes) {
-    if (!strcmp(argv[1], "pointers")) {
-      old_p = p = realloc(nullptr, size);
-      assert(p);
-      size = malloc_usable_size(p);
-      // Our realloc implementation will return the same pointer if the size
-      // requested is lower than or equal to the usable size of the associated
-      // chunk.
-      p = realloc(p, size - 1);
-      assert(p == old_p);
+  if (!strcmp(argv[1], "usablesize")) {
+    // This tests a sketchy behavior inherited from poorly written libraries
+    // that have become somewhat standard. When realloc'ing a chunk, the
+    // copied contents should span the usable size of the chunk, not the
+    // requested size.
+    size_t size = 496, usable_size;
+    p = nullptr;
+    // Make sure we get a chunk with a usable size actually larger than size.
+    do {
+      if (p) free(p);
+      size += 16;
+      p = malloc(size);
+      usable_size = malloc_usable_size(p);
+      assert(usable_size >= size);
+    } while (usable_size == size);
+    for (int i = 0; i < usable_size; i++)
+      reinterpret_cast<char *>(p)[i] = 'A';
+    old_p = p;
+    // Make sure we get a different chunk so that the data is actually copied.
+    do {
+      size *= 2;
       p = realloc(p, size);
-      assert(p == old_p);
-      // And a new one if the size is greater.
-      p = realloc(p, size + 1);
-      assert(p != old_p);
-      // A size of 0 will free the chunk and return nullptr.
-      p = realloc(p, 0);
-      assert(!p);
-      old_p = nullptr;
-    }
-    if (!strcmp(argv[1], "contents")) {
-      p = realloc(nullptr, size);
       assert(p);
-      for (int i = 0; i < size; i++)
-        reinterpret_cast<char *>(p)[i] = 'A';
-      p = realloc(p, size + 1);
-      // The contents of the reallocated chunk must match the original one.
-      for (int i = 0; i < size; i++)
-        assert(reinterpret_cast<char *>(p)[i] == 'A');
-    }
-    if (!strcmp(argv[1], "memalign")) {
-      // A chunk coming from memalign cannot be reallocated.
-      p = memalign(16, size);
-      assert(p);
-      p = realloc(p, size);
-      free(p);
+    } while (p == old_p);
+    // The contents of the new chunk must match the old one up to usable_size.
+    for (int i = 0; i < usable_size; i++)
+      assert(reinterpret_cast<char *>(p)[i] == 'A');
+    free(p);
+  } else {
+    for (size_t size : sizes) {
+      if (!strcmp(argv[1], "pointers")) {
+        old_p = p = realloc(nullptr, size);
+        assert(p);
+        size = malloc_usable_size(p);
+        // Our realloc implementation will return the same pointer if the size
+        // requested is lower than or equal to the usable size of the associated
+        // chunk.
+        p = realloc(p, size - 1);
+        assert(p == old_p);
+        p = realloc(p, size);
+        assert(p == old_p);
+        // And a new one if the size is greater.
+        p = realloc(p, size + 1);
+        assert(p != old_p);
+        // A size of 0 will free the chunk and return nullptr.
+        p = realloc(p, 0);
+        assert(!p);
+        old_p = nullptr;
+      }
+      if (!strcmp(argv[1], "contents")) {
+        p = realloc(nullptr, size);
+        assert(p);
+        for (int i = 0; i < size; i++)
+          reinterpret_cast<char *>(p)[i] = 'A';
+        p = realloc(p, size + 1);
+        // The contents of the reallocated chunk must match the original one.
+        for (int i = 0; i < size; i++)
+          assert(reinterpret_cast<char *>(p)[i] == 'A');
+      }
     }
   }
   return 0;
diff --git a/test/scudo/secondary.cpp b/test/scudo/secondary.c
similarity index 97%
rename from test/scudo/secondary.cpp
rename to test/scudo/secondary.c
index dc14f8c..b770ca0 100644
--- a/test/scudo/secondary.cpp
+++ b/test/scudo/secondary.c
@@ -36,7 +36,7 @@
   assert(p);
   memset(p, 'A', size); // This should not trigger anything.
   // Set up the SIGSEGV handler now, as the rest should trigger an AV.
-  sigaction(SIGSEGV, &a, nullptr);
+  sigaction(SIGSEGV, &a, NULL);
   if (!strcmp(argv[1], "after")) {
     for (int i = 0; i < page_size; i++)
       p[size + i] = 'A';
diff --git a/test/scudo/sized-delete.cpp b/test/scudo/sized-delete.cpp
index e467f55..85df05e 100644
--- a/test/scudo/sized-delete.cpp
+++ b/test/scudo/sized-delete.cpp
@@ -1,10 +1,10 @@
-// RUN: %clang_scudo -fsized-deallocation %s -o %t
-// RUN: SCUDO_OPTIONS=DeleteSizeMismatch=1     %run %t gooddel    2>&1
-// RUN: SCUDO_OPTIONS=DeleteSizeMismatch=1 not %run %t baddel     2>&1 | FileCheck %s
-// RUN: SCUDO_OPTIONS=DeleteSizeMismatch=0     %run %t baddel     2>&1
-// RUN: SCUDO_OPTIONS=DeleteSizeMismatch=1     %run %t gooddelarr 2>&1
-// RUN: SCUDO_OPTIONS=DeleteSizeMismatch=1 not %run %t baddelarr  2>&1 | FileCheck %s
-// RUN: SCUDO_OPTIONS=DeleteSizeMismatch=0     %run %t baddelarr  2>&1
+// RUN: %clangxx_scudo -fsized-deallocation %s -o %t
+// RUN: %env_scudo_opts=DeleteSizeMismatch=1     %run %t gooddel    2>&1
+// RUN: %env_scudo_opts=DeleteSizeMismatch=1 not %run %t baddel     2>&1 | FileCheck %s
+// RUN: %env_scudo_opts=DeleteSizeMismatch=0     %run %t baddel     2>&1
+// RUN: %env_scudo_opts=DeleteSizeMismatch=1     %run %t gooddelarr 2>&1
+// RUN: %env_scudo_opts=DeleteSizeMismatch=1 not %run %t baddelarr  2>&1 | FileCheck %s
+// RUN: %env_scudo_opts=DeleteSizeMismatch=0     %run %t baddelarr  2>&1
 
 // Ensures that the sized delete operator errors out when the appropriate
 // option is passed and the sizes do not match between allocation and
diff --git a/test/scudo/sizes.cpp b/test/scudo/sizes.cpp
index 981b859..73fc71f 100644
--- a/test/scudo/sizes.cpp
+++ b/test/scudo/sizes.cpp
@@ -1,9 +1,13 @@
-// RUN: %clang_scudo %s -o %t
-// RUN: SCUDO_OPTIONS=allocator_may_return_null=0 not %run %t malloc 2>&1 | FileCheck %s
-// RUN: SCUDO_OPTIONS=allocator_may_return_null=1     %run %t malloc 2>&1
-// RUN: SCUDO_OPTIONS=allocator_may_return_null=0 not %run %t calloc 2>&1 | FileCheck %s
-// RUN: SCUDO_OPTIONS=allocator_may_return_null=1     %run %t calloc 2>&1
-// RUN:                                               %run %t usable 2>&1
+// RUN: %clangxx_scudo %s -lstdc++ -o %t
+// RUN: %env_scudo_opts=allocator_may_return_null=0 not %run %t malloc 2>&1 | FileCheck %s
+// RUN: %env_scudo_opts=allocator_may_return_null=1     %run %t malloc 2>&1
+// RUN: %env_scudo_opts=allocator_may_return_null=0 not %run %t calloc 2>&1 | FileCheck %s
+// RUN: %env_scudo_opts=allocator_may_return_null=1     %run %t calloc 2>&1
+// RUN: %env_scudo_opts=allocator_may_return_null=0 not %run %t new 2>&1 | FileCheck %s
+// RUN: %env_scudo_opts=allocator_may_return_null=1 not %run %t new 2>&1 | FileCheck %s
+// RUN: %env_scudo_opts=allocator_may_return_null=0 not %run %t new-nothrow 2>&1 | FileCheck %s
+// RUN: %env_scudo_opts=allocator_may_return_null=1     %run %t new-nothrow 2>&1
+// RUN:                                                 %run %t usable 2>&1
 
 // Tests for various edge cases related to sizes, notably the maximum size the
 // allocator can allocate. Tests that an integer overflow in the parameters of
@@ -15,26 +19,38 @@
 #include <string.h>
 
 #include <limits>
+#include <new>
 
-int main(int argc, char **argv)
-{
+int main(int argc, char **argv) {
   assert(argc == 2);
-  if (!strcmp(argv[1], "malloc")) {
-    // Currently the maximum size the allocator can allocate is 1ULL<<40 bytes.
-    size_t size = std::numeric_limits<size_t>::max();
-    void *p = malloc(size);
+  const char *action = argv[1];
+  fprintf(stderr, "%s:\n", action);
+
+#if __LP64__ || defined(_WIN64)
+  static const size_t kMaxAllowedMallocSize = 1ULL << 40;
+  static const size_t kChunkHeaderSize = 16;
+#else
+  static const size_t kMaxAllowedMallocSize = 2UL << 30;
+  static const size_t kChunkHeaderSize = 8;
+#endif
+
+  if (!strcmp(action, "malloc")) {
+    void *p = malloc(kMaxAllowedMallocSize);
     assert(!p);
-    size = (1ULL << 40) - 16;
-    p = malloc(size);
+    p = malloc(kMaxAllowedMallocSize - kChunkHeaderSize);
     assert(!p);
-  }
-  if (!strcmp(argv[1], "calloc")) {
+  } else if (!strcmp(action, "calloc")) {
     // Trigger an overflow in calloc.
     size_t size = std::numeric_limits<size_t>::max();
     void *p = calloc((size / 0x1000) + 1, 0x1000);
     assert(!p);
-  }
-  if (!strcmp(argv[1], "usable")) {
+  } else if (!strcmp(action, "new")) {
+    void *p = operator new(kMaxAllowedMallocSize);
+    assert(!p);
+  } else if (!strcmp(action, "new-nothrow")) {
+    void *p = operator new(kMaxAllowedMallocSize, std::nothrow);
+    assert(!p);
+  } else if (!strcmp(action, "usable")) {
     // Playing with the actual usable size of a chunk.
     void *p = malloc(1007);
     assert(p);
@@ -47,7 +63,10 @@
     assert(size >= 2014);
     memset(p, 'B', size);
     free(p);
+  } else {
+    assert(0);
   }
+
   return 0;
 }
 
diff --git a/test/scudo/threads.c b/test/scudo/threads.c
new file mode 100644
index 0000000..b34e6f0
--- /dev/null
+++ b/test/scudo/threads.c
@@ -0,0 +1,65 @@
+// RUN: %clang_scudo %s -o %t
+// RUN: %env_scudo_opts="QuarantineSizeKb=0:ThreadLocalQuarantineSizeKb=0"     %run %t 5 1000000 2>&1
+// RUN: %env_scudo_opts="QuarantineSizeKb=1024:ThreadLocalQuarantineSizeKb=64" %run %t 5 1000000 2>&1
+
+// Tests parallel allocations and deallocations of memory chunks from a number
+// of concurrent threads, with and without quarantine.
+// This test passes if everything executes properly without crashing.
+
+#include <assert.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include <sanitizer/allocator_interface.h>
+
+int num_threads;
+int total_num_alloc;
+const int kMaxNumThreads = 500;
+pthread_t tid[kMaxNumThreads];
+
+pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
+pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+char go = 0;
+
+void *thread_fun(void *arg) {
+  pthread_mutex_lock(&mutex);
+  while (!go) pthread_cond_wait(&cond, &mutex);
+  pthread_mutex_unlock(&mutex);
+  for (int i = 0; i < total_num_alloc / num_threads; i++) {
+    void *p = malloc(10);
+    __asm__ __volatile__("" : : "r"(p) : "memory");
+    free(p);
+  }
+  return 0;
+}
+
+int main(int argc, char** argv) {
+  assert(argc == 3);
+  num_threads = atoi(argv[1]);
+  assert(num_threads > 0);
+  assert(num_threads <= kMaxNumThreads);
+  total_num_alloc = atoi(argv[2]);
+  assert(total_num_alloc > 0);
+
+  printf("%d threads, %d allocations in each\n", num_threads,
+         total_num_alloc / num_threads);
+  fprintf(stderr, "Heap size before: %zd\n", __sanitizer_get_heap_size());
+  fprintf(stderr, "Allocated bytes before: %zd\n",
+          __sanitizer_get_current_allocated_bytes());
+
+  for (int i = 0; i < num_threads; i++)
+    pthread_create(&tid[i], 0, thread_fun, 0);
+  pthread_mutex_lock(&mutex);
+  go = 1;
+  pthread_cond_broadcast(&cond);
+  pthread_mutex_unlock(&mutex);
+  for (int i = 0; i < num_threads; i++)
+    pthread_join(tid[i], 0);
+
+  fprintf(stderr, "Heap size after: %zd\n", __sanitizer_get_heap_size());
+  fprintf(stderr, "Allocated bytes after: %zd\n",
+          __sanitizer_get_current_allocated_bytes());
+
+  return 0;
+}
diff --git a/test/scudo/tsd_destruction.c b/test/scudo/tsd_destruction.c
new file mode 100644
index 0000000..1b0d0ef
--- /dev/null
+++ b/test/scudo/tsd_destruction.c
@@ -0,0 +1,42 @@
+// RUN: %clang_scudo %s -o %t
+// RUN: %run %t 2>&1
+
+#include <locale.h>
+#include <pthread.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+// Some of glibc's own thread local data is destroyed after a user's thread
+// local destructors are called, via __libc_thread_freeres. This might involve
+// calling free, as is the case for strerror_thread_freeres.
+// If there is no prior heap operation in the thread, this free would end up 
+// initializing some thread specific data that would never be destroyed
+// properly, while still being deallocated when the TLS goes away. As a result,
+// a program could SEGV, usually in
+// __sanitizer::AllocatorGlobalStats::Unregister, where one of the doubly
+// linked list links would refer to a now unmapped memory area.
+
+// This test reproduces those circumstances. Success means executing without
+// a segmentation fault.
+
+const int kNumThreads = 16;
+pthread_t tid[kNumThreads];
+
+void *thread_func(void *arg) {
+  uintptr_t i = (uintptr_t)arg;
+  if ((i & 1) == 0) free(malloc(16));
+  // Calling strerror_l allows for strerror_thread_freeres to be called.
+  strerror_l(0, LC_GLOBAL_LOCALE);
+  return 0;
+}
+
+int main(int argc, char** argv) {
+  for (uintptr_t j = 0; j < 8; j++) {
+    for (uintptr_t i = 0; i < kNumThreads; i++)
+      pthread_create(&tid[i], 0, thread_func, (void *)i);
+    for (uintptr_t i = 0; i < kNumThreads; i++)
+      pthread_join(tid[i], 0);
+  }
+  return 0;
+}
diff --git a/test/scudo/valloc.c b/test/scudo/valloc.c
new file mode 100644
index 0000000..132c4f2
--- /dev/null
+++ b/test/scudo/valloc.c
@@ -0,0 +1,65 @@
+// RUN: %clang_scudo %s -o %t
+// RUN:                                                 %run %t valid   2>&1
+// RUN:                                             not %run %t invalid 2>&1
+// RUN: %env_scudo_opts=allocator_may_return_null=1     %run %t invalid 2>&1
+// UNSUPPORTED: android, armhf-linux
+
+// Tests that valloc and pvalloc work as intended.
+
+#include <assert.h>
+#include <errno.h>
+#include <malloc.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+
+size_t round_up_to(size_t size, size_t alignment) {
+  return (size + alignment - 1) & ~(alignment - 1);
+}
+
+int main(int argc, char **argv)
+{
+  void *p = NULL;
+  size_t size, page_size;
+
+  assert(argc == 2);
+
+  page_size = sysconf(_SC_PAGESIZE);
+  // Check that the page size is a power of two.
+  assert((page_size & (page_size - 1)) == 0);
+
+  if (!strcmp(argv[1], "valid")) {
+    for (int i = (sizeof(void *) == 4) ? 3 : 4; i < 21; i++) {
+      size = 1U << i;
+      p = valloc(size - (2 * sizeof(void *)));
+      assert(p);
+      assert(((uintptr_t)p & (page_size - 1)) == 0);
+      free(p);
+      p = pvalloc(size - (2 * sizeof(void *)));
+      assert(p);
+      assert(((uintptr_t)p & (page_size - 1)) == 0);
+      assert(malloc_usable_size(p) >= round_up_to(size, page_size));
+      free(p);
+      p = valloc(size);
+      assert(p);
+      assert(((uintptr_t)p & (page_size - 1)) == 0);
+      free(p);
+      p = pvalloc(size);
+      assert(p);
+      assert(((uintptr_t)p & (page_size - 1)) == 0);
+      assert(malloc_usable_size(p) >= round_up_to(size, page_size));
+      free(p);
+    }
+  }
+  if (!strcmp(argv[1], "invalid")) {
+    // Size passed to pvalloc overflows when rounded up.
+    p = pvalloc((size_t)-1);
+    assert(!p);
+    assert(errno == ENOMEM);
+    errno = 0;
+    p = pvalloc((size_t)-page_size);
+    assert(!p);
+    assert(errno == ENOMEM);
+  }
+  return 0;
+}
diff --git a/test/tsan/CMakeLists.txt b/test/tsan/CMakeLists.txt
index 2db6ce0..af32956 100644
--- a/test/tsan/CMakeLists.txt
+++ b/test/tsan/CMakeLists.txt
@@ -1,3 +1,5 @@
+set(TSAN_LIT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
+
 set(TSAN_TEST_DEPS ${SANITIZER_COMMON_LIT_TEST_DEPS})
 if(${COMPILER_RT_DEFAULT_TARGET_ARCH} MATCHES "x86_64")
   list(APPEND TSAN_TEST_DEPS GotsanRuntimeCheck)
@@ -7,7 +9,7 @@
 endif()
 if(COMPILER_RT_HAS_LIBCXX_SOURCES AND
    COMPILER_RT_TEST_COMPILER_ID STREQUAL "Clang"
-   AND NOT APPLE)
+   AND NOT APPLE AND NOT ANDROID)
   list(APPEND TSAN_TEST_DEPS libcxx_tsan)
   set(TSAN_HAS_LIBCXX True)
 else()
@@ -22,6 +24,11 @@
 endif()
 
 foreach(arch ${TSAN_TEST_ARCH})
+  set(TSAN_TEST_IOS "0")
+  pythonize_bool(TSAN_TEST_IOS)
+  set(TSAN_TEST_IOSSIM "0")
+  pythonize_bool(TSAN_TEST_IOSSIM)
+
   set(TSAN_TEST_TARGET_ARCH ${arch})
   string(TOLOWER "-${arch}" TSAN_TEST_CONFIG_SUFFIX)
   get_test_cc_for_arch(${arch} TSAN_TEST_TARGET_CC TSAN_TEST_TARGET_CFLAGS)
@@ -35,6 +42,53 @@
   list(APPEND TSAN_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME})
 endforeach()
 
+# iOS and iOS simulator test suites
+# These are not added into "check-all", in order to run these tests, use
+# "check-tsan-iossim-x86_64" and similar. They also require an extra environment
+# variable to select which iOS device or simulator to use, e.g.:
+# SANITIZER_IOSSIM_TEST_DEVICE_IDENTIFIER="iPhone 6"
+if(APPLE)
+  set(EXCLUDE_FROM_ALL ON)
+
+  set(TSAN_TEST_TARGET_CC ${COMPILER_RT_TEST_COMPILER})
+  set(TSAN_TEST_IOS "1")
+  pythonize_bool(TSAN_TEST_IOS)
+
+  set(arch "x86_64")
+  set(TSAN_TEST_IOSSIM "1")
+  pythonize_bool(TSAN_TEST_IOSSIM)
+  set(TSAN_TEST_TARGET_ARCH ${arch})
+  set(TSAN_TEST_TARGET_CFLAGS "-arch ${arch} -isysroot ${DARWIN_iossim_SYSROOT} ${COMPILER_RT_TEST_COMPILER_CFLAGS}")
+  set(TSAN_TEST_CONFIG_SUFFIX "-${arch}-iossim")
+  string(TOUPPER ${arch} ARCH_UPPER_CASE)
+  set(CONFIG_NAME "IOSSim${ARCH_UPPER_CASE}Config")
+  configure_lit_site_cfg(
+    ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
+    ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/lit.site.cfg
+    )
+  add_lit_testsuite(check-tsan-iossim-${arch} "ThreadSanitizer iOS Simulator ${arch} tests"
+    ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/
+    DEPENDS ${TSAN_TEST_DEPS})
+
+  set(arch "arm64")
+  set(TSAN_TEST_IOSSIM "0")
+  pythonize_bool(TSAN_TEST_IOSSIM)
+  set(TSAN_TEST_TARGET_ARCH ${arch})
+  set(TSAN_TEST_TARGET_CFLAGS "-arch ${arch} -isysroot ${DARWIN_ios_SYSROOT} ${COMPILER_RT_TEST_COMPILER_CFLAGS}")
+  set(TSAN_TEST_CONFIG_SUFFIX "-${arch}-ios")
+  string(TOUPPER ${arch} ARCH_UPPER_CASE)
+  set(CONFIG_NAME "IOS${ARCH_UPPER_CASE}Config")
+  configure_lit_site_cfg(
+    ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
+    ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/lit.site.cfg
+    )
+  add_lit_testsuite(check-tsan-ios-${arch} "ThreadSanitizer iOS Simulator ${arch} tests"
+    ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/
+    DEPENDS ${TSAN_TEST_DEPS})
+
+  set(EXCLUDE_FROM_ALL OFF)
+endif()
+
 if(COMPILER_RT_INCLUDE_TESTS)
   configure_lit_site_cfg(
     ${CMAKE_CURRENT_SOURCE_DIR}/Unit/lit.site.cfg.in
diff --git a/test/tsan/Darwin/deadlock.mm b/test/tsan/Darwin/deadlock.mm
new file mode 100644
index 0000000..36ddfad
--- /dev/null
+++ b/test/tsan/Darwin/deadlock.mm
@@ -0,0 +1,47 @@
+// RUN: %clang_tsan %s -o %t -framework Foundation
+// RUN: %deflake %run %t 2>&1 | FileCheck %s
+
+#import <Foundation/Foundation.h>
+
+#import "../test.h"
+
+pthread_mutex_t m1;
+pthread_mutex_t m2;
+
+int main(int argc, const char *argv[]) {
+  barrier_init(&barrier, 2);
+  fprintf(stderr, "Hello world.\n");
+
+  pthread_mutex_init(&m1, NULL);
+  pthread_mutex_init(&m2, NULL);
+  
+  dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
+    pthread_mutex_lock(&m1);
+    pthread_mutex_lock(&m2);
+    pthread_mutex_unlock(&m2);
+    pthread_mutex_unlock(&m1);
+
+    barrier_wait(&barrier);
+  });
+  dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
+    barrier_wait(&barrier);
+
+    pthread_mutex_lock(&m2);
+    pthread_mutex_lock(&m1);
+    pthread_mutex_unlock(&m1);
+    pthread_mutex_unlock(&m2);
+
+    dispatch_sync(dispatch_get_main_queue(), ^{
+      CFRunLoopStop(CFRunLoopGetCurrent());
+    });
+  });
+
+  CFRunLoopRun();
+  
+  fprintf(stderr, "Done.\n");
+  return 0;
+}
+
+// CHECK: Hello world.
+// CHECK: WARNING: ThreadSanitizer: lock-order-inversion (potential deadlock)
+// CHECK: Done.
diff --git a/test/tsan/Darwin/debug_external.cc b/test/tsan/Darwin/debug_external.cc
index ac05a01..2418a27 100644
--- a/test/tsan/Darwin/debug_external.cc
+++ b/test/tsan/Darwin/debug_external.cc
@@ -6,6 +6,8 @@
 #include <stdlib.h>
 #include <string.h>
 
+#include "../test.h"
+
 extern "C" {
 void __tsan_on_report(void *report);
 int __tsan_get_report_loc(void *report, unsigned long idx, const char **type,
@@ -15,16 +17,16 @@
                           unsigned long trace_size);
 int __tsan_get_report_loc_object_type(void *report, unsigned long idx,
                                       const char **object_type);
-void *__tsan_external_register_tag(const char *object_type);
-void __tsan_external_assign_tag(void *addr, void *tag);
 }
 
 void *Thread(void *arg) {
+  barrier_wait(&barrier);
   *((long *)arg) = 42;
   return NULL;
 }
 
 int main() {
+  barrier_init(&barrier, 2);
   void *tag = __tsan_external_register_tag("MyObject");
   long *obj = (long *)malloc(sizeof(long));
   fprintf(stderr, "obj = %p\n", obj);
@@ -34,6 +36,7 @@
   pthread_t t;
   pthread_create(&t, 0, Thread, obj);
   *obj = 41;
+  barrier_wait(&barrier);
   pthread_join(t, 0);
   fprintf(stderr, "Done.\n");
   return 0;
diff --git a/test/tsan/Darwin/dlopen.cc b/test/tsan/Darwin/dlopen.cc
index 7382a6d..3d12b81 100644
--- a/test/tsan/Darwin/dlopen.cc
+++ b/test/tsan/Darwin/dlopen.cc
@@ -4,6 +4,8 @@
 
 // REQUIRES: osx-autointerception
 
+// XFAIL: ios
+
 // RUN: %clangxx_tsan %s -o %t.so -shared -DSHARED_LIB
 // RUN: %clangxx_tsan -fno-sanitize=thread %s -o %t
 
diff --git a/test/tsan/Darwin/external-dups.cc b/test/tsan/Darwin/external-dups.cc
new file mode 100644
index 0000000..ca1eb3e
--- /dev/null
+++ b/test/tsan/Darwin/external-dups.cc
@@ -0,0 +1,58 @@
+// RUN: %clangxx_tsan %s -o %t
+// RUN: %deflake %run %t 2>&1 | FileCheck %s
+
+#include <thread>
+
+#import "../test.h"
+
+void *tag;
+
+__attribute__((no_sanitize("thread")))
+void ExternalWrite(void *addr) {
+  __tsan_external_write(addr, __builtin_return_address(0), tag);
+}
+
+int main(int argc, char *argv[]) {
+  barrier_init(&barrier, 2);
+  tag = __tsan_external_register_tag("HelloWorld");
+  fprintf(stderr, "Start.\n");
+  // CHECK: Start.
+   
+  for (int i = 0; i < 4; i++) {
+    void *opaque_object = malloc(16);
+    std::thread t1([opaque_object] {
+      ExternalWrite(opaque_object);
+      barrier_wait(&barrier);
+    });
+    std::thread t2([opaque_object] {
+      barrier_wait(&barrier);
+      ExternalWrite(opaque_object);
+    });
+    // CHECK: WARNING: ThreadSanitizer: race on HelloWorld
+    t1.join();
+    t2.join();
+  }
+  
+  fprintf(stderr, "First phase done.\n");
+  // CHECK: First phase done.
+
+  for (int i = 0; i < 4; i++) {
+    void *opaque_object = malloc(16);
+    std::thread t1([opaque_object] {
+      ExternalWrite(opaque_object);
+      barrier_wait(&barrier);
+    });
+    std::thread t2([opaque_object] {
+      barrier_wait(&barrier);
+      ExternalWrite(opaque_object);
+    });
+    // CHECK: WARNING: ThreadSanitizer: race on HelloWorld
+    t1.join();
+    t2.join();
+  }
+
+  fprintf(stderr, "Second phase done.\n");
+  // CHECK: Second phase done.
+}
+
+// CHECK: ThreadSanitizer: reported 2 warnings
diff --git a/test/tsan/Darwin/external-ignore-noninstrumented.cc b/test/tsan/Darwin/external-ignore-noninstrumented.cc
new file mode 100644
index 0000000..d2acaf5
--- /dev/null
+++ b/test/tsan/Darwin/external-ignore-noninstrumented.cc
@@ -0,0 +1,19 @@
+// RUN: %clangxx_tsan -shared %p/external-lib.cc -fno-sanitize=thread -DUSE_TSAN_CALLBACKS \
+// RUN:   -o %t-lib.dylib -install_name @rpath/`basename %t-lib.dylib`
+
+// RUN: %clangxx_tsan -shared %p/external-noninstrumented-module.cc %t-lib.dylib -fno-sanitize=thread \
+// RUN:   -o %t-module.dylib -install_name @rpath/`basename %t-module.dylib`
+
+// RUN: %clangxx_tsan %s %t-module.dylib -o %t
+// RUN: %run %t 2>&1 | FileCheck %s
+
+#include <stdio.h>
+
+extern "C" void NonInstrumentedModule();
+int main(int argc, char *argv[]) {
+  NonInstrumentedModule();
+  fprintf(stderr, "Done.\n");
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer
+// CHECK: Done.
diff --git a/test/tsan/Darwin/external-lib.cc b/test/tsan/Darwin/external-lib.cc
new file mode 100644
index 0000000..f0afdf1
--- /dev/null
+++ b/test/tsan/Darwin/external-lib.cc
@@ -0,0 +1,68 @@
+// This file is used from other tests.
+// RUN: true
+
+#include <dlfcn.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+struct MyObject;
+typedef MyObject *MyObjectRef;
+extern "C" {
+  void InitializeLibrary();
+  MyObject *ObjectCreate();
+  long ObjectRead(MyObject *);
+  void ObjectWrite(MyObject *, long);
+  void ObjectWriteAnother(MyObject *, long);
+}
+
+struct MyObject {
+  long _val;
+  long _another;
+};
+
+#if defined(USE_TSAN_CALLBACKS)
+static void *tag;
+void *(*callback_register_tag)(const char *object_type);
+void *(*callback_assign_tag)(void *addr, void *tag);
+void (*callback_read)(void *addr, void *caller_pc, void *tag);
+void (*callback_write)(void *addr, void *caller_pc, void *tag);
+#endif
+
+void InitializeLibrary() {
+#if defined(USE_TSAN_CALLBACKS)
+  callback_register_tag = (decltype(callback_register_tag))dlsym(RTLD_DEFAULT, "__tsan_external_register_tag");
+  callback_assign_tag = (decltype(callback_assign_tag))dlsym(RTLD_DEFAULT, "__tsan_external_assign_tag");
+  callback_read = (decltype(callback_read))dlsym(RTLD_DEFAULT, "__tsan_external_read");
+  callback_write = (decltype(callback_write))dlsym(RTLD_DEFAULT, "__tsan_external_write");
+  tag = callback_register_tag("MyLibrary::MyObject");
+#endif
+}
+
+MyObject *ObjectCreate() {
+  MyObject *ref = (MyObject *)malloc(sizeof(MyObject));
+#if defined(USE_TSAN_CALLBACKS)
+  callback_assign_tag(ref, tag);
+#endif
+  return ref;
+}
+
+long ObjectRead(MyObject *ref) {
+#if defined(USE_TSAN_CALLBACKS)
+  callback_read(ref, __builtin_return_address(0), tag);
+#endif
+  return ref->_val;
+}
+
+void ObjectWrite(MyObject *ref, long val) {
+#if defined(USE_TSAN_CALLBACKS)
+  callback_write(ref, __builtin_return_address(0), tag);
+#endif
+  ref->_val = val;
+}
+
+void ObjectWriteAnother(MyObject *ref, long val) {
+#if defined(USE_TSAN_CALLBACKS)
+  callback_write(ref, __builtin_return_address(0), tag);
+#endif
+  ref->_another = val;
+}
diff --git a/test/tsan/Darwin/external-noninstrumented-module.cc b/test/tsan/Darwin/external-noninstrumented-module.cc
new file mode 100644
index 0000000..ce65970
--- /dev/null
+++ b/test/tsan/Darwin/external-noninstrumented-module.cc
@@ -0,0 +1,27 @@
+// This file is used from other tests.
+// RUN: true
+
+#include <thread>
+
+#include <stdio.h>
+#include <stdlib.h>
+
+struct MyObject;
+typedef MyObject *MyObjectRef;
+extern "C" {
+  void InitializeLibrary();
+  MyObject *ObjectCreate();
+  long ObjectRead(MyObject *);
+  void ObjectWrite(MyObject *, long);
+  void ObjectWriteAnother(MyObject *, long);
+}
+
+extern "C" void NonInstrumentedModule() {
+  InitializeLibrary();
+  
+  MyObjectRef ref = ObjectCreate();
+  std::thread t1([ref]{ ObjectWrite(ref, 42); });
+  std::thread t2([ref]{ ObjectWrite(ref, 43); });
+  t1.join();
+  t2.join();
+}
diff --git a/test/tsan/Darwin/external-swift.cc b/test/tsan/Darwin/external-swift.cc
new file mode 100644
index 0000000..f6f9e7f
--- /dev/null
+++ b/test/tsan/Darwin/external-swift.cc
@@ -0,0 +1,92 @@
+// RUN: %clangxx_tsan %s -o %t
+// RUN: %deflake %run %t 2>&1 | FileCheck %s
+
+#include <thread>
+
+#import "../test.h"
+
+extern "C" {
+void __tsan_write8(void *addr);
+}
+
+static void *tag = (void *)0x1;
+
+__attribute__((no_sanitize("thread")))
+void ExternalWrite(void *addr) {
+  __tsan_external_write(addr, nullptr, tag);
+}
+
+__attribute__((no_sanitize("thread")))
+void RegularWrite(void *addr) {
+  __tsan_write8(addr);
+}
+
+int main(int argc, char *argv[]) {
+  barrier_init(&barrier, 2);
+  fprintf(stderr, "Start.\n");
+  // CHECK: Start.
+  
+  {
+    void *opaque_object = malloc(16);
+    std::thread t1([opaque_object] {
+      ExternalWrite(opaque_object);
+      barrier_wait(&barrier);
+    });
+    std::thread t2([opaque_object] {
+      barrier_wait(&barrier);
+      ExternalWrite(opaque_object);
+    });
+    // CHECK: WARNING: ThreadSanitizer: Swift access race
+    // CHECK: Modifying access of Swift variable at {{.*}} by thread {{.*}}
+    // CHECK: Previous modifying access of Swift variable at {{.*}} by thread {{.*}}
+    // CHECK: SUMMARY: ThreadSanitizer: Swift access race
+    t1.join();
+    t2.join();
+  }
+
+  fprintf(stderr, "external+external test done.\n");
+  // CHECK: external+external test done.
+
+  {
+    void *opaque_object = malloc(16);
+    std::thread t1([opaque_object] {
+      ExternalWrite(opaque_object);
+      barrier_wait(&barrier);
+    });
+    std::thread t2([opaque_object] {
+      barrier_wait(&barrier);
+      RegularWrite(opaque_object);
+    });
+    // CHECK: WARNING: ThreadSanitizer: Swift access race
+    // CHECK: Write of size 8 at {{.*}} by thread {{.*}}
+    // CHECK: Previous modifying access of Swift variable at {{.*}} by thread {{.*}}
+    // CHECK: SUMMARY: ThreadSanitizer: Swift access race
+    t1.join();
+    t2.join();
+  }
+  
+  fprintf(stderr, "external+regular test done.\n");
+  // CHECK: external+regular test done.
+  
+  {
+    void *opaque_object = malloc(16);
+    std::thread t1([opaque_object] {
+      RegularWrite(opaque_object);
+      barrier_wait(&barrier);
+    });
+    std::thread t2([opaque_object] {
+      barrier_wait(&barrier);
+      ExternalWrite(opaque_object);
+    });
+    // CHECK: WARNING: ThreadSanitizer: Swift access race
+    // CHECK: Modifying access of Swift variable at {{.*}} by thread {{.*}}
+    // CHECK: Previous write of size 8 at {{.*}} by thread {{.*}}
+    // CHECK: SUMMARY: ThreadSanitizer: Swift access race
+    t1.join();
+    t2.join();
+  }
+  
+  fprintf(stderr, "regular+external test done.\n");
+  // CHECK: regular+external test done.
+}
+
diff --git a/test/tsan/Darwin/external.cc b/test/tsan/Darwin/external.cc
index 3c5e71a..e72281a 100644
--- a/test/tsan/Darwin/external.cc
+++ b/test/tsan/Darwin/external.cc
@@ -1,6 +1,15 @@
-// RUN: %clangxx_tsan %s -o %t-lib-instrumented.dylib              -shared -DSHARED_LIB
-// RUN: %clangxx_tsan %s -o %t-lib-noninstrumented.dylib           -shared -DSHARED_LIB -fno-sanitize=thread
-// RUN: %clangxx_tsan %s -o %t-lib-noninstrumented-callbacks.dylib -shared -DSHARED_LIB -fno-sanitize=thread -DUSE_TSAN_CALLBACKS
+// RUN: %clangxx_tsan %p/external-lib.cc -shared \
+// RUN:                               -o %t-lib-instrumented.dylib \
+// RUN:   -install_name @rpath/`basename %t-lib-instrumented.dylib`
+
+// RUN: %clangxx_tsan %p/external-lib.cc -shared -fno-sanitize=thread \
+// RUN:                               -o %t-lib-noninstrumented.dylib \
+// RUN:   -install_name @rpath/`basename %t-lib-noninstrumented.dylib`
+
+// RUN: %clangxx_tsan %p/external-lib.cc -shared -fno-sanitize=thread -DUSE_TSAN_CALLBACKS \
+// RUN:                               -o %t-lib-noninstrumented-callbacks.dylib \
+// RUN:   -install_name @rpath/`basename %t-lib-noninstrumented-callbacks.dylib`
+
 // RUN: %clangxx_tsan %s %t-lib-instrumented.dylib -o %t-lib-instrumented
 // RUN: %clangxx_tsan %s %t-lib-noninstrumented.dylib -o %t-lib-noninstrumented
 // RUN: %clangxx_tsan %s %t-lib-noninstrumented-callbacks.dylib -o %t-lib-noninstrumented-callbacks
@@ -14,8 +23,6 @@
 
 #include <thread>
 
-#include <dlfcn.h>
-#include <pthread.h>
 #include <stdio.h>
 #include <stdlib.h>
 
@@ -29,62 +36,6 @@
   void ObjectWriteAnother(MyObject *, long);
 }
 
-#if defined(SHARED_LIB)
-
-struct MyObject {
-  long _val;
-  long _another;
-};
-
-#if defined(USE_TSAN_CALLBACKS)
-static void *tag;
-void *(*callback_register_tag)(const char *object_type);
-void *(*callback_assign_tag)(void *addr, void *tag);
-void (*callback_read)(void *addr, void *caller_pc, void *tag);
-void (*callback_write)(void *addr, void *caller_pc, void *tag);
-#endif
-
-void InitializeLibrary() {
-#if defined(USE_TSAN_CALLBACKS)
-  callback_register_tag = (decltype(callback_register_tag))dlsym(RTLD_DEFAULT, "__tsan_external_register_tag");
-  callback_assign_tag = (decltype(callback_assign_tag))dlsym(RTLD_DEFAULT, "__tsan_external_assign_tag");
-  callback_read = (decltype(callback_read))dlsym(RTLD_DEFAULT, "__tsan_external_read");
-  callback_write = (decltype(callback_write))dlsym(RTLD_DEFAULT, "__tsan_external_write");
-  tag = callback_register_tag("MyLibrary::MyObject");
-#endif
-}
-
-MyObject *ObjectCreate() {
-  MyObject *ref = (MyObject *)malloc(sizeof(MyObject));
-#if defined(USE_TSAN_CALLBACKS)
-  callback_assign_tag(ref, tag);
-#endif
-  return ref;
-}
-
-long ObjectRead(MyObject *ref) {
-#if defined(USE_TSAN_CALLBACKS)
-  callback_read(ref, __builtin_return_address(0), tag);
-#endif
-  return ref->_val;
-}
-
-void ObjectWrite(MyObject *ref, long val) {
-#if defined(USE_TSAN_CALLBACKS)
-  callback_write(ref, __builtin_return_address(0), tag);
-#endif
-  ref->_val = val;
-}
-
-void ObjectWriteAnother(MyObject *ref, long val) {
-#if defined(USE_TSAN_CALLBACKS)
-  callback_write(ref, __builtin_return_address(0), tag);
-#endif
-  ref->_another = val;
-}
-
-#else  // defined(SHARED_LIB)
-
 int main(int argc, char *argv[]) {
   InitializeLibrary();
   
@@ -116,13 +67,14 @@
   
   // TEST2-NOT: WARNING: ThreadSanitizer
   
-  // TEST3: WARNING: ThreadSanitizer: race on a library object
-  // TEST3: {{Mutating|read-only}} access of object MyLibrary::MyObject at
+  // TEST3: WARNING: ThreadSanitizer: race on MyLibrary::MyObject
+  // TEST3: {{Modifying|read-only}} access of MyLibrary::MyObject at
   // TEST3: {{ObjectWrite|ObjectRead}}
-  // TEST3: Previous {{mutating|read-only}} access of object MyLibrary::MyObject at
+  // TEST3: Previous {{modifying|read-only}} access of MyLibrary::MyObject at
   // TEST3: {{ObjectWrite|ObjectRead}}
-  // TEST3: Location is MyLibrary::MyObject object of size 16 at
+  // TEST3: Location is MyLibrary::MyObject of size 16 at
   // TEST3: {{ObjectCreate}}
+  // TEST3: SUMMARY: ThreadSanitizer: race on MyLibrary::MyObject {{.*}} in {{ObjectWrite|ObjectRead}}
 
   fprintf(stderr, "RW test done\n");
   // CHECK: RW test done
@@ -139,16 +91,15 @@
   
   // TEST2-NOT: WARNING: ThreadSanitizer
   
-  // TEST3: WARNING: ThreadSanitizer: race on a library object
-  // TEST3: Mutating access of object MyLibrary::MyObject at
+  // TEST3: WARNING: ThreadSanitizer: race on MyLibrary::MyObject
+  // TEST3: Modifying access of MyLibrary::MyObject at
   // TEST3: {{ObjectWrite|ObjectWriteAnother}}
-  // TEST3: Previous mutating access of object MyLibrary::MyObject at
+  // TEST3: Previous modifying access of MyLibrary::MyObject at
   // TEST3: {{ObjectWrite|ObjectWriteAnother}}
-  // TEST3: Location is MyLibrary::MyObject object of size 16 at
+  // TEST3: Location is MyLibrary::MyObject of size 16 at
   // TEST3: {{ObjectCreate}}
+  // TEST3: SUMMARY: ThreadSanitizer: race on MyLibrary::MyObject {{.*}} in {{ObjectWrite|ObjectWriteAnother}}
 
   fprintf(stderr, "WW test done\n");
   // CHECK: WW test done
 }
-
-#endif  // defined(SHARED_LIB)
diff --git a/test/tsan/Darwin/gcd-after-null.mm b/test/tsan/Darwin/gcd-after-null.mm
new file mode 100644
index 0000000..7c9913c
--- /dev/null
+++ b/test/tsan/Darwin/gcd-after-null.mm
@@ -0,0 +1,23 @@
+// Regression test to make sure we don't crash when dispatch_after is called with a NULL queue.
+
+// RUN: %clang_tsan %s -o %t -framework Foundation
+// RUN: %run %t 2>&1 | FileCheck %s
+
+#import <Foundation/Foundation.h>
+
+int main(int argc, const char *argv[]) {
+  fprintf(stderr, "start\n");
+
+  dispatch_after(dispatch_time(DISPATCH_TIME_NOW, (int64_t)(10 * NSEC_PER_MSEC)), NULL, ^{
+    dispatch_async(dispatch_get_main_queue(), ^{
+      CFRunLoopStop(CFRunLoopGetMain());
+    });
+  });
+  CFRunLoopRun();
+
+  fprintf(stderr, "done\n");
+  return 0;
+}
+
+// CHECK: start
+// CHECK: done
diff --git a/test/tsan/Darwin/ignore-noninstrumented.mm b/test/tsan/Darwin/ignore-noninstrumented.mm
index 528e07b..668a76a 100644
--- a/test/tsan/Darwin/ignore-noninstrumented.mm
+++ b/test/tsan/Darwin/ignore-noninstrumented.mm
@@ -1,4 +1,4 @@
-// Check that ignore_noninstrumented_modules=1 supresses races from system libraries on OS X.
+// Check that ignore_noninstrumented_modules=1 suppresses races from system libraries on OS X.
 
 // RUN: %clang_tsan %s -o %t -framework Foundation
 
diff --git a/test/tsan/Darwin/ignored-interceptors.mm b/test/tsan/Darwin/ignored-interceptors.mm
index 1105132..b2e40f0 100644
--- a/test/tsan/Darwin/ignored-interceptors.mm
+++ b/test/tsan/Darwin/ignored-interceptors.mm
@@ -1,4 +1,4 @@
-// Check that ignore_interceptors_accesses=1 supresses reporting races from
+// Check that ignore_interceptors_accesses=1 suppresses reporting races from
 // system libraries on OS X. There are currently false positives coming from
 // libxpc, libdispatch, CoreFoundation and others, because these libraries use
 // TSan-invisible atomics as synchronization.
diff --git a/test/tsan/Darwin/main_tid.mm b/test/tsan/Darwin/main_tid.mm
index af658e4..6dea58e 100644
--- a/test/tsan/Darwin/main_tid.mm
+++ b/test/tsan/Darwin/main_tid.mm
@@ -8,7 +8,7 @@
 extern "C" {
 void __tsan_on_report(void *report);
 int __tsan_get_report_thread(void *report, unsigned long idx, int *tid,
-                             unsigned long *os_id, int *running,
+                             uint64_t *os_id, int *running,
                              const char **name, int *parent_tid, void **trace,
                              unsigned long trace_size);
 }
@@ -17,7 +17,7 @@
   fprintf(stderr, "__tsan_on_report(%p)\n", report);
 
   int tid;
-  unsigned long os_id;
+  uint64_t os_id;
   int running;
   const char *name;
   int parent_tid;
diff --git a/test/tsan/Darwin/osspinlock-norace.cc b/test/tsan/Darwin/osspinlock-norace.cc
index 2ac3989..5de02c2 100644
--- a/test/tsan/Darwin/osspinlock-norace.cc
+++ b/test/tsan/Darwin/osspinlock-norace.cc
@@ -1,8 +1,12 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
-#include <libkern/OSAtomic.h>
 #include <pthread.h>
+#include <stdint.h>
 #include <stdio.h>
 
+typedef int32_t OSSpinLock;
+extern "C" void OSSpinLockLock(OSSpinLock *);
+extern "C" void OSSpinLockUnlock(OSSpinLock *);
+
 int Global;
 OSSpinLock lock;
 
diff --git a/test/tsan/Darwin/signals-blocked.cc b/test/tsan/Darwin/signals-blocked.cc
new file mode 100644
index 0000000..209dc22
--- /dev/null
+++ b/test/tsan/Darwin/signals-blocked.cc
@@ -0,0 +1,75 @@
+// RUN: %clangxx_tsan %s -o %t && %run %t 2>&1 | FileCheck %s
+
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <errno.h>
+
+volatile bool signal_delivered;
+
+static void handler(int sig) {
+  if (sig == SIGALRM)
+    signal_delivered = true;
+}
+
+static void* thr(void *p) {
+  sigset_t sigset;
+  sigemptyset(&sigset);
+  sigaddset(&sigset, SIGALRM);
+  int ret = pthread_sigmask(SIG_UNBLOCK, &sigset, NULL);
+  if (ret) abort();
+
+  struct sigaction act = {};
+  act.sa_handler = &handler;
+  if (sigaction(SIGALRM, &act, 0)) {
+    perror("sigaction");
+    exit(1);
+  }
+
+  itimerval t;
+  t.it_value.tv_sec = 0;
+  t.it_value.tv_usec = 10000;
+  t.it_interval = t.it_value;
+  if (setitimer(ITIMER_REAL, &t, 0)) {
+    perror("setitimer");
+    exit(1);
+  }
+
+  while (!signal_delivered) {
+    usleep(1000);
+  }
+
+  t.it_value.tv_usec = 0;
+  if (setitimer(ITIMER_REAL, &t, 0)) {
+    perror("setitimer");
+    exit(1);
+  }
+
+  fprintf(stderr, "SIGNAL DELIVERED\n");
+
+  return 0;
+}
+
+int main() {
+  sigset_t sigset;
+  sigemptyset(&sigset);
+  sigaddset(&sigset, SIGALRM);
+  int ret = pthread_sigmask(SIG_BLOCK, &sigset, NULL);
+  if (ret) abort();
+
+  pthread_t th;
+  pthread_create(&th, 0, thr, 0);
+  pthread_join(th, 0);
+
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer:
+// CHECK: SIGNAL DELIVERED
+// CHECK: DONE
+// CHECK-NOT: WARNING: ThreadSanitizer:
diff --git a/test/tsan/Darwin/xpc-cancel.mm b/test/tsan/Darwin/xpc-cancel.mm
new file mode 100644
index 0000000..ac7aed0
--- /dev/null
+++ b/test/tsan/Darwin/xpc-cancel.mm
@@ -0,0 +1,39 @@
+// RUN: %clang_tsan %s -o %t -framework Foundation
+// RUN: %run %t 2>&1 | FileCheck %s
+
+// UNSUPPORTED: ios
+
+#import <Foundation/Foundation.h>
+#import <xpc/xpc.h>
+
+long global;
+
+int main(int argc, const char *argv[]) {
+  fprintf(stderr, "Hello world.\n");
+
+  dispatch_queue_t server_q = dispatch_queue_create("server.queue", DISPATCH_QUEUE_CONCURRENT);
+  xpc_connection_t server_conn = xpc_connection_create(NULL, server_q);
+
+  xpc_connection_set_event_handler(server_conn, ^(xpc_object_t client) {
+    if (client == XPC_ERROR_CONNECTION_INTERRUPTED || client == XPC_ERROR_CONNECTION_INVALID) {
+      global = 43;
+      
+      dispatch_async(dispatch_get_main_queue(), ^{
+        CFRunLoopStop(CFRunLoopGetCurrent());
+      });
+    }
+  });
+  xpc_connection_resume(server_conn);
+  
+  global = 42;
+  
+  xpc_connection_cancel(server_conn);
+  
+  CFRunLoopRun();
+
+  fprintf(stderr, "Done.\n");
+}
+
+// CHECK: Hello world.
+// CHECK-NOT: WARNING: ThreadSanitizer
+// CHECK: Done.
diff --git a/test/tsan/Darwin/xpc-race.mm b/test/tsan/Darwin/xpc-race.mm
index eaef4e0..872c9ee 100644
--- a/test/tsan/Darwin/xpc-race.mm
+++ b/test/tsan/Darwin/xpc-race.mm
@@ -1,20 +1,26 @@
-// RUN: %clang_tsan %s -o %t -framework Foundation
+// RUN: %clangxx_tsan %s -o %t -framework Foundation
 // RUN: %deflake %run %t 2>&1 | FileCheck %s
 
+// UNSUPPORTED: ios
+
 #import <Foundation/Foundation.h>
 #import <xpc/xpc.h>
+#import <stdatomic.h>
 
 #import "../test.h"
 
 long global;
 
-long received_msgs;
+_Atomic(long) msg_counter;
+_Atomic(long) processed_msgs;
 xpc_connection_t server_conn;
 xpc_connection_t client_conns[2];
 
 int main(int argc, const char *argv[]) {
   @autoreleasepool {
-    NSLog(@"Hello world.");
+    fprintf(stderr, "Hello world.\n");
+    // CHECK: Hello world.
+
     barrier_init(&barrier, 2);
 
     dispatch_queue_t server_q = dispatch_queue_create("server.queue", DISPATCH_QUEUE_CONCURRENT);
@@ -22,21 +28,34 @@
     server_conn = xpc_connection_create(NULL, server_q);
 
     xpc_connection_set_event_handler(server_conn, ^(xpc_object_t client) {
-      NSLog(@"server event handler, client = %@", client);
+      fprintf(stderr, "server event handler, client = %p\n", client);
 
       if (client == XPC_ERROR_CONNECTION_INTERRUPTED || client == XPC_ERROR_CONNECTION_INVALID) {
         return;
       }
       xpc_connection_set_event_handler(client, ^(xpc_object_t object) {
-        NSLog(@"received message: %@", object);
+        fprintf(stderr, "received message: %p\n", object);
 
-        barrier_wait(&barrier);
-        global = 42;
+        long msg_number = atomic_fetch_add_explicit(&msg_counter, 1, memory_order_relaxed);
+
+        if (msg_number == 0)
+          barrier_wait(&barrier);
+
+        global++;
+        // CHECK: WARNING: ThreadSanitizer: data race
+        // CHECK:   Write of size 8
+        // CHECK:     #0 {{.*}}xpc-race.mm:[[@LINE-3]]
+        // CHECK:   Previous write of size 8
+        // CHECK:     #0 {{.*}}xpc-race.mm:[[@LINE-5]]
+        // CHECK: Location is global 'global'
+
+        if (msg_number == 1)
+          barrier_wait(&barrier);
+
+        atomic_fetch_add(&processed_msgs, 1);
 
         dispatch_sync(dispatch_get_main_queue(), ^{
-          received_msgs++;
-
-          if (received_msgs >= 2) {
+          if (processed_msgs >= 2) {
             xpc_connection_cancel(client_conns[0]);
             xpc_connection_cancel(client_conns[1]);
             xpc_connection_cancel(server_conn);
@@ -53,12 +72,12 @@
     for (int i = 0; i < 2; i++) {
       client_conns[i] = xpc_connection_create_from_endpoint(endpoint);
       xpc_connection_set_event_handler(client_conns[i], ^(xpc_object_t event) {
-        NSLog(@"client event handler, event = %@", event);
+        fprintf(stderr, "client event handler, event = %p\n", event);
       });
 
       xpc_object_t msg = xpc_dictionary_create(NULL, NULL, 0);
       xpc_dictionary_set_string(msg, "hello", "world");
-      NSLog(@"sending message: %@", msg);
+      fprintf(stderr, "sending message: %p\n", msg);
 
       xpc_connection_send_message(client_conns[i], msg);
       xpc_connection_resume(client_conns[i]);
@@ -66,16 +85,8 @@
 
     CFRunLoopRun();
 
-    NSLog(@"Done.");
+    fprintf(stderr, "Done.\n");
+    // CHECK: Done.
   }
   return 0;
 }
-
-// CHECK: Hello world.
-// CHECK: WARNING: ThreadSanitizer: data race
-// CHECK:   Write of size 8
-// CHECK:     #0 {{.*}}xpc-race.mm:34
-// CHECK:   Previous write of size 8
-// CHECK:     #0 {{.*}}xpc-race.mm:34
-// CHECK: Location is global 'global'
-// CHECK: Done.
diff --git a/test/tsan/Darwin/xpc.mm b/test/tsan/Darwin/xpc.mm
index 2d6de26..036841e 100644
--- a/test/tsan/Darwin/xpc.mm
+++ b/test/tsan/Darwin/xpc.mm
@@ -1,6 +1,8 @@
 // RUN: %clang_tsan %s -o %t -framework Foundation
 // RUN: %run %t 2>&1 | FileCheck %s
 
+// UNSUPPORTED: ios
+
 #import <Foundation/Foundation.h>
 #import <xpc/xpc.h>
 
diff --git a/test/tsan/Linux/check_memcpy.cc b/test/tsan/Linux/check_memcpy.cc
index 8ad04c0..b81efa4 100644
--- a/test/tsan/Linux/check_memcpy.cc
+++ b/test/tsan/Linux/check_memcpy.cc
@@ -5,6 +5,8 @@
 // RUN: %clangxx_tsan -O1 %s -o %t
 // RUN: llvm-objdump -d %t | FileCheck %s
 
+// REQUIRES: compiler-rt-optimized
+
 int main() {
   return 0;
 }
diff --git a/test/tsan/Linux/double_race.cc b/test/tsan/Linux/double_race.cc
new file mode 100644
index 0000000..2b4af35
--- /dev/null
+++ b/test/tsan/Linux/double_race.cc
@@ -0,0 +1,52 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+#include "../test.h"
+#include <memory.h>
+
+// A reproducer for a known issue.
+// See reference to double_race.cc in tsan_rtl_report.cc for an explanation.
+
+char buf[16];
+volatile int nreport;
+
+void __sanitizer_report_error_summary(const char *summary) {
+  nreport++;
+}
+
+const int kEventPCBits = 61;
+
+extern "C" bool __tsan_symbolize_external(unsigned long pc, char *func_buf,
+                                          unsigned long func_siz,
+                                          char *file_buf,
+                                          unsigned long file_siz, int *line,
+                                          int *col) {
+  if (pc >> kEventPCBits) {
+    printf("bad PC passed to __tsan_symbolize_external: %lx\n", pc);
+    _exit(1);
+  }
+  return true;
+}
+
+void *Thread(void *arg) {
+  barrier_wait(&barrier);
+  memset(buf, 2, sizeof(buf));
+  return 0;
+}
+
+int main() {
+  barrier_init(&barrier, 2);
+  pthread_t t;
+  pthread_create(&t, 0, Thread, 0);
+  memset(buf, 1, sizeof(buf));
+  barrier_wait(&barrier);
+  pthread_join(t, 0);
+  return 0;
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK:   Write of size 8 at {{.*}} by thread T1:
+// CHECK:     #0 memset
+// CHECK:     #1 Thread
+// CHECK-NOT: bad PC passed to __tsan_symbolize_external
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK:   Write of size 8 at {{.*}} by thread T1:
+// CHECK:     #0 Thread
diff --git a/test/tsan/Linux/pie_no_aslr.cc b/test/tsan/Linux/pie_no_aslr.cc
new file mode 100644
index 0000000..b99342d
--- /dev/null
+++ b/test/tsan/Linux/pie_no_aslr.cc
@@ -0,0 +1,6 @@
+// RUN: %clang_tsan %s -pie -fPIE -o %t && %run setarch x86_64 -R %t
+// REQUIRES: x86_64-target-arch
+
+int main() {
+  return 0;
+}
diff --git a/test/tsan/Linux/user_malloc.cc b/test/tsan/Linux/user_malloc.cc
index 6d51a9d..b470e6c 100644
--- a/test/tsan/Linux/user_malloc.cc
+++ b/test/tsan/Linux/user_malloc.cc
@@ -1,5 +1,12 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
+
 // UNSUPPORTED: powerpc64le
+
+// FIXME: Remove the test or find how to fix this.
+// On some distributions, probably with newer glibc, tsan initialization calls
+// dlsym which then calls malloc and crashes because of tsan is not initialized.
+// UNSUPPORTED: linux
+
 #include <stdio.h>
 
 // Defined by tsan.
diff --git a/test/tsan/allocator_returns_null.cc b/test/tsan/allocator_returns_null.cc
index 6693007..5e2e2e9 100644
--- a/test/tsan/allocator_returns_null.cc
+++ b/test/tsan/allocator_returns_null.cc
@@ -1,56 +1,95 @@
-// Test the behavior of malloc/calloc/realloc when the allocation size is huge.
+// Test the behavior of malloc/calloc/realloc/new when the allocation size is
+// more than TSan allocator's max allowed one.
 // By default (allocator_may_return_null=0) the process should crash.
-// With allocator_may_return_null=1 the allocator should return 0.
+// With allocator_may_return_null=1 the allocator should return 0, except the
+// operator new(), which should crash anyway (operator new(std::nothrow) should
+// return nullptr, indeed).
 //
 // RUN: %clangxx_tsan -O0 %s -o %t
 // RUN: not %run %t malloc 2>&1 | FileCheck %s --check-prefix=CHECK-mCRASH
-// RUN: %env_tsan_opts=allocator_may_return_null=0 not %run %t malloc 2>&1 | FileCheck %s --check-prefix=CHECK-mCRASH
-// RUN: %env_tsan_opts=allocator_may_return_null=0 not %run %t calloc 2>&1 | FileCheck %s --check-prefix=CHECK-cCRASH
-// RUN: %env_tsan_opts=allocator_may_return_null=0 not %run %t calloc-overflow 2>&1 | FileCheck %s --check-prefix=CHECK-coCRASH
-// RUN: %env_tsan_opts=allocator_may_return_null=0 not %run %t realloc 2>&1 | FileCheck %s --check-prefix=CHECK-rCRASH
-// RUN: %env_tsan_opts=allocator_may_return_null=0 not %run %t realloc-after-malloc 2>&1 | FileCheck %s --check-prefix=CHECK-mrCRASH
+// RUN: %env_tsan_opts=allocator_may_return_null=0 not %run %t malloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-mCRASH
+// RUN: %env_tsan_opts=allocator_may_return_null=1     %run %t malloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-mNULL
+// RUN: %env_tsan_opts=allocator_may_return_null=0 not %run %t calloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-cCRASH
+// RUN: %env_tsan_opts=allocator_may_return_null=1     %run %t calloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-cNULL
+// RUN: %env_tsan_opts=allocator_may_return_null=0 not %run %t calloc-overflow 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-coCRASH
+// RUN: %env_tsan_opts=allocator_may_return_null=1     %run %t calloc-overflow 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-coNULL
+// RUN: %env_tsan_opts=allocator_may_return_null=0 not %run %t realloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-rCRASH
+// RUN: %env_tsan_opts=allocator_may_return_null=1     %run %t realloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-rNULL
+// RUN: %env_tsan_opts=allocator_may_return_null=0 not %run %t realloc-after-malloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-mrCRASH
+// RUN: %env_tsan_opts=allocator_may_return_null=1     %run %t realloc-after-malloc 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-mrNULL
+// RUN: %env_tsan_opts=allocator_may_return_null=0 not %run %t new 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-nCRASH
+// RUN: %env_tsan_opts=allocator_may_return_null=1 not %run %t new 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-nCRASH
+// RUN: %env_tsan_opts=allocator_may_return_null=0 not %run %t new-nothrow 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-nnCRASH
+// RUN: %env_tsan_opts=allocator_may_return_null=1     %run %t new-nothrow 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-nnNULL
 
-#include <limits.h>
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <stdio.h>
-#include <assert.h>
 #include <limits>
-int main(int argc, char **argv) {
-  volatile size_t size = std::numeric_limits<size_t>::max() - 10000;
-  assert(argc == 2);
-  char *x = 0;
-  if (!strcmp(argv[1], "malloc")) {
-    fprintf(stderr, "malloc:\n");
-    x = (char*)malloc(size);
-  }
-  if (!strcmp(argv[1], "calloc")) {
-    fprintf(stderr, "calloc:\n");
-    x = (char*)calloc(size / 4, 4);
-  }
+#include <new>
 
-  if (!strcmp(argv[1], "calloc-overflow")) {
-    fprintf(stderr, "calloc-overflow:\n");
+int main(int argc, char **argv) {
+  // Disable stderr buffering. Needed on Windows.
+  setvbuf(stderr, NULL, _IONBF, 0);
+
+  assert(argc == 2);
+  const char *action = argv[1];
+  fprintf(stderr, "%s:\n", action);
+
+  // The limit enforced in tsan_mman.cc, user_alloc_internal function.
+  static const size_t kMaxAllowedMallocSizePlusOne = (1ULL << 40) + 1;
+
+  void *x = 0;
+  if (!strcmp(action, "malloc")) {
+    x = malloc(kMaxAllowedMallocSizePlusOne);
+  } else if (!strcmp(action, "calloc")) {
+    x = calloc((kMaxAllowedMallocSizePlusOne / 4) + 1, 4);
+  } else if (!strcmp(action, "calloc-overflow")) {
     volatile size_t kMaxSizeT = std::numeric_limits<size_t>::max();
     size_t kArraySize = 4096;
     volatile size_t kArraySize2 = kMaxSizeT / kArraySize + 10;
-    x = (char*)calloc(kArraySize, kArraySize2);
-  }
-
-  if (!strcmp(argv[1], "realloc")) {
-    fprintf(stderr, "realloc:\n");
-    x = (char*)realloc(0, size);
-  }
-  if (!strcmp(argv[1], "realloc-after-malloc")) {
-    fprintf(stderr, "realloc-after-malloc:\n");
+    x = calloc(kArraySize, kArraySize2);
+  } else if (!strcmp(action, "realloc")) {
+    x = realloc(0, kMaxAllowedMallocSizePlusOne);
+  } else if (!strcmp(action, "realloc-after-malloc")) {
     char *t = (char*)malloc(100);
     *t = 42;
-    x = (char*)realloc(t, size);
+    x = realloc(t, kMaxAllowedMallocSizePlusOne);
     assert(*t == 42);
+  } else if (!strcmp(action, "new")) {
+    x = operator new(kMaxAllowedMallocSizePlusOne);
+  } else if (!strcmp(action, "new-nothrow")) {
+    x = operator new(kMaxAllowedMallocSizePlusOne, std::nothrow);
+  } else {
+    assert(0);
   }
-  fprintf(stderr, "x: %p\n", x);
+
+  fprintf(stderr, "errno: %d\n", errno);
+
+  // The NULL pointer is printed differently on different systems, while (long)0
+  // is always the same.
+  fprintf(stderr, "x: %lx\n", (long)x);
+  free(x);
+
   return x != 0;
 }
+
 // CHECK-mCRASH: malloc:
 // CHECK-mCRASH: ThreadSanitizer's allocator is terminating the process
 // CHECK-cCRASH: calloc:
@@ -61,4 +100,25 @@
 // CHECK-rCRASH: ThreadSanitizer's allocator is terminating the process
 // CHECK-mrCRASH: realloc-after-malloc:
 // CHECK-mrCRASH: ThreadSanitizer's allocator is terminating the process
+// CHECK-nCRASH: new:
+// CHECK-nCRASH: ThreadSanitizer's allocator is terminating the process
+// CHECK-nnCRASH: new-nothrow:
+// CHECK-nnCRASH: ThreadSanitizer's allocator is terminating the process
 
+// CHECK-mNULL: malloc:
+// CHECK-mNULL: errno: 12
+// CHECK-mNULL: x: 0
+// CHECK-cNULL: calloc:
+// CHECK-cNULL: errno: 12
+// CHECK-cNULL: x: 0
+// CHECK-coNULL: calloc-overflow:
+// CHECK-coNULL: errno: 12
+// CHECK-coNULL: x: 0
+// CHECK-rNULL: realloc:
+// CHECK-rNULL: errno: 12
+// CHECK-rNULL: x: 0
+// CHECK-mrNULL: realloc-after-malloc:
+// CHECK-mrNULL: errno: 12
+// CHECK-mrNULL: x: 0
+// CHECK-nnNULL: new-nothrow:
+// CHECK-nnNULL: x: 0
diff --git a/test/tsan/atomic_hle.cc b/test/tsan/atomic_hle.cc
new file mode 100644
index 0000000..345e363
--- /dev/null
+++ b/test/tsan/atomic_hle.cc
@@ -0,0 +1,25 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
+#include "test.h"
+#include <sanitizer/tsan_interface_atomic.h>
+
+#ifndef __ATOMIC_HLE_ACQUIRE
+#define __ATOMIC_HLE_ACQUIRE (1 << 16)
+#endif
+#ifndef __ATOMIC_HLE_RELEASE
+#define __ATOMIC_HLE_RELEASE (1 << 17)
+#endif
+
+int main() {
+  volatile int x = 0;
+  //__atomic_fetch_add(&x, 1, __ATOMIC_ACQUIRE | __ATOMIC_HLE_ACQUIRE);
+  //__atomic_store_n(&x, 0, __ATOMIC_RELEASE | __ATOMIC_HLE_RELEASE);
+  __tsan_atomic32_fetch_add(&x, 1,
+      (__tsan_memory_order)(__ATOMIC_ACQUIRE | __ATOMIC_HLE_ACQUIRE));
+  __tsan_atomic32_store(&x, 0,
+      (__tsan_memory_order)(__ATOMIC_RELEASE | __ATOMIC_HLE_RELEASE));
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+
+// CHECK: DONE
+
diff --git a/test/tsan/custom_mutex.h b/test/tsan/custom_mutex.h
new file mode 100644
index 0000000..8e226a1
--- /dev/null
+++ b/test/tsan/custom_mutex.h
@@ -0,0 +1,93 @@
+#include "test.h"
+#include <atomic>
+#include <vector>
+#include <sanitizer/tsan_interface.h>
+
+// A very primitive mutex annotated with tsan annotations.
+class Mutex {
+ public:
+  Mutex(bool prof, unsigned create_flags, unsigned destroy_flags=0)
+      : prof_(prof)
+      , locked_(false)
+      , seq_(0)
+      , destroy_flags_(destroy_flags) {
+    __tsan_mutex_create(this, create_flags);
+  }
+
+  ~Mutex() {
+    __tsan_mutex_destroy(this, destroy_flags_);
+  }
+
+  void Lock() {
+    __tsan_mutex_pre_lock(this, 0);
+    LockImpl();
+    __tsan_mutex_post_lock(this, 0, 0);
+  }
+
+  bool TryLock() {
+    __tsan_mutex_pre_lock(this, __tsan_mutex_try_lock);
+    bool ok = TryLockImpl();
+    __tsan_mutex_post_lock(this, __tsan_mutex_try_lock |
+        (ok ? 0 : __tsan_mutex_try_lock_failed), 0);
+    return ok;
+  }
+
+  void Unlock() {
+    __tsan_mutex_pre_unlock(this, 0);
+    UnlockImpl();
+    __tsan_mutex_post_unlock(this, 0);
+  }
+
+  void Wait() {
+    for (int seq = seq_; seq == seq_;) {
+      Unlock();
+      usleep(100);
+      Lock();
+    }
+  }
+
+  void Broadcast() {
+    __tsan_mutex_pre_signal(this, 0);
+    LockImpl(false);
+    seq_++;
+    UnlockImpl();
+    __tsan_mutex_post_signal(this, 0);
+  }
+
+ private:
+  const bool prof_;
+  std::atomic<bool> locked_;
+  int seq_;
+  unsigned destroy_flags_;
+
+  // This models mutex profiling subsystem.
+  static Mutex prof_mu_;
+  static int prof_data_;
+
+  void LockImpl(bool prof = true) {
+    while (!TryLockImpl())
+      usleep(100);
+    if (prof && prof_)
+      Prof();
+  }
+
+  bool TryLockImpl() {
+    return !locked_.exchange(true);
+  }
+
+  void UnlockImpl() {
+    locked_.store(false);
+  }
+
+  void Prof() {
+      // This happens inside of mutex lock annotations.
+      __tsan_mutex_pre_divert(this, 0);
+      prof_mu_.Lock();
+      prof_data_++;
+      prof_mu_.Unlock();
+      __tsan_mutex_post_divert(this, 0);
+  }
+};
+
+Mutex Mutex::prof_mu_(false, __tsan_mutex_linker_init);
+int Mutex::prof_data_;
diff --git a/test/tsan/custom_mutex0.cc b/test/tsan/custom_mutex0.cc
new file mode 100644
index 0000000..8302fd8
--- /dev/null
+++ b/test/tsan/custom_mutex0.cc
@@ -0,0 +1,31 @@
+// RUN: %clangxx_tsan -O1 --std=c++11 %s -o %t && %run %t 2>&1 | FileCheck %s
+#include "custom_mutex.h"
+
+// Test that custom annoations provide normal mutex synchronization
+// (no race reports for properly protected critical sections).
+
+Mutex mu(true, 0);
+long data;
+
+void *thr(void *arg) {
+  barrier_wait(&barrier);
+  mu.Lock();
+  data++;
+  mu.Unlock();
+  return 0;
+}
+
+int main() {
+  barrier_init(&barrier, 2);
+  pthread_t th;
+  pthread_create(&th, 0, thr, 0);
+  barrier_wait(&barrier);
+  mu.Lock();
+  data++;
+  mu.Unlock();
+  pthread_join(th, 0);
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+
+// CHECK: DONE
diff --git a/test/tsan/custom_mutex1.cc b/test/tsan/custom_mutex1.cc
new file mode 100644
index 0000000..1c879f5
--- /dev/null
+++ b/test/tsan/custom_mutex1.cc
@@ -0,0 +1,39 @@
+// RUN: %clangxx_tsan -O1 --std=c++11 %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+#include "custom_mutex.h"
+
+// Test that failed TryLock does not induce parasitic synchronization.
+
+Mutex mu(true, 0);
+long data;
+
+void *thr(void *arg) {
+  mu.Lock();
+  data++;
+  mu.Unlock();
+  mu.Lock();
+  barrier_wait(&barrier);
+  barrier_wait(&barrier);
+  mu.Unlock();
+  return 0;
+}
+
+int main() {
+  barrier_init(&barrier, 2);
+  pthread_t th;
+  pthread_create(&th, 0, thr, 0);
+  barrier_wait(&barrier);
+  if (mu.TryLock()) {
+    fprintf(stderr, "TryLock succeeded, should not\n");
+    exit(0);
+  }
+  data++;
+  barrier_wait(&barrier);
+  pthread_join(th, 0);
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+
+// CHECK: ThreadSanitizer: data race
+// CHECK-NEXT:   Write of size 8 at {{.*}} by main thread:
+// CHECK-NEXT:     #0 main {{.*}}custom_mutex1.cc:29
+// CHECK: DONE
diff --git a/test/tsan/custom_mutex2.cc b/test/tsan/custom_mutex2.cc
new file mode 100644
index 0000000..d4aca7e
--- /dev/null
+++ b/test/tsan/custom_mutex2.cc
@@ -0,0 +1,34 @@
+// RUN: %clangxx_tsan -O1 --std=c++11 %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+#include "custom_mutex.h"
+
+// Test that Broadcast does not induce parasitic synchronization.
+
+Mutex mu(true, 0);
+long data;
+
+void *thr(void *arg) {
+  barrier_wait(&barrier);
+  mu.Lock();
+  data++;
+  mu.Unlock();
+  data++;
+  mu.Broadcast();
+  return 0;
+}
+
+int main() {
+  barrier_init(&barrier, 2);
+  pthread_t th;
+  pthread_create(&th, 0, thr, 0);
+  mu.Lock();
+  barrier_wait(&barrier);
+  while (data == 0)
+    mu.Wait();
+  mu.Unlock();
+  pthread_join(th, 0);
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+
+// CHECK: ThreadSanitizer: data race
+// CHECK: DONE
diff --git a/test/tsan/custom_mutex3.cc b/test/tsan/custom_mutex3.cc
new file mode 100644
index 0000000..6e99926
--- /dev/null
+++ b/test/tsan/custom_mutex3.cc
@@ -0,0 +1,46 @@
+// RUN: %clangxx_tsan -O1 --std=c++11 %s -o %t
+// RUN: %env_tsan_opts=report_destroy_locked=0 %run %t 2>&1 | FileCheck %s
+#include "custom_mutex.h"
+
+// Regression test for a bug.
+// Thr1 destroys a locked mutex, previously such mutex was not removed from
+// sync map and as the result subsequent uses of a mutex located at the same
+// address caused false race reports.
+
+Mutex mu(false, __tsan_mutex_write_reentrant);
+long data;
+
+void *thr1(void *arg) {
+  mu.Lock();
+  mu.~Mutex();
+  new(&mu) Mutex(true, __tsan_mutex_write_reentrant);
+  return 0;
+}
+
+void *thr2(void *arg) {
+  barrier_wait(&barrier);
+  mu.Lock();
+  data++;
+  mu.Unlock();
+  return 0;
+}
+
+int main() {
+  barrier_init(&barrier, 2);
+  pthread_t th;
+  pthread_create(&th, 0, thr1, 0);
+  pthread_join(th, 0);
+
+  barrier_init(&barrier, 2);
+  pthread_create(&th, 0, thr2, 0);
+  mu.Lock();
+  data++;
+  mu.Unlock();
+  barrier_wait(&barrier);
+  pthread_join(th, 0);
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer: data race
+// CHECK: DONE
diff --git a/test/tsan/custom_mutex4.cc b/test/tsan/custom_mutex4.cc
new file mode 100644
index 0000000..539a8be
--- /dev/null
+++ b/test/tsan/custom_mutex4.cc
@@ -0,0 +1,33 @@
+// RUN: %clangxx_tsan -O1 --std=c++11 %s -o %t && %run %t 2>&1 | FileCheck %s
+#include "custom_mutex.h"
+
+#include <type_traits>
+
+// Test that the destruction events of a mutex are ignored when the
+// annotations request this.
+//
+// Use after destruction is UB, but __tsan_mutex_linker_init and
+// __tsan_mutex_not_static exist to support global variables of mutex type,
+// which might be accessed during program shutdown after the class's destructor
+// has run.
+
+int main() {
+  std::aligned_storage<sizeof(Mutex), alignof(Mutex)>::type mu1_store;
+  Mutex* mu1 = reinterpret_cast<Mutex*>(&mu1_store);
+  new(&mu1_store) Mutex(false, __tsan_mutex_linker_init);
+  mu1->Lock();
+  mu1->~Mutex();
+  mu1->Unlock();
+
+  std::aligned_storage<sizeof(Mutex), alignof(Mutex)>::type mu2_store;
+  Mutex* mu2 = reinterpret_cast<Mutex*>(&mu2_store);
+  new(&mu2_store) Mutex(false, 0, __tsan_mutex_not_static);
+  mu2->Lock();
+  mu2->~Mutex();
+  mu2->Unlock();
+
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+
+// CHECK: DONE
diff --git a/test/tsan/custom_mutex5.cc b/test/tsan/custom_mutex5.cc
new file mode 100644
index 0000000..ad906e3
--- /dev/null
+++ b/test/tsan/custom_mutex5.cc
@@ -0,0 +1,33 @@
+// RUN: %clangxx_tsan -O1 --std=c++11 %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+#include "custom_mutex.h"
+
+#include <type_traits>
+
+// Test that we detect the destruction of an in-use mutex when the
+// thread annotations don't otherwise disable the check.
+
+int main() {
+  std::aligned_storage<sizeof(Mutex), alignof(Mutex)>::type mu1_store;
+  Mutex* mu1 = reinterpret_cast<Mutex*>(&mu1_store);
+  new(&mu1_store) Mutex(false, 0);
+  mu1->Lock();
+  mu1->~Mutex();
+  mu1->Unlock();
+
+  std::aligned_storage<sizeof(Mutex), alignof(Mutex)>::type mu2_store;
+  Mutex* mu2 = reinterpret_cast<Mutex*>(&mu2_store);
+  new(&mu2_store)
+      Mutex(false, __tsan_mutex_not_static, __tsan_mutex_not_static);
+  mu2->Lock();
+  mu2->~Mutex();
+  mu2->Unlock();
+
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+
+// CHECK: WARNING: ThreadSanitizer: destroy of a locked mutex
+// CHECK:   main {{.*}}custom_mutex5.cc:14
+// CHECK: WARNING: ThreadSanitizer: destroy of a locked mutex
+// CHECK:   main {{.*}}custom_mutex5.cc:22
+// CHECK: DONE
diff --git a/test/tsan/debug_alloc_stack.cc b/test/tsan/debug_alloc_stack.cc
index 303c103..ffe99e7 100644
--- a/test/tsan/debug_alloc_stack.cc
+++ b/test/tsan/debug_alloc_stack.cc
@@ -15,7 +15,7 @@
 #endif
 
 extern "C" int __tsan_get_alloc_stack(void *addr, void **trace, size_t size,
-                                      int *thread_id, void *os_id);
+                                      int *thread_id, uint64_t *os_id);
 
 char *mem;
 void alloc_func() { mem = (char *)malloc(10); }
@@ -49,7 +49,7 @@
   void *trace[100];
   size_t num_frames = 100;
   int thread_id;
-  void *thread_os_id;
+  uint64_t *thread_os_id;
   num_frames =
       __tsan_get_alloc_stack(mem, trace, num_frames, &thread_id, &thread_os_id);
 
@@ -58,7 +58,7 @@
   // CHECK: alloc stack retval ok
   fprintf(stderr, "thread id = %d\n", thread_id);
   // CHECK: thread id = 1
-  fprintf(stderr, "thread os id = 0x%llx\n", (uint64_t)thread_os_id);
+  fprintf(stderr, "thread os id = 0x%llx\n", thread_os_id);
   // CHECK: thread os id = [[THREAD_OS_ID]]
   fprintf(stderr, "%p\n", trace[0]);
   // CHECK: [[ALLOC_FRAME_0:0x[0-9a-f]+]]
diff --git a/test/tsan/debugging.cc b/test/tsan/debugging.cc
index 6533644..d9c7c65 100644
--- a/test/tsan/debugging.cc
+++ b/test/tsan/debugging.cc
@@ -2,6 +2,7 @@
 // RUN: %deflake %run %t 2>&1 | FileCheck %s
 
 #include <pthread.h>
+#include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -20,7 +21,7 @@
                           void **addr, int *size, int *write, int *atomic,
                           void **trace, unsigned long trace_size);
 int __tsan_get_report_thread(void *report, unsigned long idx, int *tid,
-                             unsigned long *os_id, int *running,
+                             uint64_t *os_id, int *running,
                              const char **name, int *parent_tid, void **trace,
                              unsigned long trace_size);
 }
@@ -90,7 +91,7 @@
   fprintf(stderr, "thread_count = %d\n", thread_count);
   // CHECK: thread_count = 2
 
-  unsigned long os_id;
+  uint64_t os_id;
   int running;
   const char *name;
   int parent_tid;
diff --git a/test/tsan/deep_stack1.cc b/test/tsan/deep_stack1.cc
index 39185ef..44dd0c4 100644
--- a/test/tsan/deep_stack1.cc
+++ b/test/tsan/deep_stack1.cc
@@ -24,6 +24,10 @@
   return 0;
 }
 
+static size_t RoundUp(size_t n, size_t to) {
+  return ((n + to - 1) / to) * to;
+}
+
 int main() {
   barrier_init(&barrier, 2);
   N = 50000;
@@ -31,7 +35,10 @@
   pthread_t t;
   pthread_attr_t a;
   pthread_attr_init(&a);
-  pthread_attr_setstacksize(&a, N * 256 + (1 << 20));
+  size_t stack_size = N * 256 + (1 << 20);
+  stack_size = RoundUp(stack_size, 0x10000);  // round the stack size to 64k
+  int ret = pthread_attr_setstacksize(&a, stack_size);
+  if (ret) abort();
   pthread_create(&t, &a, Thread, 0);
 #ifdef ORDER2
   barrier_wait(&barrier);
diff --git a/test/tsan/fd_socket_connect_norace.cc b/test/tsan/fd_socket_connect_norace.cc
index b9fb434..1237518 100644
--- a/test/tsan/fd_socket_connect_norace.cc
+++ b/test/tsan/fd_socket_connect_norace.cc
@@ -1,20 +1,24 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
+#include <arpa/inet.h>
+#include <assert.h>
+#include <netinet/in.h>
 #include <pthread.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <unistd.h>
-#include <sys/types.h>
 #include <sys/socket.h>
-#include <netinet/in.h>
-#include <arpa/inet.h>
+#include <sys/types.h>
+#include <unistd.h>
 
-struct sockaddr_in addr;
+struct sockaddr_in addr4;
+struct sockaddr_in6 addr6;
+struct sockaddr *addr;
+socklen_t addrlen;
 int X;
 
 void *ClientThread(void *x) {
-  int c = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+  int c = socket(addr->sa_family, SOCK_STREAM, IPPROTO_TCP);
   X = 42;
-  if (connect(c, (struct sockaddr*)&addr, sizeof(addr))) {
+  if (connect(c, addr, addrlen)) {
     perror("connect");
     exit(1);
   }
@@ -23,13 +27,26 @@
 }
 
 int main() {
-  int s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
-  addr.sin_family = AF_INET;
-  inet_pton(AF_INET, "127.0.0.1", &addr.sin_addr);
-  addr.sin_port = INADDR_ANY;
-  socklen_t len = sizeof(addr);
-  bind(s, (sockaddr*)&addr, len);
-  getsockname(s, (sockaddr*)&addr, &len);
+  addr4.sin_family = AF_INET;
+  addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+  addr4.sin_port = INADDR_ANY;
+  addr = (struct sockaddr *)&addr4;
+  addrlen = sizeof(addr4);
+
+  int s = socket(addr->sa_family, SOCK_STREAM, IPPROTO_TCP);
+  if (s <= 0) {
+    // Try to fall-back to IPv6
+    addr6.sin6_family = AF_INET6;
+    addr6.sin6_addr = in6addr_loopback;
+    addr6.sin6_port = INADDR_ANY;
+    addr = (struct sockaddr *)&addr6;
+    addrlen = sizeof(addr6);
+    s = socket(addr->sa_family, SOCK_STREAM, IPPROTO_TCP);
+  }
+  assert(s > 0);
+
+  bind(s, addr, addrlen);
+  getsockname(s, addr, &addrlen);
   listen(s, 10);
   pthread_t t;
   pthread_create(&t, 0, ClientThread, 0);
diff --git a/test/tsan/fd_socket_norace.cc b/test/tsan/fd_socket_norace.cc
index 07b0cb3..a1761cb 100644
--- a/test/tsan/fd_socket_norace.cc
+++ b/test/tsan/fd_socket_norace.cc
@@ -1,20 +1,24 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
+#include <arpa/inet.h>
+#include <assert.h>
+#include <netinet/in.h>
 #include <pthread.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <unistd.h>
-#include <sys/types.h>
 #include <sys/socket.h>
-#include <netinet/in.h>
-#include <arpa/inet.h>
+#include <sys/types.h>
+#include <unistd.h>
 
-struct sockaddr_in addr;
+struct sockaddr_in addr4;
+struct sockaddr_in6 addr6;
+struct sockaddr *addr;
+socklen_t addrlen;
 int X;
 
 void *ClientThread(void *x) {
   X = 42;
-  int c = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
-  if (connect(c, (struct sockaddr*)&addr, sizeof(addr))) {
+  int c = socket(addr->sa_family, SOCK_STREAM, IPPROTO_TCP);
+  if (connect(c, addr, addrlen)) {
     perror("connect");
     exit(1);
   }
@@ -27,13 +31,26 @@
 }
 
 int main() {
-  int s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
-  addr.sin_family = AF_INET;
-  inet_pton(AF_INET, "127.0.0.1", &addr.sin_addr);
-  addr.sin_port = INADDR_ANY;
-  socklen_t len = sizeof(addr);
-  bind(s, (sockaddr*)&addr, len);
-  getsockname(s, (sockaddr*)&addr, &len);
+  addr4.sin_family = AF_INET;
+  addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+  addr4.sin_port = INADDR_ANY;
+  addr = (struct sockaddr *)&addr4;
+  addrlen = sizeof(addr4);
+
+  int s = socket(addr->sa_family, SOCK_STREAM, IPPROTO_TCP);
+  if (s <= 0) {
+    // Try to fall-back to IPv6
+    addr6.sin6_family = AF_INET6;
+    addr6.sin6_addr = in6addr_loopback;
+    addr6.sin6_port = INADDR_ANY;
+    addr = (struct sockaddr *)&addr6;
+    addrlen = sizeof(addr6);
+    s = socket(addr->sa_family, SOCK_STREAM, IPPROTO_TCP);
+  }
+  assert(s > 0);
+
+  bind(s, addr, addrlen);
+  getsockname(s, addr, &addrlen);
   listen(s, 10);
   pthread_t t;
   pthread_create(&t, 0, ClientThread, 0);
diff --git a/test/tsan/ignore_lib0.cc b/test/tsan/ignore_lib0.cc
index d6ae72f..8463201 100644
--- a/test/tsan/ignore_lib0.cc
+++ b/test/tsan/ignore_lib0.cc
@@ -11,6 +11,8 @@
 // Some aarch64 kernels do not support non executable write pages
 // REQUIRES: stable-runtime
 
+// UNSUPPORTED: ios
+
 #ifndef LIB
 
 extern "C" void libfunc();
diff --git a/test/tsan/ignore_lib1.cc b/test/tsan/ignore_lib1.cc
index e6a13a3..5949d81 100644
--- a/test/tsan/ignore_lib1.cc
+++ b/test/tsan/ignore_lib1.cc
@@ -9,6 +9,9 @@
 // in called_from_lib suppression are ignored.
 
 // REQUIRES: stable-runtime
+// UNSUPPORTED: powerpc64le
+// FIXME: This test regularly fails on powerpc64 LE possibly starting with
+// r279664.  Re-enable the test once the problem(s) have been fixed.
 
 #ifndef LIB
 
diff --git a/test/tsan/ignore_lib5.cc b/test/tsan/ignore_lib5.cc
index d7cd285..54630d5 100644
--- a/test/tsan/ignore_lib5.cc
+++ b/test/tsan/ignore_lib5.cc
@@ -6,6 +6,9 @@
 // RUN: %env_tsan_opts=suppressions='%s.supp' %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-WITHSUPP
 
 // REQUIRES: stable-runtime
+// UNSUPPORTED: powerpc64le
+// FIXME: This test occasionally fails on powerpc64 LE possibly starting with
+// r279664.  Re-enable the test once the problem(s) have been fixed.
 
 // Previously the test episodically failed with:
 //   ThreadSanitizer: called_from_lib suppression '/libignore_lib1.so$' is
diff --git a/test/tsan/java_find.cc b/test/tsan/java_find.cc
new file mode 100644
index 0000000..078aac5
--- /dev/null
+++ b/test/tsan/java_find.cc
@@ -0,0 +1,69 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
+#include "java.h"
+
+int const kHeapSize = 1024 * 1024;
+
+static void verify_find(jptr from, jptr to, jptr expected_addr,
+                        jptr expected_size) {
+  jptr addr = from;
+  jptr size = __tsan_java_find(&addr, to);
+  if (expected_size) {
+    if (!size) {
+      fprintf(stderr, "FAILED: range: [%p..%p): found nothing\n", (void *)from,
+              (void *)to);
+      return;
+    } else if (expected_size != size) {
+      fprintf(stderr, "FAILED: range: [%p..%p): wrong size, %lu instead of %lu\n",
+              (void *)from, (void *)to, size, expected_size);
+      return;
+    }
+  } else if (size) {
+    fprintf(stderr,
+            "FAILED: range [%p..%p): did not expect to find anything here\n",
+            (void *)from, (void *)to);
+    return;
+  } else {
+    return;
+  }
+  if (expected_addr != addr) {
+    fprintf(
+        stderr,
+        "FAILED: range [%p..%p): expected to find object at %p, found at %p\n",
+        (void *)from, (void *)to, (void *)expected_addr, (void *)addr);
+  }
+}
+
+int main() {
+  const jptr jheap = (jptr)malloc(kHeapSize + 8) + 8;
+  const jptr jheap_end = jheap + kHeapSize;
+  __tsan_java_init(jheap, kHeapSize);
+  const jptr addr1 = jheap;
+  const int size1 = 16;
+  __tsan_java_alloc(jheap, size1);
+
+  const jptr addr2 = addr1 + size1;
+  const int size2 = 32;
+  __tsan_java_alloc(jheap + size1, size2);
+
+  const jptr addr3 = addr2 + size2;
+  const int size3 = 1024;
+  __tsan_java_alloc(jheap + size1 + size2, size3);
+
+  const jptr addr4 = addr3 + size3;
+
+  verify_find(jheap, jheap_end, addr1, size1);
+  verify_find(jheap + 8, jheap_end, addr2, size2);
+  verify_find(addr2 + 8, jheap_end, addr3, size3);
+  verify_find(addr3 + 8, jheap_end, 0, 0);
+
+  __tsan_java_move(addr2, addr4, size2);
+  verify_find(jheap + 8, jheap_end, addr3, size3);
+  verify_find(addr3 + 8, jheap_end, addr4, size2);
+  verify_find(addr4 + 8, jheap_end, 0, 0);
+
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+
+// CHECK-NOT: FAILED
+// CHECK: DONE
diff --git a/test/tsan/lit.cfg b/test/tsan/lit.cfg
index 3c98d1f..f0dc7b6 100644
--- a/test/tsan/lit.cfg
+++ b/test/tsan/lit.cfg
@@ -66,7 +66,7 @@
                           "-Wl,-rpath=%s" % libcxx_libdir]
 
 def build_invocation(compile_flags):
-  return " " + " ".join([config.clang] + compile_flags) + " "
+  return " " + " ".join([config.compile_wrapper, config.clang] + compile_flags) + " "
 
 config.substitutions.append( ("%clang_tsan ", build_invocation(clang_tsan_cflags)) )
 config.substitutions.append( ("%clangxx_tsan ", build_invocation(clang_tsan_cxxflags)) )
@@ -79,14 +79,11 @@
 # Default test suffixes.
 config.suffixes = ['.c', '.cc', '.cpp', '.m', '.mm']
 
-# ThreadSanitizer tests are currently supported on FreeBSD, Linux and Darwin.
-if config.host_os not in ['FreeBSD', 'Linux', 'Darwin']:
+if config.host_os not in ['FreeBSD', 'Linux', 'Darwin', 'NetBSD']:
   config.unsupported = True
 
-# Allow tests to use REQUIRES=stable-runtime.  For use when you cannot use XFAIL
-# because the test hangs.
-if config.target_arch != 'aarch64':
-  config.available_features.add('stable-runtime')
+if config.android:
+  config.unsupported = True
 
 if config.host_os == 'Darwin' and config.target_arch in ["x86_64", "x86_64h"]:
   config.parallelism_group = "darwin-64bit-sanitizer"
diff --git a/test/tsan/lit.site.cfg.in b/test/tsan/lit.site.cfg.in
index a87e8d2..a215e66 100644
--- a/test/tsan/lit.site.cfg.in
+++ b/test/tsan/lit.site.cfg.in
@@ -1,7 +1,10 @@
 @LIT_SITE_CFG_IN_HEADER@
 
 config.name_suffix = "@TSAN_TEST_CONFIG_SUFFIX@"
+config.tsan_lit_source_dir = "@TSAN_LIT_SOURCE_DIR@"
 config.has_libcxx = @TSAN_HAS_LIBCXX@
+config.ios = @TSAN_TEST_IOS_PYBOOL@
+config.iossim = @TSAN_TEST_IOSSIM_PYBOOL@
 config.target_cflags = "@TSAN_TEST_TARGET_CFLAGS@"
 config.target_arch = "@TSAN_TEST_TARGET_ARCH@"
 
diff --git a/test/tsan/map32bit.cc b/test/tsan/map32bit.cc
index 3b4f899..8aef27b 100644
--- a/test/tsan/map32bit.cc
+++ b/test/tsan/map32bit.cc
@@ -12,8 +12,8 @@
 // XFAIL: aarch64
 // XFAIL: powerpc64
 
-// MAP_32BIT doesn't exist on OS X.
-// UNSUPPORTED: darwin
+// MAP_32BIT doesn't exist on OS X and NetBSD.
+// UNSUPPORTED: darwin,netbsd
 
 void *Thread(void *ptr) {
   *(int*)ptr = 42;
@@ -45,4 +45,3 @@
 
 // CHECK: WARNING: ThreadSanitizer: data race
 // CHECK: DONE
-
diff --git a/test/tsan/signal_pause.cc b/test/tsan/signal_pause.cc
new file mode 100644
index 0000000..cbcef94
--- /dev/null
+++ b/test/tsan/signal_pause.cc
@@ -0,0 +1,35 @@
+// RUN: %clang_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
+
+// Test that pause loop handles signals.
+
+#include "test.h"
+#include <signal.h>
+#include <errno.h>
+
+void handler(int signum) {
+  write(2, "DONE\n", 5);
+  _exit(0);
+}
+
+void *thread(void *arg) {
+  for (;;)
+    pause();
+  return 0;
+}
+
+int main(int argc, char** argv) {
+  struct sigaction act = {};
+  act.sa_handler = &handler;
+  if (sigaction(SIGUSR1, &act, 0)) {
+    fprintf(stderr, "sigaction failed %d\n", errno);
+    return 1;
+  }
+  pthread_t th;
+  pthread_create(&th, 0, thread, 0);
+  sleep(1);  // give it time to block in pause
+  pthread_kill(th, SIGUSR1);
+  sleep(10);  // signal handler must exit the process while we are here
+  return 0;
+}
+
+// CHECK: DONE
diff --git a/test/tsan/strerror_r.cc b/test/tsan/strerror_r.cc
new file mode 100644
index 0000000..ad48201
--- /dev/null
+++ b/test/tsan/strerror_r.cc
@@ -0,0 +1,30 @@
+// RUN: %clangxx_tsan -O1 -DTEST_ERROR=ERANGE %s -o %t && %run %t 2>&1 | FileCheck --check-prefixes=CHECK,CHECK-SYS %s
+// RUN: %clangxx_tsan -O1 -DTEST_ERROR=-1 %s -o %t && not %run %t 2>&1 | FileCheck --check-prefixes=CHECK,CHECK-USER %s
+// UNSUPPORTED: darwin
+
+#include <assert.h>
+#include <errno.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <string.h>
+
+char buffer[1000];
+
+void *Thread(void *p) {
+  strerror_r(TEST_ERROR, buffer, sizeof(buffer));
+  return buffer;
+}
+
+int main() {
+  pthread_t th[2];
+  pthread_create(&th[0], 0, Thread, 0);
+  pthread_create(&th[1], 0, Thread, 0);
+  pthread_join(th[0], 0);
+  pthread_join(th[1], 0);
+  fprintf(stderr, "DONE\n");
+}
+
+// CHECK-USER: WARNING: ThreadSanitizer: data race
+// CHECK-SYS-NOT: WARNING: ThreadSanitizer: data race
+
+// CHECK: DONE
diff --git a/test/tsan/test.h b/test/tsan/test.h
index 6b981c0..bc4f7aa 100644
--- a/test/tsan/test.h
+++ b/test/tsan/test.h
@@ -8,6 +8,8 @@
 #include <stdarg.h>
 #include "sanitizer_common/print_address.h"
 
+#include <sanitizer/tsan_interface.h>
+
 #ifdef __APPLE__
 #include <mach/mach_time.h>
 #endif
diff --git a/test/tsan/thread_name.cc b/test/tsan/thread_name.cc
index 80d30b8..17caa62 100644
--- a/test/tsan/thread_name.cc
+++ b/test/tsan/thread_name.cc
@@ -3,10 +3,14 @@
 
 #if defined(__linux__)
 #define USE_PTHREAD_SETNAME_NP __GLIBC_PREREQ(2, 12)
+#define tsan_pthread_setname_np pthread_setname_np
 #elif defined(__FreeBSD__)
 #include <pthread_np.h>
 #define USE_PTHREAD_SETNAME_NP 1
-#define pthread_setname_np pthread_set_name_np
+#define tasn_pthread_setname_np pthread_set_name_np
+#elif defined(__NetBSD__)
+#define USE_PTHREAD_SETNAME_NP 1
+#define tsan_pthread_setname_np(a, b) pthread_setname_np((a), "%s", (void *)(b))
 #else
 #define USE_PTHREAD_SETNAME_NP 0
 #endif
@@ -24,7 +28,7 @@
 
 void *Thread2(void *x) {
 #if USE_PTHREAD_SETNAME_NP
-  pthread_setname_np(pthread_self(), "Thread2");
+  tsan_pthread_setname_np(pthread_self(), "Thread2");
 #else
   AnnotateThreadName(__FILE__, __LINE__, "Thread2");
 #endif
diff --git a/test/tsan/thread_name2.cc b/test/tsan/thread_name2.cc
index d7ed0f0..9ebac29 100644
--- a/test/tsan/thread_name2.cc
+++ b/test/tsan/thread_name2.cc
@@ -6,7 +6,11 @@
 
 #if defined(__FreeBSD__)
 #include <pthread_np.h>
-#define pthread_setname_np pthread_set_name_np
+#define tsan_pthread_setname_np pthread_set_name_np
+#elif defined(__NetBSD__)
+#define tsan_pthread_setname_np(a, b) pthread_setname_np((a), "%s", (void *)(b))
+#else
+#define tsan_pthread_setname_np pthread_setname_np
 #endif
 
 long long Global;
@@ -18,7 +22,7 @@
 }
 
 void *Thread2(void *x) {
-  pthread_setname_np(pthread_self(), "foobar2");
+  tsan_pthread_setname_np(pthread_self(), "foobar2");
   Global--;
   barrier_wait(&barrier);
   return 0;
@@ -29,7 +33,7 @@
   pthread_t t[2];
   pthread_create(&t[0], 0, Thread1, 0);
   pthread_create(&t[1], 0, Thread2, 0);
-  pthread_setname_np(t[0], "foobar1");
+  tsan_pthread_setname_np(t[0], "foobar1");
   barrier_wait(&barrier);
   pthread_join(t[0], NULL);
   pthread_join(t[1], NULL);
diff --git a/test/tsan/unaligned_race.cc b/test/tsan/unaligned_race.cc
index 030642a..5850b21 100644
--- a/test/tsan/unaligned_race.cc
+++ b/test/tsan/unaligned_race.cc
@@ -6,31 +6,22 @@
 
 volatile uint64_t objs[8*2*(2 + 4 + 8)][2];
 
-extern "C" {
-uint16_t __sanitizer_unaligned_load16(volatile void *addr);
-uint32_t __sanitizer_unaligned_load32(volatile void *addr);
-uint64_t __sanitizer_unaligned_load64(volatile void *addr);
-void __sanitizer_unaligned_store16(volatile void *addr, uint16_t v);
-void __sanitizer_unaligned_store32(volatile void *addr, uint32_t v);
-void __sanitizer_unaligned_store64(volatile void *addr, uint64_t v);
-}
-
 // All this mess is to generate unique stack for each race,
 // otherwise tsan will suppress similar stacks.
 
-static NOINLINE void access(volatile char *p, int sz, int rw) {
+static NOINLINE void access(volatile void *p, int sz, int rw) {
   if (rw) {
     switch (sz) {
-    case 0: __sanitizer_unaligned_store16(p, 0); break;
-    case 1: __sanitizer_unaligned_store32(p, 0); break;
-    case 2: __sanitizer_unaligned_store64(p, 0); break;
+    case 0: __sanitizer_unaligned_store16((void *)p, 0); break;
+    case 1: __sanitizer_unaligned_store32((void *)p, 0); break;
+    case 2: __sanitizer_unaligned_store64((void *)p, 0); break;
     default: exit(1);
     }
   } else {
     switch (sz) {
-    case 0: __sanitizer_unaligned_load16(p); break;
-    case 1: __sanitizer_unaligned_load32(p); break;
-    case 2: __sanitizer_unaligned_load64(p); break;
+    case 0: __sanitizer_unaligned_load16((void *)p); break;
+    case 1: __sanitizer_unaligned_load32((void *)p); break;
+    case 2: __sanitizer_unaligned_load64((void *)p); break;
     default: exit(1);
     }
   }
diff --git a/test/ubsan/CMakeLists.txt b/test/ubsan/CMakeLists.txt
index f4b73e8..5843e0c 100644
--- a/test/ubsan/CMakeLists.txt
+++ b/test/ubsan/CMakeLists.txt
@@ -35,11 +35,20 @@
   if(COMPILER_RT_HAS_MSAN AND ";${MSAN_SUPPORTED_ARCH};" MATCHES ";${arch};")
     add_ubsan_testsuite("MemorySanitizer" msan ${arch})
   endif()
-  if(COMPILER_RT_HAS_TSAN AND ";${TSAN_SUPPORTED_ARCH};" MATCHES ";${arch};")
+  if(COMPILER_RT_HAS_TSAN AND ";${TSAN_SUPPORTED_ARCH};" MATCHES ";${arch};" AND NOT ANDROID)
     add_ubsan_testsuite("ThreadSanitizer" tsan ${arch})
   endif()
 endforeach()
 
+if(APPLE)
+  foreach(arch ${UBSAN_TEST_ARCH})
+    set(UBSAN_TEST_TARGET_ARCH ${arch})
+    get_test_cc_for_arch(${arch} UBSAN_TEST_TARGET_CC UBSAN_TEST_TARGET_CFLAGS)
+    set(UBSAN_TEST_TARGET_CFLAGS "${UBSAN_TEST_TARGET_CFLAGS} -lc++abi")
+    add_ubsan_testsuite("StandaloneStatic" ubsan ${arch})
+  endforeach()
+endif()
+
 add_lit_testsuite(check-ubsan "Running UndefinedBehaviorSanitizer tests"
   ${UBSAN_TESTSUITES}
   DEPENDS ${UBSAN_TEST_DEPS})
diff --git a/test/ubsan/TestCases/Float/cast-overflow.cpp b/test/ubsan/TestCases/Float/cast-overflow.cpp
index 5f51553..a53c663 100644
--- a/test/ubsan/TestCases/Float/cast-overflow.cpp
+++ b/test/ubsan/TestCases/Float/cast-overflow.cpp
@@ -18,11 +18,17 @@
 # define BYTE_ORDER __DARWIN_BYTE_ORDER
 # define BIG_ENDIAN __DARWIN_BIG_ENDIAN
 # define LITTLE_ENDIAN __DARWIN_LITTLE_ENDIAN
-#elif defined(__FreeBSD__)
+#elif defined(__FreeBSD__) || defined(__NetBSD__)
 # include <sys/endian.h>
-# define BYTE_ORDER _BYTE_ORDER
-# define BIG_ENDIAN _BIG_ENDIAN
-# define LITTLE_ENDIAN _LITTLE_ENDIAN
+# ifndef BYTE_ORDER
+#  define BYTE_ORDER _BYTE_ORDER
+# endif
+# ifndef BIG_ENDIAN
+#  define BIG_ENDIAN _BIG_ENDIAN
+# endif
+# ifndef LITTLE_ENDIAN
+#  define LITTLE_ENDIAN _LITTLE_ENDIAN
+# endif
 #elif defined(_WIN32)
 # define BYTE_ORDER 0
 # define BIG_ENDIAN 1
@@ -86,42 +92,42 @@
   case '0': {
     // Note that values between 0x7ffffe00 and 0x80000000 may or may not
     // successfully round-trip, depending on the rounding mode.
-    // CHECK-0: {{.*}}cast-overflow.cpp:[[@LINE+1]]:27: runtime error: value 2.14748{{.*}} is outside the range of representable values of type 'int'
+    // CHECK-0: {{.*}}cast-overflow.cpp:[[@LINE+1]]:27: runtime error: 2.14748{{.*}} is outside the range of representable values of type 'int'
     static int test_int = MaxFloatRepresentableAsInt + 0x80;
     // CHECK-0: SUMMARY: {{.*}}Sanitizer: float-cast-overflow {{.*}}cast-overflow.cpp:[[@LINE-1]]
     return 0;
     }
   case '1': {
-    // CHECK-1: {{.*}}cast-overflow.cpp:[[@LINE+1]]:27: runtime error: value -2.14748{{.*}} is outside the range of representable values of type 'int'
+    // CHECK-1: {{.*}}cast-overflow.cpp:[[@LINE+1]]:27: runtime error: -2.14748{{.*}} is outside the range of representable values of type 'int'
     static int test_int = MinFloatRepresentableAsInt - 0x100;
     return 0;
   }
   case '2': {
-    // CHECK-2: {{.*}}cast-overflow.cpp:[[@LINE+2]]:37: runtime error: value -1 is outside the range of representable values of type 'unsigned int'
+    // CHECK-2: {{.*}}cast-overflow.cpp:[[@LINE+2]]:37: runtime error: -1 is outside the range of representable values of type 'unsigned int'
     volatile float f = -1.0;
     volatile unsigned u = (unsigned)f;
     return 0;
   }
   case '3': {
-    // CHECK-3: {{.*}}cast-overflow.cpp:[[@LINE+1]]:37: runtime error: value 4.2949{{.*}} is outside the range of representable values of type 'unsigned int'
+    // CHECK-3: {{.*}}cast-overflow.cpp:[[@LINE+1]]:37: runtime error: 4.2949{{.*}} is outside the range of representable values of type 'unsigned int'
     static int test_int = (unsigned)(MaxFloatRepresentableAsUInt + 0x100);
     return 0;
   }
 
   case '4': {
-    // CHECK-4: {{.*}}cast-overflow.cpp:[[@LINE+1]]:27: runtime error: value {{.*}} is outside the range of representable values of type 'int'
+    // CHECK-4: {{.*}}cast-overflow.cpp:[[@LINE+1]]:27: runtime error: {{.*}} is outside the range of representable values of type 'int'
     static int test_int = Inf;
     return 0;
   }
   case '5': {
-    // CHECK-5: {{.*}}cast-overflow.cpp:[[@LINE+1]]:27: runtime error: value {{.*}} is outside the range of representable values of type 'int'
+    // CHECK-5: {{.*}}cast-overflow.cpp:[[@LINE+1]]:27: runtime error: {{.*}} is outside the range of representable values of type 'int'
     static int test_int = NaN;
     return 0;
   }
 
     // Integer -> floating point overflow.
   case '6': {
-    // CHECK-6: cast-overflow.cpp:[[@LINE+2]]:{{34: runtime error: value 0xffffff00000000000000000000000001 is outside the range of representable values of type 'float'| __int128 not supported}}
+    // CHECK-6: cast-overflow.cpp:[[@LINE+2]]:{{34: runtime error: 0xffffff00000000000000000000000001 is outside the range of representable values of type 'float'| __int128 not supported}}
 #if defined(__SIZEOF_INT128__) && !defined(_WIN32)
     static int test_int = (float)(FloatMaxAsUInt128 + 1);
     return 0;
@@ -135,16 +141,16 @@
   // FIXME: The backend cannot lower __fp16 operations on x86 yet.
   //case '7':
   //  (__fp16)65504; // ok
-  //  // CHECK-7: runtime error: value 65505 is outside the range of representable values of type '__fp16'
+  //  // CHECK-7: runtime error: 65505 is outside the range of representable values of type '__fp16'
   //  return (__fp16)65505;
 
     // Floating point -> floating point overflow.
   case '8':
-    // CHECK-8: {{.*}}cast-overflow.cpp:[[@LINE+1]]:19: runtime error: value 1e+39 is outside the range of representable values of type 'float'
+    // CHECK-8: {{.*}}cast-overflow.cpp:[[@LINE+1]]:19: runtime error: 1e+39 is outside the range of representable values of type 'float'
     return (float)1e39;
   case '9':
     volatile long double ld = 300.0;
-    // CHECK-9: {{.*}}cast-overflow.cpp:[[@LINE+1]]:14: runtime error: value 300 is outside the range of representable values of type 'char'
+    // CHECK-9: {{.*}}cast-overflow.cpp:[[@LINE+1]]:14: runtime error: 300 is outside the range of representable values of type 'char'
     char c = ld;
     return c;
   }
diff --git a/test/ubsan/TestCases/Integer/negate-overflow.cpp b/test/ubsan/TestCases/Integer/negate-overflow.cpp
index 628291e..72438d3 100644
--- a/test/ubsan/TestCases/Integer/negate-overflow.cpp
+++ b/test/ubsan/TestCases/Integer/negate-overflow.cpp
@@ -6,7 +6,9 @@
   // CHECKU: negate-overflow.cpp:[[@LINE+2]]:3: runtime error: negation of 2147483648 cannot be represented in type 'unsigned int'
   // CHECKU-NOT: cast to an unsigned
   -unsigned(-0x7fffffff - 1); // ok
-  // CHECKS: negate-overflow.cpp:[[@LINE+2]]:10: runtime error: negation of -2147483648 cannot be represented in type 'int'; cast to an unsigned type to negate this value to itself
+  // CHECKS: negate-overflow.cpp:[[@LINE+2]]:3: runtime error: negation of -2147483648 cannot be represented in type 'int'; cast to an unsigned type to negate this value to itself
   // CHECKU-NOT: runtime error
-  return -(-0x7fffffff - 1);
+  -(-0x7fffffff - 1);
+
+  return 0;
 }
diff --git a/test/ubsan/TestCases/Integer/summary.cpp b/test/ubsan/TestCases/Integer/summary.cpp
index e687afa..8726c14 100644
--- a/test/ubsan/TestCases/Integer/summary.cpp
+++ b/test/ubsan/TestCases/Integer/summary.cpp
@@ -7,7 +7,7 @@
 
 int main() {
   (void)(uint64_t(10000000000000000000ull) + uint64_t(9000000000000000000ull));
-  // CHECK-NOTYPE: SUMMARY: AddressSanitizer: undefined-behavior {{.*}}summary.cpp:[[@LINE-1]]:44
-  // CHECK-TYPE: SUMMARY: AddressSanitizer: unsigned-integer-overflow {{.*}}summary.cpp:[[@LINE-2]]:44
+  // CHECK-NOTYPE: SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior {{.*}}summary.cpp:[[@LINE-1]]:44
+  // CHECK-TYPE: SUMMARY: UndefinedBehaviorSanitizer: unsigned-integer-overflow {{.*}}summary.cpp:[[@LINE-2]]:44
   return 0;
 }
diff --git a/test/ubsan/TestCases/Integer/suppressions.cpp b/test/ubsan/TestCases/Integer/suppressions.cpp
index a9e6601..f72d82e 100644
--- a/test/ubsan/TestCases/Integer/suppressions.cpp
+++ b/test/ubsan/TestCases/Integer/suppressions.cpp
@@ -3,6 +3,7 @@
 // Suppression by symbol name (unsigned-integer-overflow:do_overflow below)
 // requires the compiler-rt runtime to be able to symbolize stack addresses.
 // REQUIRES: can-symbolize
+// UNSUPPORTED: android
 
 // Fails without any suppression.
 // RUN: %env_ubsan_opts=halt_on_error=1 not %run %t 2>&1 | FileCheck %s
diff --git a/test/ubsan/TestCases/Misc/Linux/print_stack_trace.cc b/test/ubsan/TestCases/Misc/Linux/print_stack_trace.cc
new file mode 100644
index 0000000..341fd7a
--- /dev/null
+++ b/test/ubsan/TestCases/Misc/Linux/print_stack_trace.cc
@@ -0,0 +1,23 @@
+// RUN: %clangxx -fsanitize=undefined -O0 %s -o %t && UBSAN_OPTIONS=stack_trace_format=DEFAULT:fast_unwind_on_fatal=1 %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx -fsanitize=undefined -O0 %s -o %t && UBSAN_OPTIONS=stack_trace_format=DEFAULT:fast_unwind_on_fatal=0 %run %t 2>&1 | FileCheck %s
+
+// This test is temporarily disabled due to broken unwinding on ARM.
+// UNSUPPORTED: -linux-
+
+// The test doesn't pass on Darwin in UBSan-TSan configuration, because TSan is
+// using the slow unwinder which is not supported on Darwin. The test should
+// be universal after landing of https://reviews.llvm.org/D32806.
+
+#include <sanitizer/common_interface_defs.h>
+
+static inline void FooBarBaz() {
+  __sanitizer_print_stack_trace();
+}
+
+int main() {
+  FooBarBaz();
+  return 0;
+}
+
+// CHECK: {{.*}} in FooBarBaz{{.*}}print_stack_trace.cc{{.*}}
+// CHECK: {{.*}} in main{{.*}}print_stack_trace.cc{{.*}}
diff --git a/test/ubsan/TestCases/Misc/bool.m b/test/ubsan/TestCases/Misc/bool.m
new file mode 100644
index 0000000..0430b45
--- /dev/null
+++ b/test/ubsan/TestCases/Misc/bool.m
@@ -0,0 +1,14 @@
+// RUN: %clang -fsanitize=bool %s -O3 -o %t
+// RUN: not %run %t 2>&1 | FileCheck %s
+// RUN: %env_ubsan_opts=print_summary=1:report_error_type=1 not %run %t 2>&1 | FileCheck %s --check-prefix=SUMMARY
+
+typedef char BOOL;
+unsigned char NotABool = 123;
+
+int main(int argc, char **argv) {
+  BOOL *p = (BOOL*)&NotABool;
+
+  // CHECK: bool.m:[[@LINE+1]]:10: runtime error: load of value 123, which is not a valid value for type 'BOOL'
+  return *p;
+  // SUMMARY: SUMMARY: {{.*}}Sanitizer: invalid-bool-load {{.*}}bool.m:[[@LINE-1]]
+}
diff --git a/test/ubsan/TestCases/Misc/builtins.cpp b/test/ubsan/TestCases/Misc/builtins.cpp
new file mode 100644
index 0000000..18c68b5
--- /dev/null
+++ b/test/ubsan/TestCases/Misc/builtins.cpp
@@ -0,0 +1,35 @@
+// REQUIRES: arch=x86_64
+//
+// RUN: %clangxx -fsanitize=builtin -w %s -O3 -o %t
+// RUN: %run %t 2>&1 | FileCheck %s --check-prefix=RECOVER
+// RUN: %clangxx -fsanitize=builtin -fno-sanitize-recover=builtin -w %s -O3 -o %t.abort
+// RUN: not %run %t.abort 2>&1 | FileCheck %s --check-prefix=ABORT
+
+void check_ctz(int n) {
+  // ABORT: builtins.cpp:[[@LINE+2]]:17: runtime error: passing zero to ctz(), which is not a valid argument
+  // RECOVER: builtins.cpp:[[@LINE+1]]:17: runtime error: passing zero to ctz(), which is not a valid argument
+  __builtin_ctz(n);
+
+  // RECOVER: builtins.cpp:[[@LINE+1]]:18: runtime error: passing zero to ctz(), which is not a valid argument
+  __builtin_ctzl(n);
+
+  // RECOVER: builtins.cpp:[[@LINE+1]]:19: runtime error: passing zero to ctz(), which is not a valid argument
+  __builtin_ctzll(n);
+}
+
+void check_clz(int n) {
+  // RECOVER: builtins.cpp:[[@LINE+1]]:17: runtime error: passing zero to clz(), which is not a valid argument
+  __builtin_clz(n);
+
+  // RECOVER: builtins.cpp:[[@LINE+1]]:18: runtime error: passing zero to clz(), which is not a valid argument
+  __builtin_clzl(n);
+
+  // RECOVER: builtins.cpp:[[@LINE+1]]:19: runtime error: passing zero to clz(), which is not a valid argument
+  __builtin_clzll(n);
+}
+
+int main() {
+  check_ctz(0);
+  check_clz(0);
+  return 0;
+}
diff --git a/test/ubsan/TestCases/Misc/coverage-levels.cc b/test/ubsan/TestCases/Misc/coverage-levels.cc
index f96b487..05c1993 100644
--- a/test/ubsan/TestCases/Misc/coverage-levels.cc
+++ b/test/ubsan/TestCases/Misc/coverage-levels.cc
@@ -22,6 +22,7 @@
 
 // Coverage is not yet implemented in TSan.
 // XFAIL: ubsan-tsan
+// UNSUPPORTED: ubsan-standalone-static
 
 volatile int sink;
 int main(int argc, char **argv) {
diff --git a/test/ubsan/TestCases/Misc/log-path_test.cc b/test/ubsan/TestCases/Misc/log-path_test.cc
index 5b45f0b..40bb35a 100644
--- a/test/ubsan/TestCases/Misc/log-path_test.cc
+++ b/test/ubsan/TestCases/Misc/log-path_test.cc
@@ -32,5 +32,5 @@
   return 0;
 }
 
-// CHECK-ERROR: runtime error: value -4 is outside the range of representable values of type 'unsigned int'
+// CHECK-ERROR: runtime error: -4 is outside the range of representable values of type 'unsigned int'
 
diff --git a/test/ubsan/TestCases/Misc/missing_return.cpp b/test/ubsan/TestCases/Misc/missing_return.cpp
index 6808227..5c5b286 100644
--- a/test/ubsan/TestCases/Misc/missing_return.cpp
+++ b/test/ubsan/TestCases/Misc/missing_return.cpp
@@ -1,13 +1,10 @@
-// RUN: %clangxx -fsanitize=return -g %s -O3 -o %t
+// RUN: %clangxx -fsanitize=return %gmlt %s -O3 -o %t
 // RUN: not %run %t 2>&1 | FileCheck %s
-// RUN: %env_ubsan_opts=print_stacktrace=1 not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-%os-STACKTRACE
+// RUN: %env_ubsan_opts=print_stacktrace=1 not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-STACKTRACE
 
 // CHECK: missing_return.cpp:[[@LINE+1]]:5: runtime error: execution reached the end of a value-returning function without returning a value
 int f() {
-// Slow stack unwinding is not available on Darwin for now, see
-// https://code.google.com/p/address-sanitizer/issues/detail?id=137
-// CHECK-Linux-STACKTRACE: #0 {{.*}}f(){{.*}}missing_return.cpp:[[@LINE-3]]
-// CHECK-FreeBSD-STACKTRACE: #0 {{.*}}f(void){{.*}}missing_return.cpp:[[@LINE-4]]
+// CHECK-STACKTRACE: #0 {{.*}}f{{.*}}missing_return.cpp:[[@LINE-1]]
 }
 
 int main(int, char **argv) {
diff --git a/test/ubsan/TestCases/Misc/nonnull.cpp b/test/ubsan/TestCases/Misc/nonnull.cpp
index c3ab49c..d5cd2bf 100644
--- a/test/ubsan/TestCases/Misc/nonnull.cpp
+++ b/test/ubsan/TestCases/Misc/nonnull.cpp
@@ -1,15 +1,42 @@
-// RUN: %clangxx -fsanitize=returns-nonnull-attribute %s -O3 -o %t
-// RUN: %run %t foo
+// RUN: %clangxx -fsanitize=returns-nonnull-attribute -w %s -O3 -o %t
+// RUN: %run %t foo 2>&1 | count 0
 // RUN: %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx -fsanitize=returns-nonnull-attribute -fno-sanitize-recover=returns-nonnull-attribute -w %s -O3 -o %t.abort
+// RUN: not %run %t.abort &> /dev/null
 
 __attribute__((returns_nonnull)) char *foo(char *a);
 
 char *foo(char *a) {
+  // CHECK: nonnull.cpp:[[@LINE+2]]:3: runtime error: null pointer returned from function declared to never return null
+  // CHECK-NEXT: nonnull.cpp:[[@LINE-4]]:16: note: returns_nonnull attribute specified here
   return a;
-  // CHECK: nonnull.cpp:[[@LINE+2]]:1: runtime error: null pointer returned from function declared to never return null
-  // CHECK-NEXT: nonnull.cpp:[[@LINE-5]]:16: note: returns_nonnull attribute specified here
+}
+
+__attribute__((returns_nonnull)) char *bar(int x, char *a) {
+  if (x > 10) {
+    // CHECK: nonnull.cpp:[[@LINE+2]]:5: runtime error: null pointer returned from function declared to never return null
+    // CHECK-NEXT: nonnull.cpp:[[@LINE-3]]:16: note: returns_nonnull attribute specified here
+    return a;
+  } else {
+    // CHECK: nonnull.cpp:[[@LINE+2]]:5: runtime error: null pointer returned from function declared to never return null
+    // CHECK-NEXT: nonnull.cpp:[[@LINE-7]]:16: note: returns_nonnull attribute specified here
+    return a;
+  }
 }
 
 int main(int argc, char **argv) {
-  return foo(argv[1]) == 0;
+  char *a = argv[1];
+
+  foo(a);
+
+  bar(20, a);
+
+  // We expect to see a runtime error the first time we cover the "else"...
+  bar(5, a);
+
+  // ... but not a second time.
+  // CHECK-NOT: runtime error
+  bar(5, a);
+
+  return 0;
 }
diff --git a/test/ubsan/TestCases/Misc/nullability.c b/test/ubsan/TestCases/Misc/nullability.c
new file mode 100644
index 0000000..a6ddf0e
--- /dev/null
+++ b/test/ubsan/TestCases/Misc/nullability.c
@@ -0,0 +1,62 @@
+// RUN: %clang -w -fsanitize=nullability-arg,nullability-assign,nullability-return %s -O3 -o %t
+// RUN: %run %t foo 2>&1 | count 0
+// RUN: %run %t 2>&1 | FileCheck %s
+
+// CHECK: nullability.c:[[@LINE+2]]:41: runtime error: null pointer returned from function declared to never return null
+// CHECK-NEXT: nullability.c:[[@LINE+1]]:6: note: _Nonnull return type annotation specified here
+int *_Nonnull nonnull_retval1(int *p) { return p; }
+
+// CHECK: nullability.c:1001:22: runtime error: null pointer passed as argument 2, which is declared to never be null
+// CHECK-NEXT: nullability.c:[[@LINE+1]]:56: note: _Nonnull type annotation specified here
+int *_Nonnull nonnull_retval2(int *_Nonnull arg1, int *_Nonnull arg2,
+                              int *_Nullable arg3, int *arg4, int arg5, ...) {
+  return arg1;
+}
+
+// CHECK: nullability.c:1002:15: runtime error: null pointer passed as argument 1, which is declared to never be null
+// CHECK-NEXT: nullability.c:[[@LINE+1]]:23: note: _Nonnull type annotation specified here
+void nonnull_arg(int *_Nonnull p) {}
+
+void nonnull_assign1(int *p) {
+  int *_Nonnull local;
+// CHECK: nullability.c:[[@LINE+1]]:9: runtime error: _Nonnull binding to null pointer of type 'int * _Nonnull'
+  local = p;
+}
+
+void nonnull_assign2(int *p) {
+  int *_Nonnull arr[1];
+  // CHECK: nullability.c:[[@LINE+1]]:10: runtime error: _Nonnull binding to null pointer of type 'int * _Nonnull'
+  arr[0] = p;
+}
+
+struct S1 {
+  int *_Nonnull mptr;
+};
+
+void nonnull_assign3(int *p) {
+  struct S1 s;
+  // CHECK: nullability.c:[[@LINE+1]]:10: runtime error: _Nonnull binding to null pointer of type 'int * _Nonnull'
+  s.mptr = p;
+}
+
+// CHECK: nullability.c:[[@LINE+1]]:52: runtime error: _Nonnull binding to null pointer of type 'int * _Nonnull'
+void nonnull_init1(int *p) { int *_Nonnull local = p; }
+
+// CHECK: nullability.c:[[@LINE+2]]:53: runtime error: _Nonnull binding to null pointer of type 'int * _Nonnull'
+// CHECK: nullability.c:[[@LINE+1]]:56: runtime error: _Nonnull binding to null pointer of type 'int * _Nonnull'
+void nonnull_init2(int *p) { int *_Nonnull arr[] = {p, p}; }
+
+int main(int argc, char **argv) {
+  int *p = (argc > 1) ? &argc : ((int *)0);
+
+#line 1000
+  nonnull_retval1(p);
+  nonnull_retval2(p, p, p, p, 0, 0, 0, 0);
+  nonnull_arg(p);
+  nonnull_assign1(p);
+  nonnull_assign2(p);
+  nonnull_assign3(p);
+  nonnull_init1(p);
+  nonnull_init2(p);
+  return 0;
+}
diff --git a/test/ubsan/TestCases/Pointer/index-overflow.cpp b/test/ubsan/TestCases/Pointer/index-overflow.cpp
new file mode 100644
index 0000000..eb7f95e
--- /dev/null
+++ b/test/ubsan/TestCases/Pointer/index-overflow.cpp
@@ -0,0 +1,19 @@
+// RUN: %clangxx -fsanitize=pointer-overflow %s -o %t
+// RUN: %t 1 2>&1 | FileCheck %s --check-prefix=ERR
+// RUN: %t 0 2>&1 | FileCheck %s --check-prefix=SAFE
+// RUN: %t -1 2>&1 | FileCheck %s --check-prefix=SAFE
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+int main(int argc, char *argv[]) {
+  // SAFE-NOT: runtime error
+  // ERR: runtime error: pointer index expression with base {{.*}} overflowed to
+
+  char *p = (char *)(UINTPTR_MAX);
+
+  printf("%p\n", p + atoi(argv[1]));
+
+  return 0;
+}
diff --git a/test/ubsan/TestCases/Pointer/unsigned-index-expression.cpp b/test/ubsan/TestCases/Pointer/unsigned-index-expression.cpp
new file mode 100644
index 0000000..0002c71
--- /dev/null
+++ b/test/ubsan/TestCases/Pointer/unsigned-index-expression.cpp
@@ -0,0 +1,20 @@
+// RUN: %clangxx -std=c++11 -fsanitize=pointer-overflow %s -o %t
+// RUN: %t 2>&1 | FileCheck %s
+
+int main(int argc, char *argv[]) {
+  char c;
+  char *p = &c;
+  unsigned long long neg_1 = -1;
+
+  // CHECK: unsigned-index-expression.cpp:[[@LINE+1]]:15: runtime error: addition of unsigned offset to 0x{{.*}} overflowed to 0x{{.*}}
+  char *q = p + neg_1;
+
+  // CHECK: unsigned-index-expression.cpp:[[@LINE+1]]:16: runtime error: subtraction of unsigned offset from 0x{{.*}} overflowed to 0x{{.*}}
+  char *q1 = p - neg_1;
+
+  // CHECK: unsigned-index-expression.cpp:[[@LINE+2]]:16: runtime error: pointer index expression with base 0x{{0*}} overflowed to 0x{{.*}}
+  char *n = nullptr;
+  char *q2 = n - 1ULL;
+
+  return 0;
+}
diff --git a/test/ubsan/TestCases/TypeCheck/Function/function.cpp b/test/ubsan/TestCases/TypeCheck/Function/function.cpp
index 6e7e314..25b2bdc 100644
--- a/test/ubsan/TestCases/TypeCheck/Function/function.cpp
+++ b/test/ubsan/TestCases/TypeCheck/Function/function.cpp
@@ -2,9 +2,7 @@
 // RUN: %run %t 2>&1 | FileCheck %s
 // Verify that we can disable symbolization if needed:
 // RUN: %env_ubsan_opts=symbolize=0 %run %t 2>&1 | FileCheck %s --check-prefix=NOSYM
-
-// -fsanitize=function is unsupported on Darwin yet.
-// XFAIL: darwin
+// XFAIL: win32,win64
 
 #include <stdint.h>
 
@@ -18,9 +16,9 @@
 }
 
 void make_invalid_call() {
-  // CHECK: function.cpp:25:3: runtime error: call to function f() through pointer to incorrect function type 'void (*)(int)'
-  // CHECK-NEXT: function.cpp:11: note: f() defined here
-  // NOSYM: function.cpp:25:3: runtime error: call to function (unknown) through pointer to incorrect function type 'void (*)(int)'
+  // CHECK: function.cpp:[[@LINE+4]]:3: runtime error: call to function f() through pointer to incorrect function type 'void (*)(int)'
+  // CHECK-NEXT: function.cpp:[[@LINE-11]]: note: f() defined here
+  // NOSYM: function.cpp:[[@LINE+2]]:3: runtime error: call to function (unknown) through pointer to incorrect function type 'void (*)(int)'
   // NOSYM-NEXT: ({{.*}}+0x{{.*}}): note: (unknown) defined here
   reinterpret_cast<void (*)(int)>(reinterpret_cast<uintptr_t>(f))(42);
 }
diff --git a/test/ubsan/TestCases/TypeCheck/Function/lit.local.cfg b/test/ubsan/TestCases/TypeCheck/Function/lit.local.cfg
index 27c61a3..a101599 100644
--- a/test/ubsan/TestCases/TypeCheck/Function/lit.local.cfg
+++ b/test/ubsan/TestCases/TypeCheck/Function/lit.local.cfg
@@ -1,3 +1,3 @@
 # The function type checker is only supported on x86 and x86_64 for now.
-if config.root.host_arch not in ['x86', 'x86_64']:
+if config.target_arch not in ['x86', 'x86_64']:
   config.unsupported = True
diff --git a/test/ubsan/TestCases/TypeCheck/Linux/PR33221.cpp b/test/ubsan/TestCases/TypeCheck/Linux/PR33221.cpp
new file mode 100644
index 0000000..a5e61c2
--- /dev/null
+++ b/test/ubsan/TestCases/TypeCheck/Linux/PR33221.cpp
@@ -0,0 +1,50 @@
+// RUN: %clangxx -std=c++11 -frtti -fsanitize=vptr -g %s -O3 -o %t
+// RUN: %run %t &> %t.log
+// RUN: cat %t.log | not count 0 && FileCheck --input-file %t.log %s || cat %t.log | count 0
+
+// REQUIRES: cxxabi
+
+#include <sys/mman.h>
+#include <unistd.h>
+
+class Base {
+public:
+  int i;
+  virtual void print() {}
+};
+
+class Derived : public Base {
+public:
+  void print() {}
+};
+
+
+int main() {
+  int page_size = getpagesize();
+
+  void *non_accessible = mmap(nullptr, page_size * 2, PROT_NONE,
+                              MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+  
+  if (non_accessible == MAP_FAILED)
+    return 0;
+
+  void *accessible = mmap((char*)non_accessible + page_size, page_size,
+                          PROT_READ | PROT_WRITE,
+                          MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+  if (accessible == MAP_FAILED)
+    return 0;
+
+  char *c = new char[sizeof(Derived)];
+
+  // The goal is to trigger a condition when Vptr points to accessible memory,
+  // but VptrPrefix does not. That has been triggering SIGSEGV in UBSan code.
+  void **vtable_ptr = reinterpret_cast<void **>(c);
+  *vtable_ptr = (void*)accessible;
+
+  Derived *list = (Derived *)c;
+
+// CHECK: PR33221.cpp:[[@LINE+2]]:19: runtime error: member access within address {{.*}} which does not point to an object of type 'Base'
+// CHECK-NEXT: invalid vptr
+  int foo = list->i;
+  return 0;
+}
diff --git a/test/ubsan/TestCases/TypeCheck/Linux/lit.local.cfg b/test/ubsan/TestCases/TypeCheck/Linux/lit.local.cfg
new file mode 100644
index 0000000..57271b8
--- /dev/null
+++ b/test/ubsan/TestCases/TypeCheck/Linux/lit.local.cfg
@@ -0,0 +1,9 @@
+def getRoot(config):
+  if not config.parent:
+    return config
+  return getRoot(config.parent)
+
+root = getRoot(config)
+
+if root.host_os not in ['Linux']:
+  config.unsupported = True
diff --git a/test/ubsan/TestCases/TypeCheck/PR33221.cpp b/test/ubsan/TestCases/TypeCheck/PR33221.cpp
new file mode 100644
index 0000000..65cbf5d
--- /dev/null
+++ b/test/ubsan/TestCases/TypeCheck/PR33221.cpp
@@ -0,0 +1,29 @@
+// RUN: %clangxx -frtti -fsanitize=null,vptr -g %s -O3 -o %t
+// RUN: %run %t 2>&1 | FileCheck %s
+
+// REQUIRES: cxxabi
+// UNSUPPORTED: win32
+
+#include <string.h>
+
+class Base {
+public:
+  int i;
+  virtual void print() {}
+};
+
+class Derived : public Base {
+public:
+  void print() {}
+};
+
+int main() {
+  char *c = new char[sizeof(Derived)];
+  memset((void *)c, 0xFF, sizeof(Derived));
+  Derived *list = (Derived *)c;
+
+// CHECK: PR33221.cpp:[[@LINE+2]]:19: runtime error: member access within address {{.*}} which does not point to an object of type 'Base'
+// CHECK-NEXT: invalid vptr
+  int foo = list->i;
+  return 0;
+}
diff --git a/test/ubsan/TestCases/TypeCheck/misaligned.cpp b/test/ubsan/TestCases/TypeCheck/misaligned.cpp
index 35b1ec3..4eaedf3 100644
--- a/test/ubsan/TestCases/TypeCheck/misaligned.cpp
+++ b/test/ubsan/TestCases/TypeCheck/misaligned.cpp
@@ -1,8 +1,4 @@
-// FIXME: This test currently fails on Windows because we use the MSVC linker,
-// which throws away DWARF debug info.
-// XFAIL: win32
-//
-// RUN: %clangxx -fsanitize=alignment -g %s -O3 -o %t
+// RUN: %clangxx %gmlt -fsanitize=alignment %s -O3 -o %t
 // RUN: %run %t l0 && %run %t s0 && %run %t r0 && %run %t m0 && %run %t f0 && %run %t n0 && %run %t u0
 // RUN: %run %t l1 2>&1 | FileCheck %s --check-prefix=CHECK-LOAD --strict-whitespace
 // RUN: %run %t s1 2>&1 | FileCheck %s --check-prefix=CHECK-STORE
@@ -11,7 +7,7 @@
 // RUN: %run %t f1 2>&1 | FileCheck %s --check-prefix=CHECK-MEMFUN
 // RUN: %run %t n1 2>&1 | FileCheck %s --check-prefix=CHECK-NEW
 // RUN: %run %t u1 2>&1 | FileCheck %s --check-prefix=CHECK-UPCAST
-// RUN: %env_ubsan_opts=print_stacktrace=1 %run %t l1 2>&1 | FileCheck %s --check-prefix=CHECK-LOAD --check-prefix=CHECK-%os-STACK-LOAD
+// RUN: %env_ubsan_opts=print_stacktrace=1 %run %t l1 2>&1 | FileCheck %s --check-prefix=CHECK-LOAD --check-prefix=CHECK-STACK-LOAD
 
 // RUN: %clangxx -fsanitize=alignment -fno-sanitize-recover=alignment %s -O3 -o %t
 // RUN: not %run %t w1 2>&1 | FileCheck %s --check-prefix=CHECK-WILD
@@ -47,11 +43,7 @@
     // CHECK-LOAD-NEXT: {{^ 00 00 00 01 02 03 04  05}}
     // CHECK-LOAD-NEXT: {{^             \^}}
     return *p && 0;
-    // Slow stack unwinding is disabled on Darwin for now, see
-    // https://code.google.com/p/address-sanitizer/issues/detail?id=137
-    // CHECK-Linux-STACK-LOAD: #0 {{.*}}main{{.*}}misaligned.cpp
-    // Check for the already checked line to avoid lit error reports.
-    // CHECK-Darwin-STACK-LOAD: {{ }}
+    // CHECK-STACK-LOAD: #0 {{.*}}main{{.*}}misaligned.cpp
 
   case 's':
     // CHECK-STORE: misaligned.cpp:[[@LINE+4]]{{(:5)?}}: runtime error: store to misaligned address [[PTR:0x[0-9a-f]*]] for type 'int', which requires 4 byte alignment
diff --git a/test/ubsan/TestCases/TypeCheck/vptr-corrupted-vtable-itanium.cpp b/test/ubsan/TestCases/TypeCheck/vptr-corrupted-vtable-itanium.cpp
index 37ffe5b..f0659f4 100644
--- a/test/ubsan/TestCases/TypeCheck/vptr-corrupted-vtable-itanium.cpp
+++ b/test/ubsan/TestCases/TypeCheck/vptr-corrupted-vtable-itanium.cpp
@@ -1,4 +1,4 @@
-// RUN: %clangxx -frtti -fsanitize=vptr -fno-sanitize-recover=vptr -g %s -O3 -o %t
+// RUN: %clangxx -frtti -fsanitize=vptr -fno-sanitize-recover=vptr,null -g %s -O3 -o %t
 // RUN: not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-CORRUPTED-VTABLE --strict-whitespace
 
 // UNSUPPORTED: win32
diff --git a/test/ubsan/TestCases/TypeCheck/vptr-non-unique-typeinfo.cpp b/test/ubsan/TestCases/TypeCheck/vptr-non-unique-typeinfo.cpp
index 8ab7bfc..7bc19bd 100644
--- a/test/ubsan/TestCases/TypeCheck/vptr-non-unique-typeinfo.cpp
+++ b/test/ubsan/TestCases/TypeCheck/vptr-non-unique-typeinfo.cpp
@@ -1,8 +1,9 @@
-// RUN: %clangxx -frtti -fsanitize=vptr -fno-sanitize-recover=vptr -I%p/Helpers -g %s -fPIC -shared -o %t-lib.so -DBUILD_SO
-// RUN: %clangxx -frtti -fsanitize=vptr -fno-sanitize-recover=vptr -I%p/Helpers -g %s -O3 -o %t %t-lib.so
+// RUN: %clangxx -frtti -fsanitize=vptr -fno-sanitize-recover=vptr -I%p/Helpers -g %s -fPIC -shared -o %dynamiclib -DBUILD_SO %ld_flags_rpath_so
+// RUN: %clangxx -frtti -fsanitize=vptr -fno-sanitize-recover=vptr -I%p/Helpers -g %s -O3 -o %t %ld_flags_rpath_exe
 // RUN: %run %t
 //
 // REQUIRES: cxxabi
+// UNSUPPORTED: win32
 
 struct X {
   virtual ~X() {}
diff --git a/test/ubsan/TestCases/TypeCheck/vptr-virtual-base-construction.cpp b/test/ubsan/TestCases/TypeCheck/vptr-virtual-base-construction.cpp
index dc27d9f..a86960d 100644
--- a/test/ubsan/TestCases/TypeCheck/vptr-virtual-base-construction.cpp
+++ b/test/ubsan/TestCases/TypeCheck/vptr-virtual-base-construction.cpp
@@ -2,6 +2,7 @@
 // RUN: %run %t
 
 // REQUIRES: cxxabi
+// UNSUPPORTED: win32
 
 int volatile n;
 
diff --git a/test/ubsan/TestCases/TypeCheck/vptr-virtual-base.cpp b/test/ubsan/TestCases/TypeCheck/vptr-virtual-base.cpp
index 09deac1..aa0123c 100644
--- a/test/ubsan/TestCases/TypeCheck/vptr-virtual-base.cpp
+++ b/test/ubsan/TestCases/TypeCheck/vptr-virtual-base.cpp
@@ -1,7 +1,8 @@
-// RUN: %clangxx -frtti -fsanitize=vptr -fno-sanitize-recover=vptr -g %s -O3 -o %t
+// RUN: %clangxx -frtti -fsanitize=null,vptr -fno-sanitize-recover=vptr -g %s -O3 -o %t
 // RUN: not %run %t 2>&1 | FileCheck %s
 
 // REQUIRES: cxxabi
+// UNSUPPORTED: win32
 
 struct S { virtual int f() { return 0; } };
 struct T : virtual S {};
diff --git a/test/ubsan/TestCases/TypeCheck/vptr.cpp b/test/ubsan/TestCases/TypeCheck/vptr.cpp
index 53a79c9..1db41dd 100644
--- a/test/ubsan/TestCases/TypeCheck/vptr.cpp
+++ b/test/ubsan/TestCases/TypeCheck/vptr.cpp
@@ -1,4 +1,4 @@
-// RUN: %clangxx -frtti -fsanitize=vptr -fno-sanitize-recover=vptr -g %s -O3 -o %t -mllvm -enable-tail-merge=false
+// RUN: %clangxx -frtti -fsanitize=null,vptr -fno-sanitize-recover=null,vptr -g %s -O3 -o %t -mllvm -enable-tail-merge=false
 // RUN: %run %t rT && %run %t mT && %run %t fT && %run %t cT
 // RUN: %run %t rU && %run %t mU && %run %t fU && %run %t cU
 // RUN: %run %t rS && %run %t rV && %run %t oV
@@ -9,7 +9,9 @@
 // RUN: %env_ubsan_opts=print_stacktrace=1 not %run %t fV 2>&1 | FileCheck %s --check-prefix=CHECK-MEMFUN --strict-whitespace
 // RUN: %env_ubsan_opts=print_stacktrace=1 not %run %t cV 2>&1 | FileCheck %s --check-prefix=CHECK-DOWNCAST --check-prefix=CHECK-%os-DOWNCAST --strict-whitespace
 // RUN: %env_ubsan_opts=print_stacktrace=1 not %run %t oU 2>&1 | FileCheck %s --check-prefix=CHECK-OFFSET --check-prefix=CHECK-%os-OFFSET --strict-whitespace
-// RUN: %env_ubsan_opts=print_stacktrace=1 not %run %t m0 2>&1 | FileCheck %s --check-prefix=CHECK-NULL-MEMBER --check-prefix=CHECK-%os-NULL-MEMBER --strict-whitespace
+// RUN: %env_ubsan_opts=print_stacktrace=1 not %run %t m0 2>&1 | FileCheck %s --check-prefix=CHECK-INVALID-MEMBER --check-prefix=CHECK-%os-NULL-MEMBER --strict-whitespace
+// RUN: %env_ubsan_opts=print_stacktrace=1 not %run %t m0 2>&1 | FileCheck %s --check-prefix=CHECK-INVALID-MEMBER --check-prefix=CHECK-%os-NULL-MEMBER --strict-whitespace
+// RUN: not %run %t nN 2>&1 | FileCheck %s --check-prefix=CHECK-NULL-MEMFUN --strict-whitespace
 
 // RUN: (echo "vptr_check:S"; echo "vptr_check:T"; echo "vptr_check:U") > %t.supp
 // RUN: %env_ubsan_opts=suppressions='"%t.supp"' %run %t mS
@@ -24,6 +26,9 @@
 // RUN: %env_ubsan_opts=suppressions='"%t.loc-supp"' not %run %t x- 2>&1 | FileCheck %s --check-prefix=CHECK-LOC-SUPPRESS
 
 // REQUIRES: stable-runtime, cxxabi
+// UNSUPPORTED: win32
+// Suppressions file not pushed to the device.
+// UNSUPPORTED: android
 #include <new>
 #include <assert.h>
 #include <stdio.h>
@@ -99,6 +104,9 @@
   case 'V':
     p = reinterpret_cast<T*>(new U);
     break;
+  case 'N':
+    p = 0;
+    break;
   }
 
   access_p(p, argv[1][0]);
@@ -134,11 +142,11 @@
     // CHECK-Linux-MEMBER: #0 {{.*}}access_p{{.*}}vptr.cpp:[[@LINE+1]]
     return p->b;
 
-    // CHECK-NULL-MEMBER: vptr.cpp:[[@LINE-2]]:15: runtime error: member access within address [[PTR:0x[0-9a-f]*]] which does not point to an object of type 'T'
-    // CHECK-NULL-MEMBER-NEXT: [[PTR]]: note: object has invalid vptr
-    // CHECK-NULL-MEMBER-NEXT: {{^  ?.. .. .. ..  ?00 00 00 00  ?00 00 00 00  ?}}
-    // CHECK-NULL-MEMBER-NEXT: {{^              \^~~~~~~~~~~(~~~~~~~~~~~~)? *$}}
-    // CHECK-NULL-MEMBER-NEXT: {{^              invalid vptr}}
+    // CHECK-INVALID-MEMBER: vptr.cpp:[[@LINE-2]]:15: runtime error: member access within address [[PTR:0x[0-9a-f]*]] which does not point to an object of type 'T'
+    // CHECK-INVALID-MEMBER-NEXT: [[PTR]]: note: object has invalid vptr
+    // CHECK-INVALID-MEMBER-NEXT: {{^  ?.. .. .. ..  ?00 00 00 00  ?00 00 00 00  ?}}
+    // CHECK-INVALID-MEMBER-NEXT: {{^              \^~~~~~~~~~~(~~~~~~~~~~~~)? *$}}
+    // CHECK-INVALID-MEMBER-NEXT: {{^              invalid vptr}}
     // CHECK-Linux-NULL-MEMBER: #0 {{.*}}access_p{{.*}}vptr.cpp:[[@LINE-7]]
 
   case 'f':
@@ -168,6 +176,10 @@
     // CHECK-Linux-DOWNCAST: #0 {{.*}}access_p{{.*}}vptr.cpp:[[@LINE+1]]
     (void)static_cast<T*>(reinterpret_cast<S*>(p));
     return 0;
+
+  case 'n':
+    // CHECK-NULL-MEMFUN: vptr.cpp:[[@LINE+1]]:15: runtime error: member call on null pointer of type 'T'
+    return p->g();
   }
   return 0;
 }
diff --git a/test/ubsan/lit.common.cfg b/test/ubsan/lit.common.cfg
index cd6d209..f34d586 100644
--- a/test/ubsan/lit.common.cfg
+++ b/test/ubsan/lit.common.cfg
@@ -14,13 +14,17 @@
 # Setup source root.
 config.test_source_root = os.path.dirname(__file__)
 
-default_ubsan_opts = []
+default_ubsan_opts = list(config.default_sanitizer_opts)
 # Choose between standalone and UBSan+ASan modes.
 ubsan_lit_test_mode = get_required_attr(config, 'ubsan_lit_test_mode')
 if ubsan_lit_test_mode == "Standalone":
   config.name = 'UBSan-Standalone-' + config.target_arch
   config.available_features.add("ubsan-standalone")
   clang_ubsan_cflags = []
+elif ubsan_lit_test_mode == "StandaloneStatic":
+  config.name = 'UBSan-StandaloneStatic-' + config.target_arch
+  config.available_features.add("ubsan-standalone-static")
+  clang_ubsan_cflags = ['-static-libsan']
 elif ubsan_lit_test_mode == "AddressSanitizer":
   config.name = 'UBSan-ASan-' + config.target_arch
   config.available_features.add("ubsan-asan")
@@ -38,11 +42,10 @@
   lit_config.fatal("Unknown UBSan test mode: %r" % ubsan_lit_test_mode)
 
 # Platform-specific default for lit tests.
-if config.host_os == 'Darwin':
-  # On Darwin, we default to `abort_on_error=1`, which would make tests run
-  # much slower. Let's override this and run lit tests with 'abort_on_error=0'.
-  default_ubsan_opts += ['abort_on_error=0']
-  default_ubsan_opts += ['log_to_syslog=0']
+if config.target_arch == 's390x':
+  # On SystemZ we need -mbackchain to make the fast unwinder work.
+  clang_ubsan_cflags.append("-mbackchain")
+
 default_ubsan_opts_str = ':'.join(default_ubsan_opts)
 if default_ubsan_opts_str:
   config.environment['UBSAN_OPTIONS'] = default_ubsan_opts_str
@@ -52,7 +55,7 @@
                              'env UBSAN_OPTIONS=' + default_ubsan_opts_str))
 
 def build_invocation(compile_flags):
-  return " " + " ".join([config.clang] + compile_flags) + " "
+  return " " + " ".join([config.compile_wrapper, config.clang] + compile_flags) + " "
 
 target_cflags = [get_required_attr(config, "target_cflags")]
 clang_ubsan_cflags += target_cflags
@@ -61,15 +64,13 @@
 # Define %clang and %clangxx substitutions to use in test RUN lines.
 config.substitutions.append( ("%clang ", build_invocation(clang_ubsan_cflags)) )
 config.substitutions.append( ("%clangxx ", build_invocation(clang_ubsan_cxxflags)) )
+config.substitutions.append( ("%gmlt ", " ".join(config.debug_info_flags) + " ") )
 
 # Default test suffixes.
 config.suffixes = ['.c', '.cc', '.cpp']
 
 # Check that the host supports UndefinedBehaviorSanitizer tests
-if config.host_os not in ['Linux', 'Darwin', 'FreeBSD', 'Windows']:
+if config.host_os not in ['Linux', 'Darwin', 'FreeBSD', 'Windows', 'NetBSD']:
   config.unsupported = True
 
-# Allow tests to use REQUIRES=stable-runtime.  For use when you cannot use XFAIL
-# because the test hangs or fails on one configuration and not the other.
-if config.target_arch.startswith('arm') == False and config.target_arch != 'aarch64':
-  config.available_features.add('stable-runtime')
+config.available_features.add('arch=' + config.target_arch)
diff --git a/test/ubsan_minimal/CMakeLists.txt b/test/ubsan_minimal/CMakeLists.txt
new file mode 100644
index 0000000..712654e
--- /dev/null
+++ b/test/ubsan_minimal/CMakeLists.txt
@@ -0,0 +1,26 @@
+set(UBSAN_LIT_TESTS_DIR ${CMAKE_CURRENT_SOURCE_DIR})
+
+set(UBSAN_TEST_ARCH ${UBSAN_SUPPORTED_ARCH})
+if(APPLE)
+  darwin_filter_host_archs(UBSAN_SUPPORTED_ARCH UBSAN_TEST_ARCH)
+endif()
+
+set(UBSAN_TESTSUITES)
+set(UBSAN_TEST_DEPS ${SANITIZER_COMMON_LIT_TEST_DEPS})
+if(NOT COMPILER_RT_STANDALONE_BUILD)
+  list(APPEND UBSAN_TEST_DEPS ubsan-minimal)
+endif()
+
+foreach(arch ${UBSAN_TEST_ARCH})
+  get_test_cc_for_arch(${arch} UBSAN_TEST_TARGET_CC UBSAN_TEST_TARGET_CFLAGS)
+  set(CONFIG_NAME ${arch})
+  configure_lit_site_cfg(
+    ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
+    ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/lit.site.cfg)
+  list(APPEND UBSAN_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME})
+endforeach()
+
+add_lit_testsuite(check-ubsan-minimal "Running UndefinedBehaviorSanitizerMinimal tests"
+  ${UBSAN_TESTSUITES}
+  DEPENDS ${UBSAN_TEST_DEPS})
+set_target_properties(check-ubsan-minimal PROPERTIES FOLDER "Compiler-RT Misc")
diff --git a/test/ubsan_minimal/TestCases/recover-dedup-limit.cpp b/test/ubsan_minimal/TestCases/recover-dedup-limit.cpp
new file mode 100644
index 0000000..faa2b66
--- /dev/null
+++ b/test/ubsan_minimal/TestCases/recover-dedup-limit.cpp
@@ -0,0 +1,41 @@
+// RUN: %clangxx -fsanitize=signed-integer-overflow -fsanitize-recover=all %s -o %t && %run %t 2>&1 | FileCheck %s
+
+#include <stdint.h>
+
+#define OVERFLOW  \
+  x = 0x7FFFFFFE; \
+  x += __LINE__
+
+int main() {
+  int32_t x;
+  OVERFLOW;  // CHECK: add-overflow
+  OVERFLOW;  // CHECK: add-overflow
+  OVERFLOW;  // CHECK: add-overflow
+  OVERFLOW;  // CHECK: add-overflow
+  OVERFLOW;  // CHECK: add-overflow
+
+  OVERFLOW;  // CHECK: add-overflow
+  OVERFLOW;  // CHECK: add-overflow
+  OVERFLOW;  // CHECK: add-overflow
+  OVERFLOW;  // CHECK: add-overflow
+  OVERFLOW;  // CHECK: add-overflow
+
+  OVERFLOW;  // CHECK: add-overflow
+  OVERFLOW;  // CHECK: add-overflow
+  OVERFLOW;  // CHECK: add-overflow
+  OVERFLOW;  // CHECK: add-overflow
+  OVERFLOW;  // CHECK: add-overflow
+
+  OVERFLOW;  // CHECK: add-overflow
+  OVERFLOW;  // CHECK: add-overflow
+  OVERFLOW;  // CHECK: add-overflow
+  OVERFLOW;  // CHECK: add-overflow
+  OVERFLOW;  // CHECK: add-overflow
+
+  // CHECK-NOT: add-overflow
+  OVERFLOW;  // CHECK: too many errors
+  // CHECK-NOT: add-overflow
+  OVERFLOW;
+  OVERFLOW;
+  OVERFLOW;
+}
diff --git a/test/ubsan_minimal/TestCases/recover-dedup.cpp b/test/ubsan_minimal/TestCases/recover-dedup.cpp
new file mode 100644
index 0000000..4dfd699
--- /dev/null
+++ b/test/ubsan_minimal/TestCases/recover-dedup.cpp
@@ -0,0 +1,39 @@
+// RUN: %clangxx -w -fsanitize=signed-integer-overflow,nullability-return,returns-nonnull-attribute -fsanitize-recover=all %s -o %t && %run %t 2>&1 | FileCheck %s
+
+#include <stdint.h>
+#include <stdio.h>
+
+int *_Nonnull h() {
+  // CHECK: nullability-return
+  return NULL;
+}
+
+__attribute__((returns_nonnull))
+int *i() {
+  // CHECK: nonnull-return
+  return NULL;
+}
+
+__attribute__((noinline))
+int f(int x, int y) {
+  // CHECK: mul-overflow
+  return x * y;
+}
+
+__attribute__((noinline))
+int g(int x, int y) {
+  // CHECK: mul-overflow
+  return x * (y + 1);
+}
+
+int main() {
+  h();
+  i();
+  int x = 2;
+  for (int i = 0; i < 10; ++i)
+    x = f(x, x);
+  x = 2;
+  for (int i = 0; i < 10; ++i)
+    x = g(x, x);
+  // CHECK-NOT: mul-overflow
+}
diff --git a/test/ubsan_minimal/TestCases/test-darwin-interface.c b/test/ubsan_minimal/TestCases/test-darwin-interface.c
new file mode 100644
index 0000000..1da049f
--- /dev/null
+++ b/test/ubsan_minimal/TestCases/test-darwin-interface.c
@@ -0,0 +1,16 @@
+// Check that the ubsan and ubsan-minimal runtimes have the same symbols,
+// making exceptions as necessary.
+//
+// REQUIRES: x86_64-darwin
+
+// RUN: nm -jgU `%clangxx -fsanitize-minimal-runtime -fsanitize=undefined %s -o %t '-###' 2>&1 | grep "libclang_rt.ubsan_minimal_osx_dynamic.dylib" | sed -e 's/.*"\(.*libclang_rt.ubsan_minimal_osx_dynamic.dylib\)".*/\1/'` | grep "^___ubsan_handle" \
+// RUN:  | sed 's/_minimal//g' \
+// RUN:  > %t.minimal.symlist
+//
+// RUN: nm -jgU `%clangxx -fno-sanitize-minimal-runtime -fsanitize=undefined %s -o %t '-###' 2>&1 | grep "libclang_rt.ubsan_osx_dynamic.dylib" | sed -e 's/.*"\(.*libclang_rt.ubsan_osx_dynamic.dylib\)".*/\1/'` | grep "^___ubsan_handle" \
+// RUN:  | grep -vE "^___ubsan_handle_dynamic_type_cache_miss" \
+// RUN:  | grep -vE "^___ubsan_handle_cfi_bad_type" \
+// RUN:  | sed 's/_v1//g' \
+// RUN:  > %t.full.symlist
+//
+// RUN: diff %t.minimal.symlist %t.full.symlist
diff --git a/test/ubsan_minimal/TestCases/uadd-overflow.cpp b/test/ubsan_minimal/TestCases/uadd-overflow.cpp
new file mode 100644
index 0000000..0345750
--- /dev/null
+++ b/test/ubsan_minimal/TestCases/uadd-overflow.cpp
@@ -0,0 +1,10 @@
+// RUN: %clangxx -fsanitize=unsigned-integer-overflow %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx -fsanitize=unsigned-integer-overflow -fno-sanitize-recover=all %s -o %t && not --crash %run %t 2>&1 | FileCheck %s
+
+#include <stdint.h>
+
+int main() {
+  uint32_t k = 0x87654321;
+  k += 0xedcba987;
+  // CHECK: add-overflow
+}
diff --git a/test/ubsan_minimal/lit.common.cfg b/test/ubsan_minimal/lit.common.cfg
new file mode 100644
index 0000000..32a1d0b
--- /dev/null
+++ b/test/ubsan_minimal/lit.common.cfg
@@ -0,0 +1,40 @@
+# -*- Python -*-
+
+import os
+
+def get_required_attr(config, attr_name):
+  attr_value = getattr(config, attr_name, None)
+  if attr_value == None:
+    lit_config.fatal(
+      "No attribute %r in test configuration! You may need to run "
+      "tests from your build directory or add this attribute "
+      "to lit.site.cfg " % attr_name)
+  return attr_value
+
+# Setup source root.
+config.test_source_root = os.path.dirname(__file__)
+config.name = 'UBSan-Minimal-' + config.target_arch
+
+def build_invocation(compile_flags):
+  return " " + " ".join([config.compile_wrapper, config.clang] + compile_flags) + " "
+
+target_cflags = [get_required_attr(config, "target_cflags")]
+clang_ubsan_cflags = ["-fsanitize-minimal-runtime"] + target_cflags
+clang_ubsan_cxxflags = config.cxx_mode_flags + clang_ubsan_cflags
+
+# Define %clang and %clangxx substitutions to use in test RUN lines.
+config.substitutions.append( ("%clang ", build_invocation(clang_ubsan_cflags)) )
+config.substitutions.append( ("%clangxx ", build_invocation(clang_ubsan_cxxflags)) )
+
+# Default test suffixes.
+config.suffixes = ['.c', '.cc', '.cpp']
+
+# Check that the host supports UndefinedBehaviorSanitizerMinimal tests
+if config.host_os not in ['Linux', 'FreeBSD', 'NetBSD', 'Darwin']: # TODO: Windows
+  config.unsupported = True
+
+# Don't target x86_64h if the test machine can't execute x86_64h binaries.
+if '-arch x86_64h' in target_cflags and 'x86_64h' not in config.available_features:
+  config.unsupported = True
+
+config.available_features.add('arch=' + config.target_arch)
diff --git a/test/ubsan_minimal/lit.site.cfg.in b/test/ubsan_minimal/lit.site.cfg.in
new file mode 100644
index 0000000..d4dd68e
--- /dev/null
+++ b/test/ubsan_minimal/lit.site.cfg.in
@@ -0,0 +1,11 @@
+@LIT_SITE_CFG_IN_HEADER@
+
+# Tool-specific config options.
+config.target_cflags = "@UBSAN_TEST_TARGET_CFLAGS@"
+config.target_arch = "@UBSAN_TEST_TARGET_ARCH@"
+
+# Load common config for all compiler-rt lit tests.
+lit_config.load_config(config, "@COMPILER_RT_BINARY_DIR@/test/lit.common.configured")
+
+# Load tool-specific config that would do the real work.
+lit_config.load_config(config, "@UBSAN_LIT_TESTS_DIR@/lit.common.cfg")
diff --git a/test/xray/TestCases/Linux/always-never-instrument.cc b/test/xray/TestCases/Linux/always-never-instrument.cc
new file mode 100644
index 0000000..4e19685
--- /dev/null
+++ b/test/xray/TestCases/Linux/always-never-instrument.cc
@@ -0,0 +1,23 @@
+// Test that the always/never instrument lists apply.
+// RUN: echo "fun:main" > %tmp-always.txt
+// RUN: echo "fun:__xray*" > %tmp-never.txt
+// RUN: %clangxx_xray \
+// RUN:     -fxray-never-instrument=%tmp-never.txt \
+// RUN:     -fxray-always-instrument=%tmp-always.txt \
+// RUN:     %s -o %t
+// RUN: %llvm_xray extract -symbolize %t | \
+// RUN:    FileCheck %s --check-prefix NOINSTR
+// RUN: %llvm_xray extract -symbolize %t | \
+// RUN:    FileCheck %s --check-prefix ALWAYSINSTR
+// REQUIRES: x86_64-linux
+// REQUIRES: built-in-llvm-tree
+
+// NOINSTR-NOT: {{.*__xray_NeverInstrumented.*}}
+int __xray_NeverInstrumented() {
+  return 0;
+}
+
+// ALWAYSINSTR: {{.*function-name:.*main.*}}
+int main(int argc, char *argv[]) {
+  return __xray_NeverInstrumented();
+}
diff --git a/test/xray/TestCases/Linux/arg1-arg0-logging.cc b/test/xray/TestCases/Linux/arg1-arg0-logging.cc
new file mode 100644
index 0000000..e7730bf
--- /dev/null
+++ b/test/xray/TestCases/Linux/arg1-arg0-logging.cc
@@ -0,0 +1,39 @@
+// Allow having both the no-arg and arg1 logging implementation live together,
+// and be called in the correct cases.
+//
+// RUN: rm arg0-arg1-logging-* || true
+// RUN: %clangxx_xray -std=c++11 %s -o %t
+// RUN: XRAY_OPTIONS="patch_premain=true verbosity=1 xray_logfile_base=arg0-arg1-logging-" %run %t
+//
+// TODO: Support these in ARM and PPC
+// XFAIL: arm || aarch64 || mips
+// UNSUPPORTED: powerpc64le
+
+#include "xray/xray_interface.h"
+#include <cassert>
+#include <cstdio>
+
+using namespace std;
+
+bool arg0loggercalled = false;
+void arg0logger(int32_t, XRayEntryType) { arg0loggercalled = true; }
+
+[[clang::xray_always_instrument]] void arg0fn() { printf("hello, arg0!\n"); }
+
+bool arg1loggercalled = false;
+void arg1logger(int32_t, XRayEntryType, uint64_t) { arg1loggercalled = true; }
+
+[[ clang::xray_always_instrument, clang::xray_log_args(1) ]] void
+arg1fn(uint64_t arg1) {
+  printf("hello, arg1!\n");
+}
+
+int main(int argc, char *argv[]) {
+  __xray_set_handler(arg0logger);
+  __xray_set_handler_arg1(arg1logger);
+  arg0fn();
+  arg1fn(0xcafef00d);
+  __xray_remove_handler_arg1();
+  __xray_remove_handler();
+  assert(arg0loggercalled && arg1loggercalled);
+}
diff --git a/test/xray/TestCases/Linux/arg1-logger.cc b/test/xray/TestCases/Linux/arg1-logger.cc
index 955347b..bf5c8db 100644
--- a/test/xray/TestCases/Linux/arg1-logger.cc
+++ b/test/xray/TestCases/Linux/arg1-logger.cc
@@ -29,7 +29,7 @@
 
   __xray_set_handler_arg1(arg1logger);
   foo(nullptr);
-  // CHECK: Arg1: 0, XRayEntryType 0
+  // CHECK: Arg1: 0, XRayEntryType 3
 
   __xray_remove_handler_arg1();
   foo((void *) 0xBADC0DE);
@@ -37,7 +37,7 @@
 
   __xray_set_handler_arg1(arg1logger);
   foo((void *) 0xDEADBEEFCAFE);
-  // CHECK-NEXT: Arg1: deadbeefcafe, XRayEntryType 0
+  // CHECK-NEXT: Arg1: deadbeefcafe, XRayEntryType 3
   foo((void *) -1);
-  // CHECK-NEXT: Arg1: ffffffffffffffff, XRayEntryType 0
+  // CHECK-NEXT: Arg1: ffffffffffffffff, XRayEntryType 3
 }
diff --git a/test/xray/TestCases/Linux/arg1-logging-implicit-this.cc b/test/xray/TestCases/Linux/arg1-logging-implicit-this.cc
new file mode 100644
index 0000000..66dfce9
--- /dev/null
+++ b/test/xray/TestCases/Linux/arg1-logging-implicit-this.cc
@@ -0,0 +1,31 @@
+// Intercept the implicit 'this' argument of class member functions.
+//
+// RUN: %clangxx_xray -g -std=c++11 %s -o %t
+// RUN: rm log-args-this-* || true
+// RUN: XRAY_OPTIONS="patch_premain=true verbosity=1 xray_logfile_base=log-args-this-" %run %t
+//
+// XFAIL: arm || aarch64 || mips
+// UNSUPPORTED: powerpc64le
+#include "xray/xray_interface.h"
+#include <cassert>
+
+class A {
+ public:
+  [[clang::xray_always_instrument, clang::xray_log_args(1)]] void f() {
+    // does nothing.
+  }
+};
+
+volatile uint64_t captured = 0;
+
+void handler(int32_t, XRayEntryType, uint64_t arg1) {
+  captured = arg1;
+}
+
+int main() {
+  __xray_set_handler_arg1(handler);
+  A instance;
+  instance.f();
+  __xray_remove_handler_arg1();
+  assert(captured == (uint64_t)&instance);
+}
diff --git a/test/xray/TestCases/Linux/argv0-log-file-name.cc b/test/xray/TestCases/Linux/argv0-log-file-name.cc
index 2960c57..2f9a234 100644
--- a/test/xray/TestCases/Linux/argv0-log-file-name.cc
+++ b/test/xray/TestCases/Linux/argv0-log-file-name.cc
@@ -6,6 +6,8 @@
 // RUN: ls | FileCheck xray.log.file.name
 // RUN: rm xray-log.* xray.log.file.name
 
+// UNSUPPORTED: target-is-mips64,target-is-mips64el
+
 #include <cstdio>
 #include <libgen.h>
 
diff --git a/test/xray/TestCases/Linux/coverage-sample.cc b/test/xray/TestCases/Linux/coverage-sample.cc
new file mode 100644
index 0000000..9ec22dc
--- /dev/null
+++ b/test/xray/TestCases/Linux/coverage-sample.cc
@@ -0,0 +1,90 @@
+// Check that we can patch and unpatch specific function ids.
+//
+// RUN: %clangxx_xray -std=c++11 %s -o %t
+// RUN: XRAY_OPTIONS="patch_premain=false xray_naive_log=false" %run %t | FileCheck %s
+
+// UNSUPPORTED: target-is-mips64,target-is-mips64el
+
+#include "xray/xray_interface.h"
+
+#include <set>
+#include <cstdio>
+
+std::set<int32_t> function_ids;
+
+[[clang::xray_never_instrument]] void coverage_handler(int32_t fid,
+                                                       XRayEntryType) {
+  thread_local bool patching = false;
+  if (patching) return;
+  patching = true;
+  function_ids.insert(fid);
+  __xray_unpatch_function(fid);
+  patching = false;
+}
+
+[[clang::xray_always_instrument]] void baz() {
+  // do nothing!
+}
+
+[[clang::xray_always_instrument]] void bar() {
+  baz();
+}
+
+[[clang::xray_always_instrument]] void foo() {
+  bar();
+}
+
+[[clang::xray_always_instrument]] int main(int argc, char *argv[]) {
+  __xray_set_handler(coverage_handler);
+  __xray_patch();
+  foo();
+  __xray_unpatch();
+
+  // print out the function_ids.
+  printf("first pass.\n");
+  for (const auto id : function_ids)
+    printf("patched: %d\n", id);
+
+  // CHECK-LABEL: first pass.
+  // CHECK-DAG: patched: [[F1:.*]]
+  // CHECK-DAG: patched: [[F2:.*]]
+  // CHECK-DAG: patched: [[F3:.*]]
+
+  // make a copy of the function_ids, then patch them later.
+  auto called_fns = function_ids;
+
+  // clear the function_ids.
+  function_ids.clear();
+
+  // patch the functions we've called before.
+  for (const auto id : called_fns)
+    __xray_patch_function(id);
+
+  // then call them again.
+  foo();
+  __xray_unpatch();
+
+  // confirm that we've seen the same functions again.
+  printf("second pass.\n");
+  for (const auto id : function_ids)
+    printf("patched: %d\n", id);
+  // CHECK-LABEL: second pass.
+  // CHECK-DAG: patched: [[F1]]
+  // CHECK-DAG: patched: [[F2]]
+  // CHECK-DAG: patched: [[F3]]
+
+  // Now we want to make sure that if we unpatch one, that we're only going to
+  // see two calls of the coverage_handler.
+  function_ids.clear();
+  __xray_patch();
+  __xray_unpatch_function(1);
+  foo();
+  __xray_unpatch();
+
+  // confirm that we don't see function id one called anymore.
+  printf("missing 1.\n");
+  for (const auto id : function_ids)
+    printf("patched: %d\n", id);
+  // CHECK-LABEL: missing 1.
+  // CHECK-NOT: patched: 1
+}
diff --git a/test/xray/TestCases/Linux/custom-event-handler-alignment.cc b/test/xray/TestCases/Linux/custom-event-handler-alignment.cc
new file mode 100644
index 0000000..447f6e4
--- /dev/null
+++ b/test/xray/TestCases/Linux/custom-event-handler-alignment.cc
@@ -0,0 +1,42 @@
+// Make sure we're aligning the stack properly when lowering the custom event
+// calls.
+//
+// RUN: %clangxx_xray -std=c++11 %s -o %t
+// RUN: XRAY_OPTIONS="patch_premain=false verbosity=1 xray_naive_log=false" \
+// RUN:     %run %t 2>&1
+// REQUIRES: x86_64-linux
+// REQUIRES: built-in-llvm-tree
+#include <xmmintrin.h>
+#include <stdio.h>
+#include "xray/xray_interface.h"
+
+[[clang::xray_never_instrument]] __attribute__((weak)) __m128 f(__m128 *i) {
+  return *i;
+}
+
+[[clang::xray_always_instrument]] void foo() {
+  __xray_customevent(0, 0);
+  __m128 v = {};
+  f(&v);
+}
+
+[[clang::xray_always_instrument]] void bar() {
+  __xray_customevent(0, 0);
+}
+
+void printer(void* ptr, size_t size) {
+  printf("handler called\n");
+  __m128 v = {};
+  f(&v);
+}
+
+int main(int argc, char* argv[]) {
+  __xray_set_customevent_handler(printer);
+  __xray_patch();
+  foo();  // CHECK: handler called
+  bar();  // CHECK: handler called
+  __xray_unpatch();
+  __xray_remove_customevent_handler();
+  foo();
+  bar();
+}
diff --git a/test/xray/TestCases/Linux/custom-event-logging.cc b/test/xray/TestCases/Linux/custom-event-logging.cc
new file mode 100644
index 0000000..48fd620
--- /dev/null
+++ b/test/xray/TestCases/Linux/custom-event-logging.cc
@@ -0,0 +1,42 @@
+// Use the clang feature for custom xray event logging.
+//
+// RUN: %clangxx_xray -std=c++11 %s -o %t
+// RUN: XRAY_OPTIONS="patch_premain=false verbosity=1 xray_naive_log=false xray_logfile_base=custom-event-logging.xray-" %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_xray -std=c++11 -fpic -fpie %s -o %t
+// RUN: XRAY_OPTIONS="patch_premain=false verbosity=1 xray_naive_log=false xray_logfile_base=custom-event-logging.xray-" %run %t 2>&1 | FileCheck %s
+// FIXME: Support this in non-x86_64 as well
+// REQUIRES: x86_64-linux
+// REQUIRES: built-in-llvm-tree
+#include <cstdio>
+#include "xray/xray_interface.h"
+
+[[clang::xray_always_instrument]] void foo() {
+  static constexpr char CustomLogged[] = "hello custom logging!";
+  printf("before calling the custom logging...\n");
+  __xray_customevent(CustomLogged, sizeof(CustomLogged));
+  printf("after calling the custom logging...\n");
+}
+
+void myprinter(void* ptr, size_t size) {
+  printf("%.*s\n", static_cast<int>(size), static_cast<const char*>(ptr));
+}
+
+int main() {
+  foo();
+  // CHECK: before calling the custom logging...
+  // CHECK-NEXT: after calling the custom logging...
+  printf("setting up custom event handler...\n");
+  // CHECK-NEXT: setting up custom event handler...
+  __xray_set_customevent_handler(myprinter);
+  __xray_patch();
+  // CHECK-NEXT: before calling the custom logging...
+  foo();
+  // CHECK-NEXT: hello custom logging!
+  // CHECK-NEXT: after calling the custom logging...
+  printf("removing custom event handler...\n");
+  // CHECK-NEXT: removing custom event handler...
+  __xray_remove_customevent_handler();
+  foo();
+  // CHECK-NEXT: before calling the custom logging...
+  // CHECK-NEXT: after calling the custom logging...
+}
diff --git a/test/xray/TestCases/Linux/fdr-mode.cc b/test/xray/TestCases/Linux/fdr-mode.cc
new file mode 100644
index 0000000..744c051
--- /dev/null
+++ b/test/xray/TestCases/Linux/fdr-mode.cc
@@ -0,0 +1,103 @@
+// RUN: %clangxx_xray -g -std=c++11 %s -o %t
+// RUN: rm fdr-logging-test-* || true
+// RUN: rm fdr-unwrite-test-* || true
+// RUN: XRAY_OPTIONS="patch_premain=false xray_naive_log=false xray_logfile_base=fdr-logging-test- xray_fdr_log=true verbosity=1 xray_fdr_log_func_duration_threshold_us=0" %run %t 2>&1 | FileCheck %s
+// RUN: XRAY_OPTIONS="patch_premain=false xray_naive_log=false xray_logfile_base=fdr-unwrite-test- xray_fdr_log=true verbosity=1 xray_fdr_log_func_duration_threshold_us=5000" %run %t 2>&1 | FileCheck %s
+// RUN: %llvm_xray convert --symbolize --output-format=yaml -instr_map=%t "`ls fdr-logging-test-* | head -1`" | FileCheck %s --check-prefix=TRACE
+// RUN: %llvm_xray convert --symbolize --output-format=yaml -instr_map=%t "`ls fdr-unwrite-test-* | head -1`" | FileCheck %s --check-prefix=UNWRITE
+// RUN: rm fdr-logging-test-*
+// RUN: rm fdr-unwrite-test-*
+// FIXME: Make llvm-xray work on non-x86_64 as well.
+// REQUIRES: x86_64-linux
+// REQUIRES: built-in-llvm-tree
+
+#include "xray/xray_log_interface.h"
+#include <cassert>
+#include <chrono>
+#include <cstdio>
+#include <iostream>
+#include <stdlib.h>
+#include <thread>
+#include <time.h>
+
+constexpr auto kBufferSize = 16384;
+constexpr auto kBufferMax = 10;
+
+thread_local uint64_t var = 0;
+[[clang::xray_always_instrument]] void __attribute__((noinline)) fC() { ++var; }
+
+[[clang::xray_always_instrument]] void __attribute__((noinline)) fB() { fC(); }
+
+[[clang::xray_always_instrument]] void __attribute__((noinline)) fA() { fB(); }
+
+[[clang::xray_always_instrument, clang::xray_log_args(1)]]
+void __attribute__((noinline)) fArg(int) { }
+
+int main(int argc, char *argv[]) {
+  using namespace __xray;
+  FDRLoggingOptions Options;
+  std::cout << "Logging before init." << std::endl;
+  // CHECK: Logging before init.
+  auto status = __xray_log_init(kBufferSize, kBufferMax, &Options,
+                                sizeof(FDRLoggingOptions));
+  assert(status == XRayLogInitStatus::XRAY_LOG_INITIALIZED);
+  std::cout << "Init status " << status << std::endl;
+  // CHECK: Init status {{.*}}
+  std::cout << "Patching..." << std::endl;
+  // CHECK: Patching...
+  __xray_patch();
+  fA();
+  fC();
+  fB();
+  fA();
+  fC();
+  std::thread other_thread([]() {
+    fC();
+    fB();
+    fA();
+    fArg(1);
+  });
+  other_thread.join();
+  std::cout << "Joined" << std::endl;
+  // CHECK: Joined
+  std::cout << "Finalize status " << __xray_log_finalize() << std::endl;
+  // CHECK: Finalize status {{.*}}
+  fC();
+  std::cout << "Main execution var = " << var << std::endl;
+  // CHECK: Main execution var = 6
+  std::cout << "Flush status " << __xray_log_flushLog() << std::endl;
+  // CHECK: Flush status {{.*}}
+  __xray_unpatch();
+  return 0;
+}
+
+// Check that we're able to see two threads, each entering and exiting fA().
+// TRACE-DAG: - { type: 0, func-id: [[FIDA:[0-9]+]], function: {{.*fA.*}}, cpu: {{.*}}, thread: [[THREAD1:[0-9]+]], kind: function-enter, tsc: {{[0-9]+}} }
+// TRACE:     - { type: 0, func-id: [[FIDA]], function: {{.*fA.*}}, cpu: {{.*}}, thread: [[THREAD1]], kind: function-{{exit|tail-exit}}, tsc: {{[0-9]+}} }
+// TRACE-DAG: - { type: 0, func-id: [[FIDA]], function: {{.*fA.*}}, cpu: {{.*}}, thread: [[THREAD2:[0-9]+]], kind: function-enter, tsc: {{[0-9]+}} }
+// TRACE:     - { type: 0, func-id: [[FIDA]], function: {{.*fA.*}}, cpu: {{.*}}, thread: [[THREAD2]], kind: function-{{exit|tail-exit}}, tsc: {{[0-9]+}} }
+//
+// Do the same as above for fC()
+// TRACE-DAG: - { type: 0, func-id: [[FIDC:[0-9]+]], function: {{.*fC.*}}, cpu: {{.*}}, thread: [[THREAD1:[0-9]+]], kind: function-enter, tsc: {{[0-9]+}} }
+// TRACE:     - { type: 0, func-id: [[FIDC]], function: {{.*fC.*}}, cpu: {{.*}}, thread: [[THREAD1]], kind: function-{{exit|tail-exit}}, tsc: {{[0-9]+}} }
+// TRACE-DAG: - { type: 0, func-id: [[FIDC]], function: {{.*fC.*}}, cpu: {{.*}}, thread: [[THREAD2:[0-9]+]], kind: function-enter, tsc: {{[0-9]+}} }
+// TRACE:     - { type: 0, func-id: [[FIDC]], function: {{.*fC.*}}, cpu: {{.*}}, thread: [[THREAD2]], kind: function-{{exit|tail-exit}}, tsc: {{[0-9]+}} }
+
+// Do the same as above for fB()
+// TRACE-DAG: - { type: 0, func-id: [[FIDB:[0-9]+]], function: {{.*fB.*}}, cpu: {{.*}}, thread: [[THREAD1:[0-9]+]], kind: function-enter, tsc: {{[0-9]+}} }
+// TRACE:     - { type: 0, func-id: [[FIDB]], function: {{.*fB.*}}, cpu: {{.*}}, thread: [[THREAD1]], kind: function-{{exit|tail-exit}}, tsc: {{[0-9]+}} }
+// TRACE-DAG: - { type: 0, func-id: [[FIDB]], function: {{.*fB.*}}, cpu: {{.*}}, thread: [[THREAD2:[0-9]+]], kind: function-enter, tsc: {{[0-9]+}} }
+// TRACE:     - { type: 0, func-id: [[FIDB]], function: {{.*fB.*}}, cpu: {{.*}}, thread: [[THREAD2]], kind: function-{{exit|tail-exit}}, tsc: {{[0-9]+}} }
+
+// TRACE-DAG: - { type: 0, func-id: [[FIDARG:[0-9]+]], function: 'fArg(int)', args: [ 1 ], cpu: {{.*}}, thread: [[THREAD2]], kind: function-enter-arg, tsc: {{[0-9]+}} }
+// TRACE-DAG: - { type: 0, func-id: [[FIDARG]], function: 'fArg(int)', cpu: {{.*}}, thread: [[THREAD2]], kind: function-exit, tsc: {{[0-9]+}} }
+
+// Assert that when unwriting is enabled with a high threshold time, all the function records are erased. A CPU switch could erroneously fail this test, but
+// is unlikely given the test program.
+// Even with a high threshold, arg1 logging is never unwritten.
+// UNWRITE: header:
+// UNWRITE: records:
+// UNWRITE-NEXT: - { type: 0, func-id: [[FIDARG:[0-9]+]], function: 'fArg(int)', args: [ 1 ], cpu: {{.*}}, thread: [[THREAD2:[0-9]+]], kind: function-enter-arg, tsc: {{[0-9]+}} }
+// UNWRITE-NEXT: - { type: 0, func-id: [[FIDARG]], function: 'fArg(int)', cpu: {{.*}}, thread: [[THREAD2]], kind: function-exit, tsc: {{[0-9]+}} }
+// UNWRITE-NOT: function-enter
+// UNWRITE-NOT: function-{{exit|tail-exit}}
diff --git a/test/xray/TestCases/Linux/fdr-single-thread.cc b/test/xray/TestCases/Linux/fdr-single-thread.cc
new file mode 100644
index 0000000..dd50f48
--- /dev/null
+++ b/test/xray/TestCases/Linux/fdr-single-thread.cc
@@ -0,0 +1,38 @@
+// RUN: %clangxx_xray -g -std=c++11 %s -o %t
+// RUN: rm fdr-logging-1thr-* || true
+// RUN: XRAY_OPTIONS=XRAY_OPTIONS="verbosity=1 patch_premain=true \
+// RUN:   xray_naive_log=false xray_fdr_log=true \
+// RUN:   xray_fdr_log_func_duration_threshold_us=0 \
+// RUN:   xray_logfile_base=fdr-logging-1thr-" %run %t 2>&1
+// RUN: %llvm_xray convert --output-format=yaml --symbolize --instr_map=%t \
+// RUN:   "`ls fdr-logging-1thr-* | head -n1`" | FileCheck %s
+// RUN: rm fdr-logging-1thr-*
+//
+// REQUIRES: x86_64-linux
+
+#include "xray/xray_log_interface.h"
+#include <cassert>
+
+constexpr auto kBufferSize = 16384;
+constexpr auto kBufferMax = 10;
+
+[[clang::xray_always_instrument]] void __attribute__((noinline)) fn() { }
+
+int main(int argc, char *argv[]) {
+  using namespace __xray;
+  FDRLoggingOptions Opts;
+
+  auto status = __xray_log_init(kBufferSize, kBufferMax, &Opts, sizeof(Opts));
+  assert(status == XRayLogInitStatus::XRAY_LOG_INITIALIZED);
+
+  __xray_patch();
+  fn();
+  __xray_unpatch();
+  assert(__xray_log_finalize() == XRAY_LOG_FINALIZED);
+  assert(__xray_log_flushLog() == XRAY_LOG_FLUSHED);
+  return 0;
+}
+
+// CHECK: records:
+// CHECK-NEXT: - { type: 0, func-id: [[FID1:[0-9]+]], function: {{.*fn.*}}, cpu: {{.*}}, thread: [[THREAD1:[0-9]+]], kind: function-enter, tsc: {{[0-9]+}} }
+// CHECK-NEXT: - { type: 0, func-id: [[FID1:[0-9]+]], function: {{.*fn.*}}, cpu: {{.*}}, thread: [[THREAD1:[0-9]+]], kind: function-exit, tsc: {{[0-9]+}} }
diff --git a/test/xray/TestCases/Linux/fdr-thread-order.cc b/test/xray/TestCases/Linux/fdr-thread-order.cc
new file mode 100644
index 0000000..8e8c421
--- /dev/null
+++ b/test/xray/TestCases/Linux/fdr-thread-order.cc
@@ -0,0 +1,67 @@
+// RUN: %clangxx_xray -g -std=c++11 %s -o %t
+// RUN: rm fdr-thread-order.* || true
+// RUN: XRAY_OPTIONS="patch_premain=false xray_naive_log=false \
+// RUN:    xray_logfile_base=fdr-thread-order. xray_fdr_log=true verbosity=1 \
+// RUN:    xray_fdr_log_func_duration_threshold_us=0" %run %t 2>&1 | \
+// RUN:    FileCheck %s
+// RUN: %llvm_xray convert --symbolize --output-format=yaml -instr_map=%t \
+// RUN:    "`ls fdr-thread-order.* | head -1`"
+// RUN: %llvm_xray convert --symbolize --output-format=yaml -instr_map=%t \
+// RUN:    "`ls fdr-thread-order.* | head -1`" | \
+// RUN:    FileCheck %s --check-prefix TRACE
+// RUN: rm fdr-thread-order.*
+// FIXME: Make llvm-xray work on non-x86_64 as well.
+// REQUIRES: x86_64-linux
+// REQUIRES: built-in-llvm-tree
+
+#include "xray/xray_log_interface.h"
+#include <atomic>
+#include <cassert>
+#include <thread>
+
+constexpr auto kBufferSize = 16384;
+constexpr auto kBufferMax = 10;
+
+std::atomic<uint64_t> var{0};
+
+[[clang::xray_always_instrument]] void __attribute__((noinline)) f1() {
+  for (auto i = 0; i < 1 << 20; ++i)
+    ++var;
+}
+
+[[clang::xray_always_instrument]] void __attribute__((noinline)) f2() {
+  for (auto i = 0; i < 1 << 20; ++i)
+    ++var;
+}
+
+int main(int argc, char *argv[]) {
+  using namespace __xray;
+  FDRLoggingOptions Options;
+  __xray_patch();
+  assert(__xray_log_init(kBufferSize, kBufferMax, &Options,
+                         sizeof(FDRLoggingOptions)) ==
+         XRayLogInitStatus::XRAY_LOG_INITIALIZED);
+
+  std::atomic_thread_fence(std::memory_order_acq_rel);
+
+  {
+    std::thread t1([] { f1(); });
+    std::thread t2([] { f2(); });
+    t1.join();
+    t2.join();
+  }
+
+  std::atomic_thread_fence(std::memory_order_acq_rel);
+  __xray_log_finalize();
+  __xray_log_flushLog();
+  __xray_unpatch();
+  return var > 0 ? 0 : 1;
+  // CHECK: {{.*}}XRay: Log file in '{{.*}}'
+  // CHECK-NOT: Failed
+}
+
+// We want to make sure that the order of the function log doesn't matter.
+// TRACE-DAG: - { type: 0, func-id: [[FID1:[0-9]+]], function: {{.*f1.*}}, cpu: {{.*}}, thread: [[THREAD1:[0-9]+]], kind: function-enter, tsc: {{[0-9]+}} }
+// TRACE-DAG: - { type: 0, func-id: [[FID2:[0-9]+]], function: {{.*f2.*}}, cpu: {{.*}}, thread: [[THREAD2:[0-9]+]], kind: function-enter, tsc: {{[0-9]+}} }
+// TRACE-DAG: - { type: 0, func-id: [[FID1]], function: {{.*f1.*}}, cpu: {{.*}}, thread: [[THREAD1]], kind: {{function-exit|function-tail-exit}}, tsc: {{[0-9]+}} }
+// TRACE-DAG: - { type: 0, func-id: [[FID2]], function: {{.*f2.*}}, cpu: {{.*}}, thread: [[THREAD2]], kind: {{function-exit|function-tail-exit}}, tsc: {{[0-9]+}} }
diff --git a/test/xray/TestCases/Linux/fixedsize-logging.cc b/test/xray/TestCases/Linux/fixedsize-logging.cc
index eb32afe..a2a41ce 100644
--- a/test/xray/TestCases/Linux/fixedsize-logging.cc
+++ b/test/xray/TestCases/Linux/fixedsize-logging.cc
@@ -7,6 +7,8 @@
 //
 // RUN: rm fixedsize-logging-*
 
+// UNSUPPORTED: target-is-mips64,target-is-mips64el
+
 #include <cstdio>
 
 [[clang::xray_always_instrument]] void foo() {
diff --git a/test/xray/TestCases/Linux/func-id-utils.cc b/test/xray/TestCases/Linux/func-id-utils.cc
new file mode 100644
index 0000000..4127536
--- /dev/null
+++ b/test/xray/TestCases/Linux/func-id-utils.cc
@@ -0,0 +1,44 @@
+// Check that we can turn a function id to a function address, and also get the
+// maximum function id for the current binary.
+//
+// RUN: %clangxx_xray -std=c++11 %s -o %t
+// RUN: XRAY_OPTIONS="patch_premain=false xray_naive_log=false" %run %t
+
+// UNSUPPORTED: target-is-mips64,target-is-mips64el
+
+#include "xray/xray_interface.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdio>
+#include <iterator>
+#include <set>
+
+[[clang::xray_always_instrument]] void bar(){}
+
+[[clang::xray_always_instrument]] void foo() {
+  bar();
+}
+
+[[clang::xray_always_instrument]] int main(int argc, char *argv[]) {
+  assert(__xray_max_function_id() != 0 && "we need xray instrumentation!");
+  std::set<void *> must_be_instrumented = {reinterpret_cast<void *>(&foo),
+                                           reinterpret_cast<void *>(&bar),
+                                           reinterpret_cast<void *>(&main)};
+  std::set<void *> all_instrumented;
+  for (auto i = __xray_max_function_id(); i != 0; --i) {
+    auto addr = __xray_function_address(i);
+    all_instrumented.insert(reinterpret_cast<void *>(addr));
+  }
+  assert(all_instrumented.size() == __xray_max_function_id() &&
+         "each function id must be assigned to a unique function");
+
+  std::set<void *> not_instrumented;
+  std::set_difference(
+      must_be_instrumented.begin(), must_be_instrumented.end(),
+      all_instrumented.begin(), all_instrumented.end(),
+      std::inserter(not_instrumented, not_instrumented.begin()));
+  assert(
+      not_instrumented.empty() &&
+      "we should see all explicitly instrumented functions with function ids");
+  return not_instrumented.empty() ? 0 : 1;
+}
diff --git a/test/xray/TestCases/Linux/optional-inmemory-log.cc b/test/xray/TestCases/Linux/optional-inmemory-log.cc
index f459d5a..feaaa41 100644
--- a/test/xray/TestCases/Linux/optional-inmemory-log.cc
+++ b/test/xray/TestCases/Linux/optional-inmemory-log.cc
@@ -8,6 +8,8 @@
 //
 // RUN: rm -f optional-inmemory-log.xray-*
 
+// UNSUPPORTED: target-is-mips64,target-is-mips64el
+
 #include <cstdio>
 
 [[clang::xray_always_instrument]] void foo() {
diff --git a/test/xray/TestCases/Linux/patching-unpatching.cc b/test/xray/TestCases/Linux/patching-unpatching.cc
index 05478a4..a7ea58f 100644
--- a/test/xray/TestCases/Linux/patching-unpatching.cc
+++ b/test/xray/TestCases/Linux/patching-unpatching.cc
@@ -4,6 +4,8 @@
 // RUN: %clangxx_xray -fxray-instrument -std=c++11 %s -o %t
 // RUN: XRAY_OPTIONS="patch_premain=false" %run %t 2>&1 | FileCheck %s
 
+// UNSUPPORTED: target-is-mips64,target-is-mips64el
+
 #include "xray/xray_interface.h"
 
 #include <cstdio>
diff --git a/test/xray/TestCases/Linux/pic_test.cc b/test/xray/TestCases/Linux/pic_test.cc
index 09c40b9..0e72f5d 100644
--- a/test/xray/TestCases/Linux/pic_test.cc
+++ b/test/xray/TestCases/Linux/pic_test.cc
@@ -1,11 +1,14 @@
 // Test to check if we handle pic code properly.
 
-// RUN: %clangxx_xray -fxray-instrument -std=c++11 -fpic %s -o %t
+// RUN: %clangxx_xray -fxray-instrument -std=c++11 -ffunction-sections \
+// RUN:     -fdata-sections -fpic -fpie -Wl,--gc-sections %s -o %t
 // RUN: XRAY_OPTIONS="patch_premain=true verbosity=1 xray_logfile_base=pic-test-logging-" %run %t 2>&1 | FileCheck %s
 // After all that, clean up the output xray log.
 //
 // RUN: rm pic-test-logging-*
 
+// UNSUPPORTED: target-is-mips64,target-is-mips64el
+
 #include <cstdio>
 
 [[clang::xray_always_instrument]]
diff --git a/test/xray/TestCases/Linux/quiet-start.cc b/test/xray/TestCases/Linux/quiet-start.cc
new file mode 100644
index 0000000..e26fa63
--- /dev/null
+++ b/test/xray/TestCases/Linux/quiet-start.cc
@@ -0,0 +1,26 @@
+// Ensure that we have a quiet startup when we don't have the XRay
+// instrumentation sleds.
+//
+// RUN: %clangxx -std=c++11 %s -o %t %xraylib
+// RUN: XRAY_OPTIONS="patch_premain=true verbosity=1" %run %t 2>&1 | \
+// RUN:    FileCheck %s --check-prefix NOISY
+// RUN: XRAY_OPTIONS="patch_premain=true verbosity=0" %run %t 2>&1 | \
+// RUN:    FileCheck %s --check-prefix QUIET
+// RUN: XRAY_OPTIONS="" %run %t 2>&1 | FileCheck %s --check-prefix DEFAULT
+//
+// FIXME: Understand how to make this work on other platforms
+// REQUIRES: built-in-llvm-tree
+// REQUIRES: x86_64-linux
+#include <iostream>
+
+using namespace std;
+
+int main(int, char**) {
+  // NOISY: {{.*}}XRay instrumentation map missing. Not initializing XRay.
+  // QUIET-NOT: {{.*}}XRay instrumentation map missing. Not initializing XRay.
+  // DEFAULT-NOT: {{.*}}XRay instrumentation map missing. Not initializing XRay.
+  cout << "Hello, XRay!" << endl;
+  // NOISY: Hello, XRay!
+  // QUIET: Hello, XRay!
+  // DEFAULT: Hello, XRay!
+}
diff --git a/test/xray/Unit/lit.site.cfg.in b/test/xray/Unit/lit.site.cfg.in
index 75ea888..be860de 100644
--- a/test/xray/Unit/lit.site.cfg.in
+++ b/test/xray/Unit/lit.site.cfg.in
@@ -13,4 +13,4 @@
 
 # Do not patch the XRay unit tests pre-main, and also make the error logging
 # verbose to get a more accurate error logging mechanism.
-config.environment['XRAY_OPTIONS'] = 'patch_premain=false verbose=1'
+config.environment['XRAY_OPTIONS'] = 'patch_premain=false'
diff --git a/test/xray/lit.cfg b/test/xray/lit.cfg
index 9142ad1..d5e4097 100644
--- a/test/xray/lit.cfg
+++ b/test/xray/lit.cfg
@@ -16,6 +16,9 @@
 def build_invocation(compile_flags):
   return ' ' + ' '.join([config.clang] + compile_flags) + ' '
 
+# Assume that llvm-xray is in the config.llvm_tools_dir.
+llvm_xray = os.path.join(config.llvm_tools_dir, 'llvm-xray')
+
 # Setup substitutions.
 config.substitutions.append(
     ('%clang ', build_invocation([config.target_cflags])))
@@ -26,6 +29,13 @@
     ('%clang_xray ', build_invocation(clang_xray_cflags)))
 config.substitutions.append(
     ('%clangxx_xray', build_invocation(clang_xray_cxxflags)))
+config.substitutions.append(
+    ('%llvm_xray', llvm_xray))
+config.substitutions.append(
+    ('%xraylib',
+        ('-lm -lpthread -ldl -lrt -L%s '
+         '-Wl,-whole-archive -lclang_rt.xray-%s -Wl,-no-whole-archive')
+        % (config.compiler_rt_libdir, config.host_arch)))
 
 # Default test suffixes.
 config.suffixes = ['.c', '.cc', '.cpp']
@@ -38,8 +48,3 @@
       config.unsupported = True
   else:
     config.unsupported = True
-
-# Allow tests to use REQUIRES=stable-runtime.  For use when you cannot use XFAIL
-# e.g. because the test sometimes passes, sometimes fails.
-if config.target_arch != 'aarch64':
-  config.available_features.add('stable-runtime')
diff --git a/test/xray/lit.site.cfg.in b/test/xray/lit.site.cfg.in
index ee0ffca..bc4acb6 100644
--- a/test/xray/lit.site.cfg.in
+++ b/test/xray/lit.site.cfg.in
@@ -5,9 +5,16 @@
 config.xray_lit_source_dir = "@XRAY_LIT_SOURCE_DIR@"
 config.target_cflags = "@XRAY_TEST_TARGET_CFLAGS@"
 config.target_arch = "@XRAY_TEST_TARGET_ARCH@"
+config.built_with_llvm = ("@COMPILER_RT_STANDALONE_BUILD@" != "TRUE")
+
+# TODO: Look into whether we can run a capability test on the standalone build to
+# see whether it can run 'llvm-xray convert' instead of turning off tests for a
+# standalone build.
+if config.built_with_llvm:
+  config.available_features.add('built-in-llvm-tree')
 
 # Load common config for all compiler-rt lit tests
 lit_config.load_config(config, "@COMPILER_RT_BINARY_DIR@/test/lit.common.configured")
 
 # Load tool-specific config that would do the real work.
-lit_config.load_config(config, "@XRAY_LIT_SOURCE_DIR@/lit.cfg")
+lit_config.load_config(config, "@CMAKE_CURRENT_SOURCE_DIR@/lit.cfg")
diff --git a/unittests/lit.common.unit.cfg b/unittests/lit.common.unit.cfg
index 475b22d..31206e9 100644
--- a/unittests/lit.common.unit.cfg
+++ b/unittests/lit.common.unit.cfg
@@ -16,7 +16,7 @@
 config.suffixes = []
 
 # Tweak PATH to include llvm tools dir.
-llvm_tools_dir = getattr(config, 'llvm_tools_dir', None)
+llvm_tools_dir = config.llvm_tools_dir
 if (not llvm_tools_dir) or (not os.path.exists(llvm_tools_dir)):
   lit_config.fatal("Invalid llvm_tools_dir config attribute: %r" % llvm_tools_dir)
 path = os.path.pathsep.join((llvm_tools_dir, config.environment['PATH']))
@@ -35,6 +35,12 @@
   # of large mmap'd regions (terabytes) by the kernel.
   lit_config.parallelism_groups["darwin-64bit-sanitizer"] = 3
 
-  def darwin_sanitizer_parallelism_group_func(test):
-    return "darwin-64bit-sanitizer" if "x86_64" in test.file_path else ""
-  config.darwin_sanitizer_parallelism_group_func = darwin_sanitizer_parallelism_group_func
+  # The test config gets pickled and sent to multiprocessing workers, and that
+  # only works for code if it is stored at the top level of some module.
+  # Therefore, we have to put the code in a .py file, add it to path, and import
+  # it to store it in the config.
+  import site
+  site.addsitedir(os.path.dirname(__file__))
+  import lit_unittest_cfg_utils
+  config.darwin_sanitizer_parallelism_group_func = \
+    lit_unittest_cfg_utils.darwin_sanitizer_parallelism_group_func
diff --git a/unittests/lit_unittest_cfg_utils.py b/unittests/lit_unittest_cfg_utils.py
new file mode 100644
index 0000000..ff7b1ee
--- /dev/null
+++ b/unittests/lit_unittest_cfg_utils.py
@@ -0,0 +1,4 @@
+# Put all 64-bit sanitizer tests in the darwin-64bit-sanitizer parallelism
+# group. This will only run three of them concurrently.
+def darwin_sanitizer_parallelism_group_func(test):
+  return "darwin-64bit-sanitizer" if "x86_64" in test.file_path else ""