[GWP-ASan] Add public-facing documentation [6].

Summary:
Note: Do not submit this documentation until Scudo support is reviewed and submitted (should be #[5]).

See D60593 for further information.

This patch introduces the public-facing documentation for GWP-ASan, as well as updating the definition of one of the options, which wasn't properly merged. The document describes the design and features of GWP-ASan, as well as how to use GWP-ASan from both a user's standpoint, and development documentation for supporting allocators.

Reviewers: jfb, morehouse, vlad.tsyrklevich

Reviewed By: morehouse, vlad.tsyrklevich

Subscribers: kcc, dexonsmith, kubamracek, cryptoad, jfb, #sanitizers, llvm-commits, vlad.tsyrklevich, morehouse

Tags: #sanitizers, #llvm

Differential Revision: https://reviews.llvm.org/D62875

llvm-svn: 369552
GitOrigin-RevId: c776f3f3c26f69012456117b5487df2b81ad51e7
diff --git a/options.inc b/options.inc
index 9042b11..df6c46e 100644
--- a/options.inc
+++ b/options.inc
@@ -21,9 +21,9 @@
     "byte buffer-overflows for multibyte allocations at the cost of "
     "performance, and may be incompatible with some architectures.")
 
-GWP_ASAN_OPTION(
-    int, MaxSimultaneousAllocations, 16,
-    "Number of usable guarded slots in the allocation pool. Defaults to 16.")
+GWP_ASAN_OPTION(int, MaxSimultaneousAllocations, 16,
+                "Number of simultaneously-guarded allocations available in the "
+                "pool. Defaults to 16.")
 
 GWP_ASAN_OPTION(int, SampleRate, 5000,
                 "The probability (1 / SampleRate) that an allocation is "
diff --git a/scripts/symbolize.sh b/scripts/symbolize.sh
new file mode 100755
index 0000000..fad9620
--- /dev/null
+++ b/scripts/symbolize.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+# The lines that we're looking to symbolize look like this:
+  #0 ./a.out(_foo+0x3e6) [0x55a52e64c696]
+# ... which come from the backtrace_symbols() symbolisation function used by
+# default in Scudo's implementation of GWP-ASan.
+
+while read -r line; do
+  # Check that this line needs symbolization.
+  should_symbolize="$(echo $line |\
+     grep -E '^[ ]*\#.*\(.*\+0x[0-9a-f]+\) \[0x[0-9a-f]+\]$')"
+
+  if [ -z "$should_symbolize" ]; then
+    echo "$line"
+    continue
+  fi
+
+  # Carve up the input line into sections.
+  binary_name="$(echo $line | grep -oE ' .*\(' | rev | cut -c2- | rev |\
+      cut -c2-)"
+  function_name="$(echo $line | grep -oE '\([^+]*' | cut -c2-)"
+  function_offset="$(echo $line | grep -oE '\(.*\)' | grep -oE '\+.*\)' |\
+      cut -c2- | rev | cut -c2- | rev)"
+  frame_number="$(echo $line | grep -oE '\#[0-9]+ ')"
+
+  if [ -z "$function_name" ]; then
+    # If the offset is binary-relative, just resolve that.
+    symbolized="$(echo $function_offset | addr2line -e $binary_name)"
+  else
+    # Otherwise, the offset is function-relative. Get the address of the
+    # function, and add it to the offset, then symbolize.
+    function_addr="0x$(echo $function_offset |\
+       nm --defined-only $binary_name 2> /dev/null |\
+       grep -E " $function_name$" | cut -d' ' -f1)"
+
+    # Check that we could get the function address from nm.
+    if [ -z "$function_addr" ]; then
+      echo "$line"
+      continue
+    fi
+
+    # Add the function address and offset to get the offset into the binary.
+    binary_offset="$(printf "0x%X" "$((function_addr+function_offset))")"
+    symbolized="$(echo $binary_offset | addr2line -e $binary_name)"
+  fi
+
+  # Check that it symbolized properly. If it didn't, output the old line.
+  echo $symbolized | grep -E ".*\?.*:" > /dev/null
+  if [ "$?" -eq "0" ]; then
+    echo "$line"
+    continue
+  else
+    echo "${frame_number}${symbolized}"
+  fi
+done