NFC: Make the copies of the demangler byte-for-byte identical

With this patch, the copies of the files ItaniumDemangle.h,
StringView.h, and Utility.h are kept byte-for-byte in sync between
libcxxabi and llvm. All differences (namespaces, fallthrough, and
unreachable macros) are defined in each copies' DemanglerConfig.h.

This patch also adds a script to copy changes from libcxxabi
(cp-to-llvm.sh), and a README.txt explaining the situation.

Differential revision: https://reviews.llvm.org/D53538

git-svn-id: https://llvm.org/svn/llvm-project/libcxxabi/trunk@351474 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/src/cxa_demangle.cpp b/src/cxa_demangle.cpp
index f227add..7e8f719 100644
--- a/src/cxa_demangle.cpp
+++ b/src/cxa_demangle.cpp
@@ -11,12 +11,8 @@
 // file does not yet support:
 //   - C++ modules TS
 
-#define _LIBCPP_NO_EXCEPTIONS
-
-#include "__cxxabi_config.h"
-
 #include "demangle/ItaniumDemangle.h"
-
+#include "__cxxabi_config.h"
 #include <cassert>
 #include <cctype>
 #include <cstdio>
@@ -25,7 +21,6 @@
 #include <functional>
 #include <numeric>
 #include <utility>
-#include <vector>
 
 using namespace itanium_demangle;
 
diff --git a/src/demangle/Compiler.h b/src/demangle/DemangleConfig.h
similarity index 70%
rename from src/demangle/Compiler.h
rename to src/demangle/DemangleConfig.h
index e5f3c72..3be1ccd 100644
--- a/src/demangle/Compiler.h
+++ b/src/demangle/DemangleConfig.h
@@ -12,6 +12,8 @@
 #ifndef LIBCXX_DEMANGLE_COMPILER_H
 #define LIBCXX_DEMANGLE_COMPILER_H
 
+#include "__config"
+
 #ifdef _MSC_VER
 // snprintf is implemented in VS 2015
 #if _MSC_VER < 1900
@@ -25,10 +27,16 @@
 
 #ifndef NDEBUG
 #if __has_attribute(noinline) && __has_attribute(used)
-#define DUMP_METHOD __attribute__((noinline, used))
+#define DEMANGLE_DUMP_METHOD __attribute__((noinline, used))
 #else
-#define DUMP_METHOD
+#define DEMANGLE_DUMP_METHOD
 #endif
 #endif
 
+#define DEMANGLE_FALLTHROUGH _LIBCPP_FALLTHROUGH()
+#define DEMANGLE_UNREACHABLE _LIBCPP_UNREACHABLE()
+
+#define DEMANGLE_NAMESPACE_BEGIN namespace { namespace itanium_demangle {
+#define DEMANGLE_NAMESPACE_END } }
+
 #endif
diff --git a/src/demangle/ItaniumDemangle.h b/src/demangle/ItaniumDemangle.h
index 9e9d183..53107c9 100644
--- a/src/demangle/ItaniumDemangle.h
+++ b/src/demangle/ItaniumDemangle.h
@@ -7,22 +7,21 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// WARNING: This file defines its contents within an anonymous namespace. It
-// should not be included anywhere other than cxa_demangle.h.
+// Generic itanium demangler library. This file has two byte-per-byte identical
+// copies in the source tree, one in libcxxabi, and the other in llvm.
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LIBCXX_DEMANGLE_ITANIUMDEMANGLE_H
-#define LIBCXX_DEMANGLE_ITANIUMDEMANGLE_H
+#ifndef DEMANGLE_ITANIUMDEMANGLE_H
+#define DEMANGLE_ITANIUMDEMANGLE_H
 
 // FIXME: (possibly) incomplete list of features that clang mangles that this
 // file does not yet support:
 //   - C++ modules TS
 
-#include "Compiler.h"
+#include "DemangleConfig.h"
 #include "StringView.h"
 #include "Utility.h"
-
 #include <cassert>
 #include <cctype>
 #include <cstdio>
@@ -100,8 +99,8 @@
     X(BracedExpr) \
     X(BracedRangeExpr)
 
-namespace {
-namespace itanium_demangle {
+DEMANGLE_NAMESPACE_BEGIN
+
 // Base class of all AST nodes. The AST is built by the parser, then is
 // traversed by the printLeft/Right functions to produce a demangled string.
 class Node {
@@ -199,7 +198,7 @@
   virtual ~Node() = default;
 
 #ifndef NDEBUG
-  DUMP_METHOD void dump() const;
+  DEMANGLE_DUMP_METHOD void dump() const;
 #endif
 };
 
@@ -1283,7 +1282,7 @@
     case SpecialSubKind::iostream:
       return StringView("basic_iostream");
     }
-    _LIBCPP_UNREACHABLE();
+    DEMANGLE_UNREACHABLE;
   }
 
   void printLeft(OutputStream &S) const override {
@@ -1335,7 +1334,7 @@
     case SpecialSubKind::iostream:
       return StringView("iostream");
     }
-    _LIBCPP_UNREACHABLE();
+    DEMANGLE_UNREACHABLE;
   }
 
   void printLeft(OutputStream &S) const override {
@@ -3472,7 +3471,7 @@
       Result = getDerived().parseFunctionType();
       break;
     }
-    _LIBCPP_FALLTHROUGH();
+    DEMANGLE_FALLTHROUGH;
   }
   case 'U': {
     Result = getDerived().parseQualifiedType();
@@ -3759,7 +3758,7 @@
       // substitution table.
       return Sub;
     }
-    _LIBCPP_FALLTHROUGH();
+    DEMANGLE_FALLTHROUGH;
   }
   //        ::= <class-enum-type>
   default: {
@@ -5183,7 +5182,6 @@
                                Alloc>::AbstractManglingParser;
 };
 
-}  // namespace itanium_demangle
-}  // namespace
+DEMANGLE_NAMESPACE_END
 
-#endif // LIBCXX_DEMANGLE_ITANIUMDEMANGLE_H
+#endif // DEMANGLE_ITANIUMDEMANGLE_H
diff --git a/src/demangle/README.txt b/src/demangle/README.txt
new file mode 100644
index 0000000..514ff6d
--- /dev/null
+++ b/src/demangle/README.txt
@@ -0,0 +1,52 @@
+Itanium Name Demangler Library
+==============================
+
+Introduction
+------------
+
+This directory contains the generic itanium name demangler library. The main
+purpose of the library is to demangle C++ symbols, i.e. convert the string
+"_Z1fv" into "f()". You can also use the CRTP base ManglingParser to perform
+some simple analysis on the mangled name, or (in LLVM) use the opaque
+ItaniumPartialDemangler to query the demangled AST.
+
+Why are there multiple copies of the this library in the source tree?
+---------------------------------------------------------------------
+
+This directory is mirrored between libcxxabi/demangle and
+llvm/include/llvm/Demangle. The simple reason for this is that both projects
+need to demangle symbols, but neither can depend on each other. libcxxabi needs
+the demangler to implement __cxa_demangle, which is part of the itanium ABI
+spec. LLVM needs a copy for a bunch of places, but doesn't want to use the
+system's __cxa_demangle because it a) might not be available (i.e., on Windows),
+and b) probably isn't that up-to-date on the latest language features.
+
+The copy of the demangler in LLVM has some extra stuff that aren't needed in
+libcxxabi (ie, the MSVC demangler, ItaniumPartialDemangler), which depend on the
+shared generic components. Despite these differences, we want to keep the "core"
+generic demangling library identical between both copies to simplify development
+and testing.
+
+If you're working on the generic library, then do the work first in libcxxabi,
+then run the cp-to-llvm.sh script in src/demangle. This script takes as an
+argument the path to llvm, and re-copies the changes you made to libcxxabi over.
+Note that this script just blindly overwrites all changes to the generic library
+in llvm, so be careful.
+
+Because the core demangler needs to work in libcxxabi, everything needs to be
+declared in an anonymous namespace (see DEMANGLE_NAMESPACE_BEGIN), and you can't
+introduce any code that depends on the libcxx dylib.
+
+Hopefully, when LLVM becomes a monorepo, we can de-duplicate this code, and have
+both LLVM and libcxxabi depend on a shared demangler library.
+
+Testing
+-------
+
+The tests are split up between libcxxabi/test/{unit,}test_demangle.cpp, and
+llvm/unittest/Demangle. The llvm directory should only get tests for stuff not
+included in the core library. In the future though, we should probably move all
+the tests to LLVM.
+
+It is also a really good idea to run libFuzzer after non-trivial changes, see
+libcxxabi/fuzz/cxa_demangle_fuzzer.cpp and https://llvm.org/docs/LibFuzzer.html.
diff --git a/src/demangle/StringView.h b/src/demangle/StringView.h
index 986f2de..9af4e06 100644
--- a/src/demangle/StringView.h
+++ b/src/demangle/StringView.h
@@ -5,23 +5,29 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+//===----------------------------------------------------------------------===//
 //
-// This file is copied from llvm/lib/Demangle/StringView.h.
+// FIXME: Use std::string_view instead when we support C++17.
+//
 //===----------------------------------------------------------------------===//
 
-#ifndef LIBCXX_DEMANGLE_STRINGVIEW_H
-#define LIBCXX_DEMANGLE_STRINGVIEW_H
+#ifndef DEMANGLE_STRINGVIEW_H
+#define DEMANGLE_STRINGVIEW_H
 
+#include "DemangleConfig.h"
 #include <algorithm>
 #include <cassert>
 #include <cstring>
 
-namespace {
+DEMANGLE_NAMESPACE_BEGIN
+
 class StringView {
   const char *First;
   const char *Last;
 
 public:
+  static const size_t npos = ~size_t(0);
+
   template <size_t N>
   StringView(const char (&Str)[N]) : First(Str), Last(Str + N - 1) {}
   StringView(const char *First_, const char *Last_)
@@ -35,6 +41,17 @@
     return StringView(begin() + From, size() - From);
   }
 
+  size_t find(char C, size_t From = 0) const {
+    size_t FindBegin = std::min(From, size());
+    // Avoid calling memchr with nullptr.
+    if (FindBegin < size()) {
+      // Just forward to memchr, which is faster than a hand-rolled loop.
+      if (const void *P = ::memchr(First + FindBegin, C, size() - FindBegin))
+        return size_t(static_cast<const char *>(P) - First);
+    }
+    return npos;
+  }
+
   StringView substr(size_t From, size_t To) const {
     if (To >= size())
       To = size() - 1;
@@ -45,15 +62,26 @@
 
   StringView dropFront(size_t N = 1) const {
     if (N >= size())
-      N = size() - 1;
+      N = size();
     return StringView(First + N, Last);
   }
 
+  StringView dropBack(size_t N = 1) const {
+    if (N >= size())
+      N = size();
+    return StringView(First, Last - N);
+  }
+
   char front() const {
     assert(!empty());
     return *begin();
   }
 
+  char back() const {
+    assert(!empty());
+    return *(end() - 1);
+  }
+
   char popFront() {
     assert(!empty());
     return *First++;
@@ -93,6 +121,7 @@
   return LHS.size() == RHS.size() &&
          std::equal(LHS.begin(), LHS.end(), RHS.begin());
 }
-} // namespace
+
+DEMANGLE_NAMESPACE_END
 
 #endif
diff --git a/src/demangle/Utility.h b/src/demangle/Utility.h
index b5e9b5e..4d44cd8 100644
--- a/src/demangle/Utility.h
+++ b/src/demangle/Utility.h
@@ -5,22 +5,24 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
+//===----------------------------------------------------------------------===//
 //
-// This file is copied from llvm/lib/Demangle/Utility.h.
+// Provide some utility classes for use in the demangler(s).
+//
 //===----------------------------------------------------------------------===//
 
-#ifndef LIBCXX_DEMANGLE_UTILITY_H
-#define LIBCXX_DEMANGLE_UTILITY_H
+#ifndef DEMANGLE_UTILITY_H
+#define DEMANGLE_UTILITY_H
 
 #include "StringView.h"
-
 #include <cstdint>
 #include <cstdlib>
 #include <cstring>
 #include <iterator>
 #include <limits>
 
-namespace {
+DEMANGLE_NAMESPACE_BEGIN
+
 // Stream that AST nodes write their string representation into after the AST
 // has been parsed.
 class OutputStream {
@@ -185,6 +187,6 @@
   return true;
 }
 
-} // namespace
+DEMANGLE_NAMESPACE_END
 
 #endif
diff --git a/src/demangle/cp-to-llvm.sh b/src/demangle/cp-to-llvm.sh
new file mode 100755
index 0000000..808abbc
--- /dev/null
+++ b/src/demangle/cp-to-llvm.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+# Copies the 'demangle' library, excluding 'DemangleConfig.h', to llvm. If no
+# llvm directory is specified, then assume a monorepo layout.
+
+set -e
+
+FILES="ItaniumDemangle.h StringView.h Utility.h README.txt"
+LLVM_DEMANGLE_DIR=$1
+
+if [[ -z "$LLVM_DEMANGLE_DIR" ]]; then
+    LLVM_DEMANGLE_DIR="../../../llvm/include/llvm/Demangle"
+fi
+
+if [[ ! -d "$LLVM_DEMANGLE_DIR" ]]; then
+    echo "No such directory: $LLVM_DEMANGLE_DIR" >&2
+    exit 1
+fi
+
+read -p "This will overwrite the copies of $FILES in $LLVM_DEMANGLE_DIR; are you sure? [y/N]" -n 1 -r ANSWER
+echo
+
+if [[ $ANSWER =~ ^[Yy]$ ]]; then
+    for I in $FILES ; do
+        cp $I $LLVM_DEMANGLE_DIR/$I
+    done
+fi