[MsgPack] New MsgPackDocument class

Summary:
A class that exposes a simple in-memory representation of a document of
MsgPack objects, that can be read from and written to MsgPack, read from
and written to YAML, and inspected and modified in memory. This is
intended to be a lighter-weight (in terms of memory allocations)
replacement for MsgPackTypes.

Two subsequent changes will:
1. switch AMDGPU HSA metadata to using MsgPackDocument instead of
   MsgPackTypes;
2. add MsgPack AMDGPU PAL metadata via MsgPackDocument.

Differential Revision: https://reviews.llvm.org/D57023

Change-Id: Ie15a054831d5a6467c5867c064c8f8f6b80270e1

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@356080 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/include/llvm/BinaryFormat/MsgPackDocument.h b/include/llvm/BinaryFormat/MsgPackDocument.h
new file mode 100644
index 0000000..824ecc3
--- /dev/null
+++ b/include/llvm/BinaryFormat/MsgPackDocument.h
@@ -0,0 +1,385 @@
+//===-- MsgPackDocument.h - MsgPack Document --------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file declares a class that exposes a simple in-memory representation
+/// of a document of MsgPack objects, that can be read from MsgPack, written to
+/// MsgPack, and inspected and modified in memory. This is intended to be a
+/// lighter-weight (in terms of memory allocations) replacement for
+/// MsgPackTypes.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BINARYFORMAT_MSGPACKDOCUMENT_H
+#define LLVM_BINARYFORMAT_MSGPACKDOCUMENT_H
+
+#include "llvm/BinaryFormat/MsgPackReader.h"
+#include <map>
+
+namespace llvm {
+namespace msgpack {
+
+class ArrayDocNode;
+class Document;
+class MapDocNode;
+
+/// The kind of a DocNode and its owning Document.
+struct KindAndDocument {
+  Document *Doc;
+  Type Kind;
+};
+
+/// A node in a MsgPack Document. This is a simple copyable and
+/// passable-by-value type that does not own any memory.
+class DocNode {
+  friend Document;
+
+public:
+  typedef std::map<DocNode, DocNode> MapTy;
+  typedef std::vector<DocNode> ArrayTy;
+
+private:
+  // Using KindAndDocument allows us to squeeze Kind and a pointer to the
+  // owning Document into the same word. Having a pointer to the owning
+  // Document makes the API of DocNode more convenient, and allows its use in
+  // YAMLIO.
+  const KindAndDocument *KindAndDoc;
+
+protected:
+  // The union of different values.
+  union {
+    int64_t Int;
+    uint64_t UInt;
+    bool Bool;
+    double Float;
+    StringRef Raw;
+    ArrayTy *Array;
+    MapTy *Map;
+  };
+
+public:
+  DocNode() : KindAndDoc(nullptr) {}
+
+  // Type methods
+  bool isMap() const { return getKind() == Type::Map; }
+  bool isArray() const { return getKind() == Type::Array; }
+  bool isScalar() const { return !isMap() && !isArray(); }
+  bool isString() const { return getKind() == Type::String; }
+
+  // Accessors
+  bool isEmpty() const { return !KindAndDoc; }
+  Type getKind() const { return KindAndDoc->Kind; }
+  Document *getDocument() const { return KindAndDoc->Doc; }
+
+  int64_t &getInt() {
+    assert(getKind() == Type::Int);
+    return Int;
+  }
+
+  uint64_t &getUInt() {
+    assert(getKind() == Type::UInt);
+    return UInt;
+  }
+
+  bool &getBool() {
+    assert(getKind() == Type::Boolean);
+    return Bool;
+  }
+
+  double &getFloat() {
+    assert(getKind() == Type::Float);
+    return Float;
+  }
+
+  int64_t getInt() const {
+    assert(getKind() == Type::Int);
+    return Int;
+  }
+
+  uint64_t getUInt() const {
+    assert(getKind() == Type::UInt);
+    return UInt;
+  }
+
+  bool getBool() const {
+    assert(getKind() == Type::Boolean);
+    return Bool;
+  }
+
+  double getFloat() const {
+    assert(getKind() == Type::Float);
+    return Float;
+  }
+
+  StringRef getString() const {
+    assert(getKind() == Type::String);
+    return Raw;
+  }
+
+  /// Get an ArrayDocNode for an array node. If Convert, convert the node to an
+  /// array node if necessary.
+  ArrayDocNode &getArray(bool Convert = false) {
+    if (getKind() != Type::Array) {
+      assert(Convert);
+      convertToArray();
+    }
+    // This could be a static_cast, except ArrayDocNode is a forward reference.
+    return *reinterpret_cast<ArrayDocNode *>(this);
+  }
+
+  /// Get a MapDocNode for a map node. If Convert, convert the node to a map
+  /// node if necessary.
+  MapDocNode &getMap(bool Convert = false) {
+    if (getKind() != Type::Map) {
+      assert(Convert);
+      convertToMap();
+    }
+    // This could be a static_cast, except MapDocNode is a forward reference.
+    return *reinterpret_cast<MapDocNode *>(this);
+  }
+
+  /// Comparison operator, used for map keys.
+  friend bool operator<(const DocNode &Lhs, const DocNode &Rhs) {
+    // This has to cope with one or both of the nodes being default-constructed,
+    // such that KindAndDoc is not set.
+    if (Lhs.KindAndDoc != Rhs.KindAndDoc) {
+      if (!Rhs.KindAndDoc)
+        return false;
+      if (!Lhs.KindAndDoc)
+        return true;
+      return (unsigned)Lhs.getKind() < (unsigned)Rhs.getKind();
+    }
+    switch (Lhs.getKind()) {
+    case Type::Int:
+      return Lhs.Int < Rhs.Int;
+    case Type::UInt:
+      return Lhs.UInt < Rhs.UInt;
+    case Type::Nil:
+      return false;
+    case Type::Boolean:
+      return Lhs.Bool < Rhs.Bool;
+    case Type::Float:
+      return Lhs.Float < Rhs.Float;
+    case Type::String:
+    case Type::Binary:
+      return Lhs.Raw < Rhs.Raw;
+    default:
+      llvm_unreachable("bad map key type");
+    }
+  }
+
+  /// Equality operator
+  friend bool operator==(const DocNode &Lhs, const DocNode &Rhs) {
+    return !(Lhs < Rhs) && !(Rhs < Lhs);
+  }
+
+  /// Convert this node to a string, assuming it is scalar.
+  std::string toString() const;
+
+  /// Convert the StringRef and use it to set this DocNode (assuming scalar). If
+  /// it is a string, copy the string into the Document's strings list so we do
+  /// not rely on S having a lifetime beyond this call. Tag is "" or a YAML tag.
+  StringRef fromString(StringRef S, StringRef Tag = "");
+
+private:
+  // Private constructor setting KindAndDoc, used by methods in Document.
+  DocNode(const KindAndDocument *KindAndDoc) : KindAndDoc(KindAndDoc) {}
+
+  void convertToArray();
+  void convertToMap();
+};
+
+/// A DocNode that is a map.
+class MapDocNode : public DocNode {
+public:
+  MapDocNode() {}
+  MapDocNode(DocNode &N) : DocNode(N) { assert(getKind() == Type::Map); }
+
+  // Map access methods.
+  size_t size() const { return Map->size(); }
+  bool empty() const { return !size(); }
+  MapTy::iterator begin() { return Map->begin(); }
+  MapTy::iterator end() { return Map->end(); }
+  MapTy::iterator find(DocNode Key) { return Map->find(Key); }
+  MapTy::iterator find(StringRef Key);
+  /// Member access. The string data must remain valid for the lifetime of the
+  /// Document.
+  DocNode &operator[](StringRef S);
+  /// Member access.
+  DocNode &operator[](DocNode Key);
+};
+
+/// A DocNode that is an array.
+class ArrayDocNode : public DocNode {
+public:
+  ArrayDocNode() {}
+  ArrayDocNode(DocNode &N) : DocNode(N) { assert(getKind() == Type::Array); }
+
+  // Array access methods.
+  size_t size() const { return Array->size(); }
+  bool empty() const { return !size(); }
+  ArrayTy::iterator begin() { return Array->begin(); }
+  ArrayTy::iterator end() { return Array->end(); }
+  void push_back(DocNode N) {
+    assert(N.getDocument() == getDocument());
+    Array->push_back(N);
+  }
+
+  /// Element access. This extends the array if necessary.
+  DocNode &operator[](size_t Index);
+};
+
+/// Simple in-memory representation of a document of msgpack objects with
+/// ability to find and create array and map elements.  Does not currently cope
+/// with any extension types.
+class Document {
+  // Maps, arrays and strings used by nodes in the document. No attempt is made
+  // to free unused ones.
+  std::vector<std::unique_ptr<DocNode::MapTy>> Maps;
+  std::vector<std::unique_ptr<DocNode::ArrayTy>> Arrays;
+  std::vector<std::unique_ptr<char[]>> Strings;
+
+  // The root node of the document.
+  DocNode Root;
+
+  // The KindAndDocument structs pointed to by nodes in the document.
+  KindAndDocument KindAndDocs[size_t(Type::Extension) + 1];
+
+  // Whether YAML output uses hex for UInt.
+  bool HexMode = false;
+
+public:
+  Document() {
+    clear();
+    for (unsigned T = 0; T != size_t(Type::Extension) + 1; ++T)
+      KindAndDocs[T] = {this, Type(T)};
+  }
+
+  /// Get ref to the document's root element.
+  DocNode &getRoot() { return Root; }
+
+  /// Restore the Document to an empty state.
+  void clear() { getRoot() = getNode(); }
+
+  /// Create a nil node associated with this Document.
+  DocNode getNode() {
+    auto N = DocNode(&KindAndDocs[size_t(Type::Nil)]);
+    return N;
+  }
+
+  /// Create an Int node associated with this Document.
+  DocNode getNode(int64_t V) {
+    auto N = DocNode(&KindAndDocs[size_t(Type::Int)]);
+    N.Int = V;
+    return N;
+  }
+
+  /// Create an Int node associated with this Document.
+  DocNode getNode(int V) {
+    auto N = DocNode(&KindAndDocs[size_t(Type::Int)]);
+    N.Int = V;
+    return N;
+  }
+
+  /// Create a UInt node associated with this Document.
+  DocNode getNode(uint64_t V) {
+    auto N = DocNode(&KindAndDocs[size_t(Type::UInt)]);
+    N.UInt = V;
+    return N;
+  }
+
+  /// Create a UInt node associated with this Document.
+  DocNode getNode(unsigned V) {
+    auto N = DocNode(&KindAndDocs[size_t(Type::UInt)]);
+    N.UInt = V;
+    return N;
+  }
+
+  /// Create a Boolean node associated with this Document.
+  DocNode getNode(bool V) {
+    auto N = DocNode(&KindAndDocs[size_t(Type::Boolean)]);
+    N.Bool = V;
+    return N;
+  }
+
+  /// Create a Float node associated with this Document.
+  DocNode getNode(double V) {
+    auto N = DocNode(&KindAndDocs[size_t(Type::Float)]);
+    N.Float = V;
+    return N;
+  }
+
+  /// Create a String node associated with this Document. If !Copy, the passed
+  /// string must remain valid for the lifetime of the Document.
+  DocNode getNode(StringRef V, bool Copy = false) {
+    if (Copy)
+      V = addString(V);
+    auto N = DocNode(&KindAndDocs[size_t(Type::String)]);
+    N.Raw = V;
+    return N;
+  }
+
+  /// Create a String node associated with this Document. If !Copy, the passed
+  /// string must remain valid for the lifetime of the Document.
+  DocNode getNode(const char *V, bool Copy = false) {
+    return getNode(StringRef(V), Copy);
+  }
+
+  /// Create an empty Map node associated with this Document.
+  MapDocNode getMapNode() {
+    auto N = DocNode(&KindAndDocs[size_t(Type::Map)]);
+    Maps.push_back(std::unique_ptr<DocNode::MapTy>(new DocNode::MapTy));
+    N.Map = Maps.back().get();
+    return N.getMap();
+  }
+
+  /// Create an empty Array node associated with this Document.
+  ArrayDocNode getArrayNode() {
+    auto N = DocNode(&KindAndDocs[size_t(Type::Array)]);
+    Arrays.push_back(std::unique_ptr<DocNode::ArrayTy>(new DocNode::ArrayTy));
+    N.Array = Arrays.back().get();
+    return N.getArray();
+  }
+
+  /// Read a MsgPack document from a binary MsgPack blob.
+  /// The blob data must remain valid for the lifetime of this Document (because
+  /// a string object in the document contains a StringRef into the original
+  /// blob).
+  /// If Multi, then this sets root to an array and adds top-level objects to
+  /// it. If !Multi, then it only reads a single top-level object, even if there
+  /// are more, and sets root to that.
+  /// Returns false if failed due to illegal format.
+  bool readFromBlob(StringRef Blob, bool Multi);
+
+  /// Write a MsgPack document to a binary MsgPack blob.
+  void writeToBlob(std::string &Blob);
+
+  /// Copy a string into the Document's strings list, and return the copy that
+  /// is owned by the Document.
+  StringRef addString(StringRef S) {
+    Strings.push_back(std::unique_ptr<char[]>(new char[S.size()]));
+    memcpy(&Strings.back()[0], S.data(), S.size());
+    return StringRef(&Strings.back()[0], S.size());
+  }
+
+  /// Set whether YAML output uses hex for UInt. Default off.
+  void setHexMode(bool Val = true) { HexMode = Val; }
+
+  /// Get Hexmode flag.
+  bool getHexMode() const { return HexMode; }
+
+  /// Convert MsgPack Document to YAML text.
+  void toYAML(raw_ostream &OS);
+
+  /// Read YAML text into the MsgPack document. Returns false on failure.
+  bool fromYAML(StringRef S);
+};
+
+} // namespace msgpack
+} // namespace llvm
+
+#endif // LLVM_BINARYFORMAT_MSGPACKDOCUMENT_H
diff --git a/lib/BinaryFormat/CMakeLists.txt b/lib/BinaryFormat/CMakeLists.txt
index d645279..88ad036 100644
--- a/lib/BinaryFormat/CMakeLists.txt
+++ b/lib/BinaryFormat/CMakeLists.txt
@@ -2,6 +2,8 @@
   AMDGPUMetadataVerifier.cpp
   Dwarf.cpp
   Magic.cpp
+  MsgPackDocument.cpp
+  MsgPackDocumentYAML.cpp
   MsgPackReader.cpp
   MsgPackTypes.cpp
   MsgPackWriter.cpp
diff --git a/lib/BinaryFormat/MsgPackDocument.cpp b/lib/BinaryFormat/MsgPackDocument.cpp
new file mode 100644
index 0000000..e12c54a
--- /dev/null
+++ b/lib/BinaryFormat/MsgPackDocument.cpp
@@ -0,0 +1,245 @@
+//===-- MsgPackDocument.cpp - MsgPack Document --------------------------*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file implements a class that exposes a simple in-memory representation
+/// of a document of MsgPack objects, that can be read from MsgPack, written to
+/// MsgPack, and inspected and modified in memory. This is intended to be a
+/// lighter-weight (in terms of memory allocations) replacement for
+/// MsgPackTypes.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/BinaryFormat/MsgPackDocument.h"
+#include "llvm/BinaryFormat/MsgPackWriter.h"
+
+using namespace llvm;
+using namespace msgpack;
+
+// Convert this DocNode into an empty array.
+void DocNode::convertToArray() { *this = getDocument()->getArrayNode(); }
+
+// Convert this DocNode into an empty map.
+void DocNode::convertToMap() { *this = getDocument()->getMapNode(); }
+
+/// Find the key in the MapDocNode.
+DocNode::MapTy::iterator MapDocNode::find(StringRef S) {
+  return find(getDocument()->getNode(S));
+}
+
+/// Member access for MapDocNode. The string data must remain valid for the
+/// lifetime of the Document.
+DocNode &MapDocNode::operator[](StringRef S) {
+  return (*this)[getDocument()->getNode(S)];
+}
+
+/// Member access for MapDocNode.
+DocNode &MapDocNode::operator[](DocNode Key) {
+  assert(!Key.isEmpty());
+  MapTy::value_type Entry(Key, DocNode());
+  auto ItAndInserted = Map->insert(Entry);
+  if (ItAndInserted.second) {
+    // Ensure a new element has its KindAndDoc initialized.
+    ItAndInserted.first->second = getDocument()->getNode();
+  }
+  return ItAndInserted.first->second;
+}
+
+/// Array element access. This extends the array if necessary.
+DocNode &ArrayDocNode::operator[](size_t Index) {
+  if (size() <= Index) {
+    // Ensure new elements have their KindAndDoc initialized.
+    Array->resize(Index + 1, getDocument()->getNode());
+  }
+  return (*Array)[Index];
+}
+
+// A level in the document reading stack.
+struct StackLevel {
+  DocNode Node;
+  size_t Length;
+  // Points to map entry when we have just processed a map key.
+  DocNode *MapEntry;
+};
+
+// Read a document from a binary msgpack blob.
+// The blob data must remain valid for the lifetime of this Document (because a
+// string object in the document contains a StringRef into the original blob).
+// If Multi, then this sets root to an array and adds top-level objects to it.
+// If !Multi, then it only reads a single top-level object, even if there are
+// more, and sets root to that.
+// Returns false if failed due to illegal format.
+bool Document::readFromBlob(StringRef Blob, bool Multi) {
+  msgpack::Reader MPReader(Blob);
+  SmallVector<StackLevel, 4> Stack;
+  if (Multi) {
+    // Create the array for multiple top-level objects.
+    Root = getArrayNode();
+    Stack.push_back(StackLevel({Root, (size_t)-1, nullptr}));
+  }
+  do {
+    // On to next element (or key if doing a map key next).
+    // Read the value.
+    Object Obj;
+    if (!MPReader.read(Obj)) {
+      if (Multi && Stack.size() == 1) {
+        // OK to finish here as we've just done a top-level element with Multi
+        break;
+      }
+      return false; // Finished too early
+    }
+    // Convert it into a DocNode.
+    DocNode Node;
+    switch (Obj.Kind) {
+    case Type::Nil:
+      Node = getNode();
+      break;
+    case Type::Int:
+      Node = getNode(Obj.Int);
+      break;
+    case Type::UInt:
+      Node = getNode(Obj.UInt);
+      break;
+    case Type::Boolean:
+      Node = getNode(Obj.Bool);
+      break;
+    case Type::Float:
+      Node = getNode(Obj.Float);
+      break;
+    case Type::String:
+      Node = getNode(Obj.Raw);
+      break;
+    case Type::Map:
+      Node = getMapNode();
+      break;
+    case Type::Array:
+      Node = getArrayNode();
+      break;
+    default:
+      return false; // Raw and Extension not supported
+    }
+
+    // Store it.
+    if (Stack.empty())
+      Root = Node;
+    else if (Stack.back().Node.getKind() == Type::Array) {
+      // Reading an array entry.
+      auto &Array = Stack.back().Node.getArray();
+      Array.push_back(Node);
+    } else {
+      auto &Map = Stack.back().Node.getMap();
+      if (!Stack.back().MapEntry) {
+        // Reading a map key.
+        Stack.back().MapEntry = &Map[Node];
+      } else {
+        // Reading the value for the map key read in the last iteration.
+        *Stack.back().MapEntry = Node;
+        Stack.back().MapEntry = nullptr;
+      }
+    }
+
+    // See if we're starting a new array or map.
+    switch (Node.getKind()) {
+    case msgpack::Type::Array:
+    case msgpack::Type::Map:
+      Stack.push_back(StackLevel({Node, Obj.Length, nullptr}));
+      break;
+    default:
+      break;
+    }
+
+    // Pop finished stack levels.
+    while (!Stack.empty()) {
+      if (Stack.back().Node.getKind() == msgpack::Type::Array) {
+        if (Stack.back().Node.getArray().size() != Stack.back().Length)
+          break;
+      } else {
+        if (Stack.back().MapEntry ||
+            Stack.back().Node.getMap().size() != Stack.back().Length)
+          break;
+      }
+      Stack.pop_back();
+    }
+  } while (!Stack.empty());
+  return true;
+}
+
+struct WriterStackLevel {
+  DocNode Node;
+  DocNode::MapTy::iterator MapIt;
+  DocNode::ArrayTy::iterator ArrayIt;
+  bool OnKey;
+};
+
+/// Write a MsgPack document to a binary MsgPack blob.
+void Document::writeToBlob(std::string &Blob) {
+  Blob.clear();
+  raw_string_ostream OS(Blob);
+  msgpack::Writer MPWriter(OS);
+  SmallVector<WriterStackLevel, 4> Stack;
+  DocNode Node = getRoot();
+  for (;;) {
+    switch (Node.getKind()) {
+    case Type::Array:
+      MPWriter.writeArraySize(Node.getArray().size());
+      Stack.push_back(
+          {Node, DocNode::MapTy::iterator(), Node.getArray().begin(), false});
+      break;
+    case Type::Map:
+      MPWriter.writeMapSize(Node.getMap().size());
+      Stack.push_back(
+          {Node, Node.getMap().begin(), DocNode::ArrayTy::iterator(), true});
+      break;
+    case Type::Nil:
+      MPWriter.writeNil();
+      break;
+    case Type::Boolean:
+      MPWriter.write(Node.getBool());
+      break;
+    case Type::Int:
+      MPWriter.write(Node.getInt());
+      break;
+    case Type::UInt:
+      MPWriter.write(Node.getUInt());
+      break;
+    case Type::String:
+      MPWriter.write(Node.getString());
+      break;
+    default:
+      llvm_unreachable("unhandled msgpack object kind");
+    }
+    // Pop finished stack levels.
+    while (!Stack.empty()) {
+      if (Stack.back().Node.getKind() == Type::Map) {
+        if (Stack.back().MapIt != Stack.back().Node.getMap().end())
+          break;
+      } else {
+        if (Stack.back().ArrayIt != Stack.back().Node.getArray().end())
+          break;
+      }
+      Stack.pop_back();
+    }
+    if (Stack.empty())
+      break;
+    // Get the next value.
+    if (Stack.back().Node.getKind() == Type::Map) {
+      if (Stack.back().OnKey) {
+        // Do the key of a key,value pair in a map.
+        Node = Stack.back().MapIt->first;
+        Stack.back().OnKey = false;
+      } else {
+        Node = Stack.back().MapIt->second;
+        ++Stack.back().MapIt;
+        Stack.back().OnKey = true;
+      }
+    } else {
+      Node = *Stack.back().ArrayIt;
+      ++Stack.back().ArrayIt;
+    }
+  }
+}
+
diff --git a/lib/BinaryFormat/MsgPackDocumentYAML.cpp b/lib/BinaryFormat/MsgPackDocumentYAML.cpp
new file mode 100644
index 0000000..ae67d7b
--- /dev/null
+++ b/lib/BinaryFormat/MsgPackDocumentYAML.cpp
@@ -0,0 +1,249 @@
+//===-- MsgPackDocumentYAML.cpp - MsgPack Document YAML interface -------*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// This file implements YAMLIO on a msgpack::Document.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/BinaryFormat/MsgPackDocument.h"
+#include "llvm/Support/YAMLTraits.h"
+
+using namespace llvm;
+using namespace msgpack;
+
+namespace {
+
+// Struct used to represent scalar node. (MapDocNode and ArrayDocNode already
+// exist in MsgPackDocument.h.)
+struct ScalarDocNode : DocNode {
+  ScalarDocNode(DocNode N) : DocNode(N) {}
+
+  /// Get the YAML tag for this ScalarDocNode. This normally returns ""; it only
+  /// returns something else if the result of toString would be ambiguous, e.g.
+  /// a string that parses as a number or boolean.
+  StringRef getYAMLTag() const;
+};
+
+} // namespace
+
+/// Convert this DocNode to a string, assuming it is scalar.
+std::string DocNode::toString() const {
+  std::string S;
+  raw_string_ostream OS(S);
+  switch (getKind()) {
+  case msgpack::Type::String:
+    OS << Raw;
+    break;
+  case msgpack::Type::Nil:
+    break;
+  case msgpack::Type::Boolean:
+    OS << (Bool ? "true" : "false");
+    break;
+  case msgpack::Type::Int:
+    OS << Int;
+    break;
+  case msgpack::Type::UInt:
+    if (getDocument()->getHexMode())
+      OS << format("%#llx", (unsigned long long)UInt);
+    else
+      OS << UInt;
+    break;
+  case msgpack::Type::Float:
+    OS << Float;
+    break;
+  default:
+    llvm_unreachable("not scalar");
+    break;
+  }
+  return S;
+}
+
+/// Convert the StringRef and use it to set this DocNode (assuming scalar). If
+/// it is a string, copy the string into the Document's strings list so we do
+/// not rely on S having a lifetime beyond this call. Tag is "" or a YAML tag.
+StringRef DocNode::fromString(StringRef S, StringRef Tag) {
+  if (Tag == "tag:yaml.org,2002:str")
+    Tag = "";
+  if (Tag == "!int" || Tag == "") {
+    // Try unsigned int then signed int.
+    *this = getDocument()->getNode(uint64_t(0));
+    StringRef Err = yaml::ScalarTraits<uint64_t>::input(S, nullptr, getUInt());
+    if (Err != "") {
+      *this = getDocument()->getNode(int64_t(0));
+      Err = yaml::ScalarTraits<int64_t>::input(S, nullptr, getInt());
+    }
+    if (Err == "" || Tag != "")
+      return Err;
+  }
+  if (Tag == "!nil") {
+    *this = getDocument()->getNode();
+    return "";
+  }
+  if (Tag == "!bool" || Tag == "") {
+    *this = getDocument()->getNode(false);
+    StringRef Err = yaml::ScalarTraits<bool>::input(S, nullptr, getBool());
+    if (Err == "" || Tag != "")
+      return Err;
+  }
+  if (Tag == "!float" || Tag == "") {
+    *this = getDocument()->getNode(0.0);
+    StringRef Err = yaml::ScalarTraits<double>::input(S, nullptr, getFloat());
+    if (Err == "" || Tag != "")
+      return Err;
+  }
+  assert((Tag == "!str" || Tag == "") && "unsupported tag");
+  std::string V;
+  StringRef Err = yaml::ScalarTraits<std::string>::input(S, nullptr, V);
+  if (Err == "")
+    *this = getDocument()->getNode(V, /*Copy=*/true);
+  return Err;
+}
+
+/// Get the YAML tag for this ScalarDocNode. This normally returns ""; it only
+/// returns something else if the result of toString would be ambiguous, e.g.
+/// a string that parses as a number or boolean.
+StringRef ScalarDocNode::getYAMLTag() const {
+  if (getKind() == msgpack::Type::Nil)
+    return "!nil";
+  // Try converting both ways and see if we get the same kind. If not, we need
+  // a tag.
+  ScalarDocNode N = getDocument()->getNode();
+  N.fromString(toString(), "");
+  if (N.getKind() == getKind())
+    return "";
+  // Tolerate signedness of int changing, as tags do not differentiate between
+  // them anyway.
+  if (N.getKind() == msgpack::Type::UInt && getKind() == msgpack::Type::Int)
+    return "";
+  if (N.getKind() == msgpack::Type::Int && getKind() == msgpack::Type::UInt)
+    return "";
+  // We do need a tag.
+  switch (getKind()) {
+  case msgpack::Type::String:
+    return "!str";
+  case msgpack::Type::Int:
+    return "!int";
+  case msgpack::Type::UInt:
+    return "!int";
+  case msgpack::Type::Boolean:
+    return "!bool";
+  case msgpack::Type::Float:
+    return "!float";
+  default:
+    llvm_unreachable("unrecognized kind");
+  }
+}
+
+namespace llvm {
+namespace yaml {
+
+/// YAMLIO for DocNode
+template <> struct PolymorphicTraits<DocNode> {
+
+  static NodeKind getKind(const DocNode &N) {
+    switch (N.getKind()) {
+    case msgpack::Type::Map:
+      return NodeKind::Map;
+    case msgpack::Type::Array:
+      return NodeKind::Sequence;
+    default:
+      return NodeKind::Scalar;
+    }
+  }
+
+  static MapDocNode &getAsMap(DocNode &N) { return N.getMap(/*Convert=*/true); }
+
+  static ArrayDocNode &getAsSequence(DocNode &N) {
+    N.getArray(/*Convert=*/true);
+    return *static_cast<ArrayDocNode *>(&N);
+  }
+
+  static ScalarDocNode &getAsScalar(DocNode &N) {
+    return *static_cast<ScalarDocNode *>(&N);
+  }
+};
+
+/// YAMLIO for ScalarDocNode
+template <> struct TaggedScalarTraits<ScalarDocNode> {
+
+  static void output(const ScalarDocNode &S, void *Ctxt, raw_ostream &OS,
+                     raw_ostream &TagOS) {
+    TagOS << S.getYAMLTag();
+    OS << S.toString();
+  }
+
+  static StringRef input(StringRef Str, StringRef Tag, void *Ctxt,
+                         ScalarDocNode &S) {
+    return S.fromString(Str, Tag);
+  }
+
+  static QuotingType mustQuote(const ScalarDocNode &S, StringRef ScalarStr) {
+    switch (S.getKind()) {
+    case Type::Int:
+      return ScalarTraits<int64_t>::mustQuote(ScalarStr);
+    case Type::UInt:
+      return ScalarTraits<uint64_t>::mustQuote(ScalarStr);
+    case Type::Nil:
+      return ScalarTraits<StringRef>::mustQuote(ScalarStr);
+    case Type::Boolean:
+      return ScalarTraits<bool>::mustQuote(ScalarStr);
+    case Type::Float:
+      return ScalarTraits<double>::mustQuote(ScalarStr);
+    case Type::Binary:
+    case Type::String:
+      return ScalarTraits<std::string>::mustQuote(ScalarStr);
+    default:
+      llvm_unreachable("unrecognized ScalarKind");
+    }
+  }
+};
+
+/// YAMLIO for MapDocNode
+template <> struct CustomMappingTraits<MapDocNode> {
+
+  static void inputOne(IO &IO, StringRef Key, MapDocNode &M) {
+    ScalarDocNode KeyObj = M.getDocument()->getNode();
+    KeyObj.fromString(Key, "");
+    IO.mapRequired(Key.str().c_str(), M.getMap()[KeyObj]);
+  }
+
+  static void output(IO &IO, MapDocNode &M) {
+    for (auto I : M.getMap()) {
+      IO.mapRequired(I.first.toString().c_str(), I.second);
+    }
+  }
+};
+
+/// YAMLIO for ArrayNode
+template <> struct SequenceTraits<ArrayDocNode> {
+
+  static size_t size(IO &IO, ArrayDocNode &A) { return A.size(); }
+
+  static DocNode &element(IO &IO, ArrayDocNode &A, size_t Index) {
+    return A[Index];
+  }
+};
+
+} // namespace yaml
+} // namespace llvm
+
+/// Convert MsgPack Document to YAML text.
+void msgpack::Document::toYAML(raw_ostream &OS) {
+  yaml::Output Yout(OS);
+  Yout << getRoot();
+}
+
+/// Read YAML text into the MsgPack document. Returns false on failure.
+bool msgpack::Document::fromYAML(StringRef S) {
+  clear();
+  yaml::Input Yin(S);
+  Yin >> getRoot();
+  return !Yin.error();
+}
+
diff --git a/unittests/BinaryFormat/CMakeLists.txt b/unittests/BinaryFormat/CMakeLists.txt
index 82d76ec..00c2898 100644
--- a/unittests/BinaryFormat/CMakeLists.txt
+++ b/unittests/BinaryFormat/CMakeLists.txt
@@ -5,6 +5,7 @@
 add_llvm_unittest(BinaryFormatTests
   DwarfTest.cpp
   MachOTest.cpp
+  MsgPackDocumentTest.cpp
   MsgPackReaderTest.cpp
   MsgPackTypesTest.cpp
   MsgPackWriterTest.cpp
diff --git a/unittests/BinaryFormat/MsgPackDocumentTest.cpp b/unittests/BinaryFormat/MsgPackDocumentTest.cpp
new file mode 100644
index 0000000..77b7e29
--- /dev/null
+++ b/unittests/BinaryFormat/MsgPackDocumentTest.cpp
@@ -0,0 +1,168 @@
+//===- MsgPackDocumentTest.cpp --------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/BinaryFormat/MsgPackDocument.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace msgpack;
+
+TEST(MsgPackDocument, TestReadInt) {
+  Document Doc;
+  bool Ok = Doc.readFromBlob(StringRef("\xd0\x00", 2), /*Multi=*/false);
+  ASSERT_TRUE(Ok);
+  ASSERT_EQ(Doc.getRoot().getKind(), Type::Int);
+  ASSERT_EQ(Doc.getRoot().getInt(), 0);
+}
+
+TEST(MsgPackDocument, TestReadArray) {
+  Document Doc;
+  bool Ok = Doc.readFromBlob(StringRef("\x92\xd0\x01\xc0"), /*Multi=*/false);
+  ASSERT_TRUE(Ok);
+  ASSERT_EQ(Doc.getRoot().getKind(), Type::Array);
+  auto A = Doc.getRoot().getArray();
+  ASSERT_EQ(A.size(), 2u);
+  auto SI = A[0];
+  ASSERT_EQ(SI.getKind(), Type::Int);
+  ASSERT_EQ(SI.getInt(), 1);
+  auto SN = A[1];
+  ASSERT_EQ(SN.getKind(), Type::Nil);
+}
+
+TEST(MsgPackDocument, TestReadMap) {
+  Document Doc;
+  bool Ok = Doc.readFromBlob(StringRef("\x82\xa3"
+                                       "foo"
+                                       "\xd0\x01\xa3"
+                                       "bar"
+                                       "\xd0\x02"),
+                             /*Multi=*/false);
+  ASSERT_TRUE(Ok);
+  ASSERT_EQ(Doc.getRoot().getKind(), Type::Map);
+  auto M = Doc.getRoot().getMap();
+  ASSERT_EQ(M.size(), 2u);
+  auto FooS = M["foo"];
+  ASSERT_EQ(FooS.getKind(), Type::Int);
+  ASSERT_EQ(FooS.getInt(), 1);
+  auto BarS = M["bar"];
+  ASSERT_EQ(BarS.getKind(), Type::Int);
+  ASSERT_EQ(BarS.getInt(), 2);
+}
+
+TEST(MsgPackDocument, TestWriteInt) {
+  Document Doc;
+  Doc.getRoot() = Doc.getNode(int64_t(1));
+  std::string Buffer;
+  Doc.writeToBlob(Buffer);
+  ASSERT_EQ(Buffer, "\x01");
+}
+
+TEST(MsgPackDocument, TestWriteArray) {
+  Document Doc;
+  auto A = Doc.getRoot().getArray(/*Convert=*/true);
+  A.push_back(Doc.getNode(int64_t(1)));
+  A.push_back(Doc.getNode());
+  std::string Buffer;
+  Doc.writeToBlob(Buffer);
+  ASSERT_EQ(Buffer, "\x92\x01\xc0");
+}
+
+TEST(MsgPackDocument, TestWriteMap) {
+  Document Doc;
+  auto M = Doc.getRoot().getMap(/*Convert=*/true);
+  M["foo"] = Doc.getNode(int64_t(1));
+  M["bar"] = Doc.getNode(int64_t(2));
+  std::string Buffer;
+  Doc.writeToBlob(Buffer);
+  ASSERT_EQ(Buffer, "\x82\xa3"
+                    "bar"
+                    "\x02\xa3"
+                    "foo"
+                    "\x01");
+}
+
+TEST(MsgPackDocument, TestOutputYAMLArray) {
+  Document Doc;
+  auto A = Doc.getRoot().getArray(/*Convert=*/true);
+  A.push_back(Doc.getNode(int64_t(1)));
+  A.push_back(Doc.getNode(int64_t(2)));
+  std::string Buffer;
+  raw_string_ostream OStream(Buffer);
+  Doc.toYAML(OStream);
+  ASSERT_EQ(OStream.str(), "---\n- 1\n- 2\n...\n");
+}
+
+TEST(MsgPackDocument, TestInputYAMLArray) {
+  Document Doc;
+  bool Ok = Doc.fromYAML("---\n- !int 0x1\n- !str 2\n...\n");
+  ASSERT_TRUE(Ok);
+  ASSERT_EQ(Doc.getRoot().getKind(), Type::Array);
+  auto A = Doc.getRoot().getArray();
+  ASSERT_EQ(A.size(), 2u);
+  auto SI = A[0];
+  ASSERT_EQ(SI.getKind(), Type::UInt);
+  ASSERT_EQ(SI.getUInt(), 1u);
+  auto SS = A[1];
+  ASSERT_EQ(SS.getKind(), Type::String);
+  ASSERT_EQ(SS.getString(), "2");
+}
+
+TEST(MsgPackDocument, TestOutputYAMLMap) {
+  Document Doc;
+  auto M = Doc.getRoot().getMap(/*Convert=*/true);
+  M["foo"] = Doc.getNode(int64_t(1));
+  M["bar"] = Doc.getNode(uint64_t(2));
+  auto N = Doc.getMapNode();
+  M["qux"] = N;
+  N["baz"] = Doc.getNode(true);
+  std::string Buffer;
+  raw_string_ostream OStream(Buffer);
+  Doc.toYAML(OStream);
+  ASSERT_EQ(OStream.str(), "---\n"
+                           "bar:             2\n"
+                           "foo:             1\n"
+                           "qux:             \n"
+                           "  baz:             true\n"
+                           "...\n");
+}
+
+TEST(MsgPackDocument, TestOutputYAMLMapHex) {
+  Document Doc;
+  Doc.setHexMode();
+  auto M = Doc.getRoot().getMap(/*Convert=*/true);
+  M["foo"] = Doc.getNode(int64_t(1));
+  M["bar"] = Doc.getNode(uint64_t(2));
+  auto N = Doc.getMapNode();
+  M["qux"] = N;
+  N["baz"] = Doc.getNode(true);
+  std::string Buffer;
+  raw_string_ostream OStream(Buffer);
+  Doc.toYAML(OStream);
+  ASSERT_EQ(OStream.str(), "---\n"
+                           "bar:             0x2\n"
+                           "foo:             1\n"
+                           "qux:             \n"
+                           "  baz:             true\n"
+                           "...\n");
+}
+
+TEST(MsgPackDocument, TestInputYAMLMap) {
+  Document Doc;
+  bool Ok = Doc.fromYAML("---\nfoo: !int 0x1\nbaz: !str 2\n...\n");
+  ASSERT_TRUE(Ok);
+  ASSERT_EQ(Doc.getRoot().getKind(), Type::Map);
+  auto M = Doc.getRoot().getMap();
+  ASSERT_EQ(M.size(), 2u);
+  auto SI = M["foo"];
+  ASSERT_EQ(SI.getKind(), Type::UInt);
+  ASSERT_EQ(SI.getUInt(), 1u);
+  auto SS = M["baz"];
+  ASSERT_EQ(SS.getKind(), Type::String);
+  ASSERT_EQ(SS.getString(), "2");
+}