MinidumpYAML: Add support for ModuleList stream

Summary:
This patch adds support for yaml (de)serialization of the minidump
ModuleList stream. It's a fairly straight forward-application of the
existing patterns to the ModuleList structures defined in previous
patches.

One thing, which may be interesting to call out explicitly is the
addition of "new" allocation functions to the helper BlobAllocator
class. The reason for this was, that there was an emerging pattern of a
need to allocate space for entities, which do not have a suitable
lifetime for use with the existing allocation functions. A typical
example of that was the "size" of various lists, which is only available
as a temporary returned by the .size() method of some container. For
these cases, one can use the new set of allocation functions, which
will take a temporary object, and store it in an allocator-managed
buffer until it is written to disk.

Reviewers: amccarth, jhenderson, clayborg, zturner

Subscribers: lldb-commits, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D60405

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@358672 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/include/llvm/BinaryFormat/Minidump.h b/include/llvm/BinaryFormat/Minidump.h
index 2bb75a6..8f3c170 100644
--- a/include/llvm/BinaryFormat/Minidump.h
+++ b/include/llvm/BinaryFormat/Minidump.h
@@ -141,6 +141,10 @@
 };
 static_assert(sizeof(VSFixedFileInfo) == 52, "");
 
+inline bool operator==(const VSFixedFileInfo &LHS, const VSFixedFileInfo &RHS) {
+  return memcmp(&LHS, &RHS, sizeof(VSFixedFileInfo)) == 0;
+}
+
 struct Module {
   support::ulittle64_t BaseOfImage;
   support::ulittle32_t SizeOfImage;
diff --git a/include/llvm/Object/Minidump.h b/include/llvm/Object/Minidump.h
index 0a74fcb..e234f9c 100644
--- a/include/llvm/Object/Minidump.h
+++ b/include/llvm/Object/Minidump.h
@@ -43,6 +43,13 @@
   /// file does not contain a stream of this type.
   Optional<ArrayRef<uint8_t>> getRawStream(minidump::StreamType Type) const;
 
+  /// Returns the raw contents of an object given by the LocationDescriptor. An
+  /// error is returned if the descriptor points outside of the minidump file.
+  Expected<ArrayRef<uint8_t>>
+  getRawData(minidump::LocationDescriptor Desc) const {
+    return getDataSlice(getData(), Desc.RVA, Desc.DataSize);
+  }
+
   /// Returns the minidump string at the given offset. An error is returned if
   /// we fail to parse the string, or the string is invalid UTF16.
   Expected<std::string> getString(size_t Offset) const;
diff --git a/include/llvm/ObjectYAML/MinidumpYAML.h b/include/llvm/ObjectYAML/MinidumpYAML.h
index 0965d64..c4cb2e9 100644
--- a/include/llvm/ObjectYAML/MinidumpYAML.h
+++ b/include/llvm/ObjectYAML/MinidumpYAML.h
@@ -26,6 +26,7 @@
 /// from Types to Kinds is fixed and given by the static getKind function.
 struct Stream {
   enum class StreamKind {
+    ModuleList,
     RawContent,
     SystemInfo,
     TextContent,
@@ -49,6 +50,30 @@
          const object::MinidumpFile &File);
 };
 
+/// A stream representing the list of modules loaded in the process. On disk, it
+/// is represented as a sequence of minidump::Module structures. These contain
+/// pointers to other data structures, like the module's name and CodeView
+/// record. In memory, we represent these as the ParsedModule struct, which
+/// groups minidump::Module with all of its dependant structures in a single
+/// entity.
+struct ModuleListStream : public Stream {
+  struct ParsedModule {
+    minidump::Module Module;
+    std::string Name;
+    yaml::BinaryRef CvRecord;
+    yaml::BinaryRef MiscRecord;
+  };
+  std::vector<ParsedModule> Modules;
+
+  ModuleListStream(std::vector<ParsedModule> Modules = {})
+      : Stream(StreamKind::ModuleList, minidump::StreamType::ModuleList),
+        Modules(std::move(Modules)) {}
+
+  static bool classof(const Stream *S) {
+    return S->Kind == StreamKind::ModuleList;
+  }
+};
+
 /// A minidump stream represented as a sequence of hex bytes. This is used as a
 /// fallback when no other stream kind is suitable.
 struct RawContentStream : public Stream {
@@ -162,8 +187,12 @@
 LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::minidump::CPUInfo::ArmInfo)
 LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::minidump::CPUInfo::OtherInfo)
 LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::minidump::CPUInfo::X86Info)
+LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::minidump::VSFixedFileInfo)
+LLVM_YAML_DECLARE_MAPPING_TRAITS(
+    llvm::MinidumpYAML::ModuleListStream::ParsedModule)
 
 LLVM_YAML_IS_SEQUENCE_VECTOR(std::unique_ptr<llvm::MinidumpYAML::Stream>)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MinidumpYAML::ModuleListStream::ParsedModule)
 
 LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::MinidumpYAML::Object)
 
diff --git a/lib/ObjectYAML/MinidumpYAML.cpp b/lib/ObjectYAML/MinidumpYAML.cpp
index db7431b..746c3bb 100644
--- a/lib/ObjectYAML/MinidumpYAML.cpp
+++ b/lib/ObjectYAML/MinidumpYAML.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ObjectYAML/MinidumpYAML.h"
+#include "llvm/Support/Allocator.h"
 #include "llvm/Support/ConvertUTF.h"
 
 using namespace llvm;
@@ -14,6 +15,16 @@
 using namespace llvm::minidump;
 
 namespace {
+/// A helper class to manage the placement of various structures into the final
+/// minidump binary. Space for objects can be allocated via various allocate***
+/// methods, while the final minidump file is written by calling the writeTo
+/// method. The plain versions of allocation functions take a reference to the
+/// data which is to be written (and hence the data must be available until
+/// writeTo is called), while the "New" versions allocate the data in an
+/// allocator-managed buffer, which is available until the allocator object is
+/// destroyed. For both kinds of functions, it is possible to modify the
+/// data for which the space has been "allocated" until the final writeTo call.
+/// This is useful for "linking" the allocated structures via their offsets.
 class BlobAllocator {
 public:
   size_t tell() const { return NextOffset; }
@@ -31,15 +42,31 @@
         Data.size(), [Data](raw_ostream &OS) { OS << toStringRef(Data); });
   }
 
+  size_t allocateBytes(yaml::BinaryRef Data) {
+    return allocateCallback(Data.binary_size(), [Data](raw_ostream &OS) {
+      Data.writeAsBinary(OS);
+    });
+  }
+
   template <typename T> size_t allocateArray(ArrayRef<T> Data) {
     return allocateBytes({reinterpret_cast<const uint8_t *>(Data.data()),
                           sizeof(T) * Data.size()});
   }
 
+  template <typename T, typename RangeType>
+  std::pair<size_t, MutableArrayRef<T>>
+  allocateNewArray(const iterator_range<RangeType> &Range);
+
   template <typename T> size_t allocateObject(const T &Data) {
     return allocateArray(makeArrayRef(Data));
   }
 
+  template <typename T, typename... Types>
+  std::pair<size_t, T *> allocateNewObject(Types &&... Args) {
+    T *Object = new (Temporaries.Allocate<T>()) T(std::forward<Types>(Args)...);
+    return {allocateObject(*Object), Object};
+  }
+
   size_t allocateString(StringRef Str);
 
   void writeTo(raw_ostream &OS) const;
@@ -47,28 +74,33 @@
 private:
   size_t NextOffset = 0;
 
+  BumpPtrAllocator Temporaries;
   std::vector<std::function<void(raw_ostream &)>> Callbacks;
 };
 } // namespace
 
+template <typename T, typename RangeType>
+std::pair<size_t, MutableArrayRef<T>>
+BlobAllocator::allocateNewArray(const iterator_range<RangeType> &Range) {
+  size_t Num = std::distance(Range.begin(), Range.end());
+  MutableArrayRef<T> Array(Temporaries.Allocate<T>(Num), Num);
+  std::uninitialized_copy(Range.begin(), Range.end(), Array.begin());
+  return {allocateArray(Array), Array};
+}
+
 size_t BlobAllocator::allocateString(StringRef Str) {
   SmallVector<UTF16, 32> WStr;
   bool OK = convertUTF8ToUTF16String(Str, WStr);
   assert(OK && "Invalid UTF8 in Str?");
   (void)OK;
 
-  SmallVector<support::ulittle16_t, 32> EndianStr(WStr.size() + 1,
-                                                  support::ulittle16_t());
-  copy(WStr, EndianStr.begin());
-  return allocateCallback(
-      sizeof(uint32_t) + EndianStr.size() * sizeof(support::ulittle16_t),
-      [EndianStr](raw_ostream &OS) {
-        // Length does not include the null-terminator.
-        support::ulittle32_t Length(2 * (EndianStr.size() - 1));
-        OS.write(reinterpret_cast<const char *>(&Length), sizeof(Length));
-        OS.write(reinterpret_cast<const char *>(EndianStr.begin()),
-                 sizeof(support::ulittle16_t) * EndianStr.size());
-      });
+  // The utf16 string is null-terminated, but the terminator is not counted in
+  // the string size.
+  WStr.push_back(0);
+  size_t Result =
+      allocateNewObject<support::ulittle32_t>(2 * (WStr.size() - 1)).first;
+  allocateNewArray<support::ulittle16_t>(make_range(WStr.begin(), WStr.end()));
+  return Result;
 }
 
 void BlobAllocator::writeTo(raw_ostream &OS) const {
@@ -136,6 +168,8 @@
 
 Stream::StreamKind Stream::getKind(StreamType Type) {
   switch (Type) {
+  case StreamType::ModuleList:
+    return StreamKind::ModuleList;
   case StreamType::SystemInfo:
     return StreamKind::SystemInfo;
   case StreamType::LinuxCPUInfo:
@@ -154,6 +188,8 @@
 std::unique_ptr<Stream> Stream::create(StreamType Type) {
   StreamKind Kind = getKind(Type);
   switch (Kind) {
+  case StreamKind::ModuleList:
+    return llvm::make_unique<ModuleListStream>();
   case StreamKind::RawContent:
     return llvm::make_unique<RawContentStream>(Type);
   case StreamKind::SystemInfo:
@@ -270,6 +306,38 @@
   mapOptionalHex(IO, "AMD Extended Features", Info.AMDExtendedFeatures, 0);
 }
 
+void yaml::MappingTraits<VSFixedFileInfo>::mapping(IO &IO,
+                                                   VSFixedFileInfo &Info) {
+  mapOptionalHex(IO, "Signature", Info.Signature, 0);
+  mapOptionalHex(IO, "Struct Version", Info.StructVersion, 0);
+  mapOptionalHex(IO, "File Version High", Info.FileVersionHigh, 0);
+  mapOptionalHex(IO, "File Version Low", Info.FileVersionLow, 0);
+  mapOptionalHex(IO, "Product Version High", Info.ProductVersionHigh, 0);
+  mapOptionalHex(IO, "Product Version Low", Info.ProductVersionLow, 0);
+  mapOptionalHex(IO, "File Flags Mask", Info.FileFlagsMask, 0);
+  mapOptionalHex(IO, "File Flags", Info.FileFlags, 0);
+  mapOptionalHex(IO, "File OS", Info.FileOS, 0);
+  mapOptionalHex(IO, "File Type", Info.FileType, 0);
+  mapOptionalHex(IO, "File Subtype", Info.FileSubtype, 0);
+  mapOptionalHex(IO, "File Date High", Info.FileDateHigh, 0);
+  mapOptionalHex(IO, "File Date Low", Info.FileDateLow, 0);
+}
+
+void yaml::MappingTraits<ModuleListStream::ParsedModule>::mapping(
+    IO &IO, ModuleListStream::ParsedModule &M) {
+  mapRequiredHex(IO, "Base of Image", M.Module.BaseOfImage);
+  mapRequiredHex(IO, "Size of Image", M.Module.SizeOfImage);
+  mapOptionalHex(IO, "Checksum", M.Module.Checksum, 0);
+  IO.mapOptional("Time Date Stamp", M.Module.TimeDateStamp,
+                 support::ulittle32_t(0));
+  IO.mapRequired("Module Name", M.Name);
+  IO.mapOptional("Version Info", M.Module.VersionInfo, VSFixedFileInfo());
+  IO.mapRequired("CodeView Record", M.CvRecord);
+  IO.mapOptional("Misc Record", M.MiscRecord, yaml::BinaryRef());
+  mapOptionalHex(IO, "Reserved0", M.Module.Reserved0, 0);
+  mapOptionalHex(IO, "Reserved1", M.Module.Reserved1, 0);
+}
+
 static void streamMapping(yaml::IO &IO, RawContentStream &Stream) {
   IO.mapOptional("Content", Stream.Content);
   IO.mapOptional("Size", Stream.Size, Stream.Content.binary_size());
@@ -281,6 +349,10 @@
   return "";
 }
 
+static void streamMapping(yaml::IO &IO, ModuleListStream &Stream) {
+  IO.mapRequired("Modules", Stream.Modules);
+}
+
 static void streamMapping(yaml::IO &IO, SystemInfoStream &Stream) {
   SystemInfo &Info = Stream.Info;
   IO.mapRequired("Processor Arch", Info.ProcessorArch);
@@ -324,6 +396,9 @@
   if (!IO.outputting())
     S = MinidumpYAML::Stream::create(Type);
   switch (S->Kind) {
+  case MinidumpYAML::Stream::StreamKind::ModuleList:
+    streamMapping(IO, llvm::cast<ModuleListStream>(*S));
+    break;
   case MinidumpYAML::Stream::StreamKind::RawContent:
     streamMapping(IO, llvm::cast<RawContentStream>(*S));
     break;
@@ -341,6 +416,7 @@
   switch (S->Kind) {
   case MinidumpYAML::Stream::StreamKind::RawContent:
     return streamValidate(cast<RawContentStream>(*S));
+  case MinidumpYAML::Stream::StreamKind::ModuleList:
   case MinidumpYAML::Stream::StreamKind::SystemInfo:
   case MinidumpYAML::Stream::StreamKind::TextContent:
     return "";
@@ -362,6 +438,26 @@
   Result.Location.RVA = File.tell();
   Optional<size_t> DataEnd;
   switch (S.Kind) {
+  case Stream::StreamKind::ModuleList: {
+    ModuleListStream &List = cast<ModuleListStream>(S);
+
+    File.allocateNewObject<support::ulittle32_t>(List.Modules.size());
+    for (ModuleListStream::ParsedModule &M : List.Modules)
+      File.allocateObject(M.Module);
+
+    // Module names and CodeView/Misc records are not a part of the stream.
+    DataEnd = File.tell();
+    for (ModuleListStream::ParsedModule &M : List.Modules) {
+      M.Module.ModuleNameRVA = File.allocateString(M.Name);
+
+      M.Module.CvRecord.RVA = File.allocateBytes(M.CvRecord);
+      M.Module.CvRecord.DataSize = M.CvRecord.binary_size();
+
+      M.Module.MiscRecord.RVA = File.allocateBytes(M.MiscRecord);
+      M.Module.MiscRecord.DataSize = M.MiscRecord.binary_size();
+    }
+    break;
+  }
   case Stream::StreamKind::RawContent: {
     RawContentStream &Raw = cast<RawContentStream>(S);
     File.allocateCallback(Raw.Size, [&Raw](raw_ostream &OS) {
@@ -420,6 +516,26 @@
 Stream::create(const Directory &StreamDesc, const object::MinidumpFile &File) {
   StreamKind Kind = getKind(StreamDesc.Type);
   switch (Kind) {
+  case StreamKind::ModuleList: {
+    auto ExpectedList = File.getModuleList();
+    if (!ExpectedList)
+      return ExpectedList.takeError();
+    std::vector<ModuleListStream::ParsedModule> Modules;
+    for (const Module &M : *ExpectedList) {
+      auto ExpectedName = File.getString(M.ModuleNameRVA);
+      if (!ExpectedName)
+        return ExpectedName.takeError();
+      auto ExpectedCv = File.getRawData(M.CvRecord);
+      if (!ExpectedCv)
+        return ExpectedCv.takeError();
+      auto ExpectedMisc = File.getRawData(M.MiscRecord);
+      if (!ExpectedMisc)
+        return ExpectedMisc.takeError();
+      Modules.push_back(
+          {M, std::move(*ExpectedName), *ExpectedCv, *ExpectedMisc});
+    }
+    return make_unique<ModuleListStream>(std::move(Modules));
+  }
   case StreamKind::RawContent:
     return llvm::make_unique<RawContentStream>(StreamDesc.Type,
                                                File.getRawStream(StreamDesc));
diff --git a/test/tools/obj2yaml/basic-minidump.yaml b/test/tools/obj2yaml/basic-minidump.yaml
index aebaafa..d85b96a 100644
--- a/test/tools/obj2yaml/basic-minidump.yaml
+++ b/test/tools/obj2yaml/basic-minidump.yaml
@@ -15,6 +15,33 @@
       400d9000-400db000 r-xp 00000000 b3:04 227        /system/bin/app_process
       400db000-400dc000 r--p 00001000 b3:04 227        /system/bin/app_process
 
+  - Type:            ModuleList
+    Modules:         
+      - Base of Image:   0x0001020304050607
+        Size of Image:   0x08090A0B
+        Checksum:        0x0C0D0E0F
+        Time Date Stamp: 47
+        Module Name:     a.out
+        Version Info:    
+          Signature:       0x10111213
+          Struct Version:  0x14151617
+          File Version High: 0x18191A1B
+          File Version Low: 0x1C1D1E1F
+          Product Version High: 0x20212223
+          Product Version Low: 0x24252627
+          File Flags Mask: 0x28292A2B
+          File Flags:      0x2C2D2E2F
+          File OS:         0x30313233
+          File Type:       0x34353637
+          File Subtype:    0x38393A3B
+          File Date High:  0x3C3D3E3F
+          File Date Low:   0x40414243
+        CodeView Record: '44454647'
+        Misc Record:     48494A4B
+      - Base of Image:   0x4C4D4E4F50515253
+        Size of Image:   0x54555657
+        Module Name:     libb.so
+        CodeView Record: 58595A5B
 ...
 
 # CHECK:      --- !minidump
@@ -32,4 +59,31 @@
 # CHECK-NEXT:       400d9000-400db000 r-xp 00000000 b3:04 227        /system/bin/app_process
 # CHECK-NEXT:       400db000-400dc000 r--p 00001000 b3:04 227        /system/bin/app_process
 # CHECK-EMPTY:
+# CHECK-NEXT:   - Type:            ModuleList
+# CHECK-NEXT:     Modules:         
+# CHECK-NEXT:       - Base of Image:   0x0001020304050607
+# CHECK-NEXT:         Size of Image:   0x08090A0B
+# CHECK-NEXT:         Checksum:        0x0C0D0E0F
+# CHECK-NEXT:         Time Date Stamp: 47
+# CHECK-NEXT:         Module Name:     a.out
+# CHECK-NEXT:         Version Info:    
+# CHECK-NEXT:           Signature:       0x10111213
+# CHECK-NEXT:           Struct Version:  0x14151617
+# CHECK-NEXT:           File Version High: 0x18191A1B
+# CHECK-NEXT:           File Version Low: 0x1C1D1E1F
+# CHECK-NEXT:           Product Version High: 0x20212223
+# CHECK-NEXT:           Product Version Low: 0x24252627
+# CHECK-NEXT:           File Flags Mask: 0x28292A2B
+# CHECK-NEXT:           File Flags:      0x2C2D2E2F
+# CHECK-NEXT:           File OS:         0x30313233
+# CHECK-NEXT:           File Type:       0x34353637
+# CHECK-NEXT:           File Subtype:    0x38393A3B
+# CHECK-NEXT:           File Date High:  0x3C3D3E3F
+# CHECK-NEXT:           File Date Low:   0x40414243
+# CHECK-NEXT:         CodeView Record: '44454647'
+# CHECK-NEXT:         Misc Record:     48494A4B
+# CHECK-NEXT:       - Base of Image:   0x4C4D4E4F50515253
+# CHECK-NEXT:         Size of Image:   0x54555657
+# CHECK-NEXT:         Module Name:     libb.so
+# CHECK-NEXT:         CodeView Record: 58595A5B
 # CHECK-NEXT: ...