lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp - llvm-project/llvm - Git at Google

 //===- DbiStreamBuilder.cpp - PDB Dbi Stream Creation -----------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // The data structures defined in this file are based on the reference
 // implementation which is available at
 // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/dbi/gsi.cpp
 //
 //===----------------------------------------------------------------------===//

 #include "llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h"
 #include "llvm/DebugInfo/CodeView/RecordName.h"
 #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
 #include "llvm/DebugInfo/CodeView/SymbolSerializer.h"
 #include "llvm/DebugInfo/MSF/MSFBuilder.h"
 #include "llvm/DebugInfo/MSF/MSFCommon.h"
 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
 #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
 #include "llvm/DebugInfo/PDB/Native/Hash.h"
 #include "llvm/Support/BinaryItemStream.h"
 #include "llvm/Support/BinaryStreamWriter.h"
 #include "llvm/Support/Parallel.h"
 #include "llvm/Support/xxhash.h"
 #include <algorithm>
 #include <vector>

 using namespace llvm;
 using namespace llvm::msf;
 using namespace llvm::pdb;
 using namespace llvm::codeview;

 // Helper class for building the public and global PDB hash table buckets.
 struct llvm::pdb::GSIHashStreamBuilder {
   // Sum of the size of all public or global records.
   uint32_t RecordByteSize = 0;

   std::vector<PSHashRecord> HashRecords;

   // The hash bitmap has `ceil((IPHR_HASH + 1) / 32)` words in it. The
   // reference implementation builds a hash table with IPHR_HASH buckets in it.
   // The last bucket is used to link together free hash table cells in a linked
   // list, but it is always empty in the compressed, on-disk format. However,
   // the bitmap must have a bit for it.
   std::array<support::ulittle32_t, (IPHR_HASH + 32) / 32> HashBitmap;

   std::vector<support::ulittle32_t> HashBuckets;

   uint32_t calculateSerializedLength() const;
   Error commit(BinaryStreamWriter &Writer);

   void finalizePublicBuckets();
   void finalizeGlobalBuckets(uint32_t RecordZeroOffset);

   // Assign public and global symbol records into hash table buckets.
   // Modifies the list of records to store the bucket index, but does not
   // change the order.
   void finalizeBuckets(uint32_t RecordZeroOffset,
                        MutableArrayRef<BulkPublic> Globals);
 };

 // DenseMapInfo implementation for deduplicating symbol records.
 struct llvm::pdb::SymbolDenseMapInfo {
   static inline CVSymbol getEmptyKey() {
     static CVSymbol Empty;
     return Empty;
   }
   static inline CVSymbol getTombstoneKey() {
     static CVSymbol Tombstone(
         DenseMapInfo<ArrayRef<uint8_t>>::getTombstoneKey());
     return Tombstone;
   }
   static unsigned getHashValue(const CVSymbol &Val) {
     return xxHash64(Val.RecordData);
   }
   static bool isEqual(const CVSymbol &LHS, const CVSymbol &RHS) {
     return LHS.RecordData == RHS.RecordData;
   }
 };

 namespace {
 LLVM_PACKED_START
 struct PublicSym32Layout {
   RecordPrefix Prefix;
   PublicSym32Header Pub;
   // char Name[];
 };
 LLVM_PACKED_END
 } // namespace

 // Calculate how much memory this public needs when serialized.
 static uint32_t sizeOfPublic(const BulkPublic &Pub) {
   uint32_t NameLen = Pub.NameLen;
   NameLen = std::min(NameLen,
                      uint32_t(MaxRecordLength - sizeof(PublicSym32Layout) - 1));
   return alignTo(sizeof(PublicSym32Layout) + NameLen + 1, 4);
 }

 static CVSymbol serializePublic(uint8_t *Mem, const BulkPublic &Pub) {
   // Assume the caller has allocated sizeOfPublic bytes.
   uint32_t NameLen = std::min(
       Pub.NameLen, uint32_t(MaxRecordLength - sizeof(PublicSym32Layout) - 1));
   size_t Size = alignTo(sizeof(PublicSym32Layout) + NameLen + 1, 4);
   assert(Size == sizeOfPublic(Pub));
   auto *FixedMem = reinterpret_cast<PublicSym32Layout *>(Mem);
   FixedMem->Prefix.RecordKind = static_cast<uint16_t>(codeview::S_PUB32);
   FixedMem->Prefix.RecordLen = static_cast<uint16_t>(Size - 2);
   FixedMem->Pub.Flags = Pub.Flags;
   FixedMem->Pub.Offset = Pub.Offset;
   FixedMem->Pub.Segment = Pub.Segment;
   char *NameMem = reinterpret_cast<char *>(FixedMem + 1);
   memcpy(NameMem, Pub.Name, NameLen);
   // Zero the null terminator and remaining bytes.
   memset(&NameMem[NameLen], 0, Size - sizeof(PublicSym32Layout) - NameLen);
   return CVSymbol(makeArrayRef(reinterpret_cast<uint8_t *>(Mem), Size));
 }

 uint32_t GSIHashStreamBuilder::calculateSerializedLength() const {
   uint32_t Size = sizeof(GSIHashHeader);
   Size += HashRecords.size() * sizeof(PSHashRecord);
   Size += HashBitmap.size() * sizeof(uint32_t);
   Size += HashBuckets.size() * sizeof(uint32_t);
   return Size;
 }

 Error GSIHashStreamBuilder::commit(BinaryStreamWriter &Writer) {
   GSIHashHeader Header;
   Header.VerSignature = GSIHashHeader::HdrSignature;
   Header.VerHdr = GSIHashHeader::HdrVersion;
   Header.HrSize = HashRecords.size() * sizeof(PSHashRecord);
   Header.NumBuckets = HashBitmap.size() * 4 + HashBuckets.size() * 4;

   if (auto EC = Writer.writeObject(Header))
     return EC;

   if (auto EC = Writer.writeArray(makeArrayRef(HashRecords)))
     return EC;
   if (auto EC = Writer.writeArray(makeArrayRef(HashBitmap)))
     return EC;
   if (auto EC = Writer.writeArray(makeArrayRef(HashBuckets)))
     return EC;
   return Error::success();
 }

 static bool isAsciiString(StringRef S) {
   return llvm::all_of(S, [](char C) { return unsigned(C) < 0x80; });
 }

 // See `caseInsensitiveComparePchPchCchCch` in gsi.cpp
 static int gsiRecordCmp(StringRef S1, StringRef S2) {
   size_t LS = S1.size();
   size_t RS = S2.size();
   // Shorter strings always compare less than longer strings.
   if (LS != RS)
     return (LS > RS) - (LS < RS);

   // If either string contains non ascii characters, memcmp them.
   if (LLVM_UNLIKELY(!isAsciiString(S1) || !isAsciiString(S2)))
     return memcmp(S1.data(), S2.data(), LS);

   // Both strings are ascii, perform a case-insensitive comparison.
   return S1.compare_lower(S2.data());
 }

 void GSIStreamBuilder::finalizePublicBuckets() {
   PSH->finalizeBuckets(0, Publics);
 }

 void GSIStreamBuilder::finalizeGlobalBuckets(uint32_t RecordZeroOffset) {
   // Build up a list of globals to be bucketed. Use the BulkPublic data
   // structure for this purpose, even though these are global records, not
   // public records. Most of the same fields are required:
   // - Name
   // - NameLen
   // - SymOffset
   // - BucketIdx
   // The dead fields are Offset, Segment, and Flags.
   std::vector<BulkPublic> Records;
   Records.resize(Globals.size());
   uint32_t SymOffset = RecordZeroOffset;
   for (size_t I = 0, E = Globals.size(); I < E; ++I) {
     StringRef Name = getSymbolName(Globals[I]);
     Records[I].Name = Name.data();
     Records[I].NameLen = Name.size();
     Records[I].SymOffset = SymOffset;
     SymOffset += Globals[I].length();
   }

   GSH->finalizeBuckets(RecordZeroOffset, Records);
 }

 void GSIHashStreamBuilder::finalizeBuckets(
     uint32_t RecordZeroOffset, MutableArrayRef<BulkPublic> Records) {
   // Hash every name in parallel.
   parallelForEachN(0, Records.size(), [&](size_t I) {
     Records[I].setBucketIdx(hashStringV1(Records[I].Name) % IPHR_HASH);
   });

   // Count up the size of each bucket. Then, use an exclusive prefix sum to
   // calculate the bucket start offsets. This is C++17 std::exclusive_scan, but
   // we can't use it yet.
   uint32_t BucketStarts[IPHR_HASH] = {0};
   for (const BulkPublic &P : Records)
     ++BucketStarts[P.BucketIdx];
   uint32_t Sum = 0;
   for (uint32_t &B : BucketStarts) {
     uint32_t Size = B;
     B = Sum;
     Sum += Size;
   }

   // Place globals into the hash table in bucket order. When placing a global,
   // update the bucket start. Every hash table slot should be filled. Always use
   // a refcount of one for now.
   HashRecords.resize(Records.size());
   uint32_t BucketCursors[IPHR_HASH];
   memcpy(BucketCursors, BucketStarts, sizeof(BucketCursors));
   for (int I = 0, E = Records.size(); I < E; ++I) {
     uint32_t HashIdx = BucketCursors[Records[I].BucketIdx]++;
     HashRecords[HashIdx].Off = I;
     HashRecords[HashIdx].CRef = 1;
   }

   // Within the buckets, sort each bucket by memcmp of the symbol's name.  It's
   // important that we use the same sorting algorithm as is used by the
   // reference implementation to ensure that the search for a record within a
   // bucket can properly early-out when it detects the record won't be found.
   // The algorithm used here corresponds to the function
   // caseInsensitiveComparePchPchCchCch in the reference implementation.
   parallelForEachN(0, IPHR_HASH, [&](size_t I) {
     auto B = HashRecords.begin() + BucketStarts[I];
     auto E = HashRecords.begin() + BucketCursors[I];
     if (B == E)
       return;
     auto BucketCmp = [Records](const PSHashRecord &LHash,
                                const PSHashRecord &RHash) {
       const BulkPublic &L = Records[uint32_t(LHash.Off)];
       const BulkPublic &R = Records[uint32_t(RHash.Off)];
       assert(L.BucketIdx == R.BucketIdx);
       int Cmp = gsiRecordCmp(L.getName(), R.getName());
       if (Cmp != 0)
         return Cmp < 0;
       // This comparison is necessary to make the sorting stable in the presence
       // of two static globals with the same name. The easiest way to observe
       // this is with S_LDATA32 records.
       return L.SymOffset < R.SymOffset;
     };
     llvm::sort(B, E, BucketCmp);

     // After we are done sorting, replace the global indices with the stream
     // offsets of each global. Add one when writing symbol offsets to disk.
     // See GSI1::fixSymRecs.
     for (PSHashRecord &HRec : make_range(B, E))
       HRec.Off = Records[uint32_t(HRec.Off)].SymOffset + 1;
   });

   // For each non-empty bucket, push the bucket start offset into HashBuckets
   // and set a bit in the hash bitmap.
   for (uint32_t I = 0; I < HashBitmap.size(); ++I) {
     uint32_t Word = 0;
     for (uint32_t J = 0; J < 32; ++J) {
       // Skip empty buckets.
       uint32_t BucketIdx = I * 32 + J;
       if (BucketIdx >= IPHR_HASH ||
           BucketStarts[BucketIdx] == BucketCursors[BucketIdx])
         continue;
       Word |= (1U << J);

       // Calculate what the offset of the first hash record in the chain would
       // be if it were inflated to contain 32-bit pointers. On a 32-bit system,
       // each record would be 12 bytes. See HROffsetCalc in gsi.h.
       const int SizeOfHROffsetCalc = 12;
       ulittle32_t ChainStartOff =
           ulittle32_t(BucketStarts[BucketIdx] * SizeOfHROffsetCalc);
       HashBuckets.push_back(ChainStartOff);
     }
     HashBitmap[I] = Word;
   }
 }

 GSIStreamBuilder::GSIStreamBuilder(msf::MSFBuilder &Msf)
     : Msf(Msf), PSH(std::make_unique<GSIHashStreamBuilder>()),
       GSH(std::make_unique<GSIHashStreamBuilder>()) {}

 GSIStreamBuilder::~GSIStreamBuilder() {}

 uint32_t GSIStreamBuilder::calculatePublicsHashStreamSize() const {
   uint32_t Size = 0;
   Size += sizeof(PublicsStreamHeader);
   Size += PSH->calculateSerializedLength();
   Size += Publics.size() * sizeof(uint32_t); // AddrMap
   // FIXME: Add thunk map and section offsets for incremental linking.

   return Size;
 }

 uint32_t GSIStreamBuilder::calculateGlobalsHashStreamSize() const {
   return GSH->calculateSerializedLength();
 }

 Error GSIStreamBuilder::finalizeMsfLayout() {
   // First we write public symbol records, then we write global symbol records.
   finalizePublicBuckets();
   finalizeGlobalBuckets(PSH->RecordByteSize);

   Expected<uint32_t> Idx = Msf.addStream(calculateGlobalsHashStreamSize());
   if (!Idx)
     return Idx.takeError();
   GlobalsStreamIndex = *Idx;

   Idx = Msf.addStream(calculatePublicsHashStreamSize());
   if (!Idx)
     return Idx.takeError();
   PublicsStreamIndex = *Idx;

   uint32_t RecordBytes = PSH->RecordByteSize + GSH->RecordByteSize;

   Idx = Msf.addStream(RecordBytes);
   if (!Idx)
     return Idx.takeError();
   RecordStreamIndex = *Idx;
   return Error::success();
 }

 void GSIStreamBuilder::addPublicSymbols(std::vector<BulkPublic> &&PublicsIn) {
   assert(Publics.empty() && PSH->RecordByteSize == 0 &&
          "publics can only be added once");
   Publics = std::move(PublicsIn);

   // Sort the symbols by name. PDBs contain lots of symbols, so use parallelism.
   parallelSort(Publics, [](const BulkPublic &L, const BulkPublic &R) {
     return L.getName() < R.getName();
   });

   // Assign offsets and calculate the length of the public symbol records.
   uint32_t SymOffset = 0;
   for (BulkPublic &Pub : Publics) {
     Pub.SymOffset = SymOffset;
     SymOffset += sizeOfPublic(Pub);
   }

   // Remember the length of the public stream records.
   PSH->RecordByteSize = SymOffset;
 }

 void GSIStreamBuilder::addGlobalSymbol(const ProcRefSym &Sym) {
   serializeAndAddGlobal(Sym);
 }

 void GSIStreamBuilder::addGlobalSymbol(const DataSym &Sym) {
   serializeAndAddGlobal(Sym);
 }

 void GSIStreamBuilder::addGlobalSymbol(const ConstantSym &Sym) {
   serializeAndAddGlobal(Sym);
 }

 template <typename T>
 void GSIStreamBuilder::serializeAndAddGlobal(const T &Symbol) {
   T Copy(Symbol);
   addGlobalSymbol(SymbolSerializer::writeOneSymbol(Copy, Msf.getAllocator(),
                                                    CodeViewContainer::Pdb));
 }

 void GSIStreamBuilder::addGlobalSymbol(const codeview::CVSymbol &Symbol) {
   // Ignore duplicate typedefs and constants.
   if (Symbol.kind() == S_UDT || Symbol.kind() == S_CONSTANT) {
     auto Iter = GlobalsSeen.insert(Symbol);
     if (!Iter.second)
       return;
   }
   GSH->RecordByteSize += Symbol.length();
   Globals.push_back(Symbol);
 }

 // Serialize each public and write it.
 static Error writePublics(BinaryStreamWriter &Writer,
                           ArrayRef<BulkPublic> Publics) {
   std::vector<uint8_t> Storage;
   for (const BulkPublic &Pub : Publics) {
     Storage.resize(sizeOfPublic(Pub));
     serializePublic(Storage.data(), Pub);
     if (Error E = Writer.writeBytes(Storage))
       return E;
   }
   return Error::success();
 }

 static Error writeRecords(BinaryStreamWriter &Writer,
                           ArrayRef<CVSymbol> Records) {
   BinaryItemStream<CVSymbol> ItemStream(support::endianness::little);
   ItemStream.setItems(Records);
   BinaryStreamRef RecordsRef(ItemStream);
   return Writer.writeStreamRef(RecordsRef);
 }

 Error GSIStreamBuilder::commitSymbolRecordStream(
     WritableBinaryStreamRef Stream) {
   BinaryStreamWriter Writer(Stream);

   // Write public symbol records first, followed by global symbol records.  This
   // must match the order that we assume in finalizeMsfLayout when computing
   // PSHZero and GSHZero.
   if (auto EC = writePublics(Writer, Publics))
     return EC;
   if (auto EC = writeRecords(Writer, Globals))
     return EC;

   return Error::success();
 }

 static std::vector<support::ulittle32_t>
 computeAddrMap(ArrayRef<BulkPublic> Publics) {
   // Build a parallel vector of indices into the Publics vector, and sort it by
   // address.
   std::vector<ulittle32_t> PubAddrMap;
   PubAddrMap.reserve(Publics.size());
   for (int I = 0, E = Publics.size(); I < E; ++I)
     PubAddrMap.push_back(ulittle32_t(I));

   auto AddrCmp = [Publics](const ulittle32_t &LIdx, const ulittle32_t &RIdx) {
     const BulkPublic &L = Publics[LIdx];
     const BulkPublic &R = Publics[RIdx];
     if (L.Segment != R.Segment)
       return L.Segment < R.Segment;
     if (L.Offset != R.Offset)
       return L.Offset < R.Offset;
     // parallelSort is unstable, so we have to do name comparison to ensure
     // that two names for the same location come out in a deterministic order.
     return L.getName() < R.getName();
   };
   parallelSort(PubAddrMap, AddrCmp);

   // Rewrite the public symbol indices into symbol offsets.
   for (ulittle32_t &Entry : PubAddrMap)
     Entry = Publics[Entry].SymOffset;
   return PubAddrMap;
 }

 Error GSIStreamBuilder::commitPublicsHashStream(
     WritableBinaryStreamRef Stream) {
   BinaryStreamWriter Writer(Stream);
   PublicsStreamHeader Header;

   // FIXME: Fill these in. They are for incremental linking.
   Header.SymHash = PSH->calculateSerializedLength();
   Header.AddrMap = Publics.size() * 4;
   Header.NumThunks = 0;
   Header.SizeOfThunk = 0;
   Header.ISectThunkTable = 0;
   memset(Header.Padding, 0, sizeof(Header.Padding));
   Header.OffThunkTable = 0;
   Header.NumSections = 0;
   if (auto EC = Writer.writeObject(Header))
     return EC;

   if (auto EC = PSH->commit(Writer))
     return EC;

   std::vector<support::ulittle32_t> PubAddrMap = computeAddrMap(Publics);
   assert(PubAddrMap.size() == Publics.size());
   if (auto EC = Writer.writeArray(makeArrayRef(PubAddrMap)))
     return EC;

   return Error::success();
 }

 Error GSIStreamBuilder::commitGlobalsHashStream(
     WritableBinaryStreamRef Stream) {
   BinaryStreamWriter Writer(Stream);
   return GSH->commit(Writer);
 }

 Error GSIStreamBuilder::commit(const msf::MSFLayout &Layout,
                                WritableBinaryStreamRef Buffer) {
   auto GS = WritableMappedBlockStream::createIndexedStream(
       Layout, Buffer, getGlobalsStreamIndex(), Msf.getAllocator());
   auto PS = WritableMappedBlockStream::createIndexedStream(
       Layout, Buffer, getPublicsStreamIndex(), Msf.getAllocator());
   auto PRS = WritableMappedBlockStream::createIndexedStream(
       Layout, Buffer, getRecordStreamIndex(), Msf.getAllocator());

   if (auto EC = commitSymbolRecordStream(*PRS))
     return EC;
   if (auto EC = commitGlobalsHashStream(*GS))
     return EC;
   if (auto EC = commitPublicsHashStream(*PS))
     return EC;
   return Error::success();
 }
	//===- DbiStreamBuilder.cpp - PDB Dbi Stream Creation ------------ C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// The data structures defined in this file are based on the reference
	// implementation which is available at
	// https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/dbi/gsi.cpp
	//
	//===----------------------------------------------------------------------===//

	#include "llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h"
	#include "llvm/DebugInfo/CodeView/RecordName.h"
	#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
	#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
	#include "llvm/DebugInfo/CodeView/SymbolSerializer.h"
	#include "llvm/DebugInfo/MSF/MSFBuilder.h"
	#include "llvm/DebugInfo/MSF/MSFCommon.h"
	#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
	#include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
	#include "llvm/DebugInfo/PDB/Native/Hash.h"
	#include "llvm/Support/BinaryItemStream.h"
	#include "llvm/Support/BinaryStreamWriter.h"
	#include "llvm/Support/Parallel.h"
	#include "llvm/Support/xxhash.h"
	#include <algorithm>
	#include <vector>

	using namespace llvm;
	using namespace llvm::msf;
	using namespace llvm::pdb;
	using namespace llvm::codeview;

	// Helper class for building the public and global PDB hash table buckets.
	struct llvm::pdb::GSIHashStreamBuilder {
	// Sum of the size of all public or global records.
	uint32_t RecordByteSize = 0;

	std::vector<PSHashRecord> HashRecords;

	// The hash bitmap has `ceil((IPHR_HASH + 1) / 32)` words in it. The
	// reference implementation builds a hash table with IPHR_HASH buckets in it.
	// The last bucket is used to link together free hash table cells in a linked
	// list, but it is always empty in the compressed, on-disk format. However,
	// the bitmap must have a bit for it.
	std::array<support::ulittle32_t, (IPHR_HASH + 32) / 32> HashBitmap;

	std::vector<support::ulittle32_t> HashBuckets;

	uint32_t calculateSerializedLength() const;
	Error commit(BinaryStreamWriter &Writer);

	void finalizePublicBuckets();
	void finalizeGlobalBuckets(uint32_t RecordZeroOffset);

	// Assign public and global symbol records into hash table buckets.
	// Modifies the list of records to store the bucket index, but does not
	// change the order.
	void finalizeBuckets(uint32_t RecordZeroOffset,
	MutableArrayRef<BulkPublic> Globals);
	};

	// DenseMapInfo implementation for deduplicating symbol records.
	struct llvm::pdb::SymbolDenseMapInfo {
	static inline CVSymbol getEmptyKey() {
	static CVSymbol Empty;
	return Empty;
	}
	static inline CVSymbol getTombstoneKey() {
	static CVSymbol Tombstone(
	DenseMapInfo<ArrayRef<uint8_t>>::getTombstoneKey());
	return Tombstone;
	}
	static unsigned getHashValue(const CVSymbol &Val) {
	return xxHash64(Val.RecordData);
	}
	static bool isEqual(const CVSymbol &LHS, const CVSymbol &RHS) {
	return LHS.RecordData == RHS.RecordData;
	}
	};

	namespace {
	LLVM_PACKED_START
	struct PublicSym32Layout {
	RecordPrefix Prefix;
	PublicSym32Header Pub;
	// char Name[];
	};
	LLVM_PACKED_END
	} // namespace

	// Calculate how much memory this public needs when serialized.
	static uint32_t sizeOfPublic(const BulkPublic &Pub) {
	uint32_t NameLen = Pub.NameLen;
	NameLen = std::min(NameLen,
	uint32_t(MaxRecordLength - sizeof(PublicSym32Layout) - 1));
	return alignTo(sizeof(PublicSym32Layout) + NameLen + 1, 4);
	}

	static CVSymbol serializePublic(uint8_t *Mem, const BulkPublic &Pub) {
	// Assume the caller has allocated sizeOfPublic bytes.
	uint32_t NameLen = std::min(
	Pub.NameLen, uint32_t(MaxRecordLength - sizeof(PublicSym32Layout) - 1));
	size_t Size = alignTo(sizeof(PublicSym32Layout) + NameLen + 1, 4);
	assert(Size == sizeOfPublic(Pub));
	auto FixedMem = reinterpret_cast<PublicSym32Layout >(Mem);
	FixedMem->Prefix.RecordKind = static_cast<uint16_t>(codeview::S_PUB32);
	FixedMem->Prefix.RecordLen = static_cast<uint16_t>(Size - 2);
	FixedMem->Pub.Flags = Pub.Flags;
	FixedMem->Pub.Offset = Pub.Offset;
	FixedMem->Pub.Segment = Pub.Segment;
	char NameMem = reinterpret_cast<char >(FixedMem + 1);
	memcpy(NameMem, Pub.Name, NameLen);
	// Zero the null terminator and remaining bytes.
	memset(&NameMem[NameLen], 0, Size - sizeof(PublicSym32Layout) - NameLen);
	return CVSymbol(makeArrayRef(reinterpret_cast<uint8_t *>(Mem), Size));
	}

	uint32_t GSIHashStreamBuilder::calculateSerializedLength() const {
	uint32_t Size = sizeof(GSIHashHeader);
	Size += HashRecords.size() * sizeof(PSHashRecord);
	Size += HashBitmap.size() * sizeof(uint32_t);
	Size += HashBuckets.size() * sizeof(uint32_t);
	return Size;
	}

	Error GSIHashStreamBuilder::commit(BinaryStreamWriter &Writer) {
	GSIHashHeader Header;
	Header.VerSignature = GSIHashHeader::HdrSignature;
	Header.VerHdr = GSIHashHeader::HdrVersion;
	Header.HrSize = HashRecords.size() * sizeof(PSHashRecord);
	Header.NumBuckets = HashBitmap.size() * 4 + HashBuckets.size() * 4;

	if (auto EC = Writer.writeObject(Header))
	return EC;

	if (auto EC = Writer.writeArray(makeArrayRef(HashRecords)))
	return EC;
	if (auto EC = Writer.writeArray(makeArrayRef(HashBitmap)))
	return EC;
	if (auto EC = Writer.writeArray(makeArrayRef(HashBuckets)))
	return EC;
	return Error::success();
	}

	static bool isAsciiString(StringRef S) {
	return llvm::all_of(S, [](char C) { return unsigned(C) < 0x80; });
	}

	// See `caseInsensitiveComparePchPchCchCch` in gsi.cpp
	static int gsiRecordCmp(StringRef S1, StringRef S2) {
	size_t LS = S1.size();
	size_t RS = S2.size();
	// Shorter strings always compare less than longer strings.
	if (LS != RS)
	return (LS > RS) - (LS < RS);

	// If either string contains non ascii characters, memcmp them.
	if (LLVM_UNLIKELY(!isAsciiString(S1) \|\| !isAsciiString(S2)))
	return memcmp(S1.data(), S2.data(), LS);

	// Both strings are ascii, perform a case-insensitive comparison.
	return S1.compare_lower(S2.data());
	}

	void GSIStreamBuilder::finalizePublicBuckets() {
	PSH->finalizeBuckets(0, Publics);
	}

	void GSIStreamBuilder::finalizeGlobalBuckets(uint32_t RecordZeroOffset) {
	// Build up a list of globals to be bucketed. Use the BulkPublic data
	// structure for this purpose, even though these are global records, not
	// public records. Most of the same fields are required:
	// - Name
	// - NameLen
	// - SymOffset
	// - BucketIdx
	// The dead fields are Offset, Segment, and Flags.
	std::vector<BulkPublic> Records;
	Records.resize(Globals.size());
	uint32_t SymOffset = RecordZeroOffset;
	for (size_t I = 0, E = Globals.size(); I < E; ++I) {
	StringRef Name = getSymbolName(Globals[I]);
	Records[I].Name = Name.data();
	Records[I].NameLen = Name.size();
	Records[I].SymOffset = SymOffset;
	SymOffset += Globals[I].length();
	}

	GSH->finalizeBuckets(RecordZeroOffset, Records);
	}

	void GSIHashStreamBuilder::finalizeBuckets(
	uint32_t RecordZeroOffset, MutableArrayRef<BulkPublic> Records) {
	// Hash every name in parallel.
	parallelForEachN(0, Records.size(), [&](size_t I) {
	Records[I].setBucketIdx(hashStringV1(Records[I].Name) % IPHR_HASH);
	});

	// Count up the size of each bucket. Then, use an exclusive prefix sum to
	// calculate the bucket start offsets. This is C++17 std::exclusive_scan, but
	// we can't use it yet.
	uint32_t BucketStarts[IPHR_HASH] = {0};
	for (const BulkPublic &P : Records)
	++BucketStarts[P.BucketIdx];
	uint32_t Sum = 0;
	for (uint32_t &B : BucketStarts) {
	uint32_t Size = B;
	B = Sum;
	Sum += Size;
	}

	// Place globals into the hash table in bucket order. When placing a global,
	// update the bucket start. Every hash table slot should be filled. Always use
	// a refcount of one for now.
	HashRecords.resize(Records.size());
	uint32_t BucketCursors[IPHR_HASH];
	memcpy(BucketCursors, BucketStarts, sizeof(BucketCursors));
	for (int I = 0, E = Records.size(); I < E; ++I) {
	uint32_t HashIdx = BucketCursors[Records[I].BucketIdx]++;
	HashRecords[HashIdx].Off = I;
	HashRecords[HashIdx].CRef = 1;
	}

	// Within the buckets, sort each bucket by memcmp of the symbol's name. It's
	// important that we use the same sorting algorithm as is used by the
	// reference implementation to ensure that the search for a record within a
	// bucket can properly early-out when it detects the record won't be found.
	// The algorithm used here corresponds to the function
	// caseInsensitiveComparePchPchCchCch in the reference implementation.
	parallelForEachN(0, IPHR_HASH, [&](size_t I) {
	auto B = HashRecords.begin() + BucketStarts[I];
	auto E = HashRecords.begin() + BucketCursors[I];
	if (B == E)
	return;
	auto BucketCmp = [Records](const PSHashRecord &LHash,
	const PSHashRecord &RHash) {
	const BulkPublic &L = Records[uint32_t(LHash.Off)];
	const BulkPublic &R = Records[uint32_t(RHash.Off)];
	assert(L.BucketIdx == R.BucketIdx);
	int Cmp = gsiRecordCmp(L.getName(), R.getName());
	if (Cmp != 0)
	return Cmp < 0;
	// This comparison is necessary to make the sorting stable in the presence
	// of two static globals with the same name. The easiest way to observe
	// this is with S_LDATA32 records.
	return L.SymOffset < R.SymOffset;
	};
	llvm::sort(B, E, BucketCmp);

	// After we are done sorting, replace the global indices with the stream
	// offsets of each global. Add one when writing symbol offsets to disk.
	// See GSI1::fixSymRecs.
	for (PSHashRecord &HRec : make_range(B, E))
	HRec.Off = Records[uint32_t(HRec.Off)].SymOffset + 1;
	});

	// For each non-empty bucket, push the bucket start offset into HashBuckets
	// and set a bit in the hash bitmap.
	for (uint32_t I = 0; I < HashBitmap.size(); ++I) {
	uint32_t Word = 0;
	for (uint32_t J = 0; J < 32; ++J) {
	// Skip empty buckets.
	uint32_t BucketIdx = I * 32 + J;
	if (BucketIdx >= IPHR_HASH \|\|
	BucketStarts[BucketIdx] == BucketCursors[BucketIdx])
	continue;
	Word \|= (1U << J);

	// Calculate what the offset of the first hash record in the chain would
	// be if it were inflated to contain 32-bit pointers. On a 32-bit system,
	// each record would be 12 bytes. See HROffsetCalc in gsi.h.
	const int SizeOfHROffsetCalc = 12;
	ulittle32_t ChainStartOff =
	ulittle32_t(BucketStarts[BucketIdx] * SizeOfHROffsetCalc);
	HashBuckets.push_back(ChainStartOff);
	}
	HashBitmap[I] = Word;
	}
	}

	GSIStreamBuilder::GSIStreamBuilder(msf::MSFBuilder &Msf)
	: Msf(Msf), PSH(std::make_unique<GSIHashStreamBuilder>()),
	GSH(std::make_unique<GSIHashStreamBuilder>()) {}

	GSIStreamBuilder::~GSIStreamBuilder() {}

	uint32_t GSIStreamBuilder::calculatePublicsHashStreamSize() const {
	uint32_t Size = 0;
	Size += sizeof(PublicsStreamHeader);
	Size += PSH->calculateSerializedLength();
	Size += Publics.size() * sizeof(uint32_t); // AddrMap
	// FIXME: Add thunk map and section offsets for incremental linking.

	return Size;
	}

	uint32_t GSIStreamBuilder::calculateGlobalsHashStreamSize() const {
	return GSH->calculateSerializedLength();
	}

	Error GSIStreamBuilder::finalizeMsfLayout() {
	// First we write public symbol records, then we write global symbol records.
	finalizePublicBuckets();
	finalizeGlobalBuckets(PSH->RecordByteSize);

	Expected<uint32_t> Idx = Msf.addStream(calculateGlobalsHashStreamSize());
	if (!Idx)
	return Idx.takeError();
	GlobalsStreamIndex = *Idx;

	Idx = Msf.addStream(calculatePublicsHashStreamSize());
	if (!Idx)
	return Idx.takeError();
	PublicsStreamIndex = *Idx;

	uint32_t RecordBytes = PSH->RecordByteSize + GSH->RecordByteSize;

	Idx = Msf.addStream(RecordBytes);
	if (!Idx)
	return Idx.takeError();
	RecordStreamIndex = *Idx;
	return Error::success();
	}

	void GSIStreamBuilder::addPublicSymbols(std::vector<BulkPublic> &&PublicsIn) {
	assert(Publics.empty() && PSH->RecordByteSize == 0 &&
	"publics can only be added once");
	Publics = std::move(PublicsIn);

	// Sort the symbols by name. PDBs contain lots of symbols, so use parallelism.
	parallelSort(Publics, [](const BulkPublic &L, const BulkPublic &R) {
	return L.getName() < R.getName();
	});

	// Assign offsets and calculate the length of the public symbol records.
	uint32_t SymOffset = 0;
	for (BulkPublic &Pub : Publics) {
	Pub.SymOffset = SymOffset;
	SymOffset += sizeOfPublic(Pub);
	}

	// Remember the length of the public stream records.
	PSH->RecordByteSize = SymOffset;
	}

	void GSIStreamBuilder::addGlobalSymbol(const ProcRefSym &Sym) {
	serializeAndAddGlobal(Sym);
	}

	void GSIStreamBuilder::addGlobalSymbol(const DataSym &Sym) {
	serializeAndAddGlobal(Sym);
	}

	void GSIStreamBuilder::addGlobalSymbol(const ConstantSym &Sym) {
	serializeAndAddGlobal(Sym);
	}

	template <typename T>
	void GSIStreamBuilder::serializeAndAddGlobal(const T &Symbol) {
	T Copy(Symbol);
	addGlobalSymbol(SymbolSerializer::writeOneSymbol(Copy, Msf.getAllocator(),
	CodeViewContainer::Pdb));
	}

	void GSIStreamBuilder::addGlobalSymbol(const codeview::CVSymbol &Symbol) {
	// Ignore duplicate typedefs and constants.
	if (Symbol.kind() == S_UDT \|\| Symbol.kind() == S_CONSTANT) {
	auto Iter = GlobalsSeen.insert(Symbol);
	if (!Iter.second)
	return;
	}
	GSH->RecordByteSize += Symbol.length();
	Globals.push_back(Symbol);
	}

	// Serialize each public and write it.
	static Error writePublics(BinaryStreamWriter &Writer,
	ArrayRef<BulkPublic> Publics) {
	std::vector<uint8_t> Storage;
	for (const BulkPublic &Pub : Publics) {
	Storage.resize(sizeOfPublic(Pub));
	serializePublic(Storage.data(), Pub);
	if (Error E = Writer.writeBytes(Storage))
	return E;
	}
	return Error::success();
	}

	static Error writeRecords(BinaryStreamWriter &Writer,
	ArrayRef<CVSymbol> Records) {
	BinaryItemStream<CVSymbol> ItemStream(support::endianness::little);
	ItemStream.setItems(Records);
	BinaryStreamRef RecordsRef(ItemStream);
	return Writer.writeStreamRef(RecordsRef);
	}

	Error GSIStreamBuilder::commitSymbolRecordStream(
	WritableBinaryStreamRef Stream) {
	BinaryStreamWriter Writer(Stream);

	// Write public symbol records first, followed by global symbol records. This
	// must match the order that we assume in finalizeMsfLayout when computing
	// PSHZero and GSHZero.
	if (auto EC = writePublics(Writer, Publics))
	return EC;
	if (auto EC = writeRecords(Writer, Globals))
	return EC;

	return Error::success();
	}

	static std::vector<support::ulittle32_t>
	computeAddrMap(ArrayRef<BulkPublic> Publics) {
	// Build a parallel vector of indices into the Publics vector, and sort it by
	// address.
	std::vector<ulittle32_t> PubAddrMap;
	PubAddrMap.reserve(Publics.size());
	for (int I = 0, E = Publics.size(); I < E; ++I)
	PubAddrMap.push_back(ulittle32_t(I));

	auto AddrCmp = [Publics](const ulittle32_t &LIdx, const ulittle32_t &RIdx) {
	const BulkPublic &L = Publics[LIdx];
	const BulkPublic &R = Publics[RIdx];
	if (L.Segment != R.Segment)
	return L.Segment < R.Segment;
	if (L.Offset != R.Offset)
	return L.Offset < R.Offset;
	// parallelSort is unstable, so we have to do name comparison to ensure
	// that two names for the same location come out in a deterministic order.
	return L.getName() < R.getName();
	};
	parallelSort(PubAddrMap, AddrCmp);

	// Rewrite the public symbol indices into symbol offsets.
	for (ulittle32_t &Entry : PubAddrMap)
	Entry = Publics[Entry].SymOffset;
	return PubAddrMap;
	}

	Error GSIStreamBuilder::commitPublicsHashStream(
	WritableBinaryStreamRef Stream) {
	BinaryStreamWriter Writer(Stream);
	PublicsStreamHeader Header;

	// FIXME: Fill these in. They are for incremental linking.
	Header.SymHash = PSH->calculateSerializedLength();
	Header.AddrMap = Publics.size() * 4;
	Header.NumThunks = 0;
	Header.SizeOfThunk = 0;
	Header.ISectThunkTable = 0;
	memset(Header.Padding, 0, sizeof(Header.Padding));
	Header.OffThunkTable = 0;
	Header.NumSections = 0;
	if (auto EC = Writer.writeObject(Header))
	return EC;

	if (auto EC = PSH->commit(Writer))
	return EC;

	std::vector<support::ulittle32_t> PubAddrMap = computeAddrMap(Publics);
	assert(PubAddrMap.size() == Publics.size());
	if (auto EC = Writer.writeArray(makeArrayRef(PubAddrMap)))
	return EC;

	return Error::success();
	}

	Error GSIStreamBuilder::commitGlobalsHashStream(
	WritableBinaryStreamRef Stream) {
	BinaryStreamWriter Writer(Stream);
	return GSH->commit(Writer);
	}

	Error GSIStreamBuilder::commit(const msf::MSFLayout &Layout,
	WritableBinaryStreamRef Buffer) {
	auto GS = WritableMappedBlockStream::createIndexedStream(
	Layout, Buffer, getGlobalsStreamIndex(), Msf.getAllocator());
	auto PS = WritableMappedBlockStream::createIndexedStream(
	Layout, Buffer, getPublicsStreamIndex(), Msf.getAllocator());
	auto PRS = WritableMappedBlockStream::createIndexedStream(
	Layout, Buffer, getRecordStreamIndex(), Msf.getAllocator());

	if (auto EC = commitSymbolRecordStream(*PRS))
	return EC;
	if (auto EC = commitGlobalsHashStream(*GS))
	return EC;
	if (auto EC = commitPublicsHashStream(*PS))
	return EC;
	return Error::success();
	}