blob: 67b708c5b36a7b3c6e07eb3d4c1ee718f2168d3d [file] [log] [blame]
//===- DebugTypes.cpp -----------------------------------------------------===//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#include "DebugTypes.h"
#include "COFFLinkerContext.h"
#include "Chunks.h"
#include "Driver.h"
#include "InputFiles.h"
#include "PDB.h"
#include "TypeMerger.h"
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Memory.h"
#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
#include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
#include "llvm/DebugInfo/CodeView/TypeStreamMerger.h"
#include "llvm/DebugInfo/PDB/GenericError.h"
#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
#include "llvm/DebugInfo/PDB/Native/TpiHashing.h"
#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/Parallel.h"
#include "llvm/Support/Path.h"
using namespace llvm;
using namespace llvm::codeview;
using namespace lld;
using namespace lld::coff;
namespace {
class TypeServerIpiSource;
// The TypeServerSource class represents a PDB type server, a file referenced by
// OBJ files compiled with MSVC /Zi. A single PDB can be shared by several OBJ
// files, therefore there must be only once instance per OBJ lot. The file path
// is discovered from the dependent OBJ's debug type stream. The
// TypeServerSource object is then queued and loaded by the COFF Driver. The
// debug type stream for such PDB files will be merged first in the final PDB,
// before any dependent OBJ.
class TypeServerSource : public TpiSource {
explicit TypeServerSource(COFFLinkerContext &ctx, PDBInputFile *f)
: TpiSource(ctx, PDB, nullptr), pdbInputFile(f) {
if (f->loadErr && *f->loadErr)
pdb::PDBFile &file = f->session->getPDBFile();
auto expectedInfo = file.getPDBInfoStream();
if (!expectedInfo)
Guid = expectedInfo->getGuid();
auto it = ctx.typeServerSourceMappings.emplace(Guid, this);
Error mergeDebugT(TypeMerger *m) override;
void loadGHashes() override;
void remapTpiWithGHashes(GHashState *g) override;
bool isDependency() const override { return true; }
PDBInputFile *pdbInputFile = nullptr;
// TpiSource for IPI stream.
TypeServerIpiSource *ipiSrc = nullptr;
// The PDB signature GUID.
codeview::GUID Guid;
// Companion to TypeServerSource. Stores the index map for the IPI stream in the
// PDB. Modeling PDBs with two sources for TPI and IPI helps establish the
// invariant of one type index space per source.
class TypeServerIpiSource : public TpiSource {
explicit TypeServerIpiSource(COFFLinkerContext &ctx)
: TpiSource(ctx, PDBIpi, nullptr) {}
friend class TypeServerSource;
// All of the TpiSource methods are no-ops. The parent TypeServerSource
// handles both TPI and IPI.
Error mergeDebugT(TypeMerger *m) override { return Error::success(); }
void loadGHashes() override {}
void remapTpiWithGHashes(GHashState *g) override {}
bool isDependency() const override { return true; }
// This class represents the debug type stream of an OBJ file that depends on a
// PDB type server (see TypeServerSource).
class UseTypeServerSource : public TpiSource {
Expected<TypeServerSource *> getTypeServerSource();
UseTypeServerSource(COFFLinkerContext &ctx, ObjFile *f, TypeServer2Record ts)
: TpiSource(ctx, UsingPDB, f), typeServerDependency(ts) {}
Error mergeDebugT(TypeMerger *m) override;
// No need to load ghashes from /Zi objects.
void loadGHashes() override {}
void remapTpiWithGHashes(GHashState *g) override;
// Information about the PDB type server dependency, that needs to be loaded
// in before merging this OBJ.
TypeServer2Record typeServerDependency;
// This class represents the debug type stream of a Microsoft precompiled
// headers OBJ (PCH OBJ). This OBJ kind needs to be merged first in the output
// PDB, before any other OBJs that depend on this. Note that only MSVC generate
// such files, clang does not.
class PrecompSource : public TpiSource {
PrecompSource(COFFLinkerContext &ctx, ObjFile *f) : TpiSource(ctx, PCH, f) {
if (!f->pchSignature || !*f->pchSignature)
fatal(toString(f) +
" claims to be a PCH object, but does not have a valid signature");
auto it = ctx.precompSourceMappings.emplace(*f->pchSignature, this);
if (!it.second)
fatal("a PCH object with the same signature has already been provided (" +
toString(it.first->second->file) + " and " + toString(file) + ")");
void loadGHashes() override;
bool isDependency() const override { return true; }
// This class represents the debug type stream of an OBJ file that depends on a
// Microsoft precompiled headers OBJ (see PrecompSource).
class UsePrecompSource : public TpiSource {
UsePrecompSource(COFFLinkerContext &ctx, ObjFile *f, PrecompRecord precomp)
: TpiSource(ctx, UsingPCH, f), precompDependency(precomp) {}
Error mergeDebugT(TypeMerger *m) override;
void loadGHashes() override;
void remapTpiWithGHashes(GHashState *g) override;
Error mergeInPrecompHeaderObj();
PrecompSource *findObjByName(StringRef fileNameOnly);
PrecompSource *findPrecompSource(ObjFile *file, PrecompRecord &pr);
Expected<PrecompSource *> findPrecompMap(ObjFile *file, PrecompRecord &pr);
// Information about the Precomp OBJ dependency, that needs to be loaded in
// before merging this OBJ.
PrecompRecord precompDependency;
} // namespace
TpiSource::TpiSource(COFFLinkerContext &ctx, TpiKind k, ObjFile *f)
: ctx(ctx), kind(k), tpiSrcIdx(ctx.tpiSourceList.size()), file(f) {
// Vtable key method.
TpiSource::~TpiSource() {
// Silence any assertions about unchecked errors.
TpiSource *lld::coff::makeTpiSource(COFFLinkerContext &ctx, ObjFile *file) {
return make<TpiSource>(ctx, TpiSource::Regular, file);
TpiSource *lld::coff::makeTypeServerSource(COFFLinkerContext &ctx,
PDBInputFile *pdbInputFile) {
// Type server sources come in pairs: the TPI stream, and the IPI stream.
auto *tpiSource = make<TypeServerSource>(ctx, pdbInputFile);
if (pdbInputFile->session->getPDBFile().hasPDBIpiStream())
tpiSource->ipiSrc = make<TypeServerIpiSource>(ctx);
return tpiSource;
TpiSource *lld::coff::makeUseTypeServerSource(COFFLinkerContext &ctx,
ObjFile *file,
TypeServer2Record ts) {
return make<UseTypeServerSource>(ctx, file, ts);
TpiSource *lld::coff::makePrecompSource(COFFLinkerContext &ctx, ObjFile *file) {
return make<PrecompSource>(ctx, file);
TpiSource *lld::coff::makeUsePrecompSource(COFFLinkerContext &ctx,
ObjFile *file,
PrecompRecord precomp) {
return make<UsePrecompSource>(ctx, file, precomp);
bool TpiSource::remapTypeIndex(TypeIndex &ti, TiRefKind refKind) const {
if (ti.isSimple())
return true;
// This can be an item index or a type index. Choose the appropriate map.
ArrayRef<TypeIndex> tpiOrIpiMap =
(refKind == TiRefKind::IndexRef) ? ipiMap : tpiMap;
if (ti.toArrayIndex() >= tpiOrIpiMap.size())
return false;
ti = tpiOrIpiMap[ti.toArrayIndex()];
return true;
void TpiSource::remapRecord(MutableArrayRef<uint8_t> rec,
ArrayRef<TiReference> typeRefs) {
MutableArrayRef<uint8_t> contents = rec.drop_front(sizeof(RecordPrefix));
for (const TiReference &ref : typeRefs) {
unsigned byteSize = ref.Count * sizeof(TypeIndex);
if (contents.size() < ref.Offset + byteSize)
fatal("symbol record too short");
MutableArrayRef<TypeIndex> indices(
reinterpret_cast<TypeIndex *>( + ref.Offset), ref.Count);
for (TypeIndex &ti : indices) {
if (!remapTypeIndex(ti, ref.Kind)) {
if (config->verbose) {
uint16_t kind =
reinterpret_cast<const RecordPrefix *>(>RecordKind;
StringRef fname = file ? file->getName() : "<unknown PDB>";
log("failed to remap type index in record of kind 0x" +
utohexstr(kind) + " in " + fname + " with bad " +
(ref.Kind == TiRefKind::IndexRef ? "item" : "type") +
" index 0x" + utohexstr(ti.getIndex()));
ti = TypeIndex(SimpleTypeKind::NotTranslated);
void TpiSource::remapTypesInTypeRecord(MutableArrayRef<uint8_t> rec) {
// TODO: Handle errors similar to symbols.
SmallVector<TiReference, 32> typeRefs;
discoverTypeIndices(CVType(rec), typeRefs);
remapRecord(rec, typeRefs);
bool TpiSource::remapTypesInSymbolRecord(MutableArrayRef<uint8_t> rec) {
// Discover type index references in the record. Skip it if we don't
// know where they are.
SmallVector<TiReference, 32> typeRefs;
if (!discoverTypeIndicesInSymbol(rec, typeRefs))
return false;
remapRecord(rec, typeRefs);
return true;
// A COFF .debug$H section is currently a clang extension. This function checks
// if a .debug$H section is in a format that we expect / understand, so that we
// can ignore any sections which are coincidentally also named .debug$H but do
// not contain a format we recognize.
static bool canUseDebugH(ArrayRef<uint8_t> debugH) {
if (debugH.size() < sizeof(object::debug_h_header))
return false;
auto *header =
reinterpret_cast<const object::debug_h_header *>(;
debugH = debugH.drop_front(sizeof(object::debug_h_header));
return header->Magic == COFF::DEBUG_HASHES_SECTION_MAGIC &&
header->Version == 0 &&
header->HashAlgorithm == uint16_t(GlobalTypeHashAlg::SHA1_8) &&
(debugH.size() % 8 == 0);
static Optional<ArrayRef<uint8_t>> getDebugH(ObjFile *file) {
SectionChunk *sec =
SectionChunk::findByName(file->getDebugChunks(), ".debug$H");
if (!sec)
return llvm::None;
ArrayRef<uint8_t> contents = sec->getContents();
if (!canUseDebugH(contents))
return None;
return contents;
static ArrayRef<GloballyHashedType>
getHashesFromDebugH(ArrayRef<uint8_t> debugH) {
debugH = debugH.drop_front(sizeof(object::debug_h_header));
uint32_t count = debugH.size() / sizeof(GloballyHashedType);
return {reinterpret_cast<const GloballyHashedType *>(, count};
// Merge .debug$T for a generic object file.
Error TpiSource::mergeDebugT(TypeMerger *m) {
assert(!config->debugGHashes &&
"use remapTpiWithGHashes when ghash is enabled");
CVTypeArray types;
BinaryStreamReader reader(file->debugTypes, support::little);
cantFail(reader.readArray(types, reader.getLength()));
// When dealing with PCH.OBJ, some indices were already merged.
unsigned nbHeadIndices = indexMapStorage.size();
if (auto err = mergeTypeAndIdRecords(
m->idTable, m->typeTable, indexMapStorage, types, file->pchSignature))
fatal("codeview::mergeTypeAndIdRecords failed: " +
// In an object, there is only one mapping for both types and items.
tpiMap = indexMapStorage;
ipiMap = indexMapStorage;
if (config->showSummary) {
nbTypeRecords = indexMapStorage.size() - nbHeadIndices;
nbTypeRecordsBytes = reader.getLength();
// Count how many times we saw each type record in our input. This
// calculation requires a second pass over the type records to classify each
// record as a type or index. This is slow, but this code executes when
// collecting statistics.
uint32_t srcIdx = nbHeadIndices;
for (const CVType &ty : types) {
TypeIndex dstIdx = tpiMap[srcIdx++];
// Type merging may fail, so a complex source type may become the simple
// NotTranslated type, which cannot be used as an array index.
if (dstIdx.isSimple())
SmallVectorImpl<uint32_t> &counts =
isIdRecord(ty.kind()) ? m->ipiCounts : m->tpiCounts;
return Error::success();
// Merge types from a type server PDB.
Error TypeServerSource::mergeDebugT(TypeMerger *m) {
assert(!config->debugGHashes &&
"use remapTpiWithGHashes when ghash is enabled");
pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream();
if (auto e = expectedTpi.takeError())
fatal("Type server does not have TPI stream: " + toString(std::move(e)));
pdb::TpiStream *maybeIpi = nullptr;
if (pdbFile.hasPDBIpiStream()) {
Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream();
if (auto e = expectedIpi.takeError())
fatal("Error getting type server IPI stream: " + toString(std::move(e)));
maybeIpi = &*expectedIpi;
// Merge TPI first, because the IPI stream will reference type indices.
if (auto err = mergeTypeRecords(m->typeTable, indexMapStorage,
fatal("codeview::mergeTypeRecords failed: " + toString(std::move(err)));
tpiMap = indexMapStorage;
// Merge IPI.
if (maybeIpi) {
if (auto err = mergeIdRecords(m->idTable, tpiMap, ipiSrc->indexMapStorage,
fatal("codeview::mergeIdRecords failed: " + toString(std::move(err)));
ipiMap = ipiSrc->indexMapStorage;
if (config->showSummary) {
nbTypeRecords = tpiMap.size() + ipiMap.size();
nbTypeRecordsBytes =
expectedTpi->typeArray().getUnderlyingStream().getLength() +
(maybeIpi ? maybeIpi->typeArray().getUnderlyingStream().getLength()
: 0);
// Count how many times we saw each type record in our input. If a
// destination type index is present in the source to destination type index
// map, that means we saw it once in the input. Add it to our histogram.
for (TypeIndex ti : tpiMap)
if (!ti.isSimple())
for (TypeIndex ti : ipiMap)
if (!ti.isSimple())
return Error::success();
Expected<TypeServerSource *> UseTypeServerSource::getTypeServerSource() {
const codeview::GUID &tsId = typeServerDependency.getGuid();
StringRef tsPath = typeServerDependency.getName();
TypeServerSource *tsSrc;
auto it = ctx.typeServerSourceMappings.find(tsId);
if (it != ctx.typeServerSourceMappings.end()) {
tsSrc = (TypeServerSource *)it->second;
} else {
// The file failed to load, lookup by name
PDBInputFile *pdb = PDBInputFile::findFromRecordPath(ctx, tsPath, file);
if (!pdb)
return createFileError(tsPath, errorCodeToError(std::error_code(
ENOENT, std::generic_category())));
// If an error occurred during loading, throw it now
if (pdb->loadErr && *pdb->loadErr)
return createFileError(tsPath, std::move(*pdb->loadErr));
tsSrc = (TypeServerSource *)pdb->debugTypesObj;
// Just because a file with a matching name was found and it was an actual
// PDB file doesn't mean it matches. For it to match the InfoStream's GUID
// must match the GUID specified in the TypeServer2 record.
if (tsSrc->Guid != tsId) {
return createFileError(tsPath,
return tsSrc;
Error UseTypeServerSource::mergeDebugT(TypeMerger *m) {
Expected<TypeServerSource *> tsSrc = getTypeServerSource();
if (!tsSrc)
return tsSrc.takeError();
pdb::PDBFile &pdbSession = (*tsSrc)->pdbInputFile->session->getPDBFile();
auto expectedInfo = pdbSession.getPDBInfoStream();
if (!expectedInfo)
return expectedInfo.takeError();
// Reuse the type index map of the type server.
tpiMap = (*tsSrc)->tpiMap;
ipiMap = (*tsSrc)->ipiMap;
return Error::success();
static bool equalsPath(StringRef path1, StringRef path2) {
#if defined(_WIN32)
return path1.equals_insensitive(path2);
return path1.equals(path2);
// Find by name an OBJ provided on the command line
PrecompSource *UsePrecompSource::findObjByName(StringRef fileNameOnly) {
SmallString<128> currentPath;
for (auto kv : ctx.precompSourceMappings) {
StringRef currentFileName = sys::path::filename(kv.second->file->getName(),
// Compare based solely on the file name (link.exe behavior)
if (equalsPath(currentFileName, fileNameOnly))
return (PrecompSource *)kv.second;
return nullptr;
PrecompSource *UsePrecompSource::findPrecompSource(ObjFile *file,
PrecompRecord &pr) {
// Cross-compile warning: given that Clang doesn't generate LF_PRECOMP
// records, we assume the OBJ comes from a Windows build of cl.exe. Thusly,
// the paths embedded in the OBJs are in the Windows format.
SmallString<128> prFileName =
sys::path::filename(pr.getPrecompFilePath(), sys::path::Style::windows);
auto it = ctx.precompSourceMappings.find(pr.getSignature());
if (it != ctx.precompSourceMappings.end()) {
return (PrecompSource *)it->second;
// Lookup by name
return findObjByName(prFileName);
Expected<PrecompSource *> UsePrecompSource::findPrecompMap(ObjFile *file,
PrecompRecord &pr) {
PrecompSource *precomp = findPrecompSource(file, pr);
if (!precomp)
return createFileError(
if (pr.getSignature() != file->pchSignature)
return createFileError(
if (pr.getSignature() != *precomp->file->pchSignature)
return createFileError(
return precomp;
/// Merges a precompiled headers TPI map into the current TPI map. The
/// precompiled headers object will also be loaded and remapped in the
/// process.
Error UsePrecompSource::mergeInPrecompHeaderObj() {
auto e = findPrecompMap(file, precompDependency);
if (!e)
return e.takeError();
PrecompSource *precompSrc = *e;
if (precompSrc->tpiMap.empty())
return Error::success();
assert(precompDependency.getStartTypeIndex() ==
assert(precompDependency.getTypesCount() <= precompSrc->tpiMap.size());
// Use the previously remapped index map from the precompiled headers.
indexMapStorage.insert(indexMapStorage.begin(), precompSrc->tpiMap.begin(),
precompSrc->tpiMap.begin() +
return Error::success();
Error UsePrecompSource::mergeDebugT(TypeMerger *m) {
// This object was compiled with /Yu, so process the corresponding
// precompiled headers object (/Yc) first. Some type indices in the current
// object are referencing data in the precompiled headers object, so we need
// both to be loaded.
if (Error e = mergeInPrecompHeaderObj())
return e;
return TpiSource::mergeDebugT(m);
// Parellel GHash type merging implementation.
void TpiSource::loadGHashes() {
if (Optional<ArrayRef<uint8_t>> debugH = getDebugH(file)) {
ghashes = getHashesFromDebugH(*debugH);
ownedGHashes = false;
} else {
CVTypeArray types;
BinaryStreamReader reader(file->debugTypes, support::little);
cantFail(reader.readArray(types, reader.getLength()));
// Copies ghashes from a vector into an array. These are long lived, so it's
// worth the time to copy these into an appropriately sized vector to reduce
// memory usage.
void TpiSource::assignGHashesFromVector(
std::vector<GloballyHashedType> &&hashVec) {
if (hashVec.empty())
GloballyHashedType *hashes = new GloballyHashedType[hashVec.size()];
memcpy(hashes,, hashVec.size() * sizeof(GloballyHashedType));
ghashes = makeArrayRef(hashes, hashVec.size());
ownedGHashes = true;
// Faster way to iterate type records. forEachTypeChecked is faster than
// iterating CVTypeArray. It avoids virtual readBytes calls in inner loops.
static void forEachTypeChecked(ArrayRef<uint8_t> types,
function_ref<void(const CVType &)> fn) {
forEachCodeViewRecord<CVType>(types, [fn](const CVType &ty) -> Error {
return Error::success();
// Walk over file->debugTypes and fill in the isItemIndex bit vector.
// TODO: Store this information in .debug$H so that we don't have to recompute
// it. This is the main bottleneck slowing down parallel ghashing with one
// thread over single-threaded ghashing.
void TpiSource::fillIsItemIndexFromDebugT() {
uint32_t index = 0;
forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
if (isIdRecord(ty.kind()))
void TpiSource::mergeTypeRecord(TypeIndex curIndex, CVType ty) {
// Decide if the merged type goes into TPI or IPI.
bool isItem = isIdRecord(ty.kind());
MergedInfo &merged = isItem ? mergedIpi : mergedTpi;
// Copy the type into our mutable buffer.
assert(ty.length() <= codeview::MaxRecordLength);
size_t offset = merged.recs.size();
size_t newSize = alignTo(ty.length(), 4);
merged.recs.resize(offset + newSize);
auto newRec = makeMutableArrayRef(&merged.recs[offset], newSize);
memcpy(,, newSize);
// Fix up the record prefix and padding bytes if it required resizing.
if (newSize != ty.length()) {
reinterpret_cast<RecordPrefix *>(>RecordLen = newSize - 2;
for (size_t i = ty.length(); i < newSize; ++i)
newRec[i] = LF_PAD0 + (newSize - i);
// Remap the type indices in the new record.
uint32_t pdbHash = check(pdb::hashTypeRecord(CVType(newRec)));
// Retain a mapping from PDB function id to PDB function type. This mapping is
// used during symbol processing to rewrite S_GPROC32_ID symbols to S_GPROC32
// symbols.
if (ty.kind() == LF_FUNC_ID || ty.kind() == LF_MFUNC_ID) {
bool success = ty.length() >= 12;
TypeIndex funcId = curIndex;
if (success)
success &= remapTypeIndex(funcId, TiRefKind::IndexRef);
TypeIndex funcType =
*reinterpret_cast<const TypeIndex *>(&[8]);
if (success) {
funcIdToType.push_back({funcId, funcType});
} else {
StringRef fname = file ? file->getName() : "<unknown PDB>";
warn("corrupt LF_[M]FUNC_ID record 0x" + utohexstr(curIndex.getIndex()) +
" in " + fname);
void TpiSource::mergeUniqueTypeRecords(ArrayRef<uint8_t> typeRecords,
TypeIndex beginIndex) {
// Re-sort the list of unique types by index.
if (kind == PDB)
assert(std::is_sorted(uniqueTypes.begin(), uniqueTypes.end()));
// Accumulate all the unique types into one buffer in mergedTypes.
uint32_t ghashIndex = 0;
auto nextUniqueIndex = uniqueTypes.begin();
// Pre-compute the number of elements in advance to avoid std::vector resizes.
unsigned nbTpiRecs = 0;
unsigned nbIpiRecs = 0;
forEachTypeChecked(typeRecords, [&](const CVType &ty) {
if (nextUniqueIndex != uniqueTypes.end() &&
*nextUniqueIndex == ghashIndex) {
assert(ty.length() <= codeview::MaxRecordLength);
size_t newSize = alignTo(ty.length(), 4);
(isIdRecord(ty.kind()) ? nbIpiRecs : nbTpiRecs) += newSize;
// Do the actual type merge.
ghashIndex = 0;
nextUniqueIndex = uniqueTypes.begin();
forEachTypeChecked(typeRecords, [&](const CVType &ty) {
if (nextUniqueIndex != uniqueTypes.end() &&
*nextUniqueIndex == ghashIndex) {
mergeTypeRecord(beginIndex + ghashIndex, ty);
assert(nextUniqueIndex == uniqueTypes.end() &&
"failed to merge all desired records");
assert(uniqueTypes.size() ==
mergedTpi.recSizes.size() + mergedIpi.recSizes.size() &&
"missing desired record");
void TpiSource::remapTpiWithGHashes(GHashState *g) {
assert(config->debugGHashes && "ghashes must be enabled");
tpiMap = indexMapStorage;
ipiMap = indexMapStorage;
// TODO: Free all unneeded ghash resources now that we have a full index map.
if (config->showSummary) {
nbTypeRecords = ghashes.size();
nbTypeRecordsBytes = file->debugTypes.size();
// PDBs do not actually store global hashes, so when merging a type server
// PDB we have to synthesize global hashes. To do this, we first synthesize
// global hashes for the TPI stream, since it is independent, then we
// synthesize hashes for the IPI stream, using the hashes for the TPI stream
// as inputs.
void TypeServerSource::loadGHashes() {
// Don't hash twice.
if (!ghashes.empty())
pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
// Hash TPI stream.
Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream();
if (auto e = expectedTpi.takeError())
fatal("Type server does not have TPI stream: " + toString(std::move(e)));
// Hash IPI stream, which depends on TPI ghashes.
if (!pdbFile.hasPDBIpiStream())
Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream();
if (auto e = expectedIpi.takeError())
fatal("error retrieving IPI stream: " + toString(std::move(e)));
GloballyHashedType::hashIds(expectedIpi->typeArray(), ghashes));
// The IPI stream isItemIndex bitvector should be all ones.
ipiSrc->isItemIndex.set(0, ipiSrc->ghashes.size());
// Flatten discontiguous PDB type arrays to bytes so that we can use
// forEachTypeChecked instead of CVTypeArray iteration. Copying all types from
// type servers is faster than iterating all object files compiled with /Z7 with
// CVTypeArray, which has high overheads due to the virtual interface of
// BinaryStream::readBytes.
static ArrayRef<uint8_t> typeArrayToBytes(const CVTypeArray &types) {
BinaryStreamRef stream = types.getUnderlyingStream();
ArrayRef<uint8_t> debugTypes;
checkError(stream.readBytes(0, stream.getLength(), debugTypes));
return debugTypes;
// Merge types from a type server PDB.
void TypeServerSource::remapTpiWithGHashes(GHashState *g) {
assert(config->debugGHashes && "ghashes must be enabled");
// IPI merging depends on TPI, so do TPI first, then do IPI. No need to
// propagate errors, those should've been handled during ghash loading.
pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
pdb::TpiStream &tpi = check(pdbFile.getPDBTpiStream());
tpiMap = indexMapStorage;
if (pdbFile.hasPDBIpiStream()) {
pdb::TpiStream &ipi = check(pdbFile.getPDBIpiStream());
ipiMap = ipiSrc->indexMapStorage;
ipiSrc->tpiMap = tpiMap;
ipiSrc->ipiMap = ipiMap;
if (config->showSummary) {
nbTypeRecords = ipiSrc->ghashes.size();
nbTypeRecordsBytes = ipi.typeArray().getUnderlyingStream().getLength();
if (config->showSummary) {
nbTypeRecords += ghashes.size();
nbTypeRecordsBytes += tpi.typeArray().getUnderlyingStream().getLength();
void UseTypeServerSource::remapTpiWithGHashes(GHashState *g) {
// No remapping to do with /Zi objects. Simply use the index map from the type
// server. Errors should have been reported earlier. Symbols from this object
// will be ignored.
Expected<TypeServerSource *> maybeTsSrc = getTypeServerSource();
if (!maybeTsSrc) {
typeMergingError =
joinErrors(std::move(typeMergingError), maybeTsSrc.takeError());
TypeServerSource *tsSrc = *maybeTsSrc;
tpiMap = tsSrc->tpiMap;
ipiMap = tsSrc->ipiMap;
void PrecompSource::loadGHashes() {
if (getDebugH(file)) {
warn("ignoring .debug$H section; pch with ghash is not implemented");
uint32_t ghashIdx = 0;
std::vector<GloballyHashedType> hashVec;
forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
// Remember the index of the LF_ENDPRECOMP record so it can be excluded from
// the PDB. There must be an entry in the list of ghashes so that the type
// indexes of the following records in the /Yc PCH object line up.
if (ty.kind() == LF_ENDPRECOMP)
endPrecompGHashIdx = ghashIdx;
hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec));
void UsePrecompSource::loadGHashes() {
PrecompSource *pchSrc = findPrecompSource(file, precompDependency);
if (!pchSrc)
// To compute ghashes of a /Yu object file, we need to build on the the
// ghashes of the /Yc PCH object. After we are done hashing, discard the
// ghashes from the PCH source so we don't unnecessarily try to deduplicate
// them.
std::vector<GloballyHashedType> hashVec =
forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec));
hashVec.begin() + precompDependency.getTypesCount());
void UsePrecompSource::remapTpiWithGHashes(GHashState *g) {
// This object was compiled with /Yu, so process the corresponding
// precompiled headers object (/Yc) first. Some type indices in the current
// object are referencing data in the precompiled headers object, so we need
// both to be loaded.
if (Error e = mergeInPrecompHeaderObj()) {
typeMergingError = joinErrors(std::move(typeMergingError), std::move(e));
tpiMap = indexMapStorage;
ipiMap = indexMapStorage;
TypeIndex(precompDependency.getStartTypeIndex() +
if (config->showSummary) {
nbTypeRecords = ghashes.size();
nbTypeRecordsBytes = file->debugTypes.size();
namespace {
/// A concurrent hash table for global type hashing. It is based on this paper:
/// Concurrent Hash Tables: Fast and General(?)!
/// This hash table is meant to be used in two phases:
/// 1. concurrent insertions
/// 2. concurrent reads
/// It does not support lookup, deletion, or rehashing. It uses linear probing.
/// The paper describes storing a key-value pair in two machine words.
/// Generally, the values stored in this map are type indices, and we can use
/// those values to recover the ghash key from a side table. This allows us to
/// shrink the table entries further at the cost of some loads, and sidesteps
/// the need for a 128 bit atomic compare-and-swap operation.
/// During insertion, a priority function is used to decide which insertion
/// should be preferred. This ensures that the output is deterministic. For
/// ghashing, lower tpiSrcIdx values (earlier inputs) are preferred.
class GHashCell;
struct GHashTable {
GHashCell *table = nullptr;
uint32_t tableSize = 0;
GHashTable() = default;
/// Initialize the table with the given size. Because the table cannot be
/// resized, the initial size of the table must be large enough to contain all
/// inputs, or insertion may not be able to find an empty cell.
void init(uint32_t newTableSize);
/// Insert the cell with the given ghash into the table. Return the insertion
/// position in the table. It is safe for the caller to store the insertion
/// position because the table cannot be resized.
uint32_t insert(COFFLinkerContext &ctx, GloballyHashedType ghash,
GHashCell newCell);
/// A ghash table cell for deduplicating types from TpiSources.
class GHashCell {
uint64_t data = 0;
GHashCell() = default;
// Construct data most to least significant so that sorting works well:
// - isItem
// - tpiSrcIdx
// - ghashIdx
// Add one to the tpiSrcIdx so that the 0th record from the 0th source has a
// non-zero representation.
GHashCell(bool isItem, uint32_t tpiSrcIdx, uint32_t ghashIdx)
: data((uint64_t(isItem) << 63U) | (uint64_t(tpiSrcIdx + 1) << 32ULL) |
ghashIdx) {
assert(tpiSrcIdx == getTpiSrcIdx() && "round trip failure");
assert(ghashIdx == getGHashIdx() && "round trip failure");
explicit GHashCell(uint64_t data) : data(data) {}
// The empty cell is all zeros.
bool isEmpty() const { return data == 0ULL; }
/// Extract the tpiSrcIdx.
uint32_t getTpiSrcIdx() const {
return ((uint32_t)(data >> 32U) & 0x7FFFFFFF) - 1;
/// Extract the index into the ghash array of the TpiSource.
uint32_t getGHashIdx() const { return (uint32_t)data; }
bool isItem() const { return data & (1ULL << 63U); }
/// Get the ghash key for this cell.
GloballyHashedType getGHash(const COFFLinkerContext &ctx) const {
return ctx.tpiSourceList[getTpiSrcIdx()]->ghashes[getGHashIdx()];
/// The priority function for the cell. The data is stored such that lower
/// tpiSrcIdx and ghashIdx values are preferred, which means that type record
/// from earlier sources are more likely to prevail.
friend inline bool operator<(const GHashCell &l, const GHashCell &r) {
return <;
} // namespace
namespace lld {
namespace coff {
/// This type is just a wrapper around GHashTable with external linkage so it
/// can be used from a header.
struct GHashState {
GHashTable table;
} // namespace coff
} // namespace lld
GHashTable::~GHashTable() { delete[] table; }
void GHashTable::init(uint32_t newTableSize) {
table = new GHashCell[newTableSize];
memset(table, 0, newTableSize * sizeof(GHashCell));
tableSize = newTableSize;
uint32_t GHashTable::insert(COFFLinkerContext &ctx, GloballyHashedType ghash,
GHashCell newCell) {
assert(!newCell.isEmpty() && "cannot insert empty cell value");
// FIXME: The low bytes of SHA1 have low entropy for short records, which
// type records are. Swap the byte order for better entropy. A better ghash
// won't need this.
uint32_t startIdx =
ByteSwap_64(*reinterpret_cast<uint64_t *>(&ghash)) % tableSize;
// Do a linear probe starting at startIdx.
uint32_t idx = startIdx;
while (true) {
// Run a compare and swap loop. There are four cases:
// - cell is empty: CAS into place and return
// - cell has matching key, earlier priority: do nothing, return
// - cell has matching key, later priority: CAS into place and return
// - cell has non-matching key: hash collision, probe next cell
auto *cellPtr = reinterpret_cast<std::atomic<GHashCell> *>(&table[idx]);
GHashCell oldCell(cellPtr->load());
while (oldCell.isEmpty() || oldCell.getGHash(ctx) == ghash) {
// Check if there is an existing ghash entry with a higher priority
// (earlier ordering). If so, this is a duplicate, we are done.
if (!oldCell.isEmpty() && oldCell < newCell)
return idx;
// Either the cell is empty, or our value is higher priority. Try to
// compare and swap. If it succeeds, we are done.
if (cellPtr->compare_exchange_weak(oldCell, newCell))
return idx;
// If the CAS failed, check this cell again.
// Advance the probe. Wrap around to the beginning if we run off the end.
idx = idx == tableSize ? 0 : idx;
if (idx == startIdx) {
// If this becomes an issue, we could mark failure and rehash from the
// beginning with a bigger table. There is no difference between rehashing
// internally and starting over.
report_fatal_error("ghash table is full");
llvm_unreachable("left infloop");
TypeMerger::TypeMerger(COFFLinkerContext &c, llvm::BumpPtrAllocator &alloc)
: typeTable(alloc), idTable(alloc), ctx(c) {}
TypeMerger::~TypeMerger() = default;
void TypeMerger::mergeTypesWithGHash() {
// Load ghashes. Do type servers and PCH objects first.
ScopedTimer t1(ctx.loadGHashTimer);
[&](TpiSource *source) { source->loadGHashes(); });
[&](TpiSource *source) { source->loadGHashes(); });
ScopedTimer t2(ctx.mergeGHashTimer);
GHashState ghashState;
// Estimate the size of hash table needed to deduplicate ghashes. This *must*
// be larger than the number of unique types, or hash table insertion may not
// be able to find a vacant slot. Summing the input types guarantees this, but
// it is a gross overestimate. The table size could be reduced to save memory,
// but it would require implementing rehashing, and this table is generally
// small compared to total memory usage, at eight bytes per input type record,
// and most input type records are larger than eight bytes.
size_t tableSize = 0;
for (TpiSource *source : ctx.tpiSourceList)
tableSize += source->ghashes.size();
// Cap the table size so that we can use 32-bit cell indices. Type indices are
// also 32-bit, so this is an inherent PDB file format limit anyway.
tableSize =
std::min(size_t(INT32_MAX) - TypeIndex::FirstNonSimpleIndex, tableSize);
// Insert ghashes in parallel. During concurrent insertion, we cannot observe
// the contents of the hash table cell, but we can remember the insertion
// position. Because the table does not rehash, the position will not change
// under insertion. After insertion is done, the value of the cell can be read
// to retrieve the final PDB type index.
parallelForEachN(0, ctx.tpiSourceList.size(), [&](size_t tpiSrcIdx) {
TpiSource *source = ctx.tpiSourceList[tpiSrcIdx];
for (uint32_t i = 0, e = source->ghashes.size(); i < e; i++) {
if (source->shouldOmitFromPdb(i)) {
source->indexMapStorage[i] = TypeIndex(SimpleTypeKind::NotTranslated);
GloballyHashedType ghash = source->ghashes[i];
bool isItem = source->isItemIndex.test(i);
uint32_t cellIdx =
ghashState.table.insert(ctx, ghash, GHashCell(isItem, tpiSrcIdx, i));
// Store the ghash cell index as a type index in indexMapStorage. Later
// we will replace it with the PDB type index.
source->indexMapStorage[i] = TypeIndex::fromArrayIndex(cellIdx);
// Collect all non-empty cells and sort them. This will implicitly assign
// destination type indices, and partition the entries into type records and
// item records. It arranges types in this order:
// - type records
// - source 0, type 0...
// - source 1, type 1...
// - item records
// - source 0, type 1...
// - source 1, type 0...
std::vector<GHashCell> entries;
for (const GHashCell &cell :
makeArrayRef(ghashState.table.table, tableSize)) {
if (!cell.isEmpty())
parallelSort(entries, std::less<GHashCell>());
log(formatv("ghash table load factor: {0:p} (size {1} / capacity {2})\n",
tableSize ? double(entries.size()) / tableSize : 0,
entries.size(), tableSize));
// Find out how many type and item indices there are.
auto mid =
std::lower_bound(entries.begin(), entries.end(), GHashCell(true, 0, 0));
assert((mid == entries.end() || mid->isItem()) &&
(mid == entries.begin() || !std::prev(mid)->isItem()) &&
"midpoint is not midpoint");
uint32_t numTypes = std::distance(entries.begin(), mid);
uint32_t numItems = std::distance(mid, entries.end());
log("Tpi record count: " + Twine(numTypes));
log("Ipi record count: " + Twine(numItems));
// Make a list of the "unique" type records to merge for each tpi source. Type
// merging will skip indices not on this list. Store the destination PDB type
// index for these unique types in the tpiMap for each source. The entries for
// non-unique types will be filled in prior to type merging.
for (uint32_t i = 0, e = entries.size(); i < e; ++i) {
auto &cell = entries[i];
uint32_t tpiSrcIdx = cell.getTpiSrcIdx();
TpiSource *source = ctx.tpiSourceList[tpiSrcIdx];
// Update the ghash table to store the destination PDB type index in the
// table.
uint32_t pdbTypeIndex = i < numTypes ? i : i - numTypes;
uint32_t ghashCellIndex =
ghashState.table.table[ghashCellIndex] =
GHashCell(cell.isItem(), cell.getTpiSrcIdx(), pdbTypeIndex);
// In parallel, remap all types.
for_each(dependencySources, [&](TpiSource *source) {
parallelForEach(objectSources, [&](TpiSource *source) {
// Build a global map of from function ID to function type.
for (TpiSource *source : ctx.tpiSourceList) {
for (auto idToType : source->funcIdToType)
void TypeMerger::sortDependencies() {
// Order dependencies first, but preserve the existing order.
std::vector<TpiSource *> deps;
std::vector<TpiSource *> objs;
for (TpiSource *s : ctx.tpiSourceList)
(s->isDependency() ? deps : objs).push_back(s);
uint32_t numDeps = deps.size();
uint32_t numObjs = objs.size();
ctx.tpiSourceList = std::move(deps);
ctx.tpiSourceList.insert(ctx.tpiSourceList.end(), objs.begin(), objs.end());
for (uint32_t i = 0, e = ctx.tpiSourceList.size(); i < e; ++i)
ctx.tpiSourceList[i]->tpiSrcIdx = i;
dependencySources = makeArrayRef(, numDeps);
objectSources = makeArrayRef( + numDeps, numObjs);
/// Given the index into the ghash table for a particular type, return the type
/// index for that type in the output PDB.
static TypeIndex loadPdbTypeIndexFromCell(GHashState *g,
uint32_t ghashCellIdx) {
GHashCell cell = g->table.table[ghashCellIdx];
return TypeIndex::fromArrayIndex(cell.getGHashIdx());
/// Free heap allocated ghashes.
void TypeMerger::clearGHashes() {
for (TpiSource *src : ctx.tpiSourceList) {
if (src->ownedGHashes)
delete[] src->;
src->ghashes = {};
// Fill in a TPI or IPI index map using ghashes. For each source type, use its
// ghash to lookup its final type index in the PDB, and store that in the map.
void TpiSource::fillMapFromGHashes(GHashState *g) {
for (size_t i = 0, e = ghashes.size(); i < e; ++i) {
TypeIndex fakeCellIndex = indexMapStorage[i];
if (fakeCellIndex.isSimple())
indexMapStorage[i] = fakeCellIndex;
indexMapStorage[i] =
loadPdbTypeIndexFromCell(g, fakeCellIndex.toArrayIndex());