blob: 5e78ffdb86d67dc5f012082299c810ec4f0b89e0 [file] [log] [blame]
//===- IndexedMemProfData.h - MemProf format support ------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// MemProf data is serialized in writeMemProf provided in this file.
//
//===----------------------------------------------------------------------===//
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/ProfileData/MemProf.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/OnDiskHashTable.h"
namespace llvm {
// Serialize Schema.
static void writeMemProfSchema(ProfOStream &OS,
const memprof::MemProfSchema &Schema) {
OS.write(static_cast<uint64_t>(Schema.size()));
for (const auto Id : Schema)
OS.write(static_cast<uint64_t>(Id));
}
// Serialize MemProfRecordData. Return RecordTableOffset.
static uint64_t writeMemProfRecords(
ProfOStream &OS,
llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord>
&MemProfRecordData,
memprof::MemProfSchema *Schema, memprof::IndexedVersion Version,
llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
*MemProfCallStackIndexes = nullptr) {
memprof::RecordWriterTrait RecordWriter(Schema, Version,
MemProfCallStackIndexes);
OnDiskChainedHashTableGenerator<memprof::RecordWriterTrait>
RecordTableGenerator;
for (auto &[GUID, Record] : MemProfRecordData) {
// Insert the key (func hash) and value (memprof record).
RecordTableGenerator.insert(GUID, Record, RecordWriter);
}
// Release the memory of this MapVector as it is no longer needed.
MemProfRecordData.clear();
// The call to Emit invokes RecordWriterTrait::EmitData which destructs
// the memprof record copies owned by the RecordTableGenerator. This works
// because the RecordTableGenerator is not used after this point.
return RecordTableGenerator.Emit(OS.OS, RecordWriter);
}
// Serialize MemProfFrameData. Return FrameTableOffset.
static uint64_t writeMemProfFrames(
ProfOStream &OS,
llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData) {
OnDiskChainedHashTableGenerator<memprof::FrameWriterTrait>
FrameTableGenerator;
for (auto &[FrameId, Frame] : MemProfFrameData) {
// Insert the key (frame id) and value (frame contents).
FrameTableGenerator.insert(FrameId, Frame);
}
// Release the memory of this MapVector as it is no longer needed.
MemProfFrameData.clear();
return FrameTableGenerator.Emit(OS.OS);
}
// Serialize MemProfFrameData. Return the mapping from FrameIds to their
// indexes within the frame array.
static llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId>
writeMemProfFrameArray(
ProfOStream &OS,
llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData,
llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram) {
// Mappings from FrameIds to array indexes.
llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes;
// Compute the order in which we serialize Frames. The order does not matter
// in terms of correctness, but we still compute it for deserialization
// performance. Specifically, if we serialize frequently used Frames one
// after another, we have better cache utilization. For two Frames that
// appear equally frequently, we break a tie by serializing the one that tends
// to appear earlier in call stacks. We implement the tie-breaking mechanism
// by computing the sum of indexes within call stacks for each Frame. If we
// still have a tie, then we just resort to compare two FrameIds, which is
// just for stability of output.
std::vector<std::pair<memprof::FrameId, const memprof::Frame *>> FrameIdOrder;
FrameIdOrder.reserve(MemProfFrameData.size());
for (const auto &[Id, Frame] : MemProfFrameData)
FrameIdOrder.emplace_back(Id, &Frame);
assert(MemProfFrameData.size() == FrameIdOrder.size());
llvm::sort(FrameIdOrder,
[&](const std::pair<memprof::FrameId, const memprof::Frame *> &L,
const std::pair<memprof::FrameId, const memprof::Frame *> &R) {
const auto &SL = FrameHistogram[L.first];
const auto &SR = FrameHistogram[R.first];
// Popular FrameIds should come first.
if (SL.Count != SR.Count)
return SL.Count > SR.Count;
// If they are equally popular, then the one that tends to appear
// earlier in call stacks should come first.
if (SL.PositionSum != SR.PositionSum)
return SL.PositionSum < SR.PositionSum;
// Compare their FrameIds for sort stability.
return L.first < R.first;
});
// Serialize all frames while creating mappings from linear IDs to FrameIds.
uint64_t Index = 0;
MemProfFrameIndexes.reserve(FrameIdOrder.size());
for (const auto &[Id, F] : FrameIdOrder) {
F->serialize(OS.OS);
MemProfFrameIndexes.insert({Id, Index});
++Index;
}
assert(MemProfFrameData.size() == Index);
assert(MemProfFrameData.size() == MemProfFrameIndexes.size());
// Release the memory of this MapVector as it is no longer needed.
MemProfFrameData.clear();
return MemProfFrameIndexes;
}
static uint64_t writeMemProfCallStacks(
ProfOStream &OS,
llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
&MemProfCallStackData) {
OnDiskChainedHashTableGenerator<memprof::CallStackWriterTrait>
CallStackTableGenerator;
for (auto &[CSId, CallStack] : MemProfCallStackData)
CallStackTableGenerator.insert(CSId, CallStack);
// Release the memory of this vector as it is no longer needed.
MemProfCallStackData.clear();
return CallStackTableGenerator.Emit(OS.OS);
}
static llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
writeMemProfCallStackArray(
ProfOStream &OS,
llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
&MemProfCallStackData,
llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId>
&MemProfFrameIndexes,
llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram,
unsigned &NumElements) {
llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
MemProfCallStackIndexes;
memprof::CallStackRadixTreeBuilder<memprof::FrameId> Builder;
Builder.build(std::move(MemProfCallStackData), &MemProfFrameIndexes,
FrameHistogram);
for (auto I : Builder.getRadixArray())
OS.write32(I);
NumElements = Builder.getRadixArray().size();
MemProfCallStackIndexes = Builder.takeCallStackPos();
// Release the memory of this vector as it is no longer needed.
MemProfCallStackData.clear();
return MemProfCallStackIndexes;
}
// Write out MemProf Version2 as follows:
// uint64_t Version
// uint64_t RecordTableOffset = RecordTableGenerator.Emit
// uint64_t FramePayloadOffset = Offset for the frame payload
// uint64_t FrameTableOffset = FrameTableGenerator.Emit
// uint64_t CallStackPayloadOffset = Offset for the call stack payload (NEW V2)
// uint64_t CallStackTableOffset = CallStackTableGenerator.Emit (NEW in V2)
// uint64_t Num schema entries
// uint64_t Schema entry 0
// uint64_t Schema entry 1
// ....
// uint64_t Schema entry N - 1
// OnDiskChainedHashTable MemProfRecordData
// OnDiskChainedHashTable MemProfFrameData
// OnDiskChainedHashTable MemProfCallStackData (NEW in V2)
static Error writeMemProfV2(ProfOStream &OS,
memprof::IndexedMemProfData &MemProfData,
bool MemProfFullSchema) {
OS.write(memprof::Version2);
uint64_t HeaderUpdatePos = OS.tell();
OS.write(0ULL); // Reserve space for the memprof record table offset.
OS.write(0ULL); // Reserve space for the memprof frame payload offset.
OS.write(0ULL); // Reserve space for the memprof frame table offset.
OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
OS.write(0ULL); // Reserve space for the memprof call stack table offset.
auto Schema = memprof::getHotColdSchema();
if (MemProfFullSchema)
Schema = memprof::getFullSchema();
writeMemProfSchema(OS, Schema);
uint64_t RecordTableOffset =
writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version2);
uint64_t FramePayloadOffset = OS.tell();
uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfData.Frames);
uint64_t CallStackPayloadOffset = OS.tell();
uint64_t CallStackTableOffset =
writeMemProfCallStacks(OS, MemProfData.CallStacks);
uint64_t Header[] = {
RecordTableOffset, FramePayloadOffset, FrameTableOffset,
CallStackPayloadOffset, CallStackTableOffset,
};
OS.patch({{HeaderUpdatePos, Header}});
return Error::success();
}
// Write out MemProf Version3 as follows:
// uint64_t Version
// uint64_t CallStackPayloadOffset = Offset for the call stack payload
// uint64_t RecordPayloadOffset = Offset for the record payload
// uint64_t RecordTableOffset = RecordTableGenerator.Emit
// uint64_t Num schema entries
// uint64_t Schema entry 0
// uint64_t Schema entry 1
// ....
// uint64_t Schema entry N - 1
// Frames serialized one after another
// Call stacks encoded as a radix tree
// OnDiskChainedHashTable MemProfRecordData
static Error writeMemProfV3(ProfOStream &OS,
memprof::IndexedMemProfData &MemProfData,
bool MemProfFullSchema) {
OS.write(memprof::Version3);
uint64_t HeaderUpdatePos = OS.tell();
OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
OS.write(0ULL); // Reserve space for the memprof record payload offset.
OS.write(0ULL); // Reserve space for the memprof record table offset.
auto Schema = memprof::getHotColdSchema();
if (MemProfFullSchema)
Schema = memprof::getFullSchema();
writeMemProfSchema(OS, Schema);
llvm::DenseMap<memprof::FrameId, memprof::FrameStat> FrameHistogram =
memprof::computeFrameHistogram(MemProfData.CallStacks);
assert(MemProfData.Frames.size() == FrameHistogram.size());
llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes =
writeMemProfFrameArray(OS, MemProfData.Frames, FrameHistogram);
uint64_t CallStackPayloadOffset = OS.tell();
// The number of elements in the call stack array.
unsigned NumElements = 0;
llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
MemProfCallStackIndexes =
writeMemProfCallStackArray(OS, MemProfData.CallStacks,
MemProfFrameIndexes, FrameHistogram,
NumElements);
uint64_t RecordPayloadOffset = OS.tell();
uint64_t RecordTableOffset =
writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version3,
&MemProfCallStackIndexes);
// IndexedMemProfReader::deserializeV3 computes the number of elements in the
// call stack array from the difference between CallStackPayloadOffset and
// RecordPayloadOffset. Verify that the computation works.
assert(CallStackPayloadOffset +
NumElements * sizeof(memprof::LinearFrameId) ==
RecordPayloadOffset);
uint64_t Header[] = {
CallStackPayloadOffset,
RecordPayloadOffset,
RecordTableOffset,
};
OS.patch({{HeaderUpdatePos, Header}});
return Error::success();
}
// Write out the MemProf data in a requested version.
Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
memprof::IndexedVersion MemProfVersionRequested,
bool MemProfFullSchema) {
switch (MemProfVersionRequested) {
case memprof::Version2:
return writeMemProfV2(OS, MemProfData, MemProfFullSchema);
case memprof::Version3:
return writeMemProfV3(OS, MemProfData, MemProfFullSchema);
}
return make_error<InstrProfError>(
instrprof_error::unsupported_version,
formatv("MemProf version {} not supported; "
"requires version between {} and {}, inclusive",
MemProfVersionRequested, memprof::MinimumSupportedVersion,
memprof::MaximumSupportedVersion));
}
Error IndexedMemProfReader::deserializeV2(const unsigned char *Start,
const unsigned char *Ptr) {
// The value returned from RecordTableGenerator.Emit.
const uint64_t RecordTableOffset =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
// The offset in the stream right before invoking
// FrameTableGenerator.Emit.
const uint64_t FramePayloadOffset =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
// The value returned from FrameTableGenerator.Emit.
const uint64_t FrameTableOffset =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
// The offset in the stream right before invoking
// CallStackTableGenerator.Emit.
uint64_t CallStackPayloadOffset = 0;
// The value returned from CallStackTableGenerator.Emit.
uint64_t CallStackTableOffset = 0;
if (Version >= memprof::Version2) {
CallStackPayloadOffset =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
CallStackTableOffset =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
}
// Read the schema.
auto SchemaOr = memprof::readMemProfSchema(Ptr);
if (!SchemaOr)
return SchemaOr.takeError();
Schema = SchemaOr.get();
// Now initialize the table reader with a pointer into data buffer.
MemProfRecordTable.reset(MemProfRecordHashTable::Create(
/*Buckets=*/Start + RecordTableOffset,
/*Payload=*/Ptr,
/*Base=*/Start, memprof::RecordLookupTrait(Version, Schema)));
// Initialize the frame table reader with the payload and bucket offsets.
MemProfFrameTable.reset(MemProfFrameHashTable::Create(
/*Buckets=*/Start + FrameTableOffset,
/*Payload=*/Start + FramePayloadOffset,
/*Base=*/Start));
if (Version >= memprof::Version2)
MemProfCallStackTable.reset(MemProfCallStackHashTable::Create(
/*Buckets=*/Start + CallStackTableOffset,
/*Payload=*/Start + CallStackPayloadOffset,
/*Base=*/Start));
return Error::success();
}
Error IndexedMemProfReader::deserializeV3(const unsigned char *Start,
const unsigned char *Ptr) {
// The offset in the stream right before invoking
// CallStackTableGenerator.Emit.
const uint64_t CallStackPayloadOffset =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
// The offset in the stream right before invoking RecordTableGenerator.Emit.
const uint64_t RecordPayloadOffset =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
// The value returned from RecordTableGenerator.Emit.
const uint64_t RecordTableOffset =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
// Read the schema.
auto SchemaOr = memprof::readMemProfSchema(Ptr);
if (!SchemaOr)
return SchemaOr.takeError();
Schema = SchemaOr.get();
FrameBase = Ptr;
CallStackBase = Start + CallStackPayloadOffset;
// Compute the number of elements in the radix tree array. Since we use this
// to reserve enough bits in a BitVector, it's totally OK if we overestimate
// this number a little bit because of padding just before the next section.
RadixTreeSize = (RecordPayloadOffset - CallStackPayloadOffset) /
sizeof(memprof::LinearFrameId);
// Now initialize the table reader with a pointer into data buffer.
MemProfRecordTable.reset(MemProfRecordHashTable::Create(
/*Buckets=*/Start + RecordTableOffset,
/*Payload=*/Start + RecordPayloadOffset,
/*Base=*/Start, memprof::RecordLookupTrait(memprof::Version3, Schema)));
return Error::success();
}
Error IndexedMemProfReader::deserialize(const unsigned char *Start,
uint64_t MemProfOffset) {
const unsigned char *Ptr = Start + MemProfOffset;
// Read the MemProf version number.
const uint64_t FirstWord =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
if (FirstWord == memprof::Version2 || FirstWord == memprof::Version3) {
// Everything is good. We can proceed to deserialize the rest.
Version = static_cast<memprof::IndexedVersion>(FirstWord);
} else {
return make_error<InstrProfError>(
instrprof_error::unsupported_version,
formatv("MemProf version {} not supported; "
"requires version between {} and {}, inclusive",
FirstWord, memprof::MinimumSupportedVersion,
memprof::MaximumSupportedVersion));
}
switch (Version) {
case memprof::Version2:
if (Error E = deserializeV2(Start, Ptr))
return E;
break;
case memprof::Version3:
if (Error E = deserializeV3(Start, Ptr))
return E;
break;
}
return Error::success();
}
} // namespace llvm