| #ifndef LLVM_PROFILEDATA_MEMPROF_H_ |
| #define LLVM_PROFILEDATA_MEMPROF_H_ |
| |
| #include "llvm/ADT/MapVector.h" |
| #include "llvm/ADT/STLForwardCompat.h" |
| #include "llvm/ADT/STLFunctionalExtras.h" |
| #include "llvm/ADT/SmallVector.h" |
| #include "llvm/IR/GlobalValue.h" |
| #include "llvm/ProfileData/MemProfData.inc" |
| #include "llvm/Support/Endian.h" |
| #include "llvm/Support/EndianStream.h" |
| #include "llvm/Support/raw_ostream.h" |
| |
| #include <bitset> |
| #include <cstdint> |
| #include <optional> |
| |
| namespace llvm { |
| namespace memprof { |
| |
| struct MemProfRecord; |
| |
| // The versions of the indexed MemProf format |
| enum IndexedVersion : uint64_t { |
| // Version 0: This version didn't have a version field. |
| Version0 = 0, |
| // Version 1: Added a version field to the header. |
| Version1 = 1, |
| // Version 2: Added a call stack table. |
| Version2 = 2, |
| // Version 3: Added a radix tree for call stacks. Switched to linear IDs for |
| // frames and call stacks. |
| Version3 = 3, |
| }; |
| |
| constexpr uint64_t MinimumSupportedVersion = Version0; |
| constexpr uint64_t MaximumSupportedVersion = Version3; |
| |
| // Verify that the minimum and maximum satisfy the obvious constraint. |
| static_assert(MinimumSupportedVersion <= MaximumSupportedVersion); |
| |
| enum class Meta : uint64_t { |
| Start = 0, |
| #define MIBEntryDef(NameTag, Name, Type) NameTag, |
| #include "llvm/ProfileData/MIBEntryDef.inc" |
| #undef MIBEntryDef |
| Size |
| }; |
| |
| using MemProfSchema = llvm::SmallVector<Meta, static_cast<int>(Meta::Size)>; |
| |
| // Returns the full schema currently in use. |
| MemProfSchema getFullSchema(); |
| |
| // Returns the schema consisting of the fields used for hot cold memory hinting. |
| MemProfSchema getHotColdSchema(); |
| |
| // Holds the actual MemInfoBlock data with all fields. Contents may be read or |
| // written partially by providing an appropriate schema to the serialize and |
| // deserialize methods. |
| struct PortableMemInfoBlock { |
| PortableMemInfoBlock() = default; |
| explicit PortableMemInfoBlock(const MemInfoBlock &Block, |
| const MemProfSchema &IncomingSchema) { |
| for (const Meta Id : IncomingSchema) |
| Schema.set(llvm::to_underlying(Id)); |
| #define MIBEntryDef(NameTag, Name, Type) Name = Block.Name; |
| #include "llvm/ProfileData/MIBEntryDef.inc" |
| #undef MIBEntryDef |
| } |
| |
| PortableMemInfoBlock(const MemProfSchema &Schema, const unsigned char *Ptr) { |
| deserialize(Schema, Ptr); |
| } |
| |
| // Read the contents of \p Ptr based on the \p Schema to populate the |
| // MemInfoBlock member. |
| void deserialize(const MemProfSchema &IncomingSchema, |
| const unsigned char *Ptr) { |
| using namespace support; |
| |
| Schema.reset(); |
| for (const Meta Id : IncomingSchema) { |
| switch (Id) { |
| #define MIBEntryDef(NameTag, Name, Type) \ |
| case Meta::Name: { \ |
| Name = endian::readNext<Type, llvm::endianness::little>(Ptr); \ |
| } break; |
| #include "llvm/ProfileData/MIBEntryDef.inc" |
| #undef MIBEntryDef |
| default: |
| llvm_unreachable("Unknown meta type id, is the profile collected from " |
| "a newer version of the runtime?"); |
| } |
| |
| Schema.set(llvm::to_underlying(Id)); |
| } |
| } |
| |
| // Write the contents of the MemInfoBlock based on the \p Schema provided to |
| // the raw_ostream \p OS. |
| void serialize(const MemProfSchema &Schema, raw_ostream &OS) const { |
| using namespace support; |
| |
| endian::Writer LE(OS, llvm::endianness::little); |
| for (const Meta Id : Schema) { |
| switch (Id) { |
| #define MIBEntryDef(NameTag, Name, Type) \ |
| case Meta::Name: { \ |
| LE.write<Type>(Name); \ |
| } break; |
| #include "llvm/ProfileData/MIBEntryDef.inc" |
| #undef MIBEntryDef |
| default: |
| llvm_unreachable("Unknown meta type id, invalid input?"); |
| } |
| } |
| } |
| |
| // Print out the contents of the MemInfoBlock in YAML format. |
| void printYAML(raw_ostream &OS) const { |
| OS << " MemInfoBlock:\n"; |
| #define MIBEntryDef(NameTag, Name, Type) \ |
| OS << " " << #Name << ": " << Name << "\n"; |
| #include "llvm/ProfileData/MIBEntryDef.inc" |
| #undef MIBEntryDef |
| } |
| |
| // Return the schema, only for unit tests. |
| std::bitset<llvm::to_underlying(Meta::Size)> getSchema() const { |
| return Schema; |
| } |
| |
| // Define getters for each type which can be called by analyses. |
| #define MIBEntryDef(NameTag, Name, Type) \ |
| Type get##Name() const { \ |
| assert(Schema[llvm::to_underlying(Meta::Name)]); \ |
| return Name; \ |
| } |
| #include "llvm/ProfileData/MIBEntryDef.inc" |
| #undef MIBEntryDef |
| |
| void clear() { *this = PortableMemInfoBlock(); } |
| |
| bool operator==(const PortableMemInfoBlock &Other) const { |
| if (Other.Schema != Schema) |
| return false; |
| |
| #define MIBEntryDef(NameTag, Name, Type) \ |
| if (Schema[llvm::to_underlying(Meta::Name)] && \ |
| Other.get##Name() != get##Name()) \ |
| return false; |
| #include "llvm/ProfileData/MIBEntryDef.inc" |
| #undef MIBEntryDef |
| return true; |
| } |
| |
| bool operator!=(const PortableMemInfoBlock &Other) const { |
| return !operator==(Other); |
| } |
| |
| static size_t serializedSize(const MemProfSchema &Schema) { |
| size_t Result = 0; |
| |
| for (const Meta Id : Schema) { |
| switch (Id) { |
| #define MIBEntryDef(NameTag, Name, Type) \ |
| case Meta::Name: { \ |
| Result += sizeof(Type); \ |
| } break; |
| #include "llvm/ProfileData/MIBEntryDef.inc" |
| #undef MIBEntryDef |
| default: |
| llvm_unreachable("Unknown meta type id, invalid input?"); |
| } |
| } |
| |
| return Result; |
| } |
| |
| private: |
| // The set of available fields, indexed by Meta::Name. |
| std::bitset<llvm::to_underlying(Meta::Size)> Schema; |
| |
| #define MIBEntryDef(NameTag, Name, Type) Type Name = Type(); |
| #include "llvm/ProfileData/MIBEntryDef.inc" |
| #undef MIBEntryDef |
| }; |
| |
| // A type representing the id generated by hashing the contents of the Frame. |
| using FrameId = uint64_t; |
| // A type representing the id to index into the frame array. |
| using LinearFrameId = uint32_t; |
| // Describes a call frame for a dynamic allocation context. The contents of |
| // the frame are populated by symbolizing the stack depot call frame from the |
| // compiler runtime. |
| struct Frame { |
| // A uuid (uint64_t) identifying the function. It is obtained by |
| // llvm::md5(FunctionName) which returns the lower 64 bits. |
| GlobalValue::GUID Function; |
| // The symbol name for the function. Only populated in the Frame by the reader |
| // if requested during initialization. This field should not be serialized. |
| std::unique_ptr<std::string> SymbolName; |
| // The source line offset of the call from the beginning of parent function. |
| uint32_t LineOffset; |
| // The source column number of the call to help distinguish multiple calls |
| // on the same line. |
| uint32_t Column; |
| // Whether the current frame is inlined. |
| bool IsInlineFrame; |
| |
| Frame(const Frame &Other) { |
| Function = Other.Function; |
| SymbolName = Other.SymbolName |
| ? std::make_unique<std::string>(*Other.SymbolName) |
| : nullptr; |
| LineOffset = Other.LineOffset; |
| Column = Other.Column; |
| IsInlineFrame = Other.IsInlineFrame; |
| } |
| |
| Frame(GlobalValue::GUID Hash, uint32_t Off, uint32_t Col, bool Inline) |
| : Function(Hash), LineOffset(Off), Column(Col), IsInlineFrame(Inline) {} |
| |
| bool operator==(const Frame &Other) const { |
| // Ignore the SymbolName field to avoid a string compare. Comparing the |
| // function hash serves the same purpose. |
| return Other.Function == Function && Other.LineOffset == LineOffset && |
| Other.Column == Column && Other.IsInlineFrame == IsInlineFrame; |
| } |
| |
| Frame &operator=(const Frame &Other) { |
| Function = Other.Function; |
| SymbolName = Other.SymbolName |
| ? std::make_unique<std::string>(*Other.SymbolName) |
| : nullptr; |
| LineOffset = Other.LineOffset; |
| Column = Other.Column; |
| IsInlineFrame = Other.IsInlineFrame; |
| return *this; |
| } |
| |
| bool operator!=(const Frame &Other) const { return !operator==(Other); } |
| |
| bool hasSymbolName() const { return !!SymbolName; } |
| |
| StringRef getSymbolName() const { |
| assert(hasSymbolName()); |
| return *SymbolName; |
| } |
| |
| std::string getSymbolNameOr(StringRef Alt) const { |
| return std::string(hasSymbolName() ? getSymbolName() : Alt); |
| } |
| |
| // Write the contents of the frame to the ostream \p OS. |
| void serialize(raw_ostream &OS) const { |
| using namespace support; |
| |
| endian::Writer LE(OS, llvm::endianness::little); |
| |
| // If the type of the GlobalValue::GUID changes, then we need to update |
| // the reader and the writer. |
| static_assert(std::is_same<GlobalValue::GUID, uint64_t>::value, |
| "Expect GUID to be uint64_t."); |
| LE.write<uint64_t>(Function); |
| |
| LE.write<uint32_t>(LineOffset); |
| LE.write<uint32_t>(Column); |
| LE.write<bool>(IsInlineFrame); |
| } |
| |
| // Read a frame from char data which has been serialized as little endian. |
| static Frame deserialize(const unsigned char *Ptr) { |
| using namespace support; |
| |
| const uint64_t F = |
| endian::readNext<uint64_t, llvm::endianness::little>(Ptr); |
| const uint32_t L = |
| endian::readNext<uint32_t, llvm::endianness::little>(Ptr); |
| const uint32_t C = |
| endian::readNext<uint32_t, llvm::endianness::little>(Ptr); |
| const bool I = endian::readNext<bool, llvm::endianness::little>(Ptr); |
| return Frame(/*Function=*/F, /*LineOffset=*/L, /*Column=*/C, |
| /*IsInlineFrame=*/I); |
| } |
| |
| // Returns the size of the frame information. |
| static constexpr size_t serializedSize() { |
| return sizeof(Frame::Function) + sizeof(Frame::LineOffset) + |
| sizeof(Frame::Column) + sizeof(Frame::IsInlineFrame); |
| } |
| |
| // Print the frame information in YAML format. |
| void printYAML(raw_ostream &OS) const { |
| OS << " -\n" |
| << " Function: " << Function << "\n" |
| << " SymbolName: " << getSymbolNameOr("<None>") << "\n" |
| << " LineOffset: " << LineOffset << "\n" |
| << " Column: " << Column << "\n" |
| << " Inline: " << IsInlineFrame << "\n"; |
| } |
| |
| // Return a hash value based on the contents of the frame. Here we don't use |
| // hashing from llvm ADT since we are going to persist the hash id, the hash |
| // combine algorithm in ADT uses a new randomized seed each time. |
| inline FrameId hash() const { |
| auto HashCombine = [](auto Value, size_t Seed) { |
| std::hash<decltype(Value)> Hasher; |
| // The constant used below is the 64 bit representation of the fractional |
| // part of the golden ratio. Used here for the randomness in their bit |
| // pattern. |
| return Hasher(Value) + 0x9e3779b97f4a7c15 + (Seed << 6) + (Seed >> 2); |
| }; |
| |
| size_t Result = 0; |
| Result ^= HashCombine(Function, Result); |
| Result ^= HashCombine(LineOffset, Result); |
| Result ^= HashCombine(Column, Result); |
| Result ^= HashCombine(IsInlineFrame, Result); |
| return static_cast<FrameId>(Result); |
| } |
| }; |
| |
| // A type representing the index into the table of call stacks. |
| using CallStackId = uint64_t; |
| |
| // A type representing the index into the call stack array. |
| using LinearCallStackId = uint32_t; |
| |
| // Holds allocation information in a space efficient format where frames are |
| // represented using unique identifiers. |
| struct IndexedAllocationInfo { |
| // The dynamic calling context for the allocation in bottom-up (leaf-to-root) |
| // order. Frame contents are stored out-of-line. |
| // TODO: Remove once we fully transition to CSId. |
| llvm::SmallVector<FrameId> CallStack; |
| // Conceptually the same as above. We are going to keep both CallStack and |
| // CallStackId while we are transitioning from CallStack to CallStackId. |
| CallStackId CSId = 0; |
| // The statistics obtained from the runtime for the allocation. |
| PortableMemInfoBlock Info; |
| |
| IndexedAllocationInfo() = default; |
| IndexedAllocationInfo(ArrayRef<FrameId> CS, CallStackId CSId, |
| const MemInfoBlock &MB, |
| const MemProfSchema &Schema = getFullSchema()) |
| : CallStack(CS.begin(), CS.end()), CSId(CSId), Info(MB, Schema) {} |
| |
| // Returns the size in bytes when this allocation info struct is serialized. |
| size_t serializedSize(const MemProfSchema &Schema, |
| IndexedVersion Version) const; |
| |
| bool operator==(const IndexedAllocationInfo &Other) const { |
| if (Other.Info != Info) |
| return false; |
| |
| if (Other.CSId != CSId) |
| return false; |
| return true; |
| } |
| |
| bool operator!=(const IndexedAllocationInfo &Other) const { |
| return !operator==(Other); |
| } |
| }; |
| |
| // Holds allocation information with frame contents inline. The type should |
| // be used for temporary in-memory instances. |
| struct AllocationInfo { |
| // Same as IndexedAllocationInfo::CallStack with the frame contents inline. |
| std::vector<Frame> CallStack; |
| // Same as IndexedAllocationInfo::Info; |
| PortableMemInfoBlock Info; |
| |
| AllocationInfo() = default; |
| AllocationInfo( |
| const IndexedAllocationInfo &IndexedAI, |
| llvm::function_ref<const Frame(const FrameId)> IdToFrameCallback) { |
| for (const FrameId &Id : IndexedAI.CallStack) { |
| CallStack.push_back(IdToFrameCallback(Id)); |
| } |
| Info = IndexedAI.Info; |
| } |
| |
| void printYAML(raw_ostream &OS) const { |
| OS << " -\n"; |
| OS << " Callstack:\n"; |
| // TODO: Print out the frame on one line with to make it easier for deep |
| // callstacks once we have a test to check valid YAML is generated. |
| for (const Frame &F : CallStack) { |
| F.printYAML(OS); |
| } |
| Info.printYAML(OS); |
| } |
| }; |
| |
| // Holds the memprof profile information for a function. The internal |
| // representation stores frame ids for efficiency. This representation should |
| // be used in the profile conversion and manipulation tools. |
| struct IndexedMemProfRecord { |
| // Memory allocation sites in this function for which we have memory |
| // profiling data. |
| llvm::SmallVector<IndexedAllocationInfo> AllocSites; |
| // Holds call sites in this function which are part of some memory |
| // allocation context. We store this as a list of locations, each with its |
| // list of inline locations in bottom-up order i.e. from leaf to root. The |
| // inline location list may include additional entries, users should pick |
| // the last entry in the list with the same function GUID. |
| llvm::SmallVector<llvm::SmallVector<FrameId>> CallSites; |
| // Conceptually the same as above. We are going to keep both CallSites and |
| // CallSiteIds while we are transitioning from CallSites to CallSiteIds. |
| llvm::SmallVector<CallStackId> CallSiteIds; |
| |
| void clear() { |
| AllocSites.clear(); |
| CallSites.clear(); |
| } |
| |
| void merge(const IndexedMemProfRecord &Other) { |
| // TODO: Filter out duplicates which may occur if multiple memprof |
| // profiles are merged together using llvm-profdata. |
| AllocSites.append(Other.AllocSites); |
| CallSites.append(Other.CallSites); |
| } |
| |
| size_t serializedSize(const MemProfSchema &Schema, |
| IndexedVersion Version) const; |
| |
| bool operator==(const IndexedMemProfRecord &Other) const { |
| if (Other.AllocSites != AllocSites) |
| return false; |
| |
| if (Other.CallSiteIds != CallSiteIds) |
| return false; |
| return true; |
| } |
| |
| // Serializes the memprof records in \p Records to the ostream \p OS based |
| // on the schema provided in \p Schema. |
| void serialize(const MemProfSchema &Schema, raw_ostream &OS, |
| IndexedVersion Version, |
| llvm::DenseMap<CallStackId, LinearCallStackId> |
| *MemProfCallStackIndexes = nullptr) const; |
| |
| // Deserializes memprof records from the Buffer. |
| static IndexedMemProfRecord deserialize(const MemProfSchema &Schema, |
| const unsigned char *Buffer, |
| IndexedVersion Version); |
| |
| // Convert IndexedMemProfRecord to MemProfRecord. Callback is used to |
| // translate CallStackId to call stacks with frames inline. |
| MemProfRecord toMemProfRecord( |
| llvm::function_ref<std::vector<Frame>(const CallStackId)> Callback) const; |
| |
| // Returns the GUID for the function name after canonicalization. For |
| // memprof, we remove any .llvm suffix added by LTO. MemProfRecords are |
| // mapped to functions using this GUID. |
| static GlobalValue::GUID getGUID(const StringRef FunctionName); |
| }; |
| |
| // Holds the memprof profile information for a function. The internal |
| // representation stores frame contents inline. This representation should |
| // be used for small amount of temporary, in memory instances. |
| struct MemProfRecord { |
| // Same as IndexedMemProfRecord::AllocSites with frame contents inline. |
| llvm::SmallVector<AllocationInfo> AllocSites; |
| // Same as IndexedMemProfRecord::CallSites with frame contents inline. |
| llvm::SmallVector<std::vector<Frame>> CallSites; |
| |
| MemProfRecord() = default; |
| MemProfRecord( |
| const IndexedMemProfRecord &Record, |
| llvm::function_ref<const Frame(const FrameId Id)> IdToFrameCallback) { |
| for (const IndexedAllocationInfo &IndexedAI : Record.AllocSites) { |
| AllocSites.emplace_back(IndexedAI, IdToFrameCallback); |
| } |
| for (const ArrayRef<FrameId> Site : Record.CallSites) { |
| std::vector<Frame> Frames; |
| for (const FrameId Id : Site) { |
| Frames.push_back(IdToFrameCallback(Id)); |
| } |
| CallSites.push_back(Frames); |
| } |
| } |
| |
| // Prints out the contents of the memprof record in YAML. |
| void print(llvm::raw_ostream &OS) const { |
| if (!AllocSites.empty()) { |
| OS << " AllocSites:\n"; |
| for (const AllocationInfo &N : AllocSites) |
| N.printYAML(OS); |
| } |
| |
| if (!CallSites.empty()) { |
| OS << " CallSites:\n"; |
| for (const std::vector<Frame> &Frames : CallSites) { |
| for (const Frame &F : Frames) { |
| OS << " -\n"; |
| F.printYAML(OS); |
| } |
| } |
| } |
| } |
| }; |
| |
| // Reads a memprof schema from a buffer. All entries in the buffer are |
| // interpreted as uint64_t. The first entry in the buffer denotes the number of |
| // ids in the schema. Subsequent entries are integers which map to memprof::Meta |
| // enum class entries. After successfully reading the schema, the pointer is one |
| // byte past the schema contents. |
| Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer); |
| |
| // Trait for reading IndexedMemProfRecord data from the on-disk hash table. |
| class RecordLookupTrait { |
| public: |
| using data_type = const IndexedMemProfRecord &; |
| using internal_key_type = uint64_t; |
| using external_key_type = uint64_t; |
| using hash_value_type = uint64_t; |
| using offset_type = uint64_t; |
| |
| RecordLookupTrait() = delete; |
| RecordLookupTrait(IndexedVersion V, const MemProfSchema &S) |
| : Version(V), Schema(S) {} |
| |
| static bool EqualKey(uint64_t A, uint64_t B) { return A == B; } |
| static uint64_t GetInternalKey(uint64_t K) { return K; } |
| static uint64_t GetExternalKey(uint64_t K) { return K; } |
| |
| hash_value_type ComputeHash(uint64_t K) { return K; } |
| |
| static std::pair<offset_type, offset_type> |
| ReadKeyDataLength(const unsigned char *&D) { |
| using namespace support; |
| |
| offset_type KeyLen = |
| endian::readNext<offset_type, llvm::endianness::little>(D); |
| offset_type DataLen = |
| endian::readNext<offset_type, llvm::endianness::little>(D); |
| return std::make_pair(KeyLen, DataLen); |
| } |
| |
| uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) { |
| using namespace support; |
| return endian::readNext<external_key_type, llvm::endianness::little>(D); |
| } |
| |
| data_type ReadData(uint64_t K, const unsigned char *D, |
| offset_type /*Unused*/) { |
| Record = IndexedMemProfRecord::deserialize(Schema, D, Version); |
| return Record; |
| } |
| |
| private: |
| // Holds the MemProf version. |
| IndexedVersion Version; |
| // Holds the memprof schema used to deserialize records. |
| MemProfSchema Schema; |
| // Holds the records from one function deserialized from the indexed format. |
| IndexedMemProfRecord Record; |
| }; |
| |
| // Trait for writing IndexedMemProfRecord data to the on-disk hash table. |
| class RecordWriterTrait { |
| public: |
| using key_type = uint64_t; |
| using key_type_ref = uint64_t; |
| |
| using data_type = IndexedMemProfRecord; |
| using data_type_ref = IndexedMemProfRecord &; |
| |
| using hash_value_type = uint64_t; |
| using offset_type = uint64_t; |
| |
| private: |
| // Pointer to the memprof schema to use for the generator. |
| const MemProfSchema *Schema; |
| // The MemProf version to use for the serialization. |
| IndexedVersion Version; |
| |
| // Mappings from CallStackId to the indexes into the call stack array. |
| llvm::DenseMap<CallStackId, LinearCallStackId> *MemProfCallStackIndexes; |
| |
| public: |
| // We do not support the default constructor, which does not set Version. |
| RecordWriterTrait() = delete; |
| RecordWriterTrait( |
| const MemProfSchema *Schema, IndexedVersion V, |
| llvm::DenseMap<CallStackId, LinearCallStackId> *MemProfCallStackIndexes) |
| : Schema(Schema), Version(V), |
| MemProfCallStackIndexes(MemProfCallStackIndexes) {} |
| |
| static hash_value_type ComputeHash(key_type_ref K) { return K; } |
| |
| std::pair<offset_type, offset_type> |
| EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) { |
| using namespace support; |
| |
| endian::Writer LE(Out, llvm::endianness::little); |
| offset_type N = sizeof(K); |
| LE.write<offset_type>(N); |
| offset_type M = V.serializedSize(*Schema, Version); |
| LE.write<offset_type>(M); |
| return std::make_pair(N, M); |
| } |
| |
| void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) { |
| using namespace support; |
| endian::Writer LE(Out, llvm::endianness::little); |
| LE.write<uint64_t>(K); |
| } |
| |
| void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V, |
| offset_type /*Unused*/) { |
| assert(Schema != nullptr && "MemProf schema is not initialized!"); |
| V.serialize(*Schema, Out, Version, MemProfCallStackIndexes); |
| // Clear the IndexedMemProfRecord which results in clearing/freeing its |
| // vectors of allocs and callsites. This is owned by the associated on-disk |
| // hash table, but unused after this point. See also the comment added to |
| // the client which constructs the on-disk hash table for this trait. |
| V.clear(); |
| } |
| }; |
| |
| // Trait for writing frame mappings to the on-disk hash table. |
| class FrameWriterTrait { |
| public: |
| using key_type = FrameId; |
| using key_type_ref = FrameId; |
| |
| using data_type = Frame; |
| using data_type_ref = Frame &; |
| |
| using hash_value_type = FrameId; |
| using offset_type = uint64_t; |
| |
| static hash_value_type ComputeHash(key_type_ref K) { return K; } |
| |
| static std::pair<offset_type, offset_type> |
| EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) { |
| using namespace support; |
| endian::Writer LE(Out, llvm::endianness::little); |
| offset_type N = sizeof(K); |
| LE.write<offset_type>(N); |
| offset_type M = V.serializedSize(); |
| LE.write<offset_type>(M); |
| return std::make_pair(N, M); |
| } |
| |
| void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) { |
| using namespace support; |
| endian::Writer LE(Out, llvm::endianness::little); |
| LE.write<key_type>(K); |
| } |
| |
| void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V, |
| offset_type /*Unused*/) { |
| V.serialize(Out); |
| } |
| }; |
| |
| // Trait for reading frame mappings from the on-disk hash table. |
| class FrameLookupTrait { |
| public: |
| using data_type = const Frame; |
| using internal_key_type = FrameId; |
| using external_key_type = FrameId; |
| using hash_value_type = FrameId; |
| using offset_type = uint64_t; |
| |
| static bool EqualKey(internal_key_type A, internal_key_type B) { |
| return A == B; |
| } |
| static uint64_t GetInternalKey(internal_key_type K) { return K; } |
| static uint64_t GetExternalKey(external_key_type K) { return K; } |
| |
| hash_value_type ComputeHash(internal_key_type K) { return K; } |
| |
| static std::pair<offset_type, offset_type> |
| ReadKeyDataLength(const unsigned char *&D) { |
| using namespace support; |
| |
| offset_type KeyLen = |
| endian::readNext<offset_type, llvm::endianness::little>(D); |
| offset_type DataLen = |
| endian::readNext<offset_type, llvm::endianness::little>(D); |
| return std::make_pair(KeyLen, DataLen); |
| } |
| |
| uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) { |
| using namespace support; |
| return endian::readNext<external_key_type, llvm::endianness::little>(D); |
| } |
| |
| data_type ReadData(uint64_t K, const unsigned char *D, |
| offset_type /*Unused*/) { |
| return Frame::deserialize(D); |
| } |
| }; |
| |
| // Trait for writing call stacks to the on-disk hash table. |
| class CallStackWriterTrait { |
| public: |
| using key_type = CallStackId; |
| using key_type_ref = CallStackId; |
| |
| using data_type = llvm::SmallVector<FrameId>; |
| using data_type_ref = llvm::SmallVector<FrameId> &; |
| |
| using hash_value_type = CallStackId; |
| using offset_type = uint64_t; |
| |
| static hash_value_type ComputeHash(key_type_ref K) { return K; } |
| |
| static std::pair<offset_type, offset_type> |
| EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) { |
| using namespace support; |
| endian::Writer LE(Out, llvm::endianness::little); |
| // We do not explicitly emit the key length because it is a constant. |
| offset_type N = sizeof(K); |
| offset_type M = sizeof(FrameId) * V.size(); |
| LE.write<offset_type>(M); |
| return std::make_pair(N, M); |
| } |
| |
| void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) { |
| using namespace support; |
| endian::Writer LE(Out, llvm::endianness::little); |
| LE.write<key_type>(K); |
| } |
| |
| void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V, |
| offset_type /*Unused*/) { |
| using namespace support; |
| endian::Writer LE(Out, llvm::endianness::little); |
| // Emit the frames. We do not explicitly emit the length of the vector |
| // because it can be inferred from the data length. |
| for (FrameId F : V) |
| LE.write<FrameId>(F); |
| } |
| }; |
| |
| // Trait for reading call stack mappings from the on-disk hash table. |
| class CallStackLookupTrait { |
| public: |
| using data_type = const llvm::SmallVector<FrameId>; |
| using internal_key_type = CallStackId; |
| using external_key_type = CallStackId; |
| using hash_value_type = CallStackId; |
| using offset_type = uint64_t; |
| |
| static bool EqualKey(internal_key_type A, internal_key_type B) { |
| return A == B; |
| } |
| static uint64_t GetInternalKey(internal_key_type K) { return K; } |
| static uint64_t GetExternalKey(external_key_type K) { return K; } |
| |
| hash_value_type ComputeHash(internal_key_type K) { return K; } |
| |
| static std::pair<offset_type, offset_type> |
| ReadKeyDataLength(const unsigned char *&D) { |
| using namespace support; |
| |
| // We do not explicitly read the key length because it is a constant. |
| offset_type KeyLen = sizeof(external_key_type); |
| offset_type DataLen = |
| endian::readNext<offset_type, llvm::endianness::little>(D); |
| return std::make_pair(KeyLen, DataLen); |
| } |
| |
| uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) { |
| using namespace support; |
| return endian::readNext<external_key_type, llvm::endianness::little>(D); |
| } |
| |
| data_type ReadData(uint64_t K, const unsigned char *D, offset_type Length) { |
| using namespace support; |
| llvm::SmallVector<FrameId> CS; |
| // Derive the number of frames from the data length. |
| uint64_t NumFrames = Length / sizeof(FrameId); |
| assert(Length % sizeof(FrameId) == 0); |
| CS.reserve(NumFrames); |
| for (size_t I = 0; I != NumFrames; ++I) { |
| FrameId F = endian::readNext<FrameId, llvm::endianness::little>(D); |
| CS.push_back(F); |
| } |
| return CS; |
| } |
| }; |
| |
| // Compute a CallStackId for a given call stack. |
| CallStackId hashCallStack(ArrayRef<FrameId> CS); |
| |
| namespace detail { |
| // "Dereference" the iterator from DenseMap or OnDiskChainedHashTable. We have |
| // to do so in one of two different ways depending on the type of the hash |
| // table. |
| template <typename value_type, typename IterTy> |
| value_type DerefIterator(IterTy Iter) { |
| using deref_type = llvm::remove_cvref_t<decltype(*Iter)>; |
| if constexpr (std::is_same_v<deref_type, value_type>) |
| return *Iter; |
| else |
| return Iter->second; |
| } |
| } // namespace detail |
| |
| // A function object that returns a frame for a given FrameId. |
| template <typename MapTy> struct FrameIdConverter { |
| std::optional<FrameId> LastUnmappedId; |
| MapTy ⤅ |
| |
| FrameIdConverter() = delete; |
| FrameIdConverter(MapTy &Map) : Map(Map) {} |
| |
| // Delete the copy constructor and copy assignment operator to avoid a |
| // situation where a copy of FrameIdConverter gets an error in LastUnmappedId |
| // while the original instance doesn't. |
| FrameIdConverter(const FrameIdConverter &) = delete; |
| FrameIdConverter &operator=(const FrameIdConverter &) = delete; |
| |
| Frame operator()(FrameId Id) { |
| auto Iter = Map.find(Id); |
| if (Iter == Map.end()) { |
| LastUnmappedId = Id; |
| return Frame(0, 0, 0, false); |
| } |
| return detail::DerefIterator<Frame>(Iter); |
| } |
| }; |
| |
| // A function object that returns a call stack for a given CallStackId. |
| template <typename MapTy> struct CallStackIdConverter { |
| std::optional<CallStackId> LastUnmappedId; |
| MapTy ⤅ |
| llvm::function_ref<Frame(FrameId)> FrameIdToFrame; |
| |
| CallStackIdConverter() = delete; |
| CallStackIdConverter(MapTy &Map, |
| llvm::function_ref<Frame(FrameId)> FrameIdToFrame) |
| : Map(Map), FrameIdToFrame(FrameIdToFrame) {} |
| |
| // Delete the copy constructor and copy assignment operator to avoid a |
| // situation where a copy of CallStackIdConverter gets an error in |
| // LastUnmappedId while the original instance doesn't. |
| CallStackIdConverter(const CallStackIdConverter &) = delete; |
| CallStackIdConverter &operator=(const CallStackIdConverter &) = delete; |
| |
| std::vector<Frame> operator()(CallStackId CSId) { |
| std::vector<Frame> Frames; |
| auto CSIter = Map.find(CSId); |
| if (CSIter == Map.end()) { |
| LastUnmappedId = CSId; |
| } else { |
| llvm::SmallVector<FrameId> CS = |
| detail::DerefIterator<llvm::SmallVector<FrameId>>(CSIter); |
| Frames.reserve(CS.size()); |
| for (FrameId Id : CS) |
| Frames.push_back(FrameIdToFrame(Id)); |
| } |
| return Frames; |
| } |
| }; |
| |
| // A function object that returns a Frame stored at a given index into the Frame |
| // array in the profile. |
| struct LinearFrameIdConverter { |
| const unsigned char *FrameBase; |
| |
| LinearFrameIdConverter() = delete; |
| LinearFrameIdConverter(const unsigned char *FrameBase) |
| : FrameBase(FrameBase) {} |
| |
| Frame operator()(LinearFrameId LinearId) { |
| uint64_t Offset = static_cast<uint64_t>(LinearId) * Frame::serializedSize(); |
| return Frame::deserialize(FrameBase + Offset); |
| } |
| }; |
| |
| // A function object that returns a call stack stored at a given index into the |
| // call stack array in the profile. |
| struct LinearCallStackIdConverter { |
| const unsigned char *CallStackBase; |
| std::function<Frame(LinearFrameId)> FrameIdToFrame; |
| |
| LinearCallStackIdConverter() = delete; |
| LinearCallStackIdConverter(const unsigned char *CallStackBase, |
| std::function<Frame(LinearFrameId)> FrameIdToFrame) |
| : CallStackBase(CallStackBase), FrameIdToFrame(FrameIdToFrame) {} |
| |
| std::vector<Frame> operator()(LinearCallStackId LinearCSId) { |
| std::vector<Frame> Frames; |
| |
| const unsigned char *Ptr = |
| CallStackBase + |
| static_cast<uint64_t>(LinearCSId) * sizeof(LinearFrameId); |
| uint32_t NumFrames = |
| support::endian::readNext<uint32_t, llvm::endianness::little>(Ptr); |
| Frames.reserve(NumFrames); |
| for (; NumFrames; --NumFrames) { |
| LinearFrameId Elem = |
| support::endian::read<LinearFrameId, llvm::endianness::little>(Ptr); |
| // Follow a pointer to the parent, if any. See comments below on |
| // CallStackRadixTreeBuilder for the description of the radix tree format. |
| if (static_cast<std::make_signed_t<LinearFrameId>>(Elem) < 0) { |
| Ptr += (-Elem) * sizeof(LinearFrameId); |
| Elem = |
| support::endian::read<LinearFrameId, llvm::endianness::little>(Ptr); |
| } |
| // We shouldn't encounter another pointer. |
| assert(static_cast<std::make_signed_t<LinearFrameId>>(Elem) >= 0); |
| Frames.push_back(FrameIdToFrame(Elem)); |
| Ptr += sizeof(LinearFrameId); |
| } |
| |
| return Frames; |
| } |
| }; |
| |
| struct IndexedMemProfData { |
| // A map to hold memprof data per function. The lower 64 bits obtained from |
| // the md5 hash of the function name is used to index into the map. |
| llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> RecordData; |
| |
| // A map to hold frame id to frame mappings. The mappings are used to |
| // convert IndexedMemProfRecord to MemProfRecords with frame information |
| // inline. |
| llvm::MapVector<FrameId, Frame> FrameData; |
| |
| // A map to hold call stack id to call stacks. |
| llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>> CallStackData; |
| }; |
| |
| struct FrameStat { |
| // The number of occurrences of a given FrameId. |
| uint64_t Count = 0; |
| // The sum of indexes where a given FrameId shows up. |
| uint64_t PositionSum = 0; |
| }; |
| |
| // Compute a histogram of Frames in call stacks. |
| llvm::DenseMap<FrameId, FrameStat> |
| computeFrameHistogram(llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>> |
| &MemProfCallStackData); |
| |
| // Construct a radix tree of call stacks. |
| // |
| // A set of call stacks might look like: |
| // |
| // CallStackId 1: f1 -> f2 -> f3 |
| // CallStackId 2: f1 -> f2 -> f4 -> f5 |
| // CallStackId 3: f1 -> f2 -> f4 -> f6 |
| // CallStackId 4: f7 -> f8 -> f9 |
| // |
| // where each fn refers to a stack frame. |
| // |
| // Since we expect a lot of common prefixes, we can compress the call stacks |
| // into a radix tree like: |
| // |
| // CallStackId 1: f1 -> f2 -> f3 |
| // | |
| // CallStackId 2: +---> f4 -> f5 |
| // | |
| // CallStackId 3: +---> f6 |
| // |
| // CallStackId 4: f7 -> f8 -> f9 |
| // |
| // Now, we are interested in retrieving call stacks for a given CallStackId, so |
| // we just need a pointer from a given call stack to its parent. For example, |
| // CallStackId 2 would point to CallStackId 1 as a parent. |
| // |
| // We serialize the radix tree above into a single array along with the length |
| // of each call stack and pointers to the parent call stacks. |
| // |
| // Index: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 |
| // Array: L3 f9 f8 f7 L4 f6 J3 L4 f5 f4 J3 L3 f3 f2 f1 |
| // ^ ^ ^ ^ |
| // | | | | |
| // CallStackId 4: 0 --+ | | | |
| // CallStackId 3: 4 --------------+ | | |
| // CallStackId 2: 7 -----------------------+ | |
| // CallStackId 1: 11 -----------------------------------+ |
| // |
| // - LN indicates the length of a call stack, encoded as ordinary integer N. |
| // |
| // - JN indicates a pointer to the parent, encoded as -N. |
| // |
| // The radix tree allows us to reconstruct call stacks in the leaf-to-root |
| // order as we scan the array from left ro right while following pointers to |
| // parents along the way |
| // |
| // For example, if we are decoding CallStackId 2, we start a forward traversal |
| // at Index 7, noting the call stack length of 4 and obtaining f5 and f4. When |
| // we see J3 at Index 10, we resume a forward traversal at Index 13 = 10 + 3, |
| // picking up f2 and f1. We are done after collecting 4 frames as indicated at |
| // the beginning of the traversal. |
| // |
| // On-disk IndexedMemProfRecord will refer to call stacks by their indexes into |
| // the radix tree array, so we do not explicitly encode mappings like: |
| // "CallStackId 1 -> 11". |
| class CallStackRadixTreeBuilder { |
| // The radix tree array. |
| std::vector<LinearFrameId> RadixArray; |
| |
| // Mapping from CallStackIds to indexes into RadixArray. |
| llvm::DenseMap<CallStackId, LinearCallStackId> CallStackPos; |
| |
| // In build, we partition a given call stack into two parts -- the prefix |
| // that's common with the previously encoded call stack and the frames beyond |
| // the common prefix -- the unique portion. Then we want to find out where |
| // the common prefix is stored in RadixArray so that we can link the unique |
| // portion to the common prefix. Indexes, declared below, helps with our |
| // needs. Intuitively, Indexes tells us where each of the previously encoded |
| // call stack is stored in RadixArray. More formally, Indexes satisfies: |
| // |
| // RadixArray[Indexes[I]] == Prev[I] |
| // |
| // for every I, where Prev is the the call stack in the root-to-leaf order |
| // previously encoded by build. (Note that Prev, as passed to |
| // encodeCallStack, is in the leaf-to-root order.) |
| // |
| // For example, if the call stack being encoded shares 5 frames at the root of |
| // the call stack with the previously encoded call stack, |
| // RadixArray[Indexes[0]] is the root frame of the common prefix. |
| // RadixArray[Indexes[5 - 1]] is the last frame of the common prefix. |
| std::vector<LinearCallStackId> Indexes; |
| |
| using CSIdPair = std::pair<CallStackId, llvm::SmallVector<FrameId>>; |
| |
| // Encode a call stack into RadixArray. Return the starting index within |
| // RadixArray. |
| LinearCallStackId encodeCallStack( |
| const llvm::SmallVector<FrameId> *CallStack, |
| const llvm::SmallVector<FrameId> *Prev, |
| const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes); |
| |
| public: |
| CallStackRadixTreeBuilder() = default; |
| |
| // Build a radix tree array. |
| void build(llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>> |
| &&MemProfCallStackData, |
| const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes, |
| llvm::DenseMap<FrameId, FrameStat> &FrameHistogram); |
| |
| const std::vector<LinearFrameId> &getRadixArray() const { return RadixArray; } |
| |
| llvm::DenseMap<CallStackId, LinearCallStackId> takeCallStackPos() { |
| return std::move(CallStackPos); |
| } |
| }; |
| |
| // Verify that each CallStackId is computed with hashCallStack. This function |
| // is intended to help transition from CallStack to CSId in |
| // IndexedAllocationInfo. |
| void verifyIndexedMemProfRecord(const IndexedMemProfRecord &Record); |
| |
| // Verify that each CallStackId is computed with hashCallStack. This function |
| // is intended to help transition from CallStack to CSId in |
| // IndexedAllocationInfo. |
| void verifyFunctionProfileData( |
| const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> |
| &FunctionProfileData); |
| } // namespace memprof |
| } // namespace llvm |
| |
| #endif // LLVM_PROFILEDATA_MEMPROF_H_ |