| //===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file implements the class that reads LLVM sample profiles. It |
| // supports three file formats: text, binary and gcov. |
| // |
| // The textual representation is useful for debugging and testing purposes. The |
| // binary representation is more compact, resulting in smaller file sizes. |
| // |
| // The gcov encoding is the one generated by GCC's AutoFDO profile creation |
| // tool (https://github.com/google/autofdo) |
| // |
| // All three encodings can be used interchangeably as an input sample profile. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "llvm/ProfileData/SampleProfReader.h" |
| #include "llvm/ADT/DenseMap.h" |
| #include "llvm/ADT/STLExtras.h" |
| #include "llvm/ADT/StringRef.h" |
| #include "llvm/IR/ProfileSummary.h" |
| #include "llvm/ProfileData/ProfileCommon.h" |
| #include "llvm/ProfileData/SampleProf.h" |
| #include "llvm/Support/CommandLine.h" |
| #include "llvm/Support/Compression.h" |
| #include "llvm/Support/ErrorOr.h" |
| #include "llvm/Support/LEB128.h" |
| #include "llvm/Support/LineIterator.h" |
| #include "llvm/Support/MD5.h" |
| #include "llvm/Support/MemoryBuffer.h" |
| #include "llvm/Support/raw_ostream.h" |
| #include <algorithm> |
| #include <cstddef> |
| #include <cstdint> |
| #include <limits> |
| #include <memory> |
| #include <set> |
| #include <system_error> |
| #include <vector> |
| |
| using namespace llvm; |
| using namespace sampleprof; |
| |
| #define DEBUG_TYPE "samplepgo-reader" |
| |
| // This internal option specifies if the profile uses FS discriminators. |
| // It only applies to text, binary and compact binary format profiles. |
| // For ext-binary format profiles, the flag is set in the summary. |
| static cl::opt<bool> ProfileIsFSDisciminator( |
| "profile-isfs", cl::Hidden, cl::init(false), |
| cl::desc("Profile uses flow sensitive discriminators")); |
| |
| /// Dump the function profile for \p FName. |
| /// |
| /// \param FContext Name + context of the function to print. |
| /// \param OS Stream to emit the output to. |
| void SampleProfileReader::dumpFunctionProfile(SampleContext FContext, |
| raw_ostream &OS) { |
| OS << "Function: " << FContext.toString() << ": " << Profiles[FContext]; |
| } |
| |
| /// Dump all the function profiles found on stream \p OS. |
| void SampleProfileReader::dump(raw_ostream &OS) { |
| std::vector<NameFunctionSamples> V; |
| sortFuncProfiles(Profiles, V); |
| for (const auto &I : V) |
| dumpFunctionProfile(I.first, OS); |
| } |
| |
| /// Parse \p Input as function head. |
| /// |
| /// Parse one line of \p Input, and update function name in \p FName, |
| /// function's total sample count in \p NumSamples, function's entry |
| /// count in \p NumHeadSamples. |
| /// |
| /// \returns true if parsing is successful. |
| static bool ParseHead(const StringRef &Input, StringRef &FName, |
| uint64_t &NumSamples, uint64_t &NumHeadSamples) { |
| if (Input[0] == ' ') |
| return false; |
| size_t n2 = Input.rfind(':'); |
| size_t n1 = Input.rfind(':', n2 - 1); |
| FName = Input.substr(0, n1); |
| if (Input.substr(n1 + 1, n2 - n1 - 1).getAsInteger(10, NumSamples)) |
| return false; |
| if (Input.substr(n2 + 1).getAsInteger(10, NumHeadSamples)) |
| return false; |
| return true; |
| } |
| |
| /// Returns true if line offset \p L is legal (only has 16 bits). |
| static bool isOffsetLegal(unsigned L) { return (L & 0xffff) == L; } |
| |
| /// Parse \p Input that contains metadata. |
| /// Possible metadata: |
| /// - CFG Checksum information: |
| /// !CFGChecksum: 12345 |
| /// - CFG Checksum information: |
| /// !Attributes: 1 |
| /// Stores the FunctionHash (a.k.a. CFG Checksum) into \p FunctionHash. |
| static bool parseMetadata(const StringRef &Input, uint64_t &FunctionHash, |
| uint32_t &Attributes) { |
| if (Input.startswith("!CFGChecksum:")) { |
| StringRef CFGInfo = Input.substr(strlen("!CFGChecksum:")).trim(); |
| return !CFGInfo.getAsInteger(10, FunctionHash); |
| } |
| |
| if (Input.startswith("!Attributes:")) { |
| StringRef Attrib = Input.substr(strlen("!Attributes:")).trim(); |
| return !Attrib.getAsInteger(10, Attributes); |
| } |
| |
| return false; |
| } |
| |
| enum class LineType { |
| CallSiteProfile, |
| BodyProfile, |
| Metadata, |
| }; |
| |
| /// Parse \p Input as line sample. |
| /// |
| /// \param Input input line. |
| /// \param LineTy Type of this line. |
| /// \param Depth the depth of the inline stack. |
| /// \param NumSamples total samples of the line/inlined callsite. |
| /// \param LineOffset line offset to the start of the function. |
| /// \param Discriminator discriminator of the line. |
| /// \param TargetCountMap map from indirect call target to count. |
| /// \param FunctionHash the function's CFG hash, used by pseudo probe. |
| /// |
| /// returns true if parsing is successful. |
| static bool ParseLine(const StringRef &Input, LineType &LineTy, uint32_t &Depth, |
| uint64_t &NumSamples, uint32_t &LineOffset, |
| uint32_t &Discriminator, StringRef &CalleeName, |
| DenseMap<StringRef, uint64_t> &TargetCountMap, |
| uint64_t &FunctionHash, uint32_t &Attributes) { |
| for (Depth = 0; Input[Depth] == ' '; Depth++) |
| ; |
| if (Depth == 0) |
| return false; |
| |
| if (Depth == 1 && Input[Depth] == '!') { |
| LineTy = LineType::Metadata; |
| return parseMetadata(Input.substr(Depth), FunctionHash, Attributes); |
| } |
| |
| size_t n1 = Input.find(':'); |
| StringRef Loc = Input.substr(Depth, n1 - Depth); |
| size_t n2 = Loc.find('.'); |
| if (n2 == StringRef::npos) { |
| if (Loc.getAsInteger(10, LineOffset) || !isOffsetLegal(LineOffset)) |
| return false; |
| Discriminator = 0; |
| } else { |
| if (Loc.substr(0, n2).getAsInteger(10, LineOffset)) |
| return false; |
| if (Loc.substr(n2 + 1).getAsInteger(10, Discriminator)) |
| return false; |
| } |
| |
| StringRef Rest = Input.substr(n1 + 2); |
| if (isDigit(Rest[0])) { |
| LineTy = LineType::BodyProfile; |
| size_t n3 = Rest.find(' '); |
| if (n3 == StringRef::npos) { |
| if (Rest.getAsInteger(10, NumSamples)) |
| return false; |
| } else { |
| if (Rest.substr(0, n3).getAsInteger(10, NumSamples)) |
| return false; |
| } |
| // Find call targets and their sample counts. |
| // Note: In some cases, there are symbols in the profile which are not |
| // mangled. To accommodate such cases, use colon + integer pairs as the |
| // anchor points. |
| // An example: |
| // _M_construct<char *>:1000 string_view<std::allocator<char> >:437 |
| // ":1000" and ":437" are used as anchor points so the string above will |
| // be interpreted as |
| // target: _M_construct<char *> |
| // count: 1000 |
| // target: string_view<std::allocator<char> > |
| // count: 437 |
| while (n3 != StringRef::npos) { |
| n3 += Rest.substr(n3).find_first_not_of(' '); |
| Rest = Rest.substr(n3); |
| n3 = Rest.find_first_of(':'); |
| if (n3 == StringRef::npos || n3 == 0) |
| return false; |
| |
| StringRef Target; |
| uint64_t count, n4; |
| while (true) { |
| // Get the segment after the current colon. |
| StringRef AfterColon = Rest.substr(n3 + 1); |
| // Get the target symbol before the current colon. |
| Target = Rest.substr(0, n3); |
| // Check if the word after the current colon is an integer. |
| n4 = AfterColon.find_first_of(' '); |
| n4 = (n4 != StringRef::npos) ? n3 + n4 + 1 : Rest.size(); |
| StringRef WordAfterColon = Rest.substr(n3 + 1, n4 - n3 - 1); |
| if (!WordAfterColon.getAsInteger(10, count)) |
| break; |
| |
| // Try to find the next colon. |
| uint64_t n5 = AfterColon.find_first_of(':'); |
| if (n5 == StringRef::npos) |
| return false; |
| n3 += n5 + 1; |
| } |
| |
| // An anchor point is found. Save the {target, count} pair |
| TargetCountMap[Target] = count; |
| if (n4 == Rest.size()) |
| break; |
| // Change n3 to the next blank space after colon + integer pair. |
| n3 = n4; |
| } |
| } else { |
| LineTy = LineType::CallSiteProfile; |
| size_t n3 = Rest.find_last_of(':'); |
| CalleeName = Rest.substr(0, n3); |
| if (Rest.substr(n3 + 1).getAsInteger(10, NumSamples)) |
| return false; |
| } |
| return true; |
| } |
| |
| /// Load samples from a text file. |
| /// |
| /// See the documentation at the top of the file for an explanation of |
| /// the expected format. |
| /// |
| /// \returns true if the file was loaded successfully, false otherwise. |
| std::error_code SampleProfileReaderText::readImpl() { |
| line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#'); |
| sampleprof_error Result = sampleprof_error::success; |
| |
| InlineCallStack InlineStack; |
| uint32_t ProbeProfileCount = 0; |
| |
| // SeenMetadata tracks whether we have processed metadata for the current |
| // top-level function profile. |
| bool SeenMetadata = false; |
| |
| ProfileIsFS = ProfileIsFSDisciminator; |
| FunctionSamples::ProfileIsFS = ProfileIsFS; |
| for (; !LineIt.is_at_eof(); ++LineIt) { |
| if ((*LineIt)[(*LineIt).find_first_not_of(' ')] == '#') |
| continue; |
| // Read the header of each function. |
| // |
| // Note that for function identifiers we are actually expecting |
| // mangled names, but we may not always get them. This happens when |
| // the compiler decides not to emit the function (e.g., it was inlined |
| // and removed). In this case, the binary will not have the linkage |
| // name for the function, so the profiler will emit the function's |
| // unmangled name, which may contain characters like ':' and '>' in its |
| // name (member functions, templates, etc). |
| // |
| // The only requirement we place on the identifier, then, is that it |
| // should not begin with a number. |
| if ((*LineIt)[0] != ' ') { |
| uint64_t NumSamples, NumHeadSamples; |
| StringRef FName; |
| if (!ParseHead(*LineIt, FName, NumSamples, NumHeadSamples)) { |
| reportError(LineIt.line_number(), |
| "Expected 'mangled_name:NUM:NUM', found " + *LineIt); |
| return sampleprof_error::malformed; |
| } |
| SeenMetadata = false; |
| SampleContext FContext(FName, CSNameTable); |
| if (FContext.hasContext()) |
| ++CSProfileCount; |
| Profiles[FContext] = FunctionSamples(); |
| FunctionSamples &FProfile = Profiles[FContext]; |
| FProfile.setContext(FContext); |
| MergeResult(Result, FProfile.addTotalSamples(NumSamples)); |
| MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples)); |
| InlineStack.clear(); |
| InlineStack.push_back(&FProfile); |
| } else { |
| uint64_t NumSamples; |
| StringRef FName; |
| DenseMap<StringRef, uint64_t> TargetCountMap; |
| uint32_t Depth, LineOffset, Discriminator; |
| LineType LineTy; |
| uint64_t FunctionHash = 0; |
| uint32_t Attributes = 0; |
| if (!ParseLine(*LineIt, LineTy, Depth, NumSamples, LineOffset, |
| Discriminator, FName, TargetCountMap, FunctionHash, |
| Attributes)) { |
| reportError(LineIt.line_number(), |
| "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " + |
| *LineIt); |
| return sampleprof_error::malformed; |
| } |
| if (SeenMetadata && LineTy != LineType::Metadata) { |
| // Metadata must be put at the end of a function profile. |
| reportError(LineIt.line_number(), |
| "Found non-metadata after metadata: " + *LineIt); |
| return sampleprof_error::malformed; |
| } |
| |
| // Here we handle FS discriminators. |
| Discriminator &= getDiscriminatorMask(); |
| |
| while (InlineStack.size() > Depth) { |
| InlineStack.pop_back(); |
| } |
| switch (LineTy) { |
| case LineType::CallSiteProfile: { |
| FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt( |
| LineLocation(LineOffset, Discriminator))[std::string(FName)]; |
| FSamples.setName(FName); |
| MergeResult(Result, FSamples.addTotalSamples(NumSamples)); |
| InlineStack.push_back(&FSamples); |
| break; |
| } |
| case LineType::BodyProfile: { |
| while (InlineStack.size() > Depth) { |
| InlineStack.pop_back(); |
| } |
| FunctionSamples &FProfile = *InlineStack.back(); |
| for (const auto &name_count : TargetCountMap) { |
| MergeResult(Result, FProfile.addCalledTargetSamples( |
| LineOffset, Discriminator, name_count.first, |
| name_count.second)); |
| } |
| MergeResult(Result, FProfile.addBodySamples(LineOffset, Discriminator, |
| NumSamples)); |
| break; |
| } |
| case LineType::Metadata: { |
| FunctionSamples &FProfile = *InlineStack.back(); |
| if (FunctionHash) { |
| FProfile.setFunctionHash(FunctionHash); |
| ++ProbeProfileCount; |
| } |
| if (Attributes) |
| FProfile.getContext().setAllAttributes(Attributes); |
| SeenMetadata = true; |
| break; |
| } |
| } |
| } |
| } |
| |
| assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) && |
| "Cannot have both context-sensitive and regular profile"); |
| ProfileIsCS = (CSProfileCount > 0); |
| assert((ProbeProfileCount == 0 || ProbeProfileCount == Profiles.size()) && |
| "Cannot have both probe-based profiles and regular profiles"); |
| ProfileIsProbeBased = (ProbeProfileCount > 0); |
| FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased; |
| FunctionSamples::ProfileIsCS = ProfileIsCS; |
| |
| if (Result == sampleprof_error::success) |
| computeSummary(); |
| |
| return Result; |
| } |
| |
| bool SampleProfileReaderText::hasFormat(const MemoryBuffer &Buffer) { |
| bool result = false; |
| |
| // Check that the first non-comment line is a valid function header. |
| line_iterator LineIt(Buffer, /*SkipBlanks=*/true, '#'); |
| if (!LineIt.is_at_eof()) { |
| if ((*LineIt)[0] != ' ') { |
| uint64_t NumSamples, NumHeadSamples; |
| StringRef FName; |
| result = ParseHead(*LineIt, FName, NumSamples, NumHeadSamples); |
| } |
| } |
| |
| return result; |
| } |
| |
| template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() { |
| unsigned NumBytesRead = 0; |
| std::error_code EC; |
| uint64_t Val = decodeULEB128(Data, &NumBytesRead); |
| |
| if (Val > std::numeric_limits<T>::max()) |
| EC = sampleprof_error::malformed; |
| else if (Data + NumBytesRead > End) |
| EC = sampleprof_error::truncated; |
| else |
| EC = sampleprof_error::success; |
| |
| if (EC) { |
| reportError(0, EC.message()); |
| return EC; |
| } |
| |
| Data += NumBytesRead; |
| return static_cast<T>(Val); |
| } |
| |
| ErrorOr<StringRef> SampleProfileReaderBinary::readString() { |
| std::error_code EC; |
| StringRef Str(reinterpret_cast<const char *>(Data)); |
| if (Data + Str.size() + 1 > End) { |
| EC = sampleprof_error::truncated; |
| reportError(0, EC.message()); |
| return EC; |
| } |
| |
| Data += Str.size() + 1; |
| return Str; |
| } |
| |
| template <typename T> |
| ErrorOr<T> SampleProfileReaderBinary::readUnencodedNumber() { |
| std::error_code EC; |
| |
| if (Data + sizeof(T) > End) { |
| EC = sampleprof_error::truncated; |
| reportError(0, EC.message()); |
| return EC; |
| } |
| |
| using namespace support; |
| T Val = endian::readNext<T, little, unaligned>(Data); |
| return Val; |
| } |
| |
| template <typename T> |
| inline ErrorOr<uint32_t> SampleProfileReaderBinary::readStringIndex(T &Table) { |
| std::error_code EC; |
| auto Idx = readNumber<uint32_t>(); |
| if (std::error_code EC = Idx.getError()) |
| return EC; |
| if (*Idx >= Table.size()) |
| return sampleprof_error::truncated_name_table; |
| return *Idx; |
| } |
| |
| ErrorOr<StringRef> SampleProfileReaderBinary::readStringFromTable() { |
| auto Idx = readStringIndex(NameTable); |
| if (std::error_code EC = Idx.getError()) |
| return EC; |
| |
| return NameTable[*Idx]; |
| } |
| |
| ErrorOr<SampleContext> SampleProfileReaderBinary::readSampleContextFromTable() { |
| auto FName(readStringFromTable()); |
| if (std::error_code EC = FName.getError()) |
| return EC; |
| return SampleContext(*FName); |
| } |
| |
| ErrorOr<StringRef> SampleProfileReaderExtBinaryBase::readStringFromTable() { |
| if (!FixedLengthMD5) |
| return SampleProfileReaderBinary::readStringFromTable(); |
| |
| // read NameTable index. |
| auto Idx = readStringIndex(NameTable); |
| if (std::error_code EC = Idx.getError()) |
| return EC; |
| |
| // Check whether the name to be accessed has been accessed before, |
| // if not, read it from memory directly. |
| StringRef &SR = NameTable[*Idx]; |
| if (SR.empty()) { |
| const uint8_t *SavedData = Data; |
| Data = MD5NameMemStart + ((*Idx) * sizeof(uint64_t)); |
| auto FID = readUnencodedNumber<uint64_t>(); |
| if (std::error_code EC = FID.getError()) |
| return EC; |
| // Save the string converted from uint64_t in MD5StringBuf. All the |
| // references to the name are all StringRefs refering to the string |
| // in MD5StringBuf. |
| MD5StringBuf->push_back(std::to_string(*FID)); |
| SR = MD5StringBuf->back(); |
| Data = SavedData; |
| } |
| return SR; |
| } |
| |
| ErrorOr<StringRef> SampleProfileReaderCompactBinary::readStringFromTable() { |
| auto Idx = readStringIndex(NameTable); |
| if (std::error_code EC = Idx.getError()) |
| return EC; |
| |
| return StringRef(NameTable[*Idx]); |
| } |
| |
| std::error_code |
| SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) { |
| auto NumSamples = readNumber<uint64_t>(); |
| if (std::error_code EC = NumSamples.getError()) |
| return EC; |
| FProfile.addTotalSamples(*NumSamples); |
| |
| // Read the samples in the body. |
| auto NumRecords = readNumber<uint32_t>(); |
| if (std::error_code EC = NumRecords.getError()) |
| return EC; |
| |
| for (uint32_t I = 0; I < *NumRecords; ++I) { |
| auto LineOffset = readNumber<uint64_t>(); |
| if (std::error_code EC = LineOffset.getError()) |
| return EC; |
| |
| if (!isOffsetLegal(*LineOffset)) { |
| return std::error_code(); |
| } |
| |
| auto Discriminator = readNumber<uint64_t>(); |
| if (std::error_code EC = Discriminator.getError()) |
| return EC; |
| |
| auto NumSamples = readNumber<uint64_t>(); |
| if (std::error_code EC = NumSamples.getError()) |
| return EC; |
| |
| auto NumCalls = readNumber<uint32_t>(); |
| if (std::error_code EC = NumCalls.getError()) |
| return EC; |
| |
| // Here we handle FS discriminators: |
| uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask(); |
| |
| for (uint32_t J = 0; J < *NumCalls; ++J) { |
| auto CalledFunction(readStringFromTable()); |
| if (std::error_code EC = CalledFunction.getError()) |
| return EC; |
| |
| auto CalledFunctionSamples = readNumber<uint64_t>(); |
| if (std::error_code EC = CalledFunctionSamples.getError()) |
| return EC; |
| |
| FProfile.addCalledTargetSamples(*LineOffset, DiscriminatorVal, |
| *CalledFunction, *CalledFunctionSamples); |
| } |
| |
| FProfile.addBodySamples(*LineOffset, DiscriminatorVal, *NumSamples); |
| } |
| |
| // Read all the samples for inlined function calls. |
| auto NumCallsites = readNumber<uint32_t>(); |
| if (std::error_code EC = NumCallsites.getError()) |
| return EC; |
| |
| for (uint32_t J = 0; J < *NumCallsites; ++J) { |
| auto LineOffset = readNumber<uint64_t>(); |
| if (std::error_code EC = LineOffset.getError()) |
| return EC; |
| |
| auto Discriminator = readNumber<uint64_t>(); |
| if (std::error_code EC = Discriminator.getError()) |
| return EC; |
| |
| auto FName(readStringFromTable()); |
| if (std::error_code EC = FName.getError()) |
| return EC; |
| |
| // Here we handle FS discriminators: |
| uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask(); |
| |
| FunctionSamples &CalleeProfile = FProfile.functionSamplesAt( |
| LineLocation(*LineOffset, DiscriminatorVal))[std::string(*FName)]; |
| CalleeProfile.setName(*FName); |
| if (std::error_code EC = readProfile(CalleeProfile)) |
| return EC; |
| } |
| |
| return sampleprof_error::success; |
| } |
| |
| std::error_code |
| SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) { |
| Data = Start; |
| auto NumHeadSamples = readNumber<uint64_t>(); |
| if (std::error_code EC = NumHeadSamples.getError()) |
| return EC; |
| |
| ErrorOr<SampleContext> FContext(readSampleContextFromTable()); |
| if (std::error_code EC = FContext.getError()) |
| return EC; |
| |
| Profiles[*FContext] = FunctionSamples(); |
| FunctionSamples &FProfile = Profiles[*FContext]; |
| FProfile.setContext(*FContext); |
| FProfile.addHeadSamples(*NumHeadSamples); |
| |
| if (FContext->hasContext()) |
| CSProfileCount++; |
| |
| if (std::error_code EC = readProfile(FProfile)) |
| return EC; |
| return sampleprof_error::success; |
| } |
| |
| std::error_code SampleProfileReaderBinary::readImpl() { |
| ProfileIsFS = ProfileIsFSDisciminator; |
| FunctionSamples::ProfileIsFS = ProfileIsFS; |
| while (!at_eof()) { |
| if (std::error_code EC = readFuncProfile(Data)) |
| return EC; |
| } |
| |
| return sampleprof_error::success; |
| } |
| |
| ErrorOr<SampleContextFrames> |
| SampleProfileReaderExtBinaryBase::readContextFromTable() { |
| auto ContextIdx = readNumber<uint32_t>(); |
| if (std::error_code EC = ContextIdx.getError()) |
| return EC; |
| if (*ContextIdx >= CSNameTable->size()) |
| return sampleprof_error::truncated_name_table; |
| return (*CSNameTable)[*ContextIdx]; |
| } |
| |
| ErrorOr<SampleContext> |
| SampleProfileReaderExtBinaryBase::readSampleContextFromTable() { |
| if (ProfileIsCS) { |
| auto FContext(readContextFromTable()); |
| if (std::error_code EC = FContext.getError()) |
| return EC; |
| return SampleContext(*FContext); |
| } else { |
| auto FName(readStringFromTable()); |
| if (std::error_code EC = FName.getError()) |
| return EC; |
| return SampleContext(*FName); |
| } |
| } |
| |
| std::error_code SampleProfileReaderExtBinaryBase::readOneSection( |
| const uint8_t *Start, uint64_t Size, const SecHdrTableEntry &Entry) { |
| Data = Start; |
| End = Start + Size; |
| switch (Entry.Type) { |
| case SecProfSummary: |
| if (std::error_code EC = readSummary()) |
| return EC; |
| if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial)) |
| Summary->setPartialProfile(true); |
| if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext)) |
| FunctionSamples::ProfileIsCS = ProfileIsCS = true; |
| if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator)) |
| FunctionSamples::ProfileIsFS = ProfileIsFS = true; |
| break; |
| case SecNameTable: { |
| FixedLengthMD5 = |
| hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5); |
| bool UseMD5 = hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name); |
| assert((!FixedLengthMD5 || UseMD5) && |
| "If FixedLengthMD5 is true, UseMD5 has to be true"); |
| FunctionSamples::HasUniqSuffix = |
| hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix); |
| if (std::error_code EC = readNameTableSec(UseMD5)) |
| return EC; |
| break; |
| } |
| case SecCSNameTable: { |
| if (std::error_code EC = readCSNameTableSec()) |
| return EC; |
| break; |
| } |
| case SecLBRProfile: |
| if (std::error_code EC = readFuncProfiles()) |
| return EC; |
| break; |
| case SecFuncOffsetTable: |
| FuncOffsetsOrdered = hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered); |
| if (std::error_code EC = readFuncOffsetTable()) |
| return EC; |
| break; |
| case SecFuncMetadata: { |
| ProfileIsProbeBased = |
| hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased); |
| FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased; |
| bool HasAttribute = |
| hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute); |
| if (std::error_code EC = readFuncMetadata(HasAttribute)) |
| return EC; |
| break; |
| } |
| case SecProfileSymbolList: |
| if (std::error_code EC = readProfileSymbolList()) |
| return EC; |
| break; |
| default: |
| if (std::error_code EC = readCustomSection(Entry)) |
| return EC; |
| break; |
| } |
| return sampleprof_error::success; |
| } |
| |
| bool SampleProfileReaderExtBinaryBase::collectFuncsFromModule() { |
| if (!M) |
| return false; |
| FuncsToUse.clear(); |
| for (auto &F : *M) |
| FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F)); |
| return true; |
| } |
| |
| std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() { |
| // If there are more than one FuncOffsetTable, the profile read associated |
| // with previous FuncOffsetTable has to be done before next FuncOffsetTable |
| // is read. |
| FuncOffsetTable.clear(); |
| |
| auto Size = readNumber<uint64_t>(); |
| if (std::error_code EC = Size.getError()) |
| return EC; |
| |
| FuncOffsetTable.reserve(*Size); |
| |
| if (FuncOffsetsOrdered) { |
| OrderedFuncOffsets = |
| std::make_unique<std::vector<std::pair<SampleContext, uint64_t>>>(); |
| OrderedFuncOffsets->reserve(*Size); |
| } |
| |
| for (uint32_t I = 0; I < *Size; ++I) { |
| auto FContext(readSampleContextFromTable()); |
| if (std::error_code EC = FContext.getError()) |
| return EC; |
| |
| auto Offset = readNumber<uint64_t>(); |
| if (std::error_code EC = Offset.getError()) |
| return EC; |
| |
| FuncOffsetTable[*FContext] = *Offset; |
| if (FuncOffsetsOrdered) |
| OrderedFuncOffsets->emplace_back(*FContext, *Offset); |
| } |
| |
| return sampleprof_error::success; |
| } |
| |
| std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() { |
| // Collect functions used by current module if the Reader has been |
| // given a module. |
| // collectFuncsFromModule uses FunctionSamples::getCanonicalFnName |
| // which will query FunctionSamples::HasUniqSuffix, so it has to be |
| // called after FunctionSamples::HasUniqSuffix is set, i.e. after |
| // NameTable section is read. |
| bool LoadFuncsToBeUsed = collectFuncsFromModule(); |
| |
| // When LoadFuncsToBeUsed is false, load all the function profiles. |
| const uint8_t *Start = Data; |
| if (!LoadFuncsToBeUsed) { |
| while (Data < End) { |
| if (std::error_code EC = readFuncProfile(Data)) |
| return EC; |
| } |
| assert(Data == End && "More data is read than expected"); |
| } else { |
| // Load function profiles on demand. |
| if (Remapper) { |
| for (auto Name : FuncsToUse) { |
| Remapper->insert(Name); |
| } |
| } |
| |
| if (ProfileIsCS) { |
| DenseSet<uint64_t> FuncGuidsToUse; |
| if (useMD5()) { |
| for (auto Name : FuncsToUse) |
| FuncGuidsToUse.insert(Function::getGUID(Name)); |
| } |
| |
| // For each function in current module, load all context profiles for |
| // the function as well as their callee contexts which can help profile |
| // guided importing for ThinLTO. This can be achieved by walking |
| // through an ordered context container, where contexts are laid out |
| // as if they were walked in preorder of a context trie. While |
| // traversing the trie, a link to the highest common ancestor node is |
| // kept so that all of its decendants will be loaded. |
| assert(OrderedFuncOffsets.get() && |
| "func offset table should always be sorted in CS profile"); |
| const SampleContext *CommonContext = nullptr; |
| for (const auto &NameOffset : *OrderedFuncOffsets) { |
| const auto &FContext = NameOffset.first; |
| auto FName = FContext.getName(); |
| // For function in the current module, keep its farthest ancestor |
| // context. This can be used to load itself and its child and |
| // sibling contexts. |
| if ((useMD5() && FuncGuidsToUse.count(std::stoull(FName.data()))) || |
| (!useMD5() && (FuncsToUse.count(FName) || |
| (Remapper && Remapper->exist(FName))))) { |
| if (!CommonContext || !CommonContext->IsPrefixOf(FContext)) |
| CommonContext = &FContext; |
| } |
| |
| if (CommonContext == &FContext || |
| (CommonContext && CommonContext->IsPrefixOf(FContext))) { |
| // Load profile for the current context which originated from |
| // the common ancestor. |
| const uint8_t *FuncProfileAddr = Start + NameOffset.second; |
| assert(FuncProfileAddr < End && "out of LBRProfile section"); |
| if (std::error_code EC = readFuncProfile(FuncProfileAddr)) |
| return EC; |
| } |
| } |
| } else { |
| if (useMD5()) { |
| for (auto Name : FuncsToUse) { |
| auto GUID = std::to_string(MD5Hash(Name)); |
| auto iter = FuncOffsetTable.find(StringRef(GUID)); |
| if (iter == FuncOffsetTable.end()) |
| continue; |
| const uint8_t *FuncProfileAddr = Start + iter->second; |
| assert(FuncProfileAddr < End && "out of LBRProfile section"); |
| if (std::error_code EC = readFuncProfile(FuncProfileAddr)) |
| return EC; |
| } |
| } else { |
| for (auto NameOffset : FuncOffsetTable) { |
| SampleContext FContext(NameOffset.first); |
| auto FuncName = FContext.getName(); |
| if (!FuncsToUse.count(FuncName) && |
| (!Remapper || !Remapper->exist(FuncName))) |
| continue; |
| const uint8_t *FuncProfileAddr = Start + NameOffset.second; |
| assert(FuncProfileAddr < End && "out of LBRProfile section"); |
| if (std::error_code EC = readFuncProfile(FuncProfileAddr)) |
| return EC; |
| } |
| } |
| } |
| Data = End; |
| } |
| assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) && |
| "Cannot have both context-sensitive and regular profile"); |
| assert((!CSProfileCount || ProfileIsCS) && |
| "Section flag should be consistent with actual profile"); |
| return sampleprof_error::success; |
| } |
| |
| std::error_code SampleProfileReaderExtBinaryBase::readProfileSymbolList() { |
| if (!ProfSymList) |
| ProfSymList = std::make_unique<ProfileSymbolList>(); |
| |
| if (std::error_code EC = ProfSymList->read(Data, End - Data)) |
| return EC; |
| |
| Data = End; |
| return sampleprof_error::success; |
| } |
| |
| std::error_code SampleProfileReaderExtBinaryBase::decompressSection( |
| const uint8_t *SecStart, const uint64_t SecSize, |
| const uint8_t *&DecompressBuf, uint64_t &DecompressBufSize) { |
| Data = SecStart; |
| End = SecStart + SecSize; |
| auto DecompressSize = readNumber<uint64_t>(); |
| if (std::error_code EC = DecompressSize.getError()) |
| return EC; |
| DecompressBufSize = *DecompressSize; |
| |
| auto CompressSize = readNumber<uint64_t>(); |
| if (std::error_code EC = CompressSize.getError()) |
| return EC; |
| |
| if (!llvm::zlib::isAvailable()) |
| return sampleprof_error::zlib_unavailable; |
| |
| StringRef CompressedStrings(reinterpret_cast<const char *>(Data), |
| *CompressSize); |
| char *Buffer = Allocator.Allocate<char>(DecompressBufSize); |
| size_t UCSize = DecompressBufSize; |
| llvm::Error E = |
| zlib::uncompress(CompressedStrings, Buffer, UCSize); |
| if (E) |
| return sampleprof_error::uncompress_failed; |
| DecompressBuf = reinterpret_cast<const uint8_t *>(Buffer); |
| return sampleprof_error::success; |
| } |
| |
| std::error_code SampleProfileReaderExtBinaryBase::readImpl() { |
| const uint8_t *BufStart = |
| reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()); |
| |
| for (auto &Entry : SecHdrTable) { |
| // Skip empty section. |
| if (!Entry.Size) |
| continue; |
| |
| // Skip sections without context when SkipFlatProf is true. |
| if (SkipFlatProf && hasSecFlag(Entry, SecCommonFlags::SecFlagFlat)) |
| continue; |
| |
| const uint8_t *SecStart = BufStart + Entry.Offset; |
| uint64_t SecSize = Entry.Size; |
| |
| // If the section is compressed, decompress it into a buffer |
| // DecompressBuf before reading the actual data. The pointee of |
| // 'Data' will be changed to buffer hold by DecompressBuf |
| // temporarily when reading the actual data. |
| bool isCompressed = hasSecFlag(Entry, SecCommonFlags::SecFlagCompress); |
| if (isCompressed) { |
| const uint8_t *DecompressBuf; |
| uint64_t DecompressBufSize; |
| if (std::error_code EC = decompressSection( |
| SecStart, SecSize, DecompressBuf, DecompressBufSize)) |
| return EC; |
| SecStart = DecompressBuf; |
| SecSize = DecompressBufSize; |
| } |
| |
| if (std::error_code EC = readOneSection(SecStart, SecSize, Entry)) |
| return EC; |
| if (Data != SecStart + SecSize) |
| return sampleprof_error::malformed; |
| |
| // Change the pointee of 'Data' from DecompressBuf to original Buffer. |
| if (isCompressed) { |
| Data = BufStart + Entry.Offset; |
| End = BufStart + Buffer->getBufferSize(); |
| } |
| } |
| |
| return sampleprof_error::success; |
| } |
| |
| std::error_code SampleProfileReaderCompactBinary::readImpl() { |
| // Collect functions used by current module if the Reader has been |
| // given a module. |
| bool LoadFuncsToBeUsed = collectFuncsFromModule(); |
| ProfileIsFS = ProfileIsFSDisciminator; |
| FunctionSamples::ProfileIsFS = ProfileIsFS; |
| std::vector<uint64_t> OffsetsToUse; |
| if (!LoadFuncsToBeUsed) { |
| // load all the function profiles. |
| for (auto FuncEntry : FuncOffsetTable) { |
| OffsetsToUse.push_back(FuncEntry.second); |
| } |
| } else { |
| // load function profiles on demand. |
| for (auto Name : FuncsToUse) { |
| auto GUID = std::to_string(MD5Hash(Name)); |
| auto iter = FuncOffsetTable.find(StringRef(GUID)); |
| if (iter == FuncOffsetTable.end()) |
| continue; |
| OffsetsToUse.push_back(iter->second); |
| } |
| } |
| |
| for (auto Offset : OffsetsToUse) { |
| const uint8_t *SavedData = Data; |
| if (std::error_code EC = readFuncProfile( |
| reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) + |
| Offset)) |
| return EC; |
| Data = SavedData; |
| } |
| return sampleprof_error::success; |
| } |
| |
| std::error_code SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic) { |
| if (Magic == SPMagic()) |
| return sampleprof_error::success; |
| return sampleprof_error::bad_magic; |
| } |
| |
| std::error_code SampleProfileReaderExtBinary::verifySPMagic(uint64_t Magic) { |
| if (Magic == SPMagic(SPF_Ext_Binary)) |
| return sampleprof_error::success; |
| return sampleprof_error::bad_magic; |
| } |
| |
| std::error_code |
| SampleProfileReaderCompactBinary::verifySPMagic(uint64_t Magic) { |
| if (Magic == SPMagic(SPF_Compact_Binary)) |
| return sampleprof_error::success; |
| return sampleprof_error::bad_magic; |
| } |
| |
| std::error_code SampleProfileReaderBinary::readNameTable() { |
| auto Size = readNumber<uint32_t>(); |
| if (std::error_code EC = Size.getError()) |
| return EC; |
| NameTable.reserve(*Size + NameTable.size()); |
| for (uint32_t I = 0; I < *Size; ++I) { |
| auto Name(readString()); |
| if (std::error_code EC = Name.getError()) |
| return EC; |
| NameTable.push_back(*Name); |
| } |
| |
| return sampleprof_error::success; |
| } |
| |
| std::error_code SampleProfileReaderExtBinaryBase::readMD5NameTable() { |
| auto Size = readNumber<uint64_t>(); |
| if (std::error_code EC = Size.getError()) |
| return EC; |
| MD5StringBuf = std::make_unique<std::vector<std::string>>(); |
| MD5StringBuf->reserve(*Size); |
| if (FixedLengthMD5) { |
| // Preallocate and initialize NameTable so we can check whether a name |
| // index has been read before by checking whether the element in the |
| // NameTable is empty, meanwhile readStringIndex can do the boundary |
| // check using the size of NameTable. |
| NameTable.resize(*Size + NameTable.size()); |
| |
| MD5NameMemStart = Data; |
| Data = Data + (*Size) * sizeof(uint64_t); |
| return sampleprof_error::success; |
| } |
| NameTable.reserve(*Size); |
| for (uint32_t I = 0; I < *Size; ++I) { |
| auto FID = readNumber<uint64_t>(); |
| if (std::error_code EC = FID.getError()) |
| return EC; |
| MD5StringBuf->push_back(std::to_string(*FID)); |
| // NameTable is a vector of StringRef. Here it is pushing back a |
| // StringRef initialized with the last string in MD5stringBuf. |
| NameTable.push_back(MD5StringBuf->back()); |
| } |
| return sampleprof_error::success; |
| } |
| |
| std::error_code SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5) { |
| if (IsMD5) |
| return readMD5NameTable(); |
| return SampleProfileReaderBinary::readNameTable(); |
| } |
| |
| // Read in the CS name table section, which basically contains a list of context |
| // vectors. Each element of a context vector, aka a frame, refers to the |
| // underlying raw function names that are stored in the name table, as well as |
| // a callsite identifier that only makes sense for non-leaf frames. |
| std::error_code SampleProfileReaderExtBinaryBase::readCSNameTableSec() { |
| auto Size = readNumber<uint32_t>(); |
| if (std::error_code EC = Size.getError()) |
| return EC; |
| |
| std::vector<SampleContextFrameVector> *PNameVec = |
| new std::vector<SampleContextFrameVector>(); |
| PNameVec->reserve(*Size); |
| for (uint32_t I = 0; I < *Size; ++I) { |
| PNameVec->emplace_back(SampleContextFrameVector()); |
| auto ContextSize = readNumber<uint32_t>(); |
| if (std::error_code EC = ContextSize.getError()) |
| return EC; |
| for (uint32_t J = 0; J < *ContextSize; ++J) { |
| auto FName(readStringFromTable()); |
| if (std::error_code EC = FName.getError()) |
| return EC; |
| auto LineOffset = readNumber<uint64_t>(); |
| if (std::error_code EC = LineOffset.getError()) |
| return EC; |
| |
| if (!isOffsetLegal(*LineOffset)) |
| return std::error_code(); |
| |
| auto Discriminator = readNumber<uint64_t>(); |
| if (std::error_code EC = Discriminator.getError()) |
| return EC; |
| |
| PNameVec->back().emplace_back( |
| FName.get(), LineLocation(LineOffset.get(), Discriminator.get())); |
| } |
| } |
| |
| // From this point the underlying object of CSNameTable should be immutable. |
| CSNameTable.reset(PNameVec); |
| return sampleprof_error::success; |
| } |
| |
| std::error_code |
| SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) { |
| while (Data < End) { |
| auto FContext(readSampleContextFromTable()); |
| if (std::error_code EC = FContext.getError()) |
| return EC; |
| |
| bool ProfileInMap = Profiles.count(*FContext); |
| if (ProfileIsProbeBased) { |
| auto Checksum = readNumber<uint64_t>(); |
| if (std::error_code EC = Checksum.getError()) |
| return EC; |
| if (ProfileInMap) |
| Profiles[*FContext].setFunctionHash(*Checksum); |
| } |
| |
| if (ProfileHasAttribute) { |
| auto Attributes = readNumber<uint32_t>(); |
| if (std::error_code EC = Attributes.getError()) |
| return EC; |
| if (ProfileInMap) |
| Profiles[*FContext].getContext().setAllAttributes(*Attributes); |
| } |
| } |
| |
| assert(Data == End && "More data is read than expected"); |
| return sampleprof_error::success; |
| } |
| |
| std::error_code SampleProfileReaderCompactBinary::readNameTable() { |
| auto Size = readNumber<uint64_t>(); |
| if (std::error_code EC = Size.getError()) |
| return EC; |
| NameTable.reserve(*Size); |
| for (uint32_t I = 0; I < *Size; ++I) { |
| auto FID = readNumber<uint64_t>(); |
| if (std::error_code EC = FID.getError()) |
| return EC; |
| NameTable.push_back(std::to_string(*FID)); |
| } |
| return sampleprof_error::success; |
| } |
| |
| std::error_code |
| SampleProfileReaderExtBinaryBase::readSecHdrTableEntry(uint32_t Idx) { |
| SecHdrTableEntry Entry; |
| auto Type = readUnencodedNumber<uint64_t>(); |
| if (std::error_code EC = Type.getError()) |
| return EC; |
| Entry.Type = static_cast<SecType>(*Type); |
| |
| auto Flags = readUnencodedNumber<uint64_t>(); |
| if (std::error_code EC = Flags.getError()) |
| return EC; |
| Entry.Flags = *Flags; |
| |
| auto Offset = readUnencodedNumber<uint64_t>(); |
| if (std::error_code EC = Offset.getError()) |
| return EC; |
| Entry.Offset = *Offset; |
| |
| auto Size = readUnencodedNumber<uint64_t>(); |
| if (std::error_code EC = Size.getError()) |
| return EC; |
| Entry.Size = *Size; |
| |
| Entry.LayoutIndex = Idx; |
| SecHdrTable.push_back(std::move(Entry)); |
| return sampleprof_error::success; |
| } |
| |
| std::error_code SampleProfileReaderExtBinaryBase::readSecHdrTable() { |
| auto EntryNum = readUnencodedNumber<uint64_t>(); |
| if (std::error_code EC = EntryNum.getError()) |
| return EC; |
| |
| for (uint32_t i = 0; i < (*EntryNum); i++) |
| if (std::error_code EC = readSecHdrTableEntry(i)) |
| return EC; |
| |
| return sampleprof_error::success; |
| } |
| |
| std::error_code SampleProfileReaderExtBinaryBase::readHeader() { |
| const uint8_t *BufStart = |
| reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()); |
| Data = BufStart; |
| End = BufStart + Buffer->getBufferSize(); |
| |
| if (std::error_code EC = readMagicIdent()) |
| return EC; |
| |
| if (std::error_code EC = readSecHdrTable()) |
| return EC; |
| |
| return sampleprof_error::success; |
| } |
| |
| uint64_t SampleProfileReaderExtBinaryBase::getSectionSize(SecType Type) { |
| uint64_t Size = 0; |
| for (auto &Entry : SecHdrTable) { |
| if (Entry.Type == Type) |
| Size += Entry.Size; |
| } |
| return Size; |
| } |
| |
| uint64_t SampleProfileReaderExtBinaryBase::getFileSize() { |
| // Sections in SecHdrTable is not necessarily in the same order as |
| // sections in the profile because section like FuncOffsetTable needs |
| // to be written after section LBRProfile but needs to be read before |
| // section LBRProfile, so we cannot simply use the last entry in |
| // SecHdrTable to calculate the file size. |
| uint64_t FileSize = 0; |
| for (auto &Entry : SecHdrTable) { |
| FileSize = std::max(Entry.Offset + Entry.Size, FileSize); |
| } |
| return FileSize; |
| } |
| |
| static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) { |
| std::string Flags; |
| if (hasSecFlag(Entry, SecCommonFlags::SecFlagCompress)) |
| Flags.append("{compressed,"); |
| else |
| Flags.append("{"); |
| |
| if (hasSecFlag(Entry, SecCommonFlags::SecFlagFlat)) |
| Flags.append("flat,"); |
| |
| switch (Entry.Type) { |
| case SecNameTable: |
| if (hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5)) |
| Flags.append("fixlenmd5,"); |
| else if (hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name)) |
| Flags.append("md5,"); |
| if (hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix)) |
| Flags.append("uniq,"); |
| break; |
| case SecProfSummary: |
| if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial)) |
| Flags.append("partial,"); |
| if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext)) |
| Flags.append("context,"); |
| if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator)) |
| Flags.append("fs-discriminator,"); |
| break; |
| case SecFuncOffsetTable: |
| if (hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered)) |
| Flags.append("ordered,"); |
| break; |
| case SecFuncMetadata: |
| if (hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased)) |
| Flags.append("probe,"); |
| if (hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute)) |
| Flags.append("attr,"); |
| break; |
| default: |
| break; |
| } |
| char &last = Flags.back(); |
| if (last == ',') |
| last = '}'; |
| else |
| Flags.append("}"); |
| return Flags; |
| } |
| |
| bool SampleProfileReaderExtBinaryBase::dumpSectionInfo(raw_ostream &OS) { |
| uint64_t TotalSecsSize = 0; |
| for (auto &Entry : SecHdrTable) { |
| OS << getSecName(Entry.Type) << " - Offset: " << Entry.Offset |
| << ", Size: " << Entry.Size << ", Flags: " << getSecFlagsStr(Entry) |
| << "\n"; |
| ; |
| TotalSecsSize += Entry.Size; |
| } |
| uint64_t HeaderSize = SecHdrTable.front().Offset; |
| assert(HeaderSize + TotalSecsSize == getFileSize() && |
| "Size of 'header + sections' doesn't match the total size of profile"); |
| |
| OS << "Header Size: " << HeaderSize << "\n"; |
| OS << "Total Sections Size: " << TotalSecsSize << "\n"; |
| OS << "File Size: " << getFileSize() << "\n"; |
| return true; |
| } |
| |
| std::error_code SampleProfileReaderBinary::readMagicIdent() { |
| // Read and check the magic identifier. |
| auto Magic = readNumber<uint64_t>(); |
| if (std::error_code EC = Magic.getError()) |
| return EC; |
| else if (std::error_code EC = verifySPMagic(*Magic)) |
| return EC; |
| |
| // Read the version number. |
| auto Version = readNumber<uint64_t>(); |
| if (std::error_code EC = Version.getError()) |
| return EC; |
| else if (*Version != SPVersion()) |
| return sampleprof_error::unsupported_version; |
| |
| return sampleprof_error::success; |
| } |
| |
| std::error_code SampleProfileReaderBinary::readHeader() { |
| Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()); |
| End = Data + Buffer->getBufferSize(); |
| |
| if (std::error_code EC = readMagicIdent()) |
| return EC; |
| |
| if (std::error_code EC = readSummary()) |
| return EC; |
| |
| if (std::error_code EC = readNameTable()) |
| return EC; |
| return sampleprof_error::success; |
| } |
| |
| std::error_code SampleProfileReaderCompactBinary::readHeader() { |
| SampleProfileReaderBinary::readHeader(); |
| if (std::error_code EC = readFuncOffsetTable()) |
| return EC; |
| return sampleprof_error::success; |
| } |
| |
| std::error_code SampleProfileReaderCompactBinary::readFuncOffsetTable() { |
| auto TableOffset = readUnencodedNumber<uint64_t>(); |
| if (std::error_code EC = TableOffset.getError()) |
| return EC; |
| |
| const uint8_t *SavedData = Data; |
| const uint8_t *TableStart = |
| reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) + |
| *TableOffset; |
| Data = TableStart; |
| |
| auto Size = readNumber<uint64_t>(); |
| if (std::error_code EC = Size.getError()) |
| return EC; |
| |
| FuncOffsetTable.reserve(*Size); |
| for (uint32_t I = 0; I < *Size; ++I) { |
| auto FName(readStringFromTable()); |
| if (std::error_code EC = FName.getError()) |
| return EC; |
| |
| auto Offset = readNumber<uint64_t>(); |
| if (std::error_code EC = Offset.getError()) |
| return EC; |
| |
| FuncOffsetTable[*FName] = *Offset; |
| } |
| End = TableStart; |
| Data = SavedData; |
| return sampleprof_error::success; |
| } |
| |
| bool SampleProfileReaderCompactBinary::collectFuncsFromModule() { |
| if (!M) |
| return false; |
| FuncsToUse.clear(); |
| for (auto &F : *M) |
| FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F)); |
| return true; |
| } |
| |
| std::error_code SampleProfileReaderBinary::readSummaryEntry( |
| std::vector<ProfileSummaryEntry> &Entries) { |
| auto Cutoff = readNumber<uint64_t>(); |
| if (std::error_code EC = Cutoff.getError()) |
| return EC; |
| |
| auto MinBlockCount = readNumber<uint64_t>(); |
| if (std::error_code EC = MinBlockCount.getError()) |
| return EC; |
| |
| auto NumBlocks = readNumber<uint64_t>(); |
| if (std::error_code EC = NumBlocks.getError()) |
| return EC; |
| |
| Entries.emplace_back(*Cutoff, *MinBlockCount, *NumBlocks); |
| return sampleprof_error::success; |
| } |
| |
| std::error_code SampleProfileReaderBinary::readSummary() { |
| auto TotalCount = readNumber<uint64_t>(); |
| if (std::error_code EC = TotalCount.getError()) |
| return EC; |
| |
| auto MaxBlockCount = readNumber<uint64_t>(); |
| if (std::error_code EC = MaxBlockCount.getError()) |
| return EC; |
| |
| auto MaxFunctionCount = readNumber<uint64_t>(); |
| if (std::error_code EC = MaxFunctionCount.getError()) |
| return EC; |
| |
| auto NumBlocks = readNumber<uint64_t>(); |
| if (std::error_code EC = NumBlocks.getError()) |
| return EC; |
| |
| auto NumFunctions = readNumber<uint64_t>(); |
| if (std::error_code EC = NumFunctions.getError()) |
| return EC; |
| |
| auto NumSummaryEntries = readNumber<uint64_t>(); |
| if (std::error_code EC = NumSummaryEntries.getError()) |
| return EC; |
| |
| std::vector<ProfileSummaryEntry> Entries; |
| for (unsigned i = 0; i < *NumSummaryEntries; i++) { |
| std::error_code EC = readSummaryEntry(Entries); |
| if (EC != sampleprof_error::success) |
| return EC; |
| } |
| Summary = std::make_unique<ProfileSummary>( |
| ProfileSummary::PSK_Sample, Entries, *TotalCount, *MaxBlockCount, 0, |
| *MaxFunctionCount, *NumBlocks, *NumFunctions); |
| |
| return sampleprof_error::success; |
| } |
| |
| bool SampleProfileReaderRawBinary::hasFormat(const MemoryBuffer &Buffer) { |
| const uint8_t *Data = |
| reinterpret_cast<const uint8_t *>(Buffer.getBufferStart()); |
| uint64_t Magic = decodeULEB128(Data); |
| return Magic == SPMagic(); |
| } |
| |
| bool SampleProfileReaderExtBinary::hasFormat(const MemoryBuffer &Buffer) { |
| const uint8_t *Data = |
| reinterpret_cast<const uint8_t *>(Buffer.getBufferStart()); |
| uint64_t Magic = decodeULEB128(Data); |
| return Magic == SPMagic(SPF_Ext_Binary); |
| } |
| |
| bool SampleProfileReaderCompactBinary::hasFormat(const MemoryBuffer &Buffer) { |
| const uint8_t *Data = |
| reinterpret_cast<const uint8_t *>(Buffer.getBufferStart()); |
| uint64_t Magic = decodeULEB128(Data); |
| return Magic == SPMagic(SPF_Compact_Binary); |
| } |
| |
| std::error_code SampleProfileReaderGCC::skipNextWord() { |
| uint32_t dummy; |
| if (!GcovBuffer.readInt(dummy)) |
| return sampleprof_error::truncated; |
| return sampleprof_error::success; |
| } |
| |
| template <typename T> ErrorOr<T> SampleProfileReaderGCC::readNumber() { |
| if (sizeof(T) <= sizeof(uint32_t)) { |
| uint32_t Val; |
| if (GcovBuffer.readInt(Val) && Val <= std::numeric_limits<T>::max()) |
| return static_cast<T>(Val); |
| } else if (sizeof(T) <= sizeof(uint64_t)) { |
| uint64_t Val; |
| if (GcovBuffer.readInt64(Val) && Val <= std::numeric_limits<T>::max()) |
| return static_cast<T>(Val); |
| } |
| |
| std::error_code EC = sampleprof_error::malformed; |
| reportError(0, EC.message()); |
| return EC; |
| } |
| |
| ErrorOr<StringRef> SampleProfileReaderGCC::readString() { |
| StringRef Str; |
| if (!GcovBuffer.readString(Str)) |
| return sampleprof_error::truncated; |
| return Str; |
| } |
| |
| std::error_code SampleProfileReaderGCC::readHeader() { |
| // Read the magic identifier. |
| if (!GcovBuffer.readGCDAFormat()) |
| return sampleprof_error::unrecognized_format; |
| |
| // Read the version number. Note - the GCC reader does not validate this |
| // version, but the profile creator generates v704. |
| GCOV::GCOVVersion version; |
| if (!GcovBuffer.readGCOVVersion(version)) |
| return sampleprof_error::unrecognized_format; |
| |
| if (version != GCOV::V407) |
| return sampleprof_error::unsupported_version; |
| |
| // Skip the empty integer. |
| if (std::error_code EC = skipNextWord()) |
| return EC; |
| |
| return sampleprof_error::success; |
| } |
| |
| std::error_code SampleProfileReaderGCC::readSectionTag(uint32_t Expected) { |
| uint32_t Tag; |
| if (!GcovBuffer.readInt(Tag)) |
| return sampleprof_error::truncated; |
| |
| if (Tag != Expected) |
| return sampleprof_error::malformed; |
| |
| if (std::error_code EC = skipNextWord()) |
| return EC; |
| |
| return sampleprof_error::success; |
| } |
| |
| std::error_code SampleProfileReaderGCC::readNameTable() { |
| if (std::error_code EC = readSectionTag(GCOVTagAFDOFileNames)) |
| return EC; |
| |
| uint32_t Size; |
| if (!GcovBuffer.readInt(Size)) |
| return sampleprof_error::truncated; |
| |
| for (uint32_t I = 0; I < Size; ++I) { |
| StringRef Str; |
| if (!GcovBuffer.readString(Str)) |
| return sampleprof_error::truncated; |
| Names.push_back(std::string(Str)); |
| } |
| |
| return sampleprof_error::success; |
| } |
| |
| std::error_code SampleProfileReaderGCC::readFunctionProfiles() { |
| if (std::error_code EC = readSectionTag(GCOVTagAFDOFunction)) |
| return EC; |
| |
| uint32_t NumFunctions; |
| if (!GcovBuffer.readInt(NumFunctions)) |
| return sampleprof_error::truncated; |
| |
| InlineCallStack Stack; |
| for (uint32_t I = 0; I < NumFunctions; ++I) |
| if (std::error_code EC = readOneFunctionProfile(Stack, true, 0)) |
| return EC; |
| |
| computeSummary(); |
| return sampleprof_error::success; |
| } |
| |
| std::error_code SampleProfileReaderGCC::readOneFunctionProfile( |
| const InlineCallStack &InlineStack, bool Update, uint32_t Offset) { |
| uint64_t HeadCount = 0; |
| if (InlineStack.size() == 0) |
| if (!GcovBuffer.readInt64(HeadCount)) |
| return sampleprof_error::truncated; |
| |
| uint32_t NameIdx; |
| if (!GcovBuffer.readInt(NameIdx)) |
| return sampleprof_error::truncated; |
| |
| StringRef Name(Names[NameIdx]); |
| |
| uint32_t NumPosCounts; |
| if (!GcovBuffer.readInt(NumPosCounts)) |
| return sampleprof_error::truncated; |
| |
| uint32_t NumCallsites; |
| if (!GcovBuffer.readInt(NumCallsites)) |
| return sampleprof_error::truncated; |
| |
| FunctionSamples *FProfile = nullptr; |
| if (InlineStack.size() == 0) { |
| // If this is a top function that we have already processed, do not |
| // update its profile again. This happens in the presence of |
| // function aliases. Since these aliases share the same function |
| // body, there will be identical replicated profiles for the |
| // original function. In this case, we simply not bother updating |
| // the profile of the original function. |
| FProfile = &Profiles[Name]; |
| FProfile->addHeadSamples(HeadCount); |
| if (FProfile->getTotalSamples() > 0) |
| Update = false; |
| } else { |
| // Otherwise, we are reading an inlined instance. The top of the |
| // inline stack contains the profile of the caller. Insert this |
| // callee in the caller's CallsiteMap. |
| FunctionSamples *CallerProfile = InlineStack.front(); |
| uint32_t LineOffset = Offset >> 16; |
| uint32_t Discriminator = Offset & 0xffff; |
| FProfile = &CallerProfile->functionSamplesAt( |
| LineLocation(LineOffset, Discriminator))[std::string(Name)]; |
| } |
| FProfile->setName(Name); |
| |
| for (uint32_t I = 0; I < NumPosCounts; ++I) { |
| uint32_t Offset; |
| if (!GcovBuffer.readInt(Offset)) |
| return sampleprof_error::truncated; |
| |
| uint32_t NumTargets; |
| if (!GcovBuffer.readInt(NumTargets)) |
| return sampleprof_error::truncated; |
| |
| uint64_t Count; |
| if (!GcovBuffer.readInt64(Count)) |
| return sampleprof_error::truncated; |
| |
| // The line location is encoded in the offset as: |
| // high 16 bits: line offset to the start of the function. |
| // low 16 bits: discriminator. |
| uint32_t LineOffset = Offset >> 16; |
| uint32_t Discriminator = Offset & 0xffff; |
| |
| InlineCallStack NewStack; |
| NewStack.push_back(FProfile); |
| llvm::append_range(NewStack, InlineStack); |
| if (Update) { |
| // Walk up the inline stack, adding the samples on this line to |
| // the total sample count of the callers in the chain. |
| for (auto CallerProfile : NewStack) |
| CallerProfile->addTotalSamples(Count); |
| |
| // Update the body samples for the current profile. |
| FProfile->addBodySamples(LineOffset, Discriminator, Count); |
| } |
| |
| // Process the list of functions called at an indirect call site. |
| // These are all the targets that a function pointer (or virtual |
| // function) resolved at runtime. |
| for (uint32_t J = 0; J < NumTargets; J++) { |
| uint32_t HistVal; |
| if (!GcovBuffer.readInt(HistVal)) |
| return sampleprof_error::truncated; |
| |
| if (HistVal != HIST_TYPE_INDIR_CALL_TOPN) |
| return sampleprof_error::malformed; |
| |
| uint64_t TargetIdx; |
| if (!GcovBuffer.readInt64(TargetIdx)) |
| return sampleprof_error::truncated; |
| StringRef TargetName(Names[TargetIdx]); |
| |
| uint64_t TargetCount; |
| if (!GcovBuffer.readInt64(TargetCount)) |
| return sampleprof_error::truncated; |
| |
| if (Update) |
| FProfile->addCalledTargetSamples(LineOffset, Discriminator, |
| TargetName, TargetCount); |
| } |
| } |
| |
| // Process all the inlined callers into the current function. These |
| // are all the callsites that were inlined into this function. |
| for (uint32_t I = 0; I < NumCallsites; I++) { |
| // The offset is encoded as: |
| // high 16 bits: line offset to the start of the function. |
| // low 16 bits: discriminator. |
| uint32_t Offset; |
| if (!GcovBuffer.readInt(Offset)) |
| return sampleprof_error::truncated; |
| InlineCallStack NewStack; |
| NewStack.push_back(FProfile); |
| llvm::append_range(NewStack, InlineStack); |
| if (std::error_code EC = readOneFunctionProfile(NewStack, Update, Offset)) |
| return EC; |
| } |
| |
| return sampleprof_error::success; |
| } |
| |
| /// Read a GCC AutoFDO profile. |
| /// |
| /// This format is generated by the Linux Perf conversion tool at |
| /// https://github.com/google/autofdo. |
| std::error_code SampleProfileReaderGCC::readImpl() { |
| assert(!ProfileIsFSDisciminator && "Gcc profiles not support FSDisciminator"); |
| // Read the string table. |
| if (std::error_code EC = readNameTable()) |
| return EC; |
| |
| // Read the source profile. |
| if (std::error_code EC = readFunctionProfiles()) |
| return EC; |
| |
| return sampleprof_error::success; |
| } |
| |
| bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) { |
| StringRef Magic(reinterpret_cast<const char *>(Buffer.getBufferStart())); |
| return Magic == "adcg*704"; |
| } |
| |
| void SampleProfileReaderItaniumRemapper::applyRemapping(LLVMContext &Ctx) { |
| // If the reader uses MD5 to represent string, we can't remap it because |
| // we don't know what the original function names were. |
| if (Reader.useMD5()) { |
| Ctx.diagnose(DiagnosticInfoSampleProfile( |
| Reader.getBuffer()->getBufferIdentifier(), |
| "Profile data remapping cannot be applied to profile data " |
| "in compact format (original mangled names are not available).", |
| DS_Warning)); |
| return; |
| } |
| |
| // CSSPGO-TODO: Remapper is not yet supported. |
| // We will need to remap the entire context string. |
| assert(Remappings && "should be initialized while creating remapper"); |
| for (auto &Sample : Reader.getProfiles()) { |
| DenseSet<StringRef> NamesInSample; |
| Sample.second.findAllNames(NamesInSample); |
| for (auto &Name : NamesInSample) |
| if (auto Key = Remappings->insert(Name)) |
| NameMap.insert({Key, Name}); |
| } |
| |
| RemappingApplied = true; |
| } |
| |
| Optional<StringRef> |
| SampleProfileReaderItaniumRemapper::lookUpNameInProfile(StringRef Fname) { |
| if (auto Key = Remappings->lookup(Fname)) |
| return NameMap.lookup(Key); |
| return None; |
| } |
| |
| /// Prepare a memory buffer for the contents of \p Filename. |
| /// |
| /// \returns an error code indicating the status of the buffer. |
| static ErrorOr<std::unique_ptr<MemoryBuffer>> |
| setupMemoryBuffer(const Twine &Filename) { |
| auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(Filename, /*IsText=*/true); |
| if (std::error_code EC = BufferOrErr.getError()) |
| return EC; |
| auto Buffer = std::move(BufferOrErr.get()); |
| |
| // Check the file. |
| if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint32_t>::max()) |
| return sampleprof_error::too_large; |
| |
| return std::move(Buffer); |
| } |
| |
| /// Create a sample profile reader based on the format of the input file. |
| /// |
| /// \param Filename The file to open. |
| /// |
| /// \param C The LLVM context to use to emit diagnostics. |
| /// |
| /// \param P The FSDiscriminatorPass. |
| /// |
| /// \param RemapFilename The file used for profile remapping. |
| /// |
| /// \returns an error code indicating the status of the created reader. |
| ErrorOr<std::unique_ptr<SampleProfileReader>> |
| SampleProfileReader::create(const std::string Filename, LLVMContext &C, |
| FSDiscriminatorPass P, |
| const std::string RemapFilename) { |
| auto BufferOrError = setupMemoryBuffer(Filename); |
| if (std::error_code EC = BufferOrError.getError()) |
| return EC; |
| return create(BufferOrError.get(), C, P, RemapFilename); |
| } |
| |
| /// Create a sample profile remapper from the given input, to remap the |
| /// function names in the given profile data. |
| /// |
| /// \param Filename The file to open. |
| /// |
| /// \param Reader The profile reader the remapper is going to be applied to. |
| /// |
| /// \param C The LLVM context to use to emit diagnostics. |
| /// |
| /// \returns an error code indicating the status of the created reader. |
| ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>> |
| SampleProfileReaderItaniumRemapper::create(const std::string Filename, |
| SampleProfileReader &Reader, |
| LLVMContext &C) { |
| auto BufferOrError = setupMemoryBuffer(Filename); |
| if (std::error_code EC = BufferOrError.getError()) |
| return EC; |
| return create(BufferOrError.get(), Reader, C); |
| } |
| |
| /// Create a sample profile remapper from the given input, to remap the |
| /// function names in the given profile data. |
| /// |
| /// \param B The memory buffer to create the reader from (assumes ownership). |
| /// |
| /// \param C The LLVM context to use to emit diagnostics. |
| /// |
| /// \param Reader The profile reader the remapper is going to be applied to. |
| /// |
| /// \returns an error code indicating the status of the created reader. |
| ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>> |
| SampleProfileReaderItaniumRemapper::create(std::unique_ptr<MemoryBuffer> &B, |
| SampleProfileReader &Reader, |
| LLVMContext &C) { |
| auto Remappings = std::make_unique<SymbolRemappingReader>(); |
| if (Error E = Remappings->read(*B.get())) { |
| handleAllErrors( |
| std::move(E), [&](const SymbolRemappingParseError &ParseError) { |
| C.diagnose(DiagnosticInfoSampleProfile(B->getBufferIdentifier(), |
| ParseError.getLineNum(), |
| ParseError.getMessage())); |
| }); |
| return sampleprof_error::malformed; |
| } |
| |
| return std::make_unique<SampleProfileReaderItaniumRemapper>( |
| std::move(B), std::move(Remappings), Reader); |
| } |
| |
| /// Create a sample profile reader based on the format of the input data. |
| /// |
| /// \param B The memory buffer to create the reader from (assumes ownership). |
| /// |
| /// \param C The LLVM context to use to emit diagnostics. |
| /// |
| /// \param P The FSDiscriminatorPass. |
| /// |
| /// \param RemapFilename The file used for profile remapping. |
| /// |
| /// \returns an error code indicating the status of the created reader. |
| ErrorOr<std::unique_ptr<SampleProfileReader>> |
| SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C, |
| FSDiscriminatorPass P, |
| const std::string RemapFilename) { |
| std::unique_ptr<SampleProfileReader> Reader; |
| if (SampleProfileReaderRawBinary::hasFormat(*B)) |
| Reader.reset(new SampleProfileReaderRawBinary(std::move(B), C)); |
| else if (SampleProfileReaderExtBinary::hasFormat(*B)) |
| Reader.reset(new SampleProfileReaderExtBinary(std::move(B), C)); |
| else if (SampleProfileReaderCompactBinary::hasFormat(*B)) |
| Reader.reset(new SampleProfileReaderCompactBinary(std::move(B), C)); |
| else if (SampleProfileReaderGCC::hasFormat(*B)) |
| Reader.reset(new SampleProfileReaderGCC(std::move(B), C)); |
| else if (SampleProfileReaderText::hasFormat(*B)) |
| Reader.reset(new SampleProfileReaderText(std::move(B), C)); |
| else |
| return sampleprof_error::unrecognized_format; |
| |
| if (!RemapFilename.empty()) { |
| auto ReaderOrErr = |
| SampleProfileReaderItaniumRemapper::create(RemapFilename, *Reader, C); |
| if (std::error_code EC = ReaderOrErr.getError()) { |
| std::string Msg = "Could not create remapper: " + EC.message(); |
| C.diagnose(DiagnosticInfoSampleProfile(RemapFilename, Msg)); |
| return EC; |
| } |
| Reader->Remapper = std::move(ReaderOrErr.get()); |
| } |
| |
| FunctionSamples::Format = Reader->getFormat(); |
| if (std::error_code EC = Reader->readHeader()) { |
| return EC; |
| } |
| |
| Reader->setDiscriminatorMaskedBitFrom(P); |
| |
| return std::move(Reader); |
| } |
| |
| // For text and GCC file formats, we compute the summary after reading the |
| // profile. Binary format has the profile summary in its header. |
| void SampleProfileReader::computeSummary() { |
| SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); |
| Summary = Builder.computeSummaryForProfiles(Profiles); |
| } |