|  | //===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===// | 
|  | // | 
|  | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|  | // See https://llvm.org/LICENSE.txt for license information. | 
|  | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | // | 
|  | // This file implements the class that reads LLVM sample profiles. It | 
|  | // supports three file formats: text, binary and gcov. | 
|  | // | 
|  | // The textual representation is useful for debugging and testing purposes. The | 
|  | // binary representation is more compact, resulting in smaller file sizes. | 
|  | // | 
|  | // The gcov encoding is the one generated by GCC's AutoFDO profile creation | 
|  | // tool (https://github.com/google/autofdo) | 
|  | // | 
|  | // All three encodings can be used interchangeably as an input sample profile. | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #include "llvm/ProfileData/SampleProfReader.h" | 
|  | #include "llvm/ADT/DenseMap.h" | 
|  | #include "llvm/ADT/STLExtras.h" | 
|  | #include "llvm/ADT/StringRef.h" | 
|  | #include "llvm/IR/Module.h" | 
|  | #include "llvm/IR/ProfileSummary.h" | 
|  | #include "llvm/ProfileData/ProfileCommon.h" | 
|  | #include "llvm/ProfileData/SampleProf.h" | 
|  | #include "llvm/Support/CommandLine.h" | 
|  | #include "llvm/Support/Compression.h" | 
|  | #include "llvm/Support/ErrorOr.h" | 
|  | #include "llvm/Support/JSON.h" | 
|  | #include "llvm/Support/LEB128.h" | 
|  | #include "llvm/Support/LineIterator.h" | 
|  | #include "llvm/Support/MD5.h" | 
|  | #include "llvm/Support/MemoryBuffer.h" | 
|  | #include "llvm/Support/VirtualFileSystem.h" | 
|  | #include "llvm/Support/raw_ostream.h" | 
|  | #include <algorithm> | 
|  | #include <cstddef> | 
|  | #include <cstdint> | 
|  | #include <limits> | 
|  | #include <memory> | 
|  | #include <system_error> | 
|  | #include <vector> | 
|  |  | 
|  | using namespace llvm; | 
|  | using namespace sampleprof; | 
|  |  | 
|  | #define DEBUG_TYPE "samplepgo-reader" | 
|  |  | 
|  | // This internal option specifies if the profile uses FS discriminators. | 
|  | // It only applies to text, and binary format profiles. | 
|  | // For ext-binary format profiles, the flag is set in the summary. | 
|  | static cl::opt<bool> ProfileIsFSDisciminator( | 
|  | "profile-isfs", cl::Hidden, cl::init(false), | 
|  | cl::desc("Profile uses flow sensitive discriminators")); | 
|  |  | 
|  | /// Dump the function profile for \p FName. | 
|  | /// | 
|  | /// \param FContext Name + context of the function to print. | 
|  | /// \param OS Stream to emit the output to. | 
|  | void SampleProfileReader::dumpFunctionProfile(const FunctionSamples &FS, | 
|  | raw_ostream &OS) { | 
|  | OS << "Function: " << FS.getContext().toString() << ": " << FS; | 
|  | } | 
|  |  | 
|  | /// Dump all the function profiles found on stream \p OS. | 
|  | void SampleProfileReader::dump(raw_ostream &OS) { | 
|  | std::vector<NameFunctionSamples> V; | 
|  | sortFuncProfiles(Profiles, V); | 
|  | for (const auto &I : V) | 
|  | dumpFunctionProfile(*I.second, OS); | 
|  | } | 
|  |  | 
|  | static void dumpFunctionProfileJson(const FunctionSamples &S, | 
|  | json::OStream &JOS, bool TopLevel = false) { | 
|  | auto DumpBody = [&](const BodySampleMap &BodySamples) { | 
|  | for (const auto &I : BodySamples) { | 
|  | const LineLocation &Loc = I.first; | 
|  | const SampleRecord &Sample = I.second; | 
|  | JOS.object([&] { | 
|  | JOS.attribute("line", Loc.LineOffset); | 
|  | if (Loc.Discriminator) | 
|  | JOS.attribute("discriminator", Loc.Discriminator); | 
|  | JOS.attribute("samples", Sample.getSamples()); | 
|  |  | 
|  | auto CallTargets = Sample.getSortedCallTargets(); | 
|  | if (!CallTargets.empty()) { | 
|  | JOS.attributeArray("calls", [&] { | 
|  | for (const auto &J : CallTargets) { | 
|  | JOS.object([&] { | 
|  | JOS.attribute("function", J.first.str()); | 
|  | JOS.attribute("samples", J.second); | 
|  | }); | 
|  | } | 
|  | }); | 
|  | } | 
|  | }); | 
|  | } | 
|  | }; | 
|  |  | 
|  | auto DumpCallsiteSamples = [&](const CallsiteSampleMap &CallsiteSamples) { | 
|  | for (const auto &I : CallsiteSamples) | 
|  | for (const auto &FS : I.second) { | 
|  | const LineLocation &Loc = I.first; | 
|  | const FunctionSamples &CalleeSamples = FS.second; | 
|  | JOS.object([&] { | 
|  | JOS.attribute("line", Loc.LineOffset); | 
|  | if (Loc.Discriminator) | 
|  | JOS.attribute("discriminator", Loc.Discriminator); | 
|  | JOS.attributeArray( | 
|  | "samples", [&] { dumpFunctionProfileJson(CalleeSamples, JOS); }); | 
|  | }); | 
|  | } | 
|  | }; | 
|  |  | 
|  | JOS.object([&] { | 
|  | JOS.attribute("name", S.getFunction().str()); | 
|  | JOS.attribute("total", S.getTotalSamples()); | 
|  | if (TopLevel) | 
|  | JOS.attribute("head", S.getHeadSamples()); | 
|  |  | 
|  | const auto &BodySamples = S.getBodySamples(); | 
|  | if (!BodySamples.empty()) | 
|  | JOS.attributeArray("body", [&] { DumpBody(BodySamples); }); | 
|  |  | 
|  | const auto &CallsiteSamples = S.getCallsiteSamples(); | 
|  | if (!CallsiteSamples.empty()) | 
|  | JOS.attributeArray("callsites", | 
|  | [&] { DumpCallsiteSamples(CallsiteSamples); }); | 
|  | }); | 
|  | } | 
|  |  | 
|  | /// Dump all the function profiles found on stream \p OS in the JSON format. | 
|  | void SampleProfileReader::dumpJson(raw_ostream &OS) { | 
|  | std::vector<NameFunctionSamples> V; | 
|  | sortFuncProfiles(Profiles, V); | 
|  | json::OStream JOS(OS, 2); | 
|  | JOS.arrayBegin(); | 
|  | for (const auto &F : V) | 
|  | dumpFunctionProfileJson(*F.second, JOS, true); | 
|  | JOS.arrayEnd(); | 
|  |  | 
|  | // Emit a newline character at the end as json::OStream doesn't emit one. | 
|  | OS << "\n"; | 
|  | } | 
|  |  | 
|  | /// Parse \p Input as function head. | 
|  | /// | 
|  | /// Parse one line of \p Input, and update function name in \p FName, | 
|  | /// function's total sample count in \p NumSamples, function's entry | 
|  | /// count in \p NumHeadSamples. | 
|  | /// | 
|  | /// \returns true if parsing is successful. | 
|  | static bool ParseHead(const StringRef &Input, StringRef &FName, | 
|  | uint64_t &NumSamples, uint64_t &NumHeadSamples) { | 
|  | if (Input[0] == ' ') | 
|  | return false; | 
|  | size_t n2 = Input.rfind(':'); | 
|  | size_t n1 = Input.rfind(':', n2 - 1); | 
|  | FName = Input.substr(0, n1); | 
|  | if (Input.substr(n1 + 1, n2 - n1 - 1).getAsInteger(10, NumSamples)) | 
|  | return false; | 
|  | if (Input.substr(n2 + 1).getAsInteger(10, NumHeadSamples)) | 
|  | return false; | 
|  | return true; | 
|  | } | 
|  |  | 
|  | /// Returns true if line offset \p L is legal (only has 16 bits). | 
|  | static bool isOffsetLegal(unsigned L) { return (L & 0xffff) == L; } | 
|  |  | 
|  | /// Parse \p Input that contains metadata. | 
|  | /// Possible metadata: | 
|  | /// - CFG Checksum information: | 
|  | ///     !CFGChecksum: 12345 | 
|  | /// - CFG Checksum information: | 
|  | ///     !Attributes: 1 | 
|  | /// Stores the FunctionHash (a.k.a. CFG Checksum) into \p FunctionHash. | 
|  | static bool parseMetadata(const StringRef &Input, uint64_t &FunctionHash, | 
|  | uint32_t &Attributes) { | 
|  | if (Input.starts_with("!CFGChecksum:")) { | 
|  | StringRef CFGInfo = Input.substr(strlen("!CFGChecksum:")).trim(); | 
|  | return !CFGInfo.getAsInteger(10, FunctionHash); | 
|  | } | 
|  |  | 
|  | if (Input.starts_with("!Attributes:")) { | 
|  | StringRef Attrib = Input.substr(strlen("!Attributes:")).trim(); | 
|  | return !Attrib.getAsInteger(10, Attributes); | 
|  | } | 
|  |  | 
|  | return false; | 
|  | } | 
|  |  | 
|  | enum class LineType { | 
|  | CallSiteProfile, | 
|  | BodyProfile, | 
|  | Metadata, | 
|  | }; | 
|  |  | 
|  | /// Parse \p Input as line sample. | 
|  | /// | 
|  | /// \param Input input line. | 
|  | /// \param LineTy Type of this line. | 
|  | /// \param Depth the depth of the inline stack. | 
|  | /// \param NumSamples total samples of the line/inlined callsite. | 
|  | /// \param LineOffset line offset to the start of the function. | 
|  | /// \param Discriminator discriminator of the line. | 
|  | /// \param TargetCountMap map from indirect call target to count. | 
|  | /// \param FunctionHash the function's CFG hash, used by pseudo probe. | 
|  | /// | 
|  | /// returns true if parsing is successful. | 
|  | static bool ParseLine(const StringRef &Input, LineType &LineTy, uint32_t &Depth, | 
|  | uint64_t &NumSamples, uint32_t &LineOffset, | 
|  | uint32_t &Discriminator, StringRef &CalleeName, | 
|  | DenseMap<StringRef, uint64_t> &TargetCountMap, | 
|  | uint64_t &FunctionHash, uint32_t &Attributes, | 
|  | bool &IsFlat) { | 
|  | for (Depth = 0; Input[Depth] == ' '; Depth++) | 
|  | ; | 
|  | if (Depth == 0) | 
|  | return false; | 
|  |  | 
|  | if (Input[Depth] == '!') { | 
|  | LineTy = LineType::Metadata; | 
|  | // This metadata is only for manual inspection only. We already created a | 
|  | // FunctionSamples and put it in the profile map, so there is no point | 
|  | // to skip profiles even they have no use for ThinLTO. | 
|  | if (Input == StringRef(" !Flat")) { | 
|  | IsFlat = true; | 
|  | return true; | 
|  | } | 
|  | return parseMetadata(Input.substr(Depth), FunctionHash, Attributes); | 
|  | } | 
|  |  | 
|  | size_t n1 = Input.find(':'); | 
|  | StringRef Loc = Input.substr(Depth, n1 - Depth); | 
|  | size_t n2 = Loc.find('.'); | 
|  | if (n2 == StringRef::npos) { | 
|  | if (Loc.getAsInteger(10, LineOffset) || !isOffsetLegal(LineOffset)) | 
|  | return false; | 
|  | Discriminator = 0; | 
|  | } else { | 
|  | if (Loc.substr(0, n2).getAsInteger(10, LineOffset)) | 
|  | return false; | 
|  | if (Loc.substr(n2 + 1).getAsInteger(10, Discriminator)) | 
|  | return false; | 
|  | } | 
|  |  | 
|  | StringRef Rest = Input.substr(n1 + 2); | 
|  | if (isDigit(Rest[0])) { | 
|  | LineTy = LineType::BodyProfile; | 
|  | size_t n3 = Rest.find(' '); | 
|  | if (n3 == StringRef::npos) { | 
|  | if (Rest.getAsInteger(10, NumSamples)) | 
|  | return false; | 
|  | } else { | 
|  | if (Rest.substr(0, n3).getAsInteger(10, NumSamples)) | 
|  | return false; | 
|  | } | 
|  | // Find call targets and their sample counts. | 
|  | // Note: In some cases, there are symbols in the profile which are not | 
|  | // mangled. To accommodate such cases, use colon + integer pairs as the | 
|  | // anchor points. | 
|  | // An example: | 
|  | // _M_construct<char *>:1000 string_view<std::allocator<char> >:437 | 
|  | // ":1000" and ":437" are used as anchor points so the string above will | 
|  | // be interpreted as | 
|  | // target: _M_construct<char *> | 
|  | // count: 1000 | 
|  | // target: string_view<std::allocator<char> > | 
|  | // count: 437 | 
|  | while (n3 != StringRef::npos) { | 
|  | n3 += Rest.substr(n3).find_first_not_of(' '); | 
|  | Rest = Rest.substr(n3); | 
|  | n3 = Rest.find_first_of(':'); | 
|  | if (n3 == StringRef::npos || n3 == 0) | 
|  | return false; | 
|  |  | 
|  | StringRef Target; | 
|  | uint64_t count, n4; | 
|  | while (true) { | 
|  | // Get the segment after the current colon. | 
|  | StringRef AfterColon = Rest.substr(n3 + 1); | 
|  | // Get the target symbol before the current colon. | 
|  | Target = Rest.substr(0, n3); | 
|  | // Check if the word after the current colon is an integer. | 
|  | n4 = AfterColon.find_first_of(' '); | 
|  | n4 = (n4 != StringRef::npos) ? n3 + n4 + 1 : Rest.size(); | 
|  | StringRef WordAfterColon = Rest.substr(n3 + 1, n4 - n3 - 1); | 
|  | if (!WordAfterColon.getAsInteger(10, count)) | 
|  | break; | 
|  |  | 
|  | // Try to find the next colon. | 
|  | uint64_t n5 = AfterColon.find_first_of(':'); | 
|  | if (n5 == StringRef::npos) | 
|  | return false; | 
|  | n3 += n5 + 1; | 
|  | } | 
|  |  | 
|  | // An anchor point is found. Save the {target, count} pair | 
|  | TargetCountMap[Target] = count; | 
|  | if (n4 == Rest.size()) | 
|  | break; | 
|  | // Change n3 to the next blank space after colon + integer pair. | 
|  | n3 = n4; | 
|  | } | 
|  | } else { | 
|  | LineTy = LineType::CallSiteProfile; | 
|  | size_t n3 = Rest.find_last_of(':'); | 
|  | CalleeName = Rest.substr(0, n3); | 
|  | if (Rest.substr(n3 + 1).getAsInteger(10, NumSamples)) | 
|  | return false; | 
|  | } | 
|  | return true; | 
|  | } | 
|  |  | 
|  | /// Load samples from a text file. | 
|  | /// | 
|  | /// See the documentation at the top of the file for an explanation of | 
|  | /// the expected format. | 
|  | /// | 
|  | /// \returns true if the file was loaded successfully, false otherwise. | 
|  | std::error_code SampleProfileReaderText::readImpl() { | 
|  | line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#'); | 
|  | sampleprof_error Result = sampleprof_error::success; | 
|  |  | 
|  | InlineCallStack InlineStack; | 
|  | uint32_t TopLevelProbeProfileCount = 0; | 
|  |  | 
|  | // DepthMetadata tracks whether we have processed metadata for the current | 
|  | // top-level or nested function profile. | 
|  | uint32_t DepthMetadata = 0; | 
|  |  | 
|  | std::vector<SampleContext *> FlatSamples; | 
|  |  | 
|  | ProfileIsFS = ProfileIsFSDisciminator; | 
|  | FunctionSamples::ProfileIsFS = ProfileIsFS; | 
|  | for (; !LineIt.is_at_eof(); ++LineIt) { | 
|  | size_t pos = LineIt->find_first_not_of(' '); | 
|  | if (pos == LineIt->npos || (*LineIt)[pos] == '#') | 
|  | continue; | 
|  | // Read the header of each function. | 
|  | // | 
|  | // Note that for function identifiers we are actually expecting | 
|  | // mangled names, but we may not always get them. This happens when | 
|  | // the compiler decides not to emit the function (e.g., it was inlined | 
|  | // and removed). In this case, the binary will not have the linkage | 
|  | // name for the function, so the profiler will emit the function's | 
|  | // unmangled name, which may contain characters like ':' and '>' in its | 
|  | // name (member functions, templates, etc). | 
|  | // | 
|  | // The only requirement we place on the identifier, then, is that it | 
|  | // should not begin with a number. | 
|  | if ((*LineIt)[0] != ' ') { | 
|  | uint64_t NumSamples, NumHeadSamples; | 
|  | StringRef FName; | 
|  | if (!ParseHead(*LineIt, FName, NumSamples, NumHeadSamples)) { | 
|  | reportError(LineIt.line_number(), | 
|  | "Expected 'mangled_name:NUM:NUM', found " + *LineIt); | 
|  | return sampleprof_error::malformed; | 
|  | } | 
|  | DepthMetadata = 0; | 
|  | SampleContext FContext(FName, CSNameTable); | 
|  | if (FContext.hasContext()) | 
|  | ++CSProfileCount; | 
|  | FunctionSamples &FProfile = Profiles.create(FContext); | 
|  | mergeSampleProfErrors(Result, FProfile.addTotalSamples(NumSamples)); | 
|  | mergeSampleProfErrors(Result, FProfile.addHeadSamples(NumHeadSamples)); | 
|  | InlineStack.clear(); | 
|  | InlineStack.push_back(&FProfile); | 
|  | } else { | 
|  | uint64_t NumSamples; | 
|  | StringRef FName; | 
|  | DenseMap<StringRef, uint64_t> TargetCountMap; | 
|  | uint32_t Depth, LineOffset, Discriminator; | 
|  | LineType LineTy; | 
|  | uint64_t FunctionHash = 0; | 
|  | uint32_t Attributes = 0; | 
|  | bool IsFlat = false; | 
|  | if (!ParseLine(*LineIt, LineTy, Depth, NumSamples, LineOffset, | 
|  | Discriminator, FName, TargetCountMap, FunctionHash, | 
|  | Attributes, IsFlat)) { | 
|  | reportError(LineIt.line_number(), | 
|  | "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " + | 
|  | *LineIt); | 
|  | return sampleprof_error::malformed; | 
|  | } | 
|  | if (LineTy != LineType::Metadata && Depth == DepthMetadata) { | 
|  | // Metadata must be put at the end of a function profile. | 
|  | reportError(LineIt.line_number(), | 
|  | "Found non-metadata after metadata: " + *LineIt); | 
|  | return sampleprof_error::malformed; | 
|  | } | 
|  |  | 
|  | // Here we handle FS discriminators. | 
|  | Discriminator &= getDiscriminatorMask(); | 
|  |  | 
|  | while (InlineStack.size() > Depth) { | 
|  | InlineStack.pop_back(); | 
|  | } | 
|  | switch (LineTy) { | 
|  | case LineType::CallSiteProfile: { | 
|  | FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt( | 
|  | LineLocation(LineOffset, Discriminator))[FunctionId(FName)]; | 
|  | FSamples.setFunction(FunctionId(FName)); | 
|  | mergeSampleProfErrors(Result, FSamples.addTotalSamples(NumSamples)); | 
|  | InlineStack.push_back(&FSamples); | 
|  | DepthMetadata = 0; | 
|  | break; | 
|  | } | 
|  | case LineType::BodyProfile: { | 
|  | while (InlineStack.size() > Depth) { | 
|  | InlineStack.pop_back(); | 
|  | } | 
|  | FunctionSamples &FProfile = *InlineStack.back(); | 
|  | for (const auto &name_count : TargetCountMap) { | 
|  | mergeSampleProfErrors(Result, FProfile.addCalledTargetSamples( | 
|  | LineOffset, Discriminator, | 
|  | FunctionId(name_count.first), | 
|  | name_count.second)); | 
|  | } | 
|  | mergeSampleProfErrors( | 
|  | Result, | 
|  | FProfile.addBodySamples(LineOffset, Discriminator, NumSamples)); | 
|  | break; | 
|  | } | 
|  | case LineType::Metadata: { | 
|  | FunctionSamples &FProfile = *InlineStack.back(); | 
|  | if (FunctionHash) { | 
|  | FProfile.setFunctionHash(FunctionHash); | 
|  | if (Depth == 1) | 
|  | ++TopLevelProbeProfileCount; | 
|  | } | 
|  | FProfile.getContext().setAllAttributes(Attributes); | 
|  | if (Attributes & (uint32_t)ContextShouldBeInlined) | 
|  | ProfileIsPreInlined = true; | 
|  | DepthMetadata = Depth; | 
|  | if (IsFlat) { | 
|  | if (Depth == 1) | 
|  | FlatSamples.push_back(&FProfile.getContext()); | 
|  | else | 
|  | Ctx.diagnose(DiagnosticInfoSampleProfile( | 
|  | Buffer->getBufferIdentifier(), LineIt.line_number(), | 
|  | "!Flat may only be used at top level function.", DS_Warning)); | 
|  | } | 
|  | break; | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | // Honor the option to skip flat functions. Since they are already added to | 
|  | // the profile map, remove them all here. | 
|  | if (SkipFlatProf) | 
|  | for (SampleContext *FlatSample : FlatSamples) | 
|  | Profiles.erase(*FlatSample); | 
|  |  | 
|  | assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) && | 
|  | "Cannot have both context-sensitive and regular profile"); | 
|  | ProfileIsCS = (CSProfileCount > 0); | 
|  | assert((TopLevelProbeProfileCount == 0 || | 
|  | TopLevelProbeProfileCount == Profiles.size()) && | 
|  | "Cannot have both probe-based profiles and regular profiles"); | 
|  | ProfileIsProbeBased = (TopLevelProbeProfileCount > 0); | 
|  | FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased; | 
|  | FunctionSamples::ProfileIsCS = ProfileIsCS; | 
|  | FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined; | 
|  |  | 
|  | if (Result == sampleprof_error::success) | 
|  | computeSummary(); | 
|  |  | 
|  | return Result; | 
|  | } | 
|  |  | 
|  | bool SampleProfileReaderText::hasFormat(const MemoryBuffer &Buffer) { | 
|  | bool result = false; | 
|  |  | 
|  | // Check that the first non-comment line is a valid function header. | 
|  | line_iterator LineIt(Buffer, /*SkipBlanks=*/true, '#'); | 
|  | if (!LineIt.is_at_eof()) { | 
|  | if ((*LineIt)[0] != ' ') { | 
|  | uint64_t NumSamples, NumHeadSamples; | 
|  | StringRef FName; | 
|  | result = ParseHead(*LineIt, FName, NumSamples, NumHeadSamples); | 
|  | } | 
|  | } | 
|  |  | 
|  | return result; | 
|  | } | 
|  |  | 
|  | template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() { | 
|  | unsigned NumBytesRead = 0; | 
|  | uint64_t Val = decodeULEB128(Data, &NumBytesRead); | 
|  |  | 
|  | if (Val > std::numeric_limits<T>::max()) { | 
|  | std::error_code EC = sampleprof_error::malformed; | 
|  | reportError(0, EC.message()); | 
|  | return EC; | 
|  | } else if (Data + NumBytesRead > End) { | 
|  | std::error_code EC = sampleprof_error::truncated; | 
|  | reportError(0, EC.message()); | 
|  | return EC; | 
|  | } | 
|  |  | 
|  | Data += NumBytesRead; | 
|  | return static_cast<T>(Val); | 
|  | } | 
|  |  | 
|  | ErrorOr<StringRef> SampleProfileReaderBinary::readString() { | 
|  | StringRef Str(reinterpret_cast<const char *>(Data)); | 
|  | if (Data + Str.size() + 1 > End) { | 
|  | std::error_code EC = sampleprof_error::truncated; | 
|  | reportError(0, EC.message()); | 
|  | return EC; | 
|  | } | 
|  |  | 
|  | Data += Str.size() + 1; | 
|  | return Str; | 
|  | } | 
|  |  | 
|  | template <typename T> | 
|  | ErrorOr<T> SampleProfileReaderBinary::readUnencodedNumber() { | 
|  | if (Data + sizeof(T) > End) { | 
|  | std::error_code EC = sampleprof_error::truncated; | 
|  | reportError(0, EC.message()); | 
|  | return EC; | 
|  | } | 
|  |  | 
|  | using namespace support; | 
|  | T Val = endian::readNext<T, llvm::endianness::little>(Data); | 
|  | return Val; | 
|  | } | 
|  |  | 
|  | template <typename T> | 
|  | inline ErrorOr<size_t> SampleProfileReaderBinary::readStringIndex(T &Table) { | 
|  | auto Idx = readNumber<size_t>(); | 
|  | if (std::error_code EC = Idx.getError()) | 
|  | return EC; | 
|  | if (*Idx >= Table.size()) | 
|  | return sampleprof_error::truncated_name_table; | 
|  | return *Idx; | 
|  | } | 
|  |  | 
|  | ErrorOr<FunctionId> | 
|  | SampleProfileReaderBinary::readStringFromTable(size_t *RetIdx) { | 
|  | auto Idx = readStringIndex(NameTable); | 
|  | if (std::error_code EC = Idx.getError()) | 
|  | return EC; | 
|  | if (RetIdx) | 
|  | *RetIdx = *Idx; | 
|  | return NameTable[*Idx]; | 
|  | } | 
|  |  | 
|  | ErrorOr<SampleContextFrames> | 
|  | SampleProfileReaderBinary::readContextFromTable(size_t *RetIdx) { | 
|  | auto ContextIdx = readNumber<size_t>(); | 
|  | if (std::error_code EC = ContextIdx.getError()) | 
|  | return EC; | 
|  | if (*ContextIdx >= CSNameTable.size()) | 
|  | return sampleprof_error::truncated_name_table; | 
|  | if (RetIdx) | 
|  | *RetIdx = *ContextIdx; | 
|  | return CSNameTable[*ContextIdx]; | 
|  | } | 
|  |  | 
|  | ErrorOr<std::pair<SampleContext, uint64_t>> | 
|  | SampleProfileReaderBinary::readSampleContextFromTable() { | 
|  | SampleContext Context; | 
|  | size_t Idx; | 
|  | if (ProfileIsCS) { | 
|  | auto FContext(readContextFromTable(&Idx)); | 
|  | if (std::error_code EC = FContext.getError()) | 
|  | return EC; | 
|  | Context = SampleContext(*FContext); | 
|  | } else { | 
|  | auto FName(readStringFromTable(&Idx)); | 
|  | if (std::error_code EC = FName.getError()) | 
|  | return EC; | 
|  | Context = SampleContext(*FName); | 
|  | } | 
|  | // Since MD5SampleContextStart may point to the profile's file data, need to | 
|  | // make sure it is reading the same value on big endian CPU. | 
|  | uint64_t Hash = support::endian::read64le(MD5SampleContextStart + Idx); | 
|  | // Lazy computing of hash value, write back to the table to cache it. Only | 
|  | // compute the context's hash value if it is being referenced for the first | 
|  | // time. | 
|  | if (Hash == 0) { | 
|  | assert(MD5SampleContextStart == MD5SampleContextTable.data()); | 
|  | Hash = Context.getHashCode(); | 
|  | support::endian::write64le(&MD5SampleContextTable[Idx], Hash); | 
|  | } | 
|  | return std::make_pair(Context, Hash); | 
|  | } | 
|  |  | 
|  | std::error_code | 
|  | SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) { | 
|  | auto NumSamples = readNumber<uint64_t>(); | 
|  | if (std::error_code EC = NumSamples.getError()) | 
|  | return EC; | 
|  | FProfile.addTotalSamples(*NumSamples); | 
|  |  | 
|  | // Read the samples in the body. | 
|  | auto NumRecords = readNumber<uint32_t>(); | 
|  | if (std::error_code EC = NumRecords.getError()) | 
|  | return EC; | 
|  |  | 
|  | for (uint32_t I = 0; I < *NumRecords; ++I) { | 
|  | auto LineOffset = readNumber<uint64_t>(); | 
|  | if (std::error_code EC = LineOffset.getError()) | 
|  | return EC; | 
|  |  | 
|  | if (!isOffsetLegal(*LineOffset)) { | 
|  | return std::error_code(); | 
|  | } | 
|  |  | 
|  | auto Discriminator = readNumber<uint64_t>(); | 
|  | if (std::error_code EC = Discriminator.getError()) | 
|  | return EC; | 
|  |  | 
|  | auto NumSamples = readNumber<uint64_t>(); | 
|  | if (std::error_code EC = NumSamples.getError()) | 
|  | return EC; | 
|  |  | 
|  | auto NumCalls = readNumber<uint32_t>(); | 
|  | if (std::error_code EC = NumCalls.getError()) | 
|  | return EC; | 
|  |  | 
|  | // Here we handle FS discriminators: | 
|  | uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask(); | 
|  |  | 
|  | for (uint32_t J = 0; J < *NumCalls; ++J) { | 
|  | auto CalledFunction(readStringFromTable()); | 
|  | if (std::error_code EC = CalledFunction.getError()) | 
|  | return EC; | 
|  |  | 
|  | auto CalledFunctionSamples = readNumber<uint64_t>(); | 
|  | if (std::error_code EC = CalledFunctionSamples.getError()) | 
|  | return EC; | 
|  |  | 
|  | FProfile.addCalledTargetSamples(*LineOffset, DiscriminatorVal, | 
|  | *CalledFunction, *CalledFunctionSamples); | 
|  | } | 
|  |  | 
|  | FProfile.addBodySamples(*LineOffset, DiscriminatorVal, *NumSamples); | 
|  | } | 
|  |  | 
|  | // Read all the samples for inlined function calls. | 
|  | auto NumCallsites = readNumber<uint32_t>(); | 
|  | if (std::error_code EC = NumCallsites.getError()) | 
|  | return EC; | 
|  |  | 
|  | for (uint32_t J = 0; J < *NumCallsites; ++J) { | 
|  | auto LineOffset = readNumber<uint64_t>(); | 
|  | if (std::error_code EC = LineOffset.getError()) | 
|  | return EC; | 
|  |  | 
|  | auto Discriminator = readNumber<uint64_t>(); | 
|  | if (std::error_code EC = Discriminator.getError()) | 
|  | return EC; | 
|  |  | 
|  | auto FName(readStringFromTable()); | 
|  | if (std::error_code EC = FName.getError()) | 
|  | return EC; | 
|  |  | 
|  | // Here we handle FS discriminators: | 
|  | uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask(); | 
|  |  | 
|  | FunctionSamples &CalleeProfile = FProfile.functionSamplesAt( | 
|  | LineLocation(*LineOffset, DiscriminatorVal))[*FName]; | 
|  | CalleeProfile.setFunction(*FName); | 
|  | if (std::error_code EC = readProfile(CalleeProfile)) | 
|  | return EC; | 
|  | } | 
|  |  | 
|  | return sampleprof_error::success; | 
|  | } | 
|  |  | 
|  | std::error_code | 
|  | SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start, | 
|  | SampleProfileMap &Profiles) { | 
|  | Data = Start; | 
|  | auto NumHeadSamples = readNumber<uint64_t>(); | 
|  | if (std::error_code EC = NumHeadSamples.getError()) | 
|  | return EC; | 
|  |  | 
|  | auto FContextHash(readSampleContextFromTable()); | 
|  | if (std::error_code EC = FContextHash.getError()) | 
|  | return EC; | 
|  |  | 
|  | auto &[FContext, Hash] = *FContextHash; | 
|  | // Use the cached hash value for insertion instead of recalculating it. | 
|  | auto Res = Profiles.try_emplace(Hash, FContext, FunctionSamples()); | 
|  | FunctionSamples &FProfile = Res.first->second; | 
|  | FProfile.setContext(FContext); | 
|  | FProfile.addHeadSamples(*NumHeadSamples); | 
|  |  | 
|  | if (FContext.hasContext()) | 
|  | CSProfileCount++; | 
|  |  | 
|  | if (std::error_code EC = readProfile(FProfile)) | 
|  | return EC; | 
|  | return sampleprof_error::success; | 
|  | } | 
|  |  | 
|  | std::error_code | 
|  | SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) { | 
|  | return readFuncProfile(Start, Profiles); | 
|  | } | 
|  |  | 
|  | std::error_code SampleProfileReaderBinary::readImpl() { | 
|  | ProfileIsFS = ProfileIsFSDisciminator; | 
|  | FunctionSamples::ProfileIsFS = ProfileIsFS; | 
|  | while (Data < End) { | 
|  | if (std::error_code EC = readFuncProfile(Data)) | 
|  | return EC; | 
|  | } | 
|  |  | 
|  | return sampleprof_error::success; | 
|  | } | 
|  |  | 
|  | std::error_code SampleProfileReaderExtBinaryBase::readOneSection( | 
|  | const uint8_t *Start, uint64_t Size, const SecHdrTableEntry &Entry) { | 
|  | Data = Start; | 
|  | End = Start + Size; | 
|  | switch (Entry.Type) { | 
|  | case SecProfSummary: | 
|  | if (std::error_code EC = readSummary()) | 
|  | return EC; | 
|  | if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial)) | 
|  | Summary->setPartialProfile(true); | 
|  | if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext)) | 
|  | FunctionSamples::ProfileIsCS = ProfileIsCS = true; | 
|  | if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsPreInlined)) | 
|  | FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined = true; | 
|  | if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator)) | 
|  | FunctionSamples::ProfileIsFS = ProfileIsFS = true; | 
|  | break; | 
|  | case SecNameTable: { | 
|  | bool FixedLengthMD5 = | 
|  | hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5); | 
|  | bool UseMD5 = hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name); | 
|  | // UseMD5 means if THIS section uses MD5, ProfileIsMD5 means if the entire | 
|  | // profile uses MD5 for function name matching in IPO passes. | 
|  | ProfileIsMD5 = ProfileIsMD5 || UseMD5; | 
|  | FunctionSamples::HasUniqSuffix = | 
|  | hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix); | 
|  | if (std::error_code EC = readNameTableSec(UseMD5, FixedLengthMD5)) | 
|  | return EC; | 
|  | break; | 
|  | } | 
|  | case SecCSNameTable: { | 
|  | if (std::error_code EC = readCSNameTableSec()) | 
|  | return EC; | 
|  | break; | 
|  | } | 
|  | case SecLBRProfile: | 
|  | ProfileSecRange = std::make_pair(Data, End); | 
|  | if (std::error_code EC = readFuncProfiles()) | 
|  | return EC; | 
|  | break; | 
|  | case SecFuncOffsetTable: | 
|  | // If module is absent, we are using LLVM tools, and need to read all | 
|  | // profiles, so skip reading the function offset table. | 
|  | if (!M) { | 
|  | Data = End; | 
|  | } else { | 
|  | assert((!ProfileIsCS || | 
|  | hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered)) && | 
|  | "func offset table should always be sorted in CS profile"); | 
|  | if (std::error_code EC = readFuncOffsetTable()) | 
|  | return EC; | 
|  | } | 
|  | break; | 
|  | case SecFuncMetadata: { | 
|  | ProfileIsProbeBased = | 
|  | hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased); | 
|  | FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased; | 
|  | ProfileHasAttribute = | 
|  | hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute); | 
|  | if (std::error_code EC = readFuncMetadata(ProfileHasAttribute)) | 
|  | return EC; | 
|  | break; | 
|  | } | 
|  | case SecProfileSymbolList: | 
|  | if (std::error_code EC = readProfileSymbolList()) | 
|  | return EC; | 
|  | break; | 
|  | default: | 
|  | if (std::error_code EC = readCustomSection(Entry)) | 
|  | return EC; | 
|  | break; | 
|  | } | 
|  | return sampleprof_error::success; | 
|  | } | 
|  |  | 
|  | bool SampleProfileReaderExtBinaryBase::useFuncOffsetList() const { | 
|  | // If profile is CS, the function offset section is expected to consist of | 
|  | // sequences of contexts in pre-order layout | 
|  | // (e.g. [A, A:1 @ B, A:1 @ B:2.3 @ C] [D, D:1 @ E]), so that when a matched | 
|  | // context in the module is found, the profiles of all its callees are | 
|  | // recursively loaded. A list is needed since the order of profiles matters. | 
|  | if (ProfileIsCS) | 
|  | return true; | 
|  |  | 
|  | // If the profile is MD5, use the map container to lookup functions in | 
|  | // the module. A remapper has no use on MD5 names. | 
|  | if (useMD5()) | 
|  | return false; | 
|  |  | 
|  | // Profile is not MD5 and if a remapper is present, the remapped name of | 
|  | // every function needed to be matched against the module, so use the list | 
|  | // container since each entry is accessed. | 
|  | if (Remapper) | 
|  | return true; | 
|  |  | 
|  | // Otherwise use the map container for faster lookup. | 
|  | // TODO: If the cardinality of the function offset section is much smaller | 
|  | // than the number of functions in the module, using the list container can | 
|  | // be always faster, but we need to figure out the constant factor to | 
|  | // determine the cutoff. | 
|  | return false; | 
|  | } | 
|  |  | 
|  | std::error_code | 
|  | SampleProfileReaderExtBinaryBase::read(const DenseSet<StringRef> &FuncsToUse, | 
|  | SampleProfileMap &Profiles) { | 
|  | if (FuncsToUse.empty()) | 
|  | return sampleprof_error::success; | 
|  |  | 
|  | Data = ProfileSecRange.first; | 
|  | End = ProfileSecRange.second; | 
|  | if (std::error_code EC = readFuncProfiles(FuncsToUse, Profiles)) | 
|  | return EC; | 
|  | End = Data; | 
|  | DenseSet<FunctionSamples *> ProfilesToReadMetadata; | 
|  | for (auto FName : FuncsToUse) { | 
|  | auto I = Profiles.find(FName); | 
|  | if (I != Profiles.end()) | 
|  | ProfilesToReadMetadata.insert(&I->second); | 
|  | } | 
|  |  | 
|  | if (std::error_code EC = | 
|  | readFuncMetadata(ProfileHasAttribute, ProfilesToReadMetadata)) | 
|  | return EC; | 
|  | return sampleprof_error::success; | 
|  | } | 
|  |  | 
|  | bool SampleProfileReaderExtBinaryBase::collectFuncsFromModule() { | 
|  | if (!M) | 
|  | return false; | 
|  | FuncsToUse.clear(); | 
|  | for (auto &F : *M) | 
|  | FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F)); | 
|  | return true; | 
|  | } | 
|  |  | 
|  | std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() { | 
|  | // If there are more than one function offset section, the profile associated | 
|  | // with the previous section has to be done reading before next one is read. | 
|  | FuncOffsetTable.clear(); | 
|  | FuncOffsetList.clear(); | 
|  |  | 
|  | auto Size = readNumber<uint64_t>(); | 
|  | if (std::error_code EC = Size.getError()) | 
|  | return EC; | 
|  |  | 
|  | bool UseFuncOffsetList = useFuncOffsetList(); | 
|  | if (UseFuncOffsetList) | 
|  | FuncOffsetList.reserve(*Size); | 
|  | else | 
|  | FuncOffsetTable.reserve(*Size); | 
|  |  | 
|  | for (uint64_t I = 0; I < *Size; ++I) { | 
|  | auto FContextHash(readSampleContextFromTable()); | 
|  | if (std::error_code EC = FContextHash.getError()) | 
|  | return EC; | 
|  |  | 
|  | auto &[FContext, Hash] = *FContextHash; | 
|  | auto Offset = readNumber<uint64_t>(); | 
|  | if (std::error_code EC = Offset.getError()) | 
|  | return EC; | 
|  |  | 
|  | if (UseFuncOffsetList) | 
|  | FuncOffsetList.emplace_back(FContext, *Offset); | 
|  | else | 
|  | // Because Porfiles replace existing value with new value if collision | 
|  | // happens, we also use the latest offset so that they are consistent. | 
|  | FuncOffsetTable[Hash] = *Offset; | 
|  | } | 
|  |  | 
|  | return sampleprof_error::success; | 
|  | } | 
|  |  | 
|  | std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles( | 
|  | const DenseSet<StringRef> &FuncsToUse, SampleProfileMap &Profiles) { | 
|  | const uint8_t *Start = Data; | 
|  |  | 
|  | if (Remapper) { | 
|  | for (auto Name : FuncsToUse) { | 
|  | Remapper->insert(Name); | 
|  | } | 
|  | } | 
|  |  | 
|  | if (ProfileIsCS) { | 
|  | assert(useFuncOffsetList()); | 
|  | DenseSet<uint64_t> FuncGuidsToUse; | 
|  | if (useMD5()) { | 
|  | for (auto Name : FuncsToUse) | 
|  | FuncGuidsToUse.insert(Function::getGUID(Name)); | 
|  | } | 
|  |  | 
|  | // For each function in current module, load all context profiles for | 
|  | // the function as well as their callee contexts which can help profile | 
|  | // guided importing for ThinLTO. This can be achieved by walking | 
|  | // through an ordered context container, where contexts are laid out | 
|  | // as if they were walked in preorder of a context trie. While | 
|  | // traversing the trie, a link to the highest common ancestor node is | 
|  | // kept so that all of its decendants will be loaded. | 
|  | const SampleContext *CommonContext = nullptr; | 
|  | for (const auto &NameOffset : FuncOffsetList) { | 
|  | const auto &FContext = NameOffset.first; | 
|  | FunctionId FName = FContext.getFunction(); | 
|  | StringRef FNameString; | 
|  | if (!useMD5()) | 
|  | FNameString = FName.stringRef(); | 
|  |  | 
|  | // For function in the current module, keep its farthest ancestor | 
|  | // context. This can be used to load itself and its child and | 
|  | // sibling contexts. | 
|  | if ((useMD5() && FuncGuidsToUse.count(FName.getHashCode())) || | 
|  | (!useMD5() && (FuncsToUse.count(FNameString) || | 
|  | (Remapper && Remapper->exist(FNameString))))) { | 
|  | if (!CommonContext || !CommonContext->isPrefixOf(FContext)) | 
|  | CommonContext = &FContext; | 
|  | } | 
|  |  | 
|  | if (CommonContext == &FContext || | 
|  | (CommonContext && CommonContext->isPrefixOf(FContext))) { | 
|  | // Load profile for the current context which originated from | 
|  | // the common ancestor. | 
|  | const uint8_t *FuncProfileAddr = Start + NameOffset.second; | 
|  | if (std::error_code EC = readFuncProfile(FuncProfileAddr)) | 
|  | return EC; | 
|  | } | 
|  | } | 
|  | } else if (useMD5()) { | 
|  | assert(!useFuncOffsetList()); | 
|  | for (auto Name : FuncsToUse) { | 
|  | auto GUID = MD5Hash(Name); | 
|  | auto iter = FuncOffsetTable.find(GUID); | 
|  | if (iter == FuncOffsetTable.end()) | 
|  | continue; | 
|  | const uint8_t *FuncProfileAddr = Start + iter->second; | 
|  | if (std::error_code EC = readFuncProfile(FuncProfileAddr, Profiles)) | 
|  | return EC; | 
|  | } | 
|  | } else if (Remapper) { | 
|  | assert(useFuncOffsetList()); | 
|  | for (auto NameOffset : FuncOffsetList) { | 
|  | SampleContext FContext(NameOffset.first); | 
|  | auto FuncName = FContext.getFunction(); | 
|  | StringRef FuncNameStr = FuncName.stringRef(); | 
|  | if (!FuncsToUse.count(FuncNameStr) && !Remapper->exist(FuncNameStr)) | 
|  | continue; | 
|  | const uint8_t *FuncProfileAddr = Start + NameOffset.second; | 
|  | if (std::error_code EC = readFuncProfile(FuncProfileAddr, Profiles)) | 
|  | return EC; | 
|  | } | 
|  | } else { | 
|  | assert(!useFuncOffsetList()); | 
|  | for (auto Name : FuncsToUse) { | 
|  |  | 
|  | auto iter = FuncOffsetTable.find(MD5Hash(Name)); | 
|  | if (iter == FuncOffsetTable.end()) | 
|  | continue; | 
|  | const uint8_t *FuncProfileAddr = Start + iter->second; | 
|  | if (std::error_code EC = readFuncProfile(FuncProfileAddr, Profiles)) | 
|  | return EC; | 
|  | } | 
|  | } | 
|  |  | 
|  | return sampleprof_error::success; | 
|  | } | 
|  |  | 
|  | std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() { | 
|  | // Collect functions used by current module if the Reader has been | 
|  | // given a module. | 
|  | // collectFuncsFromModule uses FunctionSamples::getCanonicalFnName | 
|  | // which will query FunctionSamples::HasUniqSuffix, so it has to be | 
|  | // called after FunctionSamples::HasUniqSuffix is set, i.e. after | 
|  | // NameTable section is read. | 
|  | bool LoadFuncsToBeUsed = collectFuncsFromModule(); | 
|  |  | 
|  | // When LoadFuncsToBeUsed is false, we are using LLVM tool, need to read all | 
|  | // profiles. | 
|  | if (!LoadFuncsToBeUsed) { | 
|  | while (Data < End) { | 
|  | if (std::error_code EC = readFuncProfile(Data)) | 
|  | return EC; | 
|  | } | 
|  | assert(Data == End && "More data is read than expected"); | 
|  | } else { | 
|  | // Load function profiles on demand. | 
|  | if (std::error_code EC = readFuncProfiles(FuncsToUse, Profiles)) | 
|  | return EC; | 
|  | Data = End; | 
|  | } | 
|  | assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) && | 
|  | "Cannot have both context-sensitive and regular profile"); | 
|  | assert((!CSProfileCount || ProfileIsCS) && | 
|  | "Section flag should be consistent with actual profile"); | 
|  | return sampleprof_error::success; | 
|  | } | 
|  |  | 
|  | std::error_code SampleProfileReaderExtBinaryBase::readProfileSymbolList() { | 
|  | if (!ProfSymList) | 
|  | ProfSymList = std::make_unique<ProfileSymbolList>(); | 
|  |  | 
|  | if (std::error_code EC = ProfSymList->read(Data, End - Data)) | 
|  | return EC; | 
|  |  | 
|  | Data = End; | 
|  | return sampleprof_error::success; | 
|  | } | 
|  |  | 
|  | std::error_code SampleProfileReaderExtBinaryBase::decompressSection( | 
|  | const uint8_t *SecStart, const uint64_t SecSize, | 
|  | const uint8_t *&DecompressBuf, uint64_t &DecompressBufSize) { | 
|  | Data = SecStart; | 
|  | End = SecStart + SecSize; | 
|  | auto DecompressSize = readNumber<uint64_t>(); | 
|  | if (std::error_code EC = DecompressSize.getError()) | 
|  | return EC; | 
|  | DecompressBufSize = *DecompressSize; | 
|  |  | 
|  | auto CompressSize = readNumber<uint64_t>(); | 
|  | if (std::error_code EC = CompressSize.getError()) | 
|  | return EC; | 
|  |  | 
|  | if (!llvm::compression::zlib::isAvailable()) | 
|  | return sampleprof_error::zlib_unavailable; | 
|  |  | 
|  | uint8_t *Buffer = Allocator.Allocate<uint8_t>(DecompressBufSize); | 
|  | size_t UCSize = DecompressBufSize; | 
|  | llvm::Error E = compression::zlib::decompress(ArrayRef(Data, *CompressSize), | 
|  | Buffer, UCSize); | 
|  | if (E) | 
|  | return sampleprof_error::uncompress_failed; | 
|  | DecompressBuf = reinterpret_cast<const uint8_t *>(Buffer); | 
|  | return sampleprof_error::success; | 
|  | } | 
|  |  | 
|  | std::error_code SampleProfileReaderExtBinaryBase::readImpl() { | 
|  | const uint8_t *BufStart = | 
|  | reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()); | 
|  |  | 
|  | for (auto &Entry : SecHdrTable) { | 
|  | // Skip empty section. | 
|  | if (!Entry.Size) | 
|  | continue; | 
|  |  | 
|  | // Skip sections without inlined functions when SkipFlatProf is true. | 
|  | if (SkipFlatProf && hasSecFlag(Entry, SecCommonFlags::SecFlagFlat)) | 
|  | continue; | 
|  |  | 
|  | const uint8_t *SecStart = BufStart + Entry.Offset; | 
|  | uint64_t SecSize = Entry.Size; | 
|  |  | 
|  | // If the section is compressed, decompress it into a buffer | 
|  | // DecompressBuf before reading the actual data. The pointee of | 
|  | // 'Data' will be changed to buffer hold by DecompressBuf | 
|  | // temporarily when reading the actual data. | 
|  | bool isCompressed = hasSecFlag(Entry, SecCommonFlags::SecFlagCompress); | 
|  | if (isCompressed) { | 
|  | const uint8_t *DecompressBuf; | 
|  | uint64_t DecompressBufSize; | 
|  | if (std::error_code EC = decompressSection( | 
|  | SecStart, SecSize, DecompressBuf, DecompressBufSize)) | 
|  | return EC; | 
|  | SecStart = DecompressBuf; | 
|  | SecSize = DecompressBufSize; | 
|  | } | 
|  |  | 
|  | if (std::error_code EC = readOneSection(SecStart, SecSize, Entry)) | 
|  | return EC; | 
|  | if (Data != SecStart + SecSize) | 
|  | return sampleprof_error::malformed; | 
|  |  | 
|  | // Change the pointee of 'Data' from DecompressBuf to original Buffer. | 
|  | if (isCompressed) { | 
|  | Data = BufStart + Entry.Offset; | 
|  | End = BufStart + Buffer->getBufferSize(); | 
|  | } | 
|  | } | 
|  |  | 
|  | return sampleprof_error::success; | 
|  | } | 
|  |  | 
|  | std::error_code SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic) { | 
|  | if (Magic == SPMagic()) | 
|  | return sampleprof_error::success; | 
|  | return sampleprof_error::bad_magic; | 
|  | } | 
|  |  | 
|  | std::error_code SampleProfileReaderExtBinary::verifySPMagic(uint64_t Magic) { | 
|  | if (Magic == SPMagic(SPF_Ext_Binary)) | 
|  | return sampleprof_error::success; | 
|  | return sampleprof_error::bad_magic; | 
|  | } | 
|  |  | 
|  | std::error_code SampleProfileReaderBinary::readNameTable() { | 
|  | auto Size = readNumber<size_t>(); | 
|  | if (std::error_code EC = Size.getError()) | 
|  | return EC; | 
|  |  | 
|  | // Normally if useMD5 is true, the name table should have MD5 values, not | 
|  | // strings, however in the case that ExtBinary profile has multiple name | 
|  | // tables mixing string and MD5, all of them have to be normalized to use MD5, | 
|  | // because optimization passes can only handle either type. | 
|  | bool UseMD5 = useMD5(); | 
|  |  | 
|  | NameTable.clear(); | 
|  | NameTable.reserve(*Size); | 
|  | if (!ProfileIsCS) { | 
|  | MD5SampleContextTable.clear(); | 
|  | if (UseMD5) | 
|  | MD5SampleContextTable.reserve(*Size); | 
|  | else | 
|  | // If we are using strings, delay MD5 computation since only a portion of | 
|  | // names are used by top level functions. Use 0 to indicate MD5 value is | 
|  | // to be calculated as no known string has a MD5 value of 0. | 
|  | MD5SampleContextTable.resize(*Size); | 
|  | } | 
|  | for (size_t I = 0; I < *Size; ++I) { | 
|  | auto Name(readString()); | 
|  | if (std::error_code EC = Name.getError()) | 
|  | return EC; | 
|  | if (UseMD5) { | 
|  | FunctionId FID(*Name); | 
|  | if (!ProfileIsCS) | 
|  | MD5SampleContextTable.emplace_back(FID.getHashCode()); | 
|  | NameTable.emplace_back(FID); | 
|  | } else | 
|  | NameTable.push_back(FunctionId(*Name)); | 
|  | } | 
|  | if (!ProfileIsCS) | 
|  | MD5SampleContextStart = MD5SampleContextTable.data(); | 
|  | return sampleprof_error::success; | 
|  | } | 
|  |  | 
|  | std::error_code | 
|  | SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5, | 
|  | bool FixedLengthMD5) { | 
|  | if (FixedLengthMD5) { | 
|  | if (!IsMD5) | 
|  | errs() << "If FixedLengthMD5 is true, UseMD5 has to be true"; | 
|  | auto Size = readNumber<size_t>(); | 
|  | if (std::error_code EC = Size.getError()) | 
|  | return EC; | 
|  |  | 
|  | assert(Data + (*Size) * sizeof(uint64_t) == End && | 
|  | "Fixed length MD5 name table does not contain specified number of " | 
|  | "entries"); | 
|  | if (Data + (*Size) * sizeof(uint64_t) > End) | 
|  | return sampleprof_error::truncated; | 
|  |  | 
|  | NameTable.clear(); | 
|  | NameTable.reserve(*Size); | 
|  | for (size_t I = 0; I < *Size; ++I) { | 
|  | using namespace support; | 
|  | uint64_t FID = endian::read<uint64_t, endianness::little, unaligned>( | 
|  | Data + I * sizeof(uint64_t)); | 
|  | NameTable.emplace_back(FunctionId(FID)); | 
|  | } | 
|  | if (!ProfileIsCS) | 
|  | MD5SampleContextStart = reinterpret_cast<const uint64_t *>(Data); | 
|  | Data = Data + (*Size) * sizeof(uint64_t); | 
|  | return sampleprof_error::success; | 
|  | } | 
|  |  | 
|  | if (IsMD5) { | 
|  | assert(!FixedLengthMD5 && "FixedLengthMD5 should be unreachable here"); | 
|  | auto Size = readNumber<size_t>(); | 
|  | if (std::error_code EC = Size.getError()) | 
|  | return EC; | 
|  |  | 
|  | NameTable.clear(); | 
|  | NameTable.reserve(*Size); | 
|  | if (!ProfileIsCS) | 
|  | MD5SampleContextTable.resize(*Size); | 
|  | for (size_t I = 0; I < *Size; ++I) { | 
|  | auto FID = readNumber<uint64_t>(); | 
|  | if (std::error_code EC = FID.getError()) | 
|  | return EC; | 
|  | if (!ProfileIsCS) | 
|  | support::endian::write64le(&MD5SampleContextTable[I], *FID); | 
|  | NameTable.emplace_back(FunctionId(*FID)); | 
|  | } | 
|  | if (!ProfileIsCS) | 
|  | MD5SampleContextStart = MD5SampleContextTable.data(); | 
|  | return sampleprof_error::success; | 
|  | } | 
|  |  | 
|  | return SampleProfileReaderBinary::readNameTable(); | 
|  | } | 
|  |  | 
|  | // Read in the CS name table section, which basically contains a list of context | 
|  | // vectors. Each element of a context vector, aka a frame, refers to the | 
|  | // underlying raw function names that are stored in the name table, as well as | 
|  | // a callsite identifier that only makes sense for non-leaf frames. | 
|  | std::error_code SampleProfileReaderExtBinaryBase::readCSNameTableSec() { | 
|  | auto Size = readNumber<size_t>(); | 
|  | if (std::error_code EC = Size.getError()) | 
|  | return EC; | 
|  |  | 
|  | CSNameTable.clear(); | 
|  | CSNameTable.reserve(*Size); | 
|  | if (ProfileIsCS) { | 
|  | // Delay MD5 computation of CS context until they are needed. Use 0 to | 
|  | // indicate MD5 value is to be calculated as no known string has a MD5 | 
|  | // value of 0. | 
|  | MD5SampleContextTable.clear(); | 
|  | MD5SampleContextTable.resize(*Size); | 
|  | MD5SampleContextStart = MD5SampleContextTable.data(); | 
|  | } | 
|  | for (size_t I = 0; I < *Size; ++I) { | 
|  | CSNameTable.emplace_back(SampleContextFrameVector()); | 
|  | auto ContextSize = readNumber<uint32_t>(); | 
|  | if (std::error_code EC = ContextSize.getError()) | 
|  | return EC; | 
|  | for (uint32_t J = 0; J < *ContextSize; ++J) { | 
|  | auto FName(readStringFromTable()); | 
|  | if (std::error_code EC = FName.getError()) | 
|  | return EC; | 
|  | auto LineOffset = readNumber<uint64_t>(); | 
|  | if (std::error_code EC = LineOffset.getError()) | 
|  | return EC; | 
|  |  | 
|  | if (!isOffsetLegal(*LineOffset)) | 
|  | return std::error_code(); | 
|  |  | 
|  | auto Discriminator = readNumber<uint64_t>(); | 
|  | if (std::error_code EC = Discriminator.getError()) | 
|  | return EC; | 
|  |  | 
|  | CSNameTable.back().emplace_back( | 
|  | FName.get(), LineLocation(LineOffset.get(), Discriminator.get())); | 
|  | } | 
|  | } | 
|  |  | 
|  | return sampleprof_error::success; | 
|  | } | 
|  |  | 
|  | std::error_code | 
|  | SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute, | 
|  | FunctionSamples *FProfile) { | 
|  | if (Data < End) { | 
|  | if (ProfileIsProbeBased) { | 
|  | auto Checksum = readNumber<uint64_t>(); | 
|  | if (std::error_code EC = Checksum.getError()) | 
|  | return EC; | 
|  | if (FProfile) | 
|  | FProfile->setFunctionHash(*Checksum); | 
|  | } | 
|  |  | 
|  | if (ProfileHasAttribute) { | 
|  | auto Attributes = readNumber<uint32_t>(); | 
|  | if (std::error_code EC = Attributes.getError()) | 
|  | return EC; | 
|  | if (FProfile) | 
|  | FProfile->getContext().setAllAttributes(*Attributes); | 
|  | } | 
|  |  | 
|  | if (!ProfileIsCS) { | 
|  | // Read all the attributes for inlined function calls. | 
|  | auto NumCallsites = readNumber<uint32_t>(); | 
|  | if (std::error_code EC = NumCallsites.getError()) | 
|  | return EC; | 
|  |  | 
|  | for (uint32_t J = 0; J < *NumCallsites; ++J) { | 
|  | auto LineOffset = readNumber<uint64_t>(); | 
|  | if (std::error_code EC = LineOffset.getError()) | 
|  | return EC; | 
|  |  | 
|  | auto Discriminator = readNumber<uint64_t>(); | 
|  | if (std::error_code EC = Discriminator.getError()) | 
|  | return EC; | 
|  |  | 
|  | auto FContextHash(readSampleContextFromTable()); | 
|  | if (std::error_code EC = FContextHash.getError()) | 
|  | return EC; | 
|  |  | 
|  | auto &[FContext, Hash] = *FContextHash; | 
|  | FunctionSamples *CalleeProfile = nullptr; | 
|  | if (FProfile) { | 
|  | CalleeProfile = const_cast<FunctionSamples *>( | 
|  | &FProfile->functionSamplesAt(LineLocation( | 
|  | *LineOffset, | 
|  | *Discriminator))[FContext.getFunction()]); | 
|  | } | 
|  | if (std::error_code EC = | 
|  | readFuncMetadata(ProfileHasAttribute, CalleeProfile)) | 
|  | return EC; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | return sampleprof_error::success; | 
|  | } | 
|  |  | 
|  | std::error_code SampleProfileReaderExtBinaryBase::readFuncMetadata( | 
|  | bool ProfileHasAttribute, DenseSet<FunctionSamples *> &Profiles) { | 
|  | if (FuncMetadataIndex.empty()) | 
|  | return sampleprof_error::success; | 
|  |  | 
|  | for (auto *FProfile : Profiles) { | 
|  | auto R = FuncMetadataIndex.find(FProfile->getContext().getHashCode()); | 
|  | if (R == FuncMetadataIndex.end()) | 
|  | continue; | 
|  |  | 
|  | Data = R->second.first; | 
|  | End = R->second.second; | 
|  | if (std::error_code EC = readFuncMetadata(ProfileHasAttribute, FProfile)) | 
|  | return EC; | 
|  | assert(Data == End && "More data is read than expected"); | 
|  | } | 
|  | return sampleprof_error::success; | 
|  | } | 
|  |  | 
|  | std::error_code | 
|  | SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) { | 
|  | while (Data < End) { | 
|  | auto FContextHash(readSampleContextFromTable()); | 
|  | if (std::error_code EC = FContextHash.getError()) | 
|  | return EC; | 
|  | auto &[FContext, Hash] = *FContextHash; | 
|  | FunctionSamples *FProfile = nullptr; | 
|  | auto It = Profiles.find(FContext); | 
|  | if (It != Profiles.end()) | 
|  | FProfile = &It->second; | 
|  |  | 
|  | const uint8_t *Start = Data; | 
|  | if (std::error_code EC = readFuncMetadata(ProfileHasAttribute, FProfile)) | 
|  | return EC; | 
|  |  | 
|  | FuncMetadataIndex[FContext.getHashCode()] = {Start, Data}; | 
|  | } | 
|  |  | 
|  | assert(Data == End && "More data is read than expected"); | 
|  | return sampleprof_error::success; | 
|  | } | 
|  |  | 
|  | std::error_code | 
|  | SampleProfileReaderExtBinaryBase::readSecHdrTableEntry(uint64_t Idx) { | 
|  | SecHdrTableEntry Entry; | 
|  | auto Type = readUnencodedNumber<uint64_t>(); | 
|  | if (std::error_code EC = Type.getError()) | 
|  | return EC; | 
|  | Entry.Type = static_cast<SecType>(*Type); | 
|  |  | 
|  | auto Flags = readUnencodedNumber<uint64_t>(); | 
|  | if (std::error_code EC = Flags.getError()) | 
|  | return EC; | 
|  | Entry.Flags = *Flags; | 
|  |  | 
|  | auto Offset = readUnencodedNumber<uint64_t>(); | 
|  | if (std::error_code EC = Offset.getError()) | 
|  | return EC; | 
|  | Entry.Offset = *Offset; | 
|  |  | 
|  | auto Size = readUnencodedNumber<uint64_t>(); | 
|  | if (std::error_code EC = Size.getError()) | 
|  | return EC; | 
|  | Entry.Size = *Size; | 
|  |  | 
|  | Entry.LayoutIndex = Idx; | 
|  | SecHdrTable.push_back(std::move(Entry)); | 
|  | return sampleprof_error::success; | 
|  | } | 
|  |  | 
|  | std::error_code SampleProfileReaderExtBinaryBase::readSecHdrTable() { | 
|  | auto EntryNum = readUnencodedNumber<uint64_t>(); | 
|  | if (std::error_code EC = EntryNum.getError()) | 
|  | return EC; | 
|  |  | 
|  | for (uint64_t i = 0; i < (*EntryNum); i++) | 
|  | if (std::error_code EC = readSecHdrTableEntry(i)) | 
|  | return EC; | 
|  |  | 
|  | return sampleprof_error::success; | 
|  | } | 
|  |  | 
|  | std::error_code SampleProfileReaderExtBinaryBase::readHeader() { | 
|  | const uint8_t *BufStart = | 
|  | reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()); | 
|  | Data = BufStart; | 
|  | End = BufStart + Buffer->getBufferSize(); | 
|  |  | 
|  | if (std::error_code EC = readMagicIdent()) | 
|  | return EC; | 
|  |  | 
|  | if (std::error_code EC = readSecHdrTable()) | 
|  | return EC; | 
|  |  | 
|  | return sampleprof_error::success; | 
|  | } | 
|  |  | 
|  | uint64_t SampleProfileReaderExtBinaryBase::getSectionSize(SecType Type) { | 
|  | uint64_t Size = 0; | 
|  | for (auto &Entry : SecHdrTable) { | 
|  | if (Entry.Type == Type) | 
|  | Size += Entry.Size; | 
|  | } | 
|  | return Size; | 
|  | } | 
|  |  | 
|  | uint64_t SampleProfileReaderExtBinaryBase::getFileSize() { | 
|  | // Sections in SecHdrTable is not necessarily in the same order as | 
|  | // sections in the profile because section like FuncOffsetTable needs | 
|  | // to be written after section LBRProfile but needs to be read before | 
|  | // section LBRProfile, so we cannot simply use the last entry in | 
|  | // SecHdrTable to calculate the file size. | 
|  | uint64_t FileSize = 0; | 
|  | for (auto &Entry : SecHdrTable) { | 
|  | FileSize = std::max(Entry.Offset + Entry.Size, FileSize); | 
|  | } | 
|  | return FileSize; | 
|  | } | 
|  |  | 
|  | static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) { | 
|  | std::string Flags; | 
|  | if (hasSecFlag(Entry, SecCommonFlags::SecFlagCompress)) | 
|  | Flags.append("{compressed,"); | 
|  | else | 
|  | Flags.append("{"); | 
|  |  | 
|  | if (hasSecFlag(Entry, SecCommonFlags::SecFlagFlat)) | 
|  | Flags.append("flat,"); | 
|  |  | 
|  | switch (Entry.Type) { | 
|  | case SecNameTable: | 
|  | if (hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5)) | 
|  | Flags.append("fixlenmd5,"); | 
|  | else if (hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name)) | 
|  | Flags.append("md5,"); | 
|  | if (hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix)) | 
|  | Flags.append("uniq,"); | 
|  | break; | 
|  | case SecProfSummary: | 
|  | if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial)) | 
|  | Flags.append("partial,"); | 
|  | if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext)) | 
|  | Flags.append("context,"); | 
|  | if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsPreInlined)) | 
|  | Flags.append("preInlined,"); | 
|  | if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator)) | 
|  | Flags.append("fs-discriminator,"); | 
|  | break; | 
|  | case SecFuncOffsetTable: | 
|  | if (hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered)) | 
|  | Flags.append("ordered,"); | 
|  | break; | 
|  | case SecFuncMetadata: | 
|  | if (hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased)) | 
|  | Flags.append("probe,"); | 
|  | if (hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute)) | 
|  | Flags.append("attr,"); | 
|  | break; | 
|  | default: | 
|  | break; | 
|  | } | 
|  | char &last = Flags.back(); | 
|  | if (last == ',') | 
|  | last = '}'; | 
|  | else | 
|  | Flags.append("}"); | 
|  | return Flags; | 
|  | } | 
|  |  | 
|  | bool SampleProfileReaderExtBinaryBase::dumpSectionInfo(raw_ostream &OS) { | 
|  | uint64_t TotalSecsSize = 0; | 
|  | for (auto &Entry : SecHdrTable) { | 
|  | OS << getSecName(Entry.Type) << " - Offset: " << Entry.Offset | 
|  | << ", Size: " << Entry.Size << ", Flags: " << getSecFlagsStr(Entry) | 
|  | << "\n"; | 
|  | ; | 
|  | TotalSecsSize += Entry.Size; | 
|  | } | 
|  | uint64_t HeaderSize = SecHdrTable.front().Offset; | 
|  | assert(HeaderSize + TotalSecsSize == getFileSize() && | 
|  | "Size of 'header + sections' doesn't match the total size of profile"); | 
|  |  | 
|  | OS << "Header Size: " << HeaderSize << "\n"; | 
|  | OS << "Total Sections Size: " << TotalSecsSize << "\n"; | 
|  | OS << "File Size: " << getFileSize() << "\n"; | 
|  | return true; | 
|  | } | 
|  |  | 
|  | std::error_code SampleProfileReaderBinary::readMagicIdent() { | 
|  | // Read and check the magic identifier. | 
|  | auto Magic = readNumber<uint64_t>(); | 
|  | if (std::error_code EC = Magic.getError()) | 
|  | return EC; | 
|  | else if (std::error_code EC = verifySPMagic(*Magic)) | 
|  | return EC; | 
|  |  | 
|  | // Read the version number. | 
|  | auto Version = readNumber<uint64_t>(); | 
|  | if (std::error_code EC = Version.getError()) | 
|  | return EC; | 
|  | else if (*Version != SPVersion()) | 
|  | return sampleprof_error::unsupported_version; | 
|  |  | 
|  | return sampleprof_error::success; | 
|  | } | 
|  |  | 
|  | std::error_code SampleProfileReaderBinary::readHeader() { | 
|  | Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()); | 
|  | End = Data + Buffer->getBufferSize(); | 
|  |  | 
|  | if (std::error_code EC = readMagicIdent()) | 
|  | return EC; | 
|  |  | 
|  | if (std::error_code EC = readSummary()) | 
|  | return EC; | 
|  |  | 
|  | if (std::error_code EC = readNameTable()) | 
|  | return EC; | 
|  | return sampleprof_error::success; | 
|  | } | 
|  |  | 
|  | std::error_code SampleProfileReaderBinary::readSummaryEntry( | 
|  | std::vector<ProfileSummaryEntry> &Entries) { | 
|  | auto Cutoff = readNumber<uint64_t>(); | 
|  | if (std::error_code EC = Cutoff.getError()) | 
|  | return EC; | 
|  |  | 
|  | auto MinBlockCount = readNumber<uint64_t>(); | 
|  | if (std::error_code EC = MinBlockCount.getError()) | 
|  | return EC; | 
|  |  | 
|  | auto NumBlocks = readNumber<uint64_t>(); | 
|  | if (std::error_code EC = NumBlocks.getError()) | 
|  | return EC; | 
|  |  | 
|  | Entries.emplace_back(*Cutoff, *MinBlockCount, *NumBlocks); | 
|  | return sampleprof_error::success; | 
|  | } | 
|  |  | 
|  | std::error_code SampleProfileReaderBinary::readSummary() { | 
|  | auto TotalCount = readNumber<uint64_t>(); | 
|  | if (std::error_code EC = TotalCount.getError()) | 
|  | return EC; | 
|  |  | 
|  | auto MaxBlockCount = readNumber<uint64_t>(); | 
|  | if (std::error_code EC = MaxBlockCount.getError()) | 
|  | return EC; | 
|  |  | 
|  | auto MaxFunctionCount = readNumber<uint64_t>(); | 
|  | if (std::error_code EC = MaxFunctionCount.getError()) | 
|  | return EC; | 
|  |  | 
|  | auto NumBlocks = readNumber<uint64_t>(); | 
|  | if (std::error_code EC = NumBlocks.getError()) | 
|  | return EC; | 
|  |  | 
|  | auto NumFunctions = readNumber<uint64_t>(); | 
|  | if (std::error_code EC = NumFunctions.getError()) | 
|  | return EC; | 
|  |  | 
|  | auto NumSummaryEntries = readNumber<uint64_t>(); | 
|  | if (std::error_code EC = NumSummaryEntries.getError()) | 
|  | return EC; | 
|  |  | 
|  | std::vector<ProfileSummaryEntry> Entries; | 
|  | for (unsigned i = 0; i < *NumSummaryEntries; i++) { | 
|  | std::error_code EC = readSummaryEntry(Entries); | 
|  | if (EC != sampleprof_error::success) | 
|  | return EC; | 
|  | } | 
|  | Summary = std::make_unique<ProfileSummary>( | 
|  | ProfileSummary::PSK_Sample, Entries, *TotalCount, *MaxBlockCount, 0, | 
|  | *MaxFunctionCount, *NumBlocks, *NumFunctions); | 
|  |  | 
|  | return sampleprof_error::success; | 
|  | } | 
|  |  | 
|  | bool SampleProfileReaderRawBinary::hasFormat(const MemoryBuffer &Buffer) { | 
|  | const uint8_t *Data = | 
|  | reinterpret_cast<const uint8_t *>(Buffer.getBufferStart()); | 
|  | uint64_t Magic = decodeULEB128(Data); | 
|  | return Magic == SPMagic(); | 
|  | } | 
|  |  | 
|  | bool SampleProfileReaderExtBinary::hasFormat(const MemoryBuffer &Buffer) { | 
|  | const uint8_t *Data = | 
|  | reinterpret_cast<const uint8_t *>(Buffer.getBufferStart()); | 
|  | uint64_t Magic = decodeULEB128(Data); | 
|  | return Magic == SPMagic(SPF_Ext_Binary); | 
|  | } | 
|  |  | 
|  | std::error_code SampleProfileReaderGCC::skipNextWord() { | 
|  | uint32_t dummy; | 
|  | if (!GcovBuffer.readInt(dummy)) | 
|  | return sampleprof_error::truncated; | 
|  | return sampleprof_error::success; | 
|  | } | 
|  |  | 
|  | template <typename T> ErrorOr<T> SampleProfileReaderGCC::readNumber() { | 
|  | if (sizeof(T) <= sizeof(uint32_t)) { | 
|  | uint32_t Val; | 
|  | if (GcovBuffer.readInt(Val) && Val <= std::numeric_limits<T>::max()) | 
|  | return static_cast<T>(Val); | 
|  | } else if (sizeof(T) <= sizeof(uint64_t)) { | 
|  | uint64_t Val; | 
|  | if (GcovBuffer.readInt64(Val) && Val <= std::numeric_limits<T>::max()) | 
|  | return static_cast<T>(Val); | 
|  | } | 
|  |  | 
|  | std::error_code EC = sampleprof_error::malformed; | 
|  | reportError(0, EC.message()); | 
|  | return EC; | 
|  | } | 
|  |  | 
|  | ErrorOr<StringRef> SampleProfileReaderGCC::readString() { | 
|  | StringRef Str; | 
|  | if (!GcovBuffer.readString(Str)) | 
|  | return sampleprof_error::truncated; | 
|  | return Str; | 
|  | } | 
|  |  | 
|  | std::error_code SampleProfileReaderGCC::readHeader() { | 
|  | // Read the magic identifier. | 
|  | if (!GcovBuffer.readGCDAFormat()) | 
|  | return sampleprof_error::unrecognized_format; | 
|  |  | 
|  | // Read the version number. Note - the GCC reader does not validate this | 
|  | // version, but the profile creator generates v704. | 
|  | GCOV::GCOVVersion version; | 
|  | if (!GcovBuffer.readGCOVVersion(version)) | 
|  | return sampleprof_error::unrecognized_format; | 
|  |  | 
|  | if (version != GCOV::V407) | 
|  | return sampleprof_error::unsupported_version; | 
|  |  | 
|  | // Skip the empty integer. | 
|  | if (std::error_code EC = skipNextWord()) | 
|  | return EC; | 
|  |  | 
|  | return sampleprof_error::success; | 
|  | } | 
|  |  | 
|  | std::error_code SampleProfileReaderGCC::readSectionTag(uint32_t Expected) { | 
|  | uint32_t Tag; | 
|  | if (!GcovBuffer.readInt(Tag)) | 
|  | return sampleprof_error::truncated; | 
|  |  | 
|  | if (Tag != Expected) | 
|  | return sampleprof_error::malformed; | 
|  |  | 
|  | if (std::error_code EC = skipNextWord()) | 
|  | return EC; | 
|  |  | 
|  | return sampleprof_error::success; | 
|  | } | 
|  |  | 
|  | std::error_code SampleProfileReaderGCC::readNameTable() { | 
|  | if (std::error_code EC = readSectionTag(GCOVTagAFDOFileNames)) | 
|  | return EC; | 
|  |  | 
|  | uint32_t Size; | 
|  | if (!GcovBuffer.readInt(Size)) | 
|  | return sampleprof_error::truncated; | 
|  |  | 
|  | for (uint32_t I = 0; I < Size; ++I) { | 
|  | StringRef Str; | 
|  | if (!GcovBuffer.readString(Str)) | 
|  | return sampleprof_error::truncated; | 
|  | Names.push_back(std::string(Str)); | 
|  | } | 
|  |  | 
|  | return sampleprof_error::success; | 
|  | } | 
|  |  | 
|  | std::error_code SampleProfileReaderGCC::readFunctionProfiles() { | 
|  | if (std::error_code EC = readSectionTag(GCOVTagAFDOFunction)) | 
|  | return EC; | 
|  |  | 
|  | uint32_t NumFunctions; | 
|  | if (!GcovBuffer.readInt(NumFunctions)) | 
|  | return sampleprof_error::truncated; | 
|  |  | 
|  | InlineCallStack Stack; | 
|  | for (uint32_t I = 0; I < NumFunctions; ++I) | 
|  | if (std::error_code EC = readOneFunctionProfile(Stack, true, 0)) | 
|  | return EC; | 
|  |  | 
|  | computeSummary(); | 
|  | return sampleprof_error::success; | 
|  | } | 
|  |  | 
|  | std::error_code SampleProfileReaderGCC::readOneFunctionProfile( | 
|  | const InlineCallStack &InlineStack, bool Update, uint32_t Offset) { | 
|  | uint64_t HeadCount = 0; | 
|  | if (InlineStack.size() == 0) | 
|  | if (!GcovBuffer.readInt64(HeadCount)) | 
|  | return sampleprof_error::truncated; | 
|  |  | 
|  | uint32_t NameIdx; | 
|  | if (!GcovBuffer.readInt(NameIdx)) | 
|  | return sampleprof_error::truncated; | 
|  |  | 
|  | StringRef Name(Names[NameIdx]); | 
|  |  | 
|  | uint32_t NumPosCounts; | 
|  | if (!GcovBuffer.readInt(NumPosCounts)) | 
|  | return sampleprof_error::truncated; | 
|  |  | 
|  | uint32_t NumCallsites; | 
|  | if (!GcovBuffer.readInt(NumCallsites)) | 
|  | return sampleprof_error::truncated; | 
|  |  | 
|  | FunctionSamples *FProfile = nullptr; | 
|  | if (InlineStack.size() == 0) { | 
|  | // If this is a top function that we have already processed, do not | 
|  | // update its profile again.  This happens in the presence of | 
|  | // function aliases.  Since these aliases share the same function | 
|  | // body, there will be identical replicated profiles for the | 
|  | // original function.  In this case, we simply not bother updating | 
|  | // the profile of the original function. | 
|  | FProfile = &Profiles[FunctionId(Name)]; | 
|  | FProfile->addHeadSamples(HeadCount); | 
|  | if (FProfile->getTotalSamples() > 0) | 
|  | Update = false; | 
|  | } else { | 
|  | // Otherwise, we are reading an inlined instance. The top of the | 
|  | // inline stack contains the profile of the caller. Insert this | 
|  | // callee in the caller's CallsiteMap. | 
|  | FunctionSamples *CallerProfile = InlineStack.front(); | 
|  | uint32_t LineOffset = Offset >> 16; | 
|  | uint32_t Discriminator = Offset & 0xffff; | 
|  | FProfile = &CallerProfile->functionSamplesAt( | 
|  | LineLocation(LineOffset, Discriminator))[FunctionId(Name)]; | 
|  | } | 
|  | FProfile->setFunction(FunctionId(Name)); | 
|  |  | 
|  | for (uint32_t I = 0; I < NumPosCounts; ++I) { | 
|  | uint32_t Offset; | 
|  | if (!GcovBuffer.readInt(Offset)) | 
|  | return sampleprof_error::truncated; | 
|  |  | 
|  | uint32_t NumTargets; | 
|  | if (!GcovBuffer.readInt(NumTargets)) | 
|  | return sampleprof_error::truncated; | 
|  |  | 
|  | uint64_t Count; | 
|  | if (!GcovBuffer.readInt64(Count)) | 
|  | return sampleprof_error::truncated; | 
|  |  | 
|  | // The line location is encoded in the offset as: | 
|  | //   high 16 bits: line offset to the start of the function. | 
|  | //   low 16 bits: discriminator. | 
|  | uint32_t LineOffset = Offset >> 16; | 
|  | uint32_t Discriminator = Offset & 0xffff; | 
|  |  | 
|  | InlineCallStack NewStack; | 
|  | NewStack.push_back(FProfile); | 
|  | llvm::append_range(NewStack, InlineStack); | 
|  | if (Update) { | 
|  | // Walk up the inline stack, adding the samples on this line to | 
|  | // the total sample count of the callers in the chain. | 
|  | for (auto *CallerProfile : NewStack) | 
|  | CallerProfile->addTotalSamples(Count); | 
|  |  | 
|  | // Update the body samples for the current profile. | 
|  | FProfile->addBodySamples(LineOffset, Discriminator, Count); | 
|  | } | 
|  |  | 
|  | // Process the list of functions called at an indirect call site. | 
|  | // These are all the targets that a function pointer (or virtual | 
|  | // function) resolved at runtime. | 
|  | for (uint32_t J = 0; J < NumTargets; J++) { | 
|  | uint32_t HistVal; | 
|  | if (!GcovBuffer.readInt(HistVal)) | 
|  | return sampleprof_error::truncated; | 
|  |  | 
|  | if (HistVal != HIST_TYPE_INDIR_CALL_TOPN) | 
|  | return sampleprof_error::malformed; | 
|  |  | 
|  | uint64_t TargetIdx; | 
|  | if (!GcovBuffer.readInt64(TargetIdx)) | 
|  | return sampleprof_error::truncated; | 
|  | StringRef TargetName(Names[TargetIdx]); | 
|  |  | 
|  | uint64_t TargetCount; | 
|  | if (!GcovBuffer.readInt64(TargetCount)) | 
|  | return sampleprof_error::truncated; | 
|  |  | 
|  | if (Update) | 
|  | FProfile->addCalledTargetSamples(LineOffset, Discriminator, | 
|  | FunctionId(TargetName), | 
|  | TargetCount); | 
|  | } | 
|  | } | 
|  |  | 
|  | // Process all the inlined callers into the current function. These | 
|  | // are all the callsites that were inlined into this function. | 
|  | for (uint32_t I = 0; I < NumCallsites; I++) { | 
|  | // The offset is encoded as: | 
|  | //   high 16 bits: line offset to the start of the function. | 
|  | //   low 16 bits: discriminator. | 
|  | uint32_t Offset; | 
|  | if (!GcovBuffer.readInt(Offset)) | 
|  | return sampleprof_error::truncated; | 
|  | InlineCallStack NewStack; | 
|  | NewStack.push_back(FProfile); | 
|  | llvm::append_range(NewStack, InlineStack); | 
|  | if (std::error_code EC = readOneFunctionProfile(NewStack, Update, Offset)) | 
|  | return EC; | 
|  | } | 
|  |  | 
|  | return sampleprof_error::success; | 
|  | } | 
|  |  | 
|  | /// Read a GCC AutoFDO profile. | 
|  | /// | 
|  | /// This format is generated by the Linux Perf conversion tool at | 
|  | /// https://github.com/google/autofdo. | 
|  | std::error_code SampleProfileReaderGCC::readImpl() { | 
|  | assert(!ProfileIsFSDisciminator && "Gcc profiles not support FSDisciminator"); | 
|  | // Read the string table. | 
|  | if (std::error_code EC = readNameTable()) | 
|  | return EC; | 
|  |  | 
|  | // Read the source profile. | 
|  | if (std::error_code EC = readFunctionProfiles()) | 
|  | return EC; | 
|  |  | 
|  | return sampleprof_error::success; | 
|  | } | 
|  |  | 
|  | bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) { | 
|  | StringRef Magic(reinterpret_cast<const char *>(Buffer.getBufferStart())); | 
|  | return Magic == "adcg*704"; | 
|  | } | 
|  |  | 
|  | void SampleProfileReaderItaniumRemapper::applyRemapping(LLVMContext &Ctx) { | 
|  | // If the reader uses MD5 to represent string, we can't remap it because | 
|  | // we don't know what the original function names were. | 
|  | if (Reader.useMD5()) { | 
|  | Ctx.diagnose(DiagnosticInfoSampleProfile( | 
|  | Reader.getBuffer()->getBufferIdentifier(), | 
|  | "Profile data remapping cannot be applied to profile data " | 
|  | "using MD5 names (original mangled names are not available).", | 
|  | DS_Warning)); | 
|  | return; | 
|  | } | 
|  |  | 
|  | // CSSPGO-TODO: Remapper is not yet supported. | 
|  | // We will need to remap the entire context string. | 
|  | assert(Remappings && "should be initialized while creating remapper"); | 
|  | for (auto &Sample : Reader.getProfiles()) { | 
|  | DenseSet<FunctionId> NamesInSample; | 
|  | Sample.second.findAllNames(NamesInSample); | 
|  | for (auto &Name : NamesInSample) { | 
|  | StringRef NameStr = Name.stringRef(); | 
|  | if (auto Key = Remappings->insert(NameStr)) | 
|  | NameMap.insert({Key, NameStr}); | 
|  | } | 
|  | } | 
|  |  | 
|  | RemappingApplied = true; | 
|  | } | 
|  |  | 
|  | std::optional<StringRef> | 
|  | SampleProfileReaderItaniumRemapper::lookUpNameInProfile(StringRef Fname) { | 
|  | if (auto Key = Remappings->lookup(Fname)) { | 
|  | StringRef Result = NameMap.lookup(Key); | 
|  | if (!Result.empty()) | 
|  | return Result; | 
|  | } | 
|  | return std::nullopt; | 
|  | } | 
|  |  | 
|  | /// Prepare a memory buffer for the contents of \p Filename. | 
|  | /// | 
|  | /// \returns an error code indicating the status of the buffer. | 
|  | static ErrorOr<std::unique_ptr<MemoryBuffer>> | 
|  | setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) { | 
|  | auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN() | 
|  | : FS.getBufferForFile(Filename); | 
|  | if (std::error_code EC = BufferOrErr.getError()) | 
|  | return EC; | 
|  | auto Buffer = std::move(BufferOrErr.get()); | 
|  |  | 
|  | return std::move(Buffer); | 
|  | } | 
|  |  | 
|  | /// Create a sample profile reader based on the format of the input file. | 
|  | /// | 
|  | /// \param Filename The file to open. | 
|  | /// | 
|  | /// \param C The LLVM context to use to emit diagnostics. | 
|  | /// | 
|  | /// \param P The FSDiscriminatorPass. | 
|  | /// | 
|  | /// \param RemapFilename The file used for profile remapping. | 
|  | /// | 
|  | /// \returns an error code indicating the status of the created reader. | 
|  | ErrorOr<std::unique_ptr<SampleProfileReader>> | 
|  | SampleProfileReader::create(StringRef Filename, LLVMContext &C, | 
|  | vfs::FileSystem &FS, FSDiscriminatorPass P, | 
|  | StringRef RemapFilename) { | 
|  | auto BufferOrError = setupMemoryBuffer(Filename, FS); | 
|  | if (std::error_code EC = BufferOrError.getError()) | 
|  | return EC; | 
|  | return create(BufferOrError.get(), C, FS, P, RemapFilename); | 
|  | } | 
|  |  | 
|  | /// Create a sample profile remapper from the given input, to remap the | 
|  | /// function names in the given profile data. | 
|  | /// | 
|  | /// \param Filename The file to open. | 
|  | /// | 
|  | /// \param Reader The profile reader the remapper is going to be applied to. | 
|  | /// | 
|  | /// \param C The LLVM context to use to emit diagnostics. | 
|  | /// | 
|  | /// \returns an error code indicating the status of the created reader. | 
|  | ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>> | 
|  | SampleProfileReaderItaniumRemapper::create(StringRef Filename, | 
|  | vfs::FileSystem &FS, | 
|  | SampleProfileReader &Reader, | 
|  | LLVMContext &C) { | 
|  | auto BufferOrError = setupMemoryBuffer(Filename, FS); | 
|  | if (std::error_code EC = BufferOrError.getError()) | 
|  | return EC; | 
|  | return create(BufferOrError.get(), Reader, C); | 
|  | } | 
|  |  | 
|  | /// Create a sample profile remapper from the given input, to remap the | 
|  | /// function names in the given profile data. | 
|  | /// | 
|  | /// \param B The memory buffer to create the reader from (assumes ownership). | 
|  | /// | 
|  | /// \param C The LLVM context to use to emit diagnostics. | 
|  | /// | 
|  | /// \param Reader The profile reader the remapper is going to be applied to. | 
|  | /// | 
|  | /// \returns an error code indicating the status of the created reader. | 
|  | ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>> | 
|  | SampleProfileReaderItaniumRemapper::create(std::unique_ptr<MemoryBuffer> &B, | 
|  | SampleProfileReader &Reader, | 
|  | LLVMContext &C) { | 
|  | auto Remappings = std::make_unique<SymbolRemappingReader>(); | 
|  | if (Error E = Remappings->read(*B)) { | 
|  | handleAllErrors( | 
|  | std::move(E), [&](const SymbolRemappingParseError &ParseError) { | 
|  | C.diagnose(DiagnosticInfoSampleProfile(B->getBufferIdentifier(), | 
|  | ParseError.getLineNum(), | 
|  | ParseError.getMessage())); | 
|  | }); | 
|  | return sampleprof_error::malformed; | 
|  | } | 
|  |  | 
|  | return std::make_unique<SampleProfileReaderItaniumRemapper>( | 
|  | std::move(B), std::move(Remappings), Reader); | 
|  | } | 
|  |  | 
|  | /// Create a sample profile reader based on the format of the input data. | 
|  | /// | 
|  | /// \param B The memory buffer to create the reader from (assumes ownership). | 
|  | /// | 
|  | /// \param C The LLVM context to use to emit diagnostics. | 
|  | /// | 
|  | /// \param P The FSDiscriminatorPass. | 
|  | /// | 
|  | /// \param RemapFilename The file used for profile remapping. | 
|  | /// | 
|  | /// \returns an error code indicating the status of the created reader. | 
|  | ErrorOr<std::unique_ptr<SampleProfileReader>> | 
|  | SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C, | 
|  | vfs::FileSystem &FS, FSDiscriminatorPass P, | 
|  | StringRef RemapFilename) { | 
|  | std::unique_ptr<SampleProfileReader> Reader; | 
|  | if (SampleProfileReaderRawBinary::hasFormat(*B)) | 
|  | Reader.reset(new SampleProfileReaderRawBinary(std::move(B), C)); | 
|  | else if (SampleProfileReaderExtBinary::hasFormat(*B)) | 
|  | Reader.reset(new SampleProfileReaderExtBinary(std::move(B), C)); | 
|  | else if (SampleProfileReaderGCC::hasFormat(*B)) | 
|  | Reader.reset(new SampleProfileReaderGCC(std::move(B), C)); | 
|  | else if (SampleProfileReaderText::hasFormat(*B)) | 
|  | Reader.reset(new SampleProfileReaderText(std::move(B), C)); | 
|  | else | 
|  | return sampleprof_error::unrecognized_format; | 
|  |  | 
|  | if (!RemapFilename.empty()) { | 
|  | auto ReaderOrErr = SampleProfileReaderItaniumRemapper::create( | 
|  | RemapFilename, FS, *Reader, C); | 
|  | if (std::error_code EC = ReaderOrErr.getError()) { | 
|  | std::string Msg = "Could not create remapper: " + EC.message(); | 
|  | C.diagnose(DiagnosticInfoSampleProfile(RemapFilename, Msg)); | 
|  | return EC; | 
|  | } | 
|  | Reader->Remapper = std::move(ReaderOrErr.get()); | 
|  | } | 
|  |  | 
|  | if (std::error_code EC = Reader->readHeader()) { | 
|  | return EC; | 
|  | } | 
|  |  | 
|  | Reader->setDiscriminatorMaskedBitFrom(P); | 
|  |  | 
|  | return std::move(Reader); | 
|  | } | 
|  |  | 
|  | // For text and GCC file formats, we compute the summary after reading the | 
|  | // profile. Binary format has the profile summary in its header. | 
|  | void SampleProfileReader::computeSummary() { | 
|  | SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); | 
|  | Summary = Builder.computeSummaryForProfiles(Profiles); | 
|  | } |