llvm/lib/ProfileData/SampleProfReader.cpp - llvm-project - Git at Google

 //===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // This file implements the class that reads LLVM sample profiles. It
 // supports three file formats: text, binary and gcov.
 //
 // The textual representation is useful for debugging and testing purposes. The
 // binary representation is more compact, resulting in smaller file sizes.
 //
 // The gcov encoding is the one generated by GCC's AutoFDO profile creation
 // tool (https://github.com/google/autofdo)
 //
 // All three encodings can be used interchangeably as an input sample profile.
 //
 //===----------------------------------------------------------------------===//

 #include "llvm/ProfileData/SampleProfReader.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ProfileSummary.h"
 #include "llvm/ProfileData/ProfileCommon.h"
 #include "llvm/ProfileData/SampleProf.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compression.h"
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/JSON.h"
 #include "llvm/Support/LEB128.h"
 #include "llvm/Support/LineIterator.h"
 #include "llvm/Support/MD5.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cstddef>
 #include <cstdint>
 #include <limits>
 #include <memory>
 #include <system_error>
 #include <vector>

 using namespace llvm;
 using namespace sampleprof;

 #define DEBUG_TYPE "samplepgo-reader"

 // This internal option specifies if the profile uses FS discriminators.
 // It only applies to text, and binary format profiles.
 // For ext-binary format profiles, the flag is set in the summary.
 static cl::opt<bool> ProfileIsFSDisciminator(
     "profile-isfs", cl::Hidden, cl::init(false),
     cl::desc("Profile uses flow sensitive discriminators"));

 /// Dump the function profile for \p FName.
 ///
 /// \param FContext Name + context of the function to print.
 /// \param OS Stream to emit the output to.
 void SampleProfileReader::dumpFunctionProfile(const FunctionSamples &FS,
                                               raw_ostream &OS) {
   OS << "Function: " << FS.getContext().toString() << ": " << FS;
 }

 /// Dump all the function profiles found on stream \p OS.
 void SampleProfileReader::dump(raw_ostream &OS) {
   std::vector<NameFunctionSamples> V;
   sortFuncProfiles(Profiles, V);
   for (const auto &I : V)
     dumpFunctionProfile(*I.second, OS);
 }

 static void dumpFunctionProfileJson(const FunctionSamples &S,
                                     json::OStream &JOS, bool TopLevel = false) {
   auto DumpBody = [&](const BodySampleMap &BodySamples) {
     for (const auto &I : BodySamples) {
       const LineLocation &Loc = I.first;
       const SampleRecord &Sample = I.second;
       JOS.object([&] {
         JOS.attribute("line", Loc.LineOffset);
         if (Loc.Discriminator)
           JOS.attribute("discriminator", Loc.Discriminator);
         JOS.attribute("samples", Sample.getSamples());

         auto CallTargets = Sample.getSortedCallTargets();
         if (!CallTargets.empty()) {
           JOS.attributeArray("calls", [&] {
             for (const auto &J : CallTargets) {
               JOS.object([&] {
                 JOS.attribute("function", J.first.str());
                 JOS.attribute("samples", J.second);
               });
             }
           });
         }
       });
     }
   };

   auto DumpCallsiteSamples = [&](const CallsiteSampleMap &CallsiteSamples) {
     for (const auto &I : CallsiteSamples)
       for (const auto &FS : I.second) {
         const LineLocation &Loc = I.first;
         const FunctionSamples &CalleeSamples = FS.second;
         JOS.object([&] {
           JOS.attribute("line", Loc.LineOffset);
           if (Loc.Discriminator)
             JOS.attribute("discriminator", Loc.Discriminator);
           JOS.attributeArray(
               "samples", [&] { dumpFunctionProfileJson(CalleeSamples, JOS); });
         });
       }
   };

   JOS.object([&] {
     JOS.attribute("name", S.getFunction().str());
     JOS.attribute("total", S.getTotalSamples());
     if (TopLevel)
       JOS.attribute("head", S.getHeadSamples());

     const auto &BodySamples = S.getBodySamples();
     if (!BodySamples.empty())
       JOS.attributeArray("body", [&] { DumpBody(BodySamples); });

     const auto &CallsiteSamples = S.getCallsiteSamples();
     if (!CallsiteSamples.empty())
       JOS.attributeArray("callsites",
                          [&] { DumpCallsiteSamples(CallsiteSamples); });
   });
 }

 /// Dump all the function profiles found on stream \p OS in the JSON format.
 void SampleProfileReader::dumpJson(raw_ostream &OS) {
   std::vector<NameFunctionSamples> V;
   sortFuncProfiles(Profiles, V);
   json::OStream JOS(OS, 2);
   JOS.arrayBegin();
   for (const auto &F : V)
     dumpFunctionProfileJson(*F.second, JOS, true);
   JOS.arrayEnd();

   // Emit a newline character at the end as json::OStream doesn't emit one.
   OS << "\n";
 }

 /// Parse \p Input as function head.
 ///
 /// Parse one line of \p Input, and update function name in \p FName,
 /// function's total sample count in \p NumSamples, function's entry
 /// count in \p NumHeadSamples.
 ///
 /// \returns true if parsing is successful.
 static bool ParseHead(const StringRef &Input, StringRef &FName,
                       uint64_t &NumSamples, uint64_t &NumHeadSamples) {
   if (Input[0] == ' ')
     return false;
   size_t n2 = Input.rfind(':');
   size_t n1 = Input.rfind(':', n2 - 1);
   FName = Input.substr(0, n1);
   if (Input.substr(n1 + 1, n2 - n1 - 1).getAsInteger(10, NumSamples))
     return false;
   if (Input.substr(n2 + 1).getAsInteger(10, NumHeadSamples))
     return false;
   return true;
 }

 /// Returns true if line offset \p L is legal (only has 16 bits).
 static bool isOffsetLegal(unsigned L) { return (L & 0xffff) == L; }

 /// Parse \p Input that contains metadata.
 /// Possible metadata:
 /// - CFG Checksum information:
 ///     !CFGChecksum: 12345
 /// - CFG Checksum information:
 ///     !Attributes: 1
 /// Stores the FunctionHash (a.k.a. CFG Checksum) into \p FunctionHash.
 static bool parseMetadata(const StringRef &Input, uint64_t &FunctionHash,
                           uint32_t &Attributes) {
   if (Input.starts_with("!CFGChecksum:")) {
     StringRef CFGInfo = Input.substr(strlen("!CFGChecksum:")).trim();
     return !CFGInfo.getAsInteger(10, FunctionHash);
   }

   if (Input.starts_with("!Attributes:")) {
     StringRef Attrib = Input.substr(strlen("!Attributes:")).trim();
     return !Attrib.getAsInteger(10, Attributes);
   }

   return false;
 }

 enum class LineType {
   CallSiteProfile,
   BodyProfile,
   Metadata,
 };

 /// Parse \p Input as line sample.
 ///
 /// \param Input input line.
 /// \param LineTy Type of this line.
 /// \param Depth the depth of the inline stack.
 /// \param NumSamples total samples of the line/inlined callsite.
 /// \param LineOffset line offset to the start of the function.
 /// \param Discriminator discriminator of the line.
 /// \param TargetCountMap map from indirect call target to count.
 /// \param FunctionHash the function's CFG hash, used by pseudo probe.
 ///
 /// returns true if parsing is successful.
 static bool ParseLine(const StringRef &Input, LineType &LineTy, uint32_t &Depth,
                       uint64_t &NumSamples, uint32_t &LineOffset,
                       uint32_t &Discriminator, StringRef &CalleeName,
                       DenseMap<StringRef, uint64_t> &TargetCountMap,
                       uint64_t &FunctionHash, uint32_t &Attributes,
                       bool &IsFlat) {
   for (Depth = 0; Input[Depth] == ' '; Depth++)
     ;
   if (Depth == 0)
     return false;

   if (Input[Depth] == '!') {
     LineTy = LineType::Metadata;
     // This metadata is only for manual inspection only. We already created a
     // FunctionSamples and put it in the profile map, so there is no point
     // to skip profiles even they have no use for ThinLTO.
     if (Input == StringRef(" !Flat")) {
       IsFlat = true;
       return true;
     }
     return parseMetadata(Input.substr(Depth), FunctionHash, Attributes);
   }

   size_t n1 = Input.find(':');
   StringRef Loc = Input.substr(Depth, n1 - Depth);
   size_t n2 = Loc.find('.');
   if (n2 == StringRef::npos) {
     if (Loc.getAsInteger(10, LineOffset) || !isOffsetLegal(LineOffset))
       return false;
     Discriminator = 0;
   } else {
     if (Loc.substr(0, n2).getAsInteger(10, LineOffset))
       return false;
     if (Loc.substr(n2 + 1).getAsInteger(10, Discriminator))
       return false;
   }

   StringRef Rest = Input.substr(n1 + 2);
   if (isDigit(Rest[0])) {
     LineTy = LineType::BodyProfile;
     size_t n3 = Rest.find(' ');
     if (n3 == StringRef::npos) {
       if (Rest.getAsInteger(10, NumSamples))
         return false;
     } else {
       if (Rest.substr(0, n3).getAsInteger(10, NumSamples))
         return false;
     }
     // Find call targets and their sample counts.
     // Note: In some cases, there are symbols in the profile which are not
     // mangled. To accommodate such cases, use colon + integer pairs as the
     // anchor points.
     // An example:
     // _M_construct<char *>:1000 string_view<std::allocator<char> >:437
     // ":1000" and ":437" are used as anchor points so the string above will
     // be interpreted as
     // target: _M_construct<char *>
     // count: 1000
     // target: string_view<std::allocator<char> >
     // count: 437
     while (n3 != StringRef::npos) {
       n3 += Rest.substr(n3).find_first_not_of(' ');
       Rest = Rest.substr(n3);
       n3 = Rest.find_first_of(':');
       if (n3 == StringRef::npos || n3 == 0)
         return false;

       StringRef Target;
       uint64_t count, n4;
       while (true) {
         // Get the segment after the current colon.
         StringRef AfterColon = Rest.substr(n3 + 1);
         // Get the target symbol before the current colon.
         Target = Rest.substr(0, n3);
         // Check if the word after the current colon is an integer.
         n4 = AfterColon.find_first_of(' ');
         n4 = (n4 != StringRef::npos) ? n3 + n4 + 1 : Rest.size();
         StringRef WordAfterColon = Rest.substr(n3 + 1, n4 - n3 - 1);
         if (!WordAfterColon.getAsInteger(10, count))
           break;

         // Try to find the next colon.
         uint64_t n5 = AfterColon.find_first_of(':');
         if (n5 == StringRef::npos)
           return false;
         n3 += n5 + 1;
       }

       // An anchor point is found. Save the {target, count} pair
       TargetCountMap[Target] = count;
       if (n4 == Rest.size())
         break;
       // Change n3 to the next blank space after colon + integer pair.
       n3 = n4;
     }
   } else {
     LineTy = LineType::CallSiteProfile;
     size_t n3 = Rest.find_last_of(':');
     CalleeName = Rest.substr(0, n3);
     if (Rest.substr(n3 + 1).getAsInteger(10, NumSamples))
       return false;
   }
   return true;
 }

 /// Load samples from a text file.
 ///
 /// See the documentation at the top of the file for an explanation of
 /// the expected format.
 ///
 /// \returns true if the file was loaded successfully, false otherwise.
 std::error_code SampleProfileReaderText::readImpl() {
   line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#');
   sampleprof_error Result = sampleprof_error::success;

   InlineCallStack InlineStack;
   uint32_t TopLevelProbeProfileCount = 0;

   // DepthMetadata tracks whether we have processed metadata for the current
   // top-level or nested function profile.
   uint32_t DepthMetadata = 0;

   std::vector<SampleContext *> FlatSamples;

   ProfileIsFS = ProfileIsFSDisciminator;
   FunctionSamples::ProfileIsFS = ProfileIsFS;
   for (; !LineIt.is_at_eof(); ++LineIt) {
     size_t pos = LineIt->find_first_not_of(' ');
     if (pos == LineIt->npos || (*LineIt)[pos] == '#')
       continue;
     // Read the header of each function.
     //
     // Note that for function identifiers we are actually expecting
     // mangled names, but we may not always get them. This happens when
     // the compiler decides not to emit the function (e.g., it was inlined
     // and removed). In this case, the binary will not have the linkage
     // name for the function, so the profiler will emit the function's
     // unmangled name, which may contain characters like ':' and '>' in its
     // name (member functions, templates, etc).
     //
     // The only requirement we place on the identifier, then, is that it
     // should not begin with a number.
     if ((*LineIt)[0] != ' ') {
       uint64_t NumSamples, NumHeadSamples;
       StringRef FName;
       if (!ParseHead(*LineIt, FName, NumSamples, NumHeadSamples)) {
         reportError(LineIt.line_number(),
                     "Expected 'mangled_name:NUM:NUM', found " + *LineIt);
         return sampleprof_error::malformed;
       }
       DepthMetadata = 0;
       SampleContext FContext(FName, CSNameTable);
       if (FContext.hasContext())
         ++CSProfileCount;
       FunctionSamples &FProfile = Profiles.create(FContext);
       mergeSampleProfErrors(Result, FProfile.addTotalSamples(NumSamples));
       mergeSampleProfErrors(Result, FProfile.addHeadSamples(NumHeadSamples));
       InlineStack.clear();
       InlineStack.push_back(&FProfile);
     } else {
       uint64_t NumSamples;
       StringRef FName;
       DenseMap<StringRef, uint64_t> TargetCountMap;
       uint32_t Depth, LineOffset, Discriminator;
       LineType LineTy;
       uint64_t FunctionHash = 0;
       uint32_t Attributes = 0;
       bool IsFlat = false;
       if (!ParseLine(*LineIt, LineTy, Depth, NumSamples, LineOffset,
                      Discriminator, FName, TargetCountMap, FunctionHash,
                      Attributes, IsFlat)) {
         reportError(LineIt.line_number(),
                     "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " +
                         *LineIt);
         return sampleprof_error::malformed;
       }
       if (LineTy != LineType::Metadata && Depth == DepthMetadata) {
         // Metadata must be put at the end of a function profile.
         reportError(LineIt.line_number(),
                     "Found non-metadata after metadata: " + *LineIt);
         return sampleprof_error::malformed;
       }

       // Here we handle FS discriminators.
       Discriminator &= getDiscriminatorMask();

       while (InlineStack.size() > Depth) {
         InlineStack.pop_back();
       }
       switch (LineTy) {
       case LineType::CallSiteProfile: {
         FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt(
             LineLocation(LineOffset, Discriminator))[FunctionId(FName)];
         FSamples.setFunction(FunctionId(FName));
         mergeSampleProfErrors(Result, FSamples.addTotalSamples(NumSamples));
         InlineStack.push_back(&FSamples);
         DepthMetadata = 0;
         break;
       }
       case LineType::BodyProfile: {
         while (InlineStack.size() > Depth) {
           InlineStack.pop_back();
         }
         FunctionSamples &FProfile = *InlineStack.back();
         for (const auto &name_count : TargetCountMap) {
           mergeSampleProfErrors(Result, FProfile.addCalledTargetSamples(
                                             LineOffset, Discriminator,
                                             FunctionId(name_count.first),
                                             name_count.second));
         }
         mergeSampleProfErrors(
             Result,
             FProfile.addBodySamples(LineOffset, Discriminator, NumSamples));
         break;
       }
       case LineType::Metadata: {
         FunctionSamples &FProfile = *InlineStack.back();
         if (FunctionHash) {
           FProfile.setFunctionHash(FunctionHash);
           if (Depth == 1)
             ++TopLevelProbeProfileCount;
         }
         FProfile.getContext().setAllAttributes(Attributes);
         if (Attributes & (uint32_t)ContextShouldBeInlined)
           ProfileIsPreInlined = true;
         DepthMetadata = Depth;
         if (IsFlat) {
           if (Depth == 1)
             FlatSamples.push_back(&FProfile.getContext());
           else
             Ctx.diagnose(DiagnosticInfoSampleProfile(
                 Buffer->getBufferIdentifier(), LineIt.line_number(),
                 "!Flat may only be used at top level function.", DS_Warning));
         }
         break;
       }
       }
     }
   }

   // Honor the option to skip flat functions. Since they are already added to
   // the profile map, remove them all here.
   if (SkipFlatProf)
     for (SampleContext *FlatSample : FlatSamples)
       Profiles.erase(*FlatSample);

   assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
          "Cannot have both context-sensitive and regular profile");
   ProfileIsCS = (CSProfileCount > 0);
   assert((TopLevelProbeProfileCount == 0 ||
           TopLevelProbeProfileCount == Profiles.size()) &&
          "Cannot have both probe-based profiles and regular profiles");
   ProfileIsProbeBased = (TopLevelProbeProfileCount > 0);
   FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
   FunctionSamples::ProfileIsCS = ProfileIsCS;
   FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined;

   if (Result == sampleprof_error::success)
     computeSummary();

   return Result;
 }

 bool SampleProfileReaderText::hasFormat(const MemoryBuffer &Buffer) {
   bool result = false;

   // Check that the first non-comment line is a valid function header.
   line_iterator LineIt(Buffer, /*SkipBlanks=*/true, '#');
   if (!LineIt.is_at_eof()) {
     if ((*LineIt)[0] != ' ') {
       uint64_t NumSamples, NumHeadSamples;
       StringRef FName;
       result = ParseHead(*LineIt, FName, NumSamples, NumHeadSamples);
     }
   }

   return result;
 }

 template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() {
   unsigned NumBytesRead = 0;
   uint64_t Val = decodeULEB128(Data, &NumBytesRead);

   if (Val > std::numeric_limits<T>::max()) {
     std::error_code EC = sampleprof_error::malformed;
     reportError(0, EC.message());
     return EC;
   } else if (Data + NumBytesRead > End) {
     std::error_code EC = sampleprof_error::truncated;
     reportError(0, EC.message());
     return EC;
   }

   Data += NumBytesRead;
   return static_cast<T>(Val);
 }

 ErrorOr<StringRef> SampleProfileReaderBinary::readString() {
   StringRef Str(reinterpret_cast<const char *>(Data));
   if (Data + Str.size() + 1 > End) {
     std::error_code EC = sampleprof_error::truncated;
     reportError(0, EC.message());
     return EC;
   }

   Data += Str.size() + 1;
   return Str;
 }

 template <typename T>
 ErrorOr<T> SampleProfileReaderBinary::readUnencodedNumber() {
   if (Data + sizeof(T) > End) {
     std::error_code EC = sampleprof_error::truncated;
     reportError(0, EC.message());
     return EC;
   }

   using namespace support;
   T Val = endian::readNext<T, llvm::endianness::little>(Data);
   return Val;
 }

 template <typename T>
 inline ErrorOr<size_t> SampleProfileReaderBinary::readStringIndex(T &Table) {
   auto Idx = readNumber<size_t>();
   if (std::error_code EC = Idx.getError())
     return EC;
   if (*Idx >= Table.size())
     return sampleprof_error::truncated_name_table;
   return *Idx;
 }

 ErrorOr<FunctionId>
 SampleProfileReaderBinary::readStringFromTable(size_t *RetIdx) {
   auto Idx = readStringIndex(NameTable);
   if (std::error_code EC = Idx.getError())
     return EC;
   if (RetIdx)
     *RetIdx = *Idx;
   return NameTable[*Idx];
 }

 ErrorOr<SampleContextFrames>
 SampleProfileReaderBinary::readContextFromTable(size_t *RetIdx) {
   auto ContextIdx = readNumber<size_t>();
   if (std::error_code EC = ContextIdx.getError())
     return EC;
   if (*ContextIdx >= CSNameTable.size())
     return sampleprof_error::truncated_name_table;
   if (RetIdx)
     *RetIdx = *ContextIdx;
   return CSNameTable[*ContextIdx];
 }

 ErrorOr<std::pair<SampleContext, uint64_t>>
 SampleProfileReaderBinary::readSampleContextFromTable() {
   SampleContext Context;
   size_t Idx;
   if (ProfileIsCS) {
     auto FContext(readContextFromTable(&Idx));
     if (std::error_code EC = FContext.getError())
       return EC;
     Context = SampleContext(*FContext);
   } else {
     auto FName(readStringFromTable(&Idx));
     if (std::error_code EC = FName.getError())
       return EC;
     Context = SampleContext(*FName);
   }
   // Since MD5SampleContextStart may point to the profile's file data, need to
   // make sure it is reading the same value on big endian CPU.
   uint64_t Hash = support::endian::read64le(MD5SampleContextStart + Idx);
   // Lazy computing of hash value, write back to the table to cache it. Only
   // compute the context's hash value if it is being referenced for the first
   // time.
   if (Hash == 0) {
     assert(MD5SampleContextStart == MD5SampleContextTable.data());
     Hash = Context.getHashCode();
     support::endian::write64le(&MD5SampleContextTable[Idx], Hash);
   }
   return std::make_pair(Context, Hash);
 }

 std::error_code
 SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
   auto NumSamples = readNumber<uint64_t>();
   if (std::error_code EC = NumSamples.getError())
     return EC;
   FProfile.addTotalSamples(*NumSamples);

   // Read the samples in the body.
   auto NumRecords = readNumber<uint32_t>();
   if (std::error_code EC = NumRecords.getError())
     return EC;

   for (uint32_t I = 0; I < *NumRecords; ++I) {
     auto LineOffset = readNumber<uint64_t>();
     if (std::error_code EC = LineOffset.getError())
       return EC;

     if (!isOffsetLegal(*LineOffset)) {
       return std::error_code();
     }

     auto Discriminator = readNumber<uint64_t>();
     if (std::error_code EC = Discriminator.getError())
       return EC;

     auto NumSamples = readNumber<uint64_t>();
     if (std::error_code EC = NumSamples.getError())
       return EC;

     auto NumCalls = readNumber<uint32_t>();
     if (std::error_code EC = NumCalls.getError())
       return EC;

     // Here we handle FS discriminators:
     uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask();

     for (uint32_t J = 0; J < *NumCalls; ++J) {
       auto CalledFunction(readStringFromTable());
       if (std::error_code EC = CalledFunction.getError())
         return EC;

       auto CalledFunctionSamples = readNumber<uint64_t>();
       if (std::error_code EC = CalledFunctionSamples.getError())
         return EC;

       FProfile.addCalledTargetSamples(*LineOffset, DiscriminatorVal,
                                       *CalledFunction, *CalledFunctionSamples);
     }

     FProfile.addBodySamples(*LineOffset, DiscriminatorVal, *NumSamples);
   }

   // Read all the samples for inlined function calls.
   auto NumCallsites = readNumber<uint32_t>();
   if (std::error_code EC = NumCallsites.getError())
     return EC;

   for (uint32_t J = 0; J < *NumCallsites; ++J) {
     auto LineOffset = readNumber<uint64_t>();
     if (std::error_code EC = LineOffset.getError())
       return EC;

     auto Discriminator = readNumber<uint64_t>();
     if (std::error_code EC = Discriminator.getError())
       return EC;

     auto FName(readStringFromTable());
     if (std::error_code EC = FName.getError())
       return EC;

     // Here we handle FS discriminators:
     uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask();

     FunctionSamples &CalleeProfile = FProfile.functionSamplesAt(
         LineLocation(*LineOffset, DiscriminatorVal))[*FName];
     CalleeProfile.setFunction(*FName);
     if (std::error_code EC = readProfile(CalleeProfile))
       return EC;
   }

   return sampleprof_error::success;
 }

 std::error_code
 SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start,
                                            SampleProfileMap &Profiles) {
   Data = Start;
   auto NumHeadSamples = readNumber<uint64_t>();
   if (std::error_code EC = NumHeadSamples.getError())
     return EC;

   auto FContextHash(readSampleContextFromTable());
   if (std::error_code EC = FContextHash.getError())
     return EC;

   auto &[FContext, Hash] = *FContextHash;
   // Use the cached hash value for insertion instead of recalculating it.
   auto Res = Profiles.try_emplace(Hash, FContext, FunctionSamples());
   FunctionSamples &FProfile = Res.first->second;
   FProfile.setContext(FContext);
   FProfile.addHeadSamples(*NumHeadSamples);

   if (FContext.hasContext())
     CSProfileCount++;

   if (std::error_code EC = readProfile(FProfile))
     return EC;
   return sampleprof_error::success;
 }

 std::error_code
 SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
   return readFuncProfile(Start, Profiles);
 }

 std::error_code SampleProfileReaderBinary::readImpl() {
   ProfileIsFS = ProfileIsFSDisciminator;
   FunctionSamples::ProfileIsFS = ProfileIsFS;
   while (Data < End) {
     if (std::error_code EC = readFuncProfile(Data))
       return EC;
   }

   return sampleprof_error::success;
 }

 std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
     const uint8_t *Start, uint64_t Size, const SecHdrTableEntry &Entry) {
   Data = Start;
   End = Start + Size;
   switch (Entry.Type) {
   case SecProfSummary:
     if (std::error_code EC = readSummary())
       return EC;
     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
       Summary->setPartialProfile(true);
     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext))
       FunctionSamples::ProfileIsCS = ProfileIsCS = true;
     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsPreInlined))
       FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined = true;
     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator))
       FunctionSamples::ProfileIsFS = ProfileIsFS = true;
     break;
   case SecNameTable: {
     bool FixedLengthMD5 =
         hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5);
     bool UseMD5 = hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name);
     // UseMD5 means if THIS section uses MD5, ProfileIsMD5 means if the entire
     // profile uses MD5 for function name matching in IPO passes.
     ProfileIsMD5 = ProfileIsMD5 || UseMD5;
     FunctionSamples::HasUniqSuffix =
         hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix);
     if (std::error_code EC = readNameTableSec(UseMD5, FixedLengthMD5))
       return EC;
     break;
   }
   case SecCSNameTable: {
     if (std::error_code EC = readCSNameTableSec())
       return EC;
     break;
   }
   case SecLBRProfile:
     ProfileSecRange = std::make_pair(Data, End);
     if (std::error_code EC = readFuncProfiles())
       return EC;
     break;
   case SecFuncOffsetTable:
     // If module is absent, we are using LLVM tools, and need to read all
     // profiles, so skip reading the function offset table.
     if (!M) {
       Data = End;
     } else {
       assert((!ProfileIsCS ||
               hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered)) &&
              "func offset table should always be sorted in CS profile");
       if (std::error_code EC = readFuncOffsetTable())
         return EC;
     }
     break;
   case SecFuncMetadata: {
     ProfileIsProbeBased =
         hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased);
     FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
     ProfileHasAttribute =
         hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute);
     if (std::error_code EC = readFuncMetadata(ProfileHasAttribute))
       return EC;
     break;
   }
   case SecProfileSymbolList:
     if (std::error_code EC = readProfileSymbolList())
       return EC;
     break;
   default:
     if (std::error_code EC = readCustomSection(Entry))
       return EC;
     break;
   }
   return sampleprof_error::success;
 }

 bool SampleProfileReaderExtBinaryBase::useFuncOffsetList() const {
   // If profile is CS, the function offset section is expected to consist of
   // sequences of contexts in pre-order layout
   // (e.g. [A, A:1 @ B, A:1 @ B:2.3 @ C] [D, D:1 @ E]), so that when a matched
   // context in the module is found, the profiles of all its callees are
   // recursively loaded. A list is needed since the order of profiles matters.
   if (ProfileIsCS)
     return true;

   // If the profile is MD5, use the map container to lookup functions in
   // the module. A remapper has no use on MD5 names.
   if (useMD5())
     return false;

   // Profile is not MD5 and if a remapper is present, the remapped name of
   // every function needed to be matched against the module, so use the list
   // container since each entry is accessed.
   if (Remapper)
     return true;

   // Otherwise use the map container for faster lookup.
   // TODO: If the cardinality of the function offset section is much smaller
   // than the number of functions in the module, using the list container can
   // be always faster, but we need to figure out the constant factor to
   // determine the cutoff.
   return false;
 }

 std::error_code
 SampleProfileReaderExtBinaryBase::read(const DenseSet<StringRef> &FuncsToUse,
                                        SampleProfileMap &Profiles) {
   if (FuncsToUse.empty())
     return sampleprof_error::success;

   Data = ProfileSecRange.first;
   End = ProfileSecRange.second;
   if (std::error_code EC = readFuncProfiles(FuncsToUse, Profiles))
     return EC;
   End = Data;
   DenseSet<FunctionSamples *> ProfilesToReadMetadata;
   for (auto FName : FuncsToUse) {
     auto I = Profiles.find(FName);
     if (I != Profiles.end())
       ProfilesToReadMetadata.insert(&I->second);
   }

   if (std::error_code EC =
           readFuncMetadata(ProfileHasAttribute, ProfilesToReadMetadata))
     return EC;
   return sampleprof_error::success;
 }

 bool SampleProfileReaderExtBinaryBase::collectFuncsFromModule() {
   if (!M)
     return false;
   FuncsToUse.clear();
   for (auto &F : *M)
     FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F));
   return true;
 }

 std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() {
   // If there are more than one function offset section, the profile associated
   // with the previous section has to be done reading before next one is read.
   FuncOffsetTable.clear();
   FuncOffsetList.clear();

   auto Size = readNumber<uint64_t>();
   if (std::error_code EC = Size.getError())
     return EC;

   bool UseFuncOffsetList = useFuncOffsetList();
   if (UseFuncOffsetList)
     FuncOffsetList.reserve(*Size);
   else
     FuncOffsetTable.reserve(*Size);

   for (uint64_t I = 0; I < *Size; ++I) {
     auto FContextHash(readSampleContextFromTable());
     if (std::error_code EC = FContextHash.getError())
       return EC;

     auto &[FContext, Hash] = *FContextHash;
     auto Offset = readNumber<uint64_t>();
     if (std::error_code EC = Offset.getError())
       return EC;

     if (UseFuncOffsetList)
       FuncOffsetList.emplace_back(FContext, *Offset);
     else
       // Because Porfiles replace existing value with new value if collision
       // happens, we also use the latest offset so that they are consistent.
       FuncOffsetTable[Hash] = *Offset;
  }

  return sampleprof_error::success;
 }

 std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles(
     const DenseSet<StringRef> &FuncsToUse, SampleProfileMap &Profiles) {
   const uint8_t *Start = Data;

   if (Remapper) {
     for (auto Name : FuncsToUse) {
       Remapper->insert(Name);
     }
   }

   if (ProfileIsCS) {
     assert(useFuncOffsetList());
     DenseSet<uint64_t> FuncGuidsToUse;
     if (useMD5()) {
       for (auto Name : FuncsToUse)
         FuncGuidsToUse.insert(Function::getGUID(Name));
     }

     // For each function in current module, load all context profiles for
     // the function as well as their callee contexts which can help profile
     // guided importing for ThinLTO. This can be achieved by walking
     // through an ordered context container, where contexts are laid out
     // as if they were walked in preorder of a context trie. While
     // traversing the trie, a link to the highest common ancestor node is
     // kept so that all of its decendants will be loaded.
     const SampleContext *CommonContext = nullptr;
     for (const auto &NameOffset : FuncOffsetList) {
       const auto &FContext = NameOffset.first;
       FunctionId FName = FContext.getFunction();
       StringRef FNameString;
       if (!useMD5())
         FNameString = FName.stringRef();

       // For function in the current module, keep its farthest ancestor
       // context. This can be used to load itself and its child and
       // sibling contexts.
       if ((useMD5() && FuncGuidsToUse.count(FName.getHashCode())) ||
           (!useMD5() && (FuncsToUse.count(FNameString) ||
                          (Remapper && Remapper->exist(FNameString))))) {
         if (!CommonContext || !CommonContext->isPrefixOf(FContext))
           CommonContext = &FContext;
       }

       if (CommonContext == &FContext ||
           (CommonContext && CommonContext->isPrefixOf(FContext))) {
         // Load profile for the current context which originated from
         // the common ancestor.
         const uint8_t *FuncProfileAddr = Start + NameOffset.second;
         if (std::error_code EC = readFuncProfile(FuncProfileAddr))
           return EC;
       }
     }
   } else if (useMD5()) {
     assert(!useFuncOffsetList());
     for (auto Name : FuncsToUse) {
       auto GUID = MD5Hash(Name);
       auto iter = FuncOffsetTable.find(GUID);
       if (iter == FuncOffsetTable.end())
         continue;
       const uint8_t *FuncProfileAddr = Start + iter->second;
       if (std::error_code EC = readFuncProfile(FuncProfileAddr, Profiles))
         return EC;
     }
   } else if (Remapper) {
     assert(useFuncOffsetList());
     for (auto NameOffset : FuncOffsetList) {
       SampleContext FContext(NameOffset.first);
       auto FuncName = FContext.getFunction();
       StringRef FuncNameStr = FuncName.stringRef();
       if (!FuncsToUse.count(FuncNameStr) && !Remapper->exist(FuncNameStr))
         continue;
       const uint8_t *FuncProfileAddr = Start + NameOffset.second;
       if (std::error_code EC = readFuncProfile(FuncProfileAddr, Profiles))
         return EC;
     }
   } else {
     assert(!useFuncOffsetList());
     for (auto Name : FuncsToUse) {

       auto iter = FuncOffsetTable.find(MD5Hash(Name));
       if (iter == FuncOffsetTable.end())
         continue;
       const uint8_t *FuncProfileAddr = Start + iter->second;
       if (std::error_code EC = readFuncProfile(FuncProfileAddr, Profiles))
         return EC;
     }
   }

   return sampleprof_error::success;
 }

 std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
   // Collect functions used by current module if the Reader has been
   // given a module.
   // collectFuncsFromModule uses FunctionSamples::getCanonicalFnName
   // which will query FunctionSamples::HasUniqSuffix, so it has to be
   // called after FunctionSamples::HasUniqSuffix is set, i.e. after
   // NameTable section is read.
   bool LoadFuncsToBeUsed = collectFuncsFromModule();

   // When LoadFuncsToBeUsed is false, we are using LLVM tool, need to read all
   // profiles.
   if (!LoadFuncsToBeUsed) {
     while (Data < End) {
       if (std::error_code EC = readFuncProfile(Data))
         return EC;
     }
     assert(Data == End && "More data is read than expected");
   } else {
     // Load function profiles on demand.
     if (std::error_code EC = readFuncProfiles(FuncsToUse, Profiles))
       return EC;
     Data = End;
   }
   assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
          "Cannot have both context-sensitive and regular profile");
   assert((!CSProfileCount || ProfileIsCS) &&
          "Section flag should be consistent with actual profile");
   return sampleprof_error::success;
 }

 std::error_code SampleProfileReaderExtBinaryBase::readProfileSymbolList() {
   if (!ProfSymList)
     ProfSymList = std::make_unique<ProfileSymbolList>();

   if (std::error_code EC = ProfSymList->read(Data, End - Data))
     return EC;

   Data = End;
   return sampleprof_error::success;
 }

 std::error_code SampleProfileReaderExtBinaryBase::decompressSection(
     const uint8_t *SecStart, const uint64_t SecSize,
     const uint8_t *&DecompressBuf, uint64_t &DecompressBufSize) {
   Data = SecStart;
   End = SecStart + SecSize;
   auto DecompressSize = readNumber<uint64_t>();
   if (std::error_code EC = DecompressSize.getError())
     return EC;
   DecompressBufSize = *DecompressSize;

   auto CompressSize = readNumber<uint64_t>();
   if (std::error_code EC = CompressSize.getError())
     return EC;

   if (!llvm::compression::zlib::isAvailable())
     return sampleprof_error::zlib_unavailable;

   uint8_t *Buffer = Allocator.Allocate<uint8_t>(DecompressBufSize);
   size_t UCSize = DecompressBufSize;
   llvm::Error E = compression::zlib::decompress(ArrayRef(Data, *CompressSize),
                                                 Buffer, UCSize);
   if (E)
     return sampleprof_error::uncompress_failed;
   DecompressBuf = reinterpret_cast<const uint8_t *>(Buffer);
   return sampleprof_error::success;
 }

 std::error_code SampleProfileReaderExtBinaryBase::readImpl() {
   const uint8_t *BufStart =
       reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());

   for (auto &Entry : SecHdrTable) {
     // Skip empty section.
     if (!Entry.Size)
       continue;

     // Skip sections without inlined functions when SkipFlatProf is true.
     if (SkipFlatProf && hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
       continue;

     const uint8_t *SecStart = BufStart + Entry.Offset;
     uint64_t SecSize = Entry.Size;

     // If the section is compressed, decompress it into a buffer
     // DecompressBuf before reading the actual data. The pointee of
     // 'Data' will be changed to buffer hold by DecompressBuf
     // temporarily when reading the actual data.
     bool isCompressed = hasSecFlag(Entry, SecCommonFlags::SecFlagCompress);
     if (isCompressed) {
       const uint8_t *DecompressBuf;
       uint64_t DecompressBufSize;
       if (std::error_code EC = decompressSection(
               SecStart, SecSize, DecompressBuf, DecompressBufSize))
         return EC;
       SecStart = DecompressBuf;
       SecSize = DecompressBufSize;
     }

     if (std::error_code EC = readOneSection(SecStart, SecSize, Entry))
       return EC;
     if (Data != SecStart + SecSize)
       return sampleprof_error::malformed;

     // Change the pointee of 'Data' from DecompressBuf to original Buffer.
     if (isCompressed) {
       Data = BufStart + Entry.Offset;
       End = BufStart + Buffer->getBufferSize();
     }
   }

   return sampleprof_error::success;
 }

 std::error_code SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic) {
   if (Magic == SPMagic())
     return sampleprof_error::success;
   return sampleprof_error::bad_magic;
 }

 std::error_code SampleProfileReaderExtBinary::verifySPMagic(uint64_t Magic) {
   if (Magic == SPMagic(SPF_Ext_Binary))
     return sampleprof_error::success;
   return sampleprof_error::bad_magic;
 }

 std::error_code SampleProfileReaderBinary::readNameTable() {
   auto Size = readNumber<size_t>();
   if (std::error_code EC = Size.getError())
     return EC;

   // Normally if useMD5 is true, the name table should have MD5 values, not
   // strings, however in the case that ExtBinary profile has multiple name
   // tables mixing string and MD5, all of them have to be normalized to use MD5,
   // because optimization passes can only handle either type.
   bool UseMD5 = useMD5();

   NameTable.clear();
   NameTable.reserve(*Size);
   if (!ProfileIsCS) {
     MD5SampleContextTable.clear();
     if (UseMD5)
       MD5SampleContextTable.reserve(*Size);
     else
       // If we are using strings, delay MD5 computation since only a portion of
       // names are used by top level functions. Use 0 to indicate MD5 value is
       // to be calculated as no known string has a MD5 value of 0.
       MD5SampleContextTable.resize(*Size);
   }
   for (size_t I = 0; I < *Size; ++I) {
     auto Name(readString());
     if (std::error_code EC = Name.getError())
       return EC;
     if (UseMD5) {
       FunctionId FID(*Name);
       if (!ProfileIsCS)
         MD5SampleContextTable.emplace_back(FID.getHashCode());
       NameTable.emplace_back(FID);
     } else
       NameTable.push_back(FunctionId(*Name));
   }
   if (!ProfileIsCS)
     MD5SampleContextStart = MD5SampleContextTable.data();
   return sampleprof_error::success;
 }

 std::error_code
 SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5,
                                                    bool FixedLengthMD5) {
   if (FixedLengthMD5) {
     if (!IsMD5)
       errs() << "If FixedLengthMD5 is true, UseMD5 has to be true";
     auto Size = readNumber<size_t>();
     if (std::error_code EC = Size.getError())
       return EC;

     assert(Data + (*Size) * sizeof(uint64_t) == End &&
            "Fixed length MD5 name table does not contain specified number of "
            "entries");
     if (Data + (*Size) * sizeof(uint64_t) > End)
       return sampleprof_error::truncated;

     NameTable.clear();
     NameTable.reserve(*Size);
     for (size_t I = 0; I < *Size; ++I) {
       using namespace support;
       uint64_t FID = endian::read<uint64_t, endianness::little, unaligned>(
           Data + I * sizeof(uint64_t));
       NameTable.emplace_back(FunctionId(FID));
     }
     if (!ProfileIsCS)
       MD5SampleContextStart = reinterpret_cast<const uint64_t *>(Data);
     Data = Data + (*Size) * sizeof(uint64_t);
     return sampleprof_error::success;
   }

   if (IsMD5) {
     assert(!FixedLengthMD5 && "FixedLengthMD5 should be unreachable here");
     auto Size = readNumber<size_t>();
     if (std::error_code EC = Size.getError())
       return EC;

     NameTable.clear();
     NameTable.reserve(*Size);
     if (!ProfileIsCS)
       MD5SampleContextTable.resize(*Size);
     for (size_t I = 0; I < *Size; ++I) {
       auto FID = readNumber<uint64_t>();
       if (std::error_code EC = FID.getError())
         return EC;
       if (!ProfileIsCS)
         support::endian::write64le(&MD5SampleContextTable[I], *FID);
       NameTable.emplace_back(FunctionId(*FID));
     }
     if (!ProfileIsCS)
       MD5SampleContextStart = MD5SampleContextTable.data();
     return sampleprof_error::success;
   }

   return SampleProfileReaderBinary::readNameTable();
 }

 // Read in the CS name table section, which basically contains a list of context
 // vectors. Each element of a context vector, aka a frame, refers to the
 // underlying raw function names that are stored in the name table, as well as
 // a callsite identifier that only makes sense for non-leaf frames.
 std::error_code SampleProfileReaderExtBinaryBase::readCSNameTableSec() {
   auto Size = readNumber<size_t>();
   if (std::error_code EC = Size.getError())
     return EC;

   CSNameTable.clear();
   CSNameTable.reserve(*Size);
   if (ProfileIsCS) {
     // Delay MD5 computation of CS context until they are needed. Use 0 to
     // indicate MD5 value is to be calculated as no known string has a MD5
     // value of 0.
     MD5SampleContextTable.clear();
     MD5SampleContextTable.resize(*Size);
     MD5SampleContextStart = MD5SampleContextTable.data();
   }
   for (size_t I = 0; I < *Size; ++I) {
     CSNameTable.emplace_back(SampleContextFrameVector());
     auto ContextSize = readNumber<uint32_t>();
     if (std::error_code EC = ContextSize.getError())
       return EC;
     for (uint32_t J = 0; J < *ContextSize; ++J) {
       auto FName(readStringFromTable());
       if (std::error_code EC = FName.getError())
         return EC;
       auto LineOffset = readNumber<uint64_t>();
       if (std::error_code EC = LineOffset.getError())
         return EC;

       if (!isOffsetLegal(*LineOffset))
         return std::error_code();

       auto Discriminator = readNumber<uint64_t>();
       if (std::error_code EC = Discriminator.getError())
         return EC;

       CSNameTable.back().emplace_back(
           FName.get(), LineLocation(LineOffset.get(), Discriminator.get()));
     }
   }

   return sampleprof_error::success;
 }

 std::error_code
 SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute,
                                                    FunctionSamples *FProfile) {
   if (Data < End) {
     if (ProfileIsProbeBased) {
       auto Checksum = readNumber<uint64_t>();
       if (std::error_code EC = Checksum.getError())
         return EC;
       if (FProfile)
         FProfile->setFunctionHash(*Checksum);
     }

     if (ProfileHasAttribute) {
       auto Attributes = readNumber<uint32_t>();
       if (std::error_code EC = Attributes.getError())
         return EC;
       if (FProfile)
         FProfile->getContext().setAllAttributes(*Attributes);
     }

     if (!ProfileIsCS) {
       // Read all the attributes for inlined function calls.
       auto NumCallsites = readNumber<uint32_t>();
       if (std::error_code EC = NumCallsites.getError())
         return EC;

       for (uint32_t J = 0; J < *NumCallsites; ++J) {
         auto LineOffset = readNumber<uint64_t>();
         if (std::error_code EC = LineOffset.getError())
           return EC;

         auto Discriminator = readNumber<uint64_t>();
         if (std::error_code EC = Discriminator.getError())
           return EC;

         auto FContextHash(readSampleContextFromTable());
         if (std::error_code EC = FContextHash.getError())
           return EC;

         auto &[FContext, Hash] = *FContextHash;
         FunctionSamples *CalleeProfile = nullptr;
         if (FProfile) {
           CalleeProfile = const_cast<FunctionSamples *>(
               &FProfile->functionSamplesAt(LineLocation(
                   *LineOffset,
                   *Discriminator))[FContext.getFunction()]);
         }
         if (std::error_code EC =
                 readFuncMetadata(ProfileHasAttribute, CalleeProfile))
           return EC;
       }
     }
   }

   return sampleprof_error::success;
 }

 std::error_code SampleProfileReaderExtBinaryBase::readFuncMetadata(
     bool ProfileHasAttribute, DenseSet<FunctionSamples *> &Profiles) {
   if (FuncMetadataIndex.empty())
     return sampleprof_error::success;

   for (auto *FProfile : Profiles) {
     auto R = FuncMetadataIndex.find(FProfile->getContext().getHashCode());
     if (R == FuncMetadataIndex.end())
       continue;

     Data = R->second.first;
     End = R->second.second;
     if (std::error_code EC = readFuncMetadata(ProfileHasAttribute, FProfile))
       return EC;
     assert(Data == End && "More data is read than expected");
   }
   return sampleprof_error::success;
 }

 std::error_code
 SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) {
   while (Data < End) {
     auto FContextHash(readSampleContextFromTable());
     if (std::error_code EC = FContextHash.getError())
       return EC;
     auto &[FContext, Hash] = *FContextHash;
     FunctionSamples *FProfile = nullptr;
     auto It = Profiles.find(FContext);
     if (It != Profiles.end())
       FProfile = &It->second;

     const uint8_t *Start = Data;
     if (std::error_code EC = readFuncMetadata(ProfileHasAttribute, FProfile))
       return EC;

     FuncMetadataIndex[FContext.getHashCode()] = {Start, Data};
   }

   assert(Data == End && "More data is read than expected");
   return sampleprof_error::success;
 }

 std::error_code
 SampleProfileReaderExtBinaryBase::readSecHdrTableEntry(uint64_t Idx) {
   SecHdrTableEntry Entry;
   auto Type = readUnencodedNumber<uint64_t>();
   if (std::error_code EC = Type.getError())
     return EC;
   Entry.Type = static_cast<SecType>(*Type);

   auto Flags = readUnencodedNumber<uint64_t>();
   if (std::error_code EC = Flags.getError())
     return EC;
   Entry.Flags = *Flags;

   auto Offset = readUnencodedNumber<uint64_t>();
   if (std::error_code EC = Offset.getError())
     return EC;
   Entry.Offset = *Offset;

   auto Size = readUnencodedNumber<uint64_t>();
   if (std::error_code EC = Size.getError())
     return EC;
   Entry.Size = *Size;

   Entry.LayoutIndex = Idx;
   SecHdrTable.push_back(std::move(Entry));
   return sampleprof_error::success;
 }

 std::error_code SampleProfileReaderExtBinaryBase::readSecHdrTable() {
   auto EntryNum = readUnencodedNumber<uint64_t>();
   if (std::error_code EC = EntryNum.getError())
     return EC;

   for (uint64_t i = 0; i < (*EntryNum); i++)
     if (std::error_code EC = readSecHdrTableEntry(i))
       return EC;

   return sampleprof_error::success;
 }

 std::error_code SampleProfileReaderExtBinaryBase::readHeader() {
   const uint8_t *BufStart =
       reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
   Data = BufStart;
   End = BufStart + Buffer->getBufferSize();

   if (std::error_code EC = readMagicIdent())
     return EC;

   if (std::error_code EC = readSecHdrTable())
     return EC;

   return sampleprof_error::success;
 }

 uint64_t SampleProfileReaderExtBinaryBase::getSectionSize(SecType Type) {
   uint64_t Size = 0;
   for (auto &Entry : SecHdrTable) {
     if (Entry.Type == Type)
       Size += Entry.Size;
   }
   return Size;
 }

 uint64_t SampleProfileReaderExtBinaryBase::getFileSize() {
   // Sections in SecHdrTable is not necessarily in the same order as
   // sections in the profile because section like FuncOffsetTable needs
   // to be written after section LBRProfile but needs to be read before
   // section LBRProfile, so we cannot simply use the last entry in
   // SecHdrTable to calculate the file size.
   uint64_t FileSize = 0;
   for (auto &Entry : SecHdrTable) {
     FileSize = std::max(Entry.Offset + Entry.Size, FileSize);
   }
   return FileSize;
 }

 static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) {
   std::string Flags;
   if (hasSecFlag(Entry, SecCommonFlags::SecFlagCompress))
     Flags.append("{compressed,");
   else
     Flags.append("{");

   if (hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
     Flags.append("flat,");

   switch (Entry.Type) {
   case SecNameTable:
     if (hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5))
       Flags.append("fixlenmd5,");
     else if (hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name))
       Flags.append("md5,");
     if (hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix))
       Flags.append("uniq,");
     break;
   case SecProfSummary:
     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
       Flags.append("partial,");
     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext))
       Flags.append("context,");
     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsPreInlined))
       Flags.append("preInlined,");
     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator))
       Flags.append("fs-discriminator,");
     break;
   case SecFuncOffsetTable:
     if (hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered))
       Flags.append("ordered,");
     break;
   case SecFuncMetadata:
     if (hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased))
       Flags.append("probe,");
     if (hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute))
       Flags.append("attr,");
     break;
   default:
     break;
   }
   char &last = Flags.back();
   if (last == ',')
     last = '}';
   else
     Flags.append("}");
   return Flags;
 }

 bool SampleProfileReaderExtBinaryBase::dumpSectionInfo(raw_ostream &OS) {
   uint64_t TotalSecsSize = 0;
   for (auto &Entry : SecHdrTable) {
     OS << getSecName(Entry.Type) << " - Offset: " << Entry.Offset
        << ", Size: " << Entry.Size << ", Flags: " << getSecFlagsStr(Entry)
        << "\n";
     ;
     TotalSecsSize += Entry.Size;
   }
   uint64_t HeaderSize = SecHdrTable.front().Offset;
   assert(HeaderSize + TotalSecsSize == getFileSize() &&
          "Size of 'header + sections' doesn't match the total size of profile");

   OS << "Header Size: " << HeaderSize << "\n";
   OS << "Total Sections Size: " << TotalSecsSize << "\n";
   OS << "File Size: " << getFileSize() << "\n";
   return true;
 }

 std::error_code SampleProfileReaderBinary::readMagicIdent() {
   // Read and check the magic identifier.
   auto Magic = readNumber<uint64_t>();
   if (std::error_code EC = Magic.getError())
     return EC;
   else if (std::error_code EC = verifySPMagic(*Magic))
     return EC;

   // Read the version number.
   auto Version = readNumber<uint64_t>();
   if (std::error_code EC = Version.getError())
     return EC;
   else if (*Version != SPVersion())
     return sampleprof_error::unsupported_version;

   return sampleprof_error::success;
 }

 std::error_code SampleProfileReaderBinary::readHeader() {
   Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
   End = Data + Buffer->getBufferSize();

   if (std::error_code EC = readMagicIdent())
     return EC;

   if (std::error_code EC = readSummary())
     return EC;

   if (std::error_code EC = readNameTable())
     return EC;
   return sampleprof_error::success;
 }

 std::error_code SampleProfileReaderBinary::readSummaryEntry(
     std::vector<ProfileSummaryEntry> &Entries) {
   auto Cutoff = readNumber<uint64_t>();
   if (std::error_code EC = Cutoff.getError())
     return EC;

   auto MinBlockCount = readNumber<uint64_t>();
   if (std::error_code EC = MinBlockCount.getError())
     return EC;

   auto NumBlocks = readNumber<uint64_t>();
   if (std::error_code EC = NumBlocks.getError())
     return EC;

   Entries.emplace_back(*Cutoff, *MinBlockCount, *NumBlocks);
   return sampleprof_error::success;
 }

 std::error_code SampleProfileReaderBinary::readSummary() {
   auto TotalCount = readNumber<uint64_t>();
   if (std::error_code EC = TotalCount.getError())
     return EC;

   auto MaxBlockCount = readNumber<uint64_t>();
   if (std::error_code EC = MaxBlockCount.getError())
     return EC;

   auto MaxFunctionCount = readNumber<uint64_t>();
   if (std::error_code EC = MaxFunctionCount.getError())
     return EC;

   auto NumBlocks = readNumber<uint64_t>();
   if (std::error_code EC = NumBlocks.getError())
     return EC;

   auto NumFunctions = readNumber<uint64_t>();
   if (std::error_code EC = NumFunctions.getError())
     return EC;

   auto NumSummaryEntries = readNumber<uint64_t>();
   if (std::error_code EC = NumSummaryEntries.getError())
     return EC;

   std::vector<ProfileSummaryEntry> Entries;
   for (unsigned i = 0; i < *NumSummaryEntries; i++) {
     std::error_code EC = readSummaryEntry(Entries);
     if (EC != sampleprof_error::success)
       return EC;
   }
   Summary = std::make_unique<ProfileSummary>(
       ProfileSummary::PSK_Sample, Entries, *TotalCount, *MaxBlockCount, 0,
       *MaxFunctionCount, *NumBlocks, *NumFunctions);

   return sampleprof_error::success;
 }

 bool SampleProfileReaderRawBinary::hasFormat(const MemoryBuffer &Buffer) {
   const uint8_t *Data =
       reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
   uint64_t Magic = decodeULEB128(Data);
   return Magic == SPMagic();
 }

 bool SampleProfileReaderExtBinary::hasFormat(const MemoryBuffer &Buffer) {
   const uint8_t *Data =
       reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
   uint64_t Magic = decodeULEB128(Data);
   return Magic == SPMagic(SPF_Ext_Binary);
 }

 std::error_code SampleProfileReaderGCC::skipNextWord() {
   uint32_t dummy;
   if (!GcovBuffer.readInt(dummy))
     return sampleprof_error::truncated;
   return sampleprof_error::success;
 }

 template <typename T> ErrorOr<T> SampleProfileReaderGCC::readNumber() {
   if (sizeof(T) <= sizeof(uint32_t)) {
     uint32_t Val;
     if (GcovBuffer.readInt(Val) && Val <= std::numeric_limits<T>::max())
       return static_cast<T>(Val);
   } else if (sizeof(T) <= sizeof(uint64_t)) {
     uint64_t Val;
     if (GcovBuffer.readInt64(Val) && Val <= std::numeric_limits<T>::max())
       return static_cast<T>(Val);
   }

   std::error_code EC = sampleprof_error::malformed;
   reportError(0, EC.message());
   return EC;
 }

 ErrorOr<StringRef> SampleProfileReaderGCC::readString() {
   StringRef Str;
   if (!GcovBuffer.readString(Str))
     return sampleprof_error::truncated;
   return Str;
 }

 std::error_code SampleProfileReaderGCC::readHeader() {
   // Read the magic identifier.
   if (!GcovBuffer.readGCDAFormat())
     return sampleprof_error::unrecognized_format;

   // Read the version number. Note - the GCC reader does not validate this
   // version, but the profile creator generates v704.
   GCOV::GCOVVersion version;
   if (!GcovBuffer.readGCOVVersion(version))
     return sampleprof_error::unrecognized_format;

   if (version != GCOV::V407)
     return sampleprof_error::unsupported_version;

   // Skip the empty integer.
   if (std::error_code EC = skipNextWord())
     return EC;

   return sampleprof_error::success;
 }

 std::error_code SampleProfileReaderGCC::readSectionTag(uint32_t Expected) {
   uint32_t Tag;
   if (!GcovBuffer.readInt(Tag))
     return sampleprof_error::truncated;

   if (Tag != Expected)
     return sampleprof_error::malformed;

   if (std::error_code EC = skipNextWord())
     return EC;

   return sampleprof_error::success;
 }

 std::error_code SampleProfileReaderGCC::readNameTable() {
   if (std::error_code EC = readSectionTag(GCOVTagAFDOFileNames))
     return EC;

   uint32_t Size;
   if (!GcovBuffer.readInt(Size))
     return sampleprof_error::truncated;

   for (uint32_t I = 0; I < Size; ++I) {
     StringRef Str;
     if (!GcovBuffer.readString(Str))
       return sampleprof_error::truncated;
     Names.push_back(std::string(Str));
   }

   return sampleprof_error::success;
 }

 std::error_code SampleProfileReaderGCC::readFunctionProfiles() {
   if (std::error_code EC = readSectionTag(GCOVTagAFDOFunction))
     return EC;

   uint32_t NumFunctions;
   if (!GcovBuffer.readInt(NumFunctions))
     return sampleprof_error::truncated;

   InlineCallStack Stack;
   for (uint32_t I = 0; I < NumFunctions; ++I)
     if (std::error_code EC = readOneFunctionProfile(Stack, true, 0))
       return EC;

   computeSummary();
   return sampleprof_error::success;
 }

 std::error_code SampleProfileReaderGCC::readOneFunctionProfile(
     const InlineCallStack &InlineStack, bool Update, uint32_t Offset) {
   uint64_t HeadCount = 0;
   if (InlineStack.size() == 0)
     if (!GcovBuffer.readInt64(HeadCount))
       return sampleprof_error::truncated;

   uint32_t NameIdx;
   if (!GcovBuffer.readInt(NameIdx))
     return sampleprof_error::truncated;

   StringRef Name(Names[NameIdx]);

   uint32_t NumPosCounts;
   if (!GcovBuffer.readInt(NumPosCounts))
     return sampleprof_error::truncated;

   uint32_t NumCallsites;
   if (!GcovBuffer.readInt(NumCallsites))
     return sampleprof_error::truncated;

   FunctionSamples *FProfile = nullptr;
   if (InlineStack.size() == 0) {
     // If this is a top function that we have already processed, do not
     // update its profile again.  This happens in the presence of
     // function aliases.  Since these aliases share the same function
     // body, there will be identical replicated profiles for the
     // original function.  In this case, we simply not bother updating
     // the profile of the original function.
     FProfile = &Profiles[FunctionId(Name)];
     FProfile->addHeadSamples(HeadCount);
     if (FProfile->getTotalSamples() > 0)
       Update = false;
   } else {
     // Otherwise, we are reading an inlined instance. The top of the
     // inline stack contains the profile of the caller. Insert this
     // callee in the caller's CallsiteMap.
     FunctionSamples *CallerProfile = InlineStack.front();
     uint32_t LineOffset = Offset >> 16;
     uint32_t Discriminator = Offset & 0xffff;
     FProfile = &CallerProfile->functionSamplesAt(
         LineLocation(LineOffset, Discriminator))[FunctionId(Name)];
   }
   FProfile->setFunction(FunctionId(Name));

   for (uint32_t I = 0; I < NumPosCounts; ++I) {
     uint32_t Offset;
     if (!GcovBuffer.readInt(Offset))
       return sampleprof_error::truncated;

     uint32_t NumTargets;
     if (!GcovBuffer.readInt(NumTargets))
       return sampleprof_error::truncated;

     uint64_t Count;
     if (!GcovBuffer.readInt64(Count))
       return sampleprof_error::truncated;

     // The line location is encoded in the offset as:
     //   high 16 bits: line offset to the start of the function.
     //   low 16 bits: discriminator.
     uint32_t LineOffset = Offset >> 16;
     uint32_t Discriminator = Offset & 0xffff;

     InlineCallStack NewStack;
     NewStack.push_back(FProfile);
     llvm::append_range(NewStack, InlineStack);
     if (Update) {
       // Walk up the inline stack, adding the samples on this line to
       // the total sample count of the callers in the chain.
       for (auto *CallerProfile : NewStack)
         CallerProfile->addTotalSamples(Count);

       // Update the body samples for the current profile.
       FProfile->addBodySamples(LineOffset, Discriminator, Count);
     }

     // Process the list of functions called at an indirect call site.
     // These are all the targets that a function pointer (or virtual
     // function) resolved at runtime.
     for (uint32_t J = 0; J < NumTargets; J++) {
       uint32_t HistVal;
       if (!GcovBuffer.readInt(HistVal))
         return sampleprof_error::truncated;

       if (HistVal != HIST_TYPE_INDIR_CALL_TOPN)
         return sampleprof_error::malformed;

       uint64_t TargetIdx;
       if (!GcovBuffer.readInt64(TargetIdx))
         return sampleprof_error::truncated;
       StringRef TargetName(Names[TargetIdx]);

       uint64_t TargetCount;
       if (!GcovBuffer.readInt64(TargetCount))
         return sampleprof_error::truncated;

       if (Update)
         FProfile->addCalledTargetSamples(LineOffset, Discriminator,
                                          FunctionId(TargetName),
                                          TargetCount);
     }
   }

   // Process all the inlined callers into the current function. These
   // are all the callsites that were inlined into this function.
   for (uint32_t I = 0; I < NumCallsites; I++) {
     // The offset is encoded as:
     //   high 16 bits: line offset to the start of the function.
     //   low 16 bits: discriminator.
     uint32_t Offset;
     if (!GcovBuffer.readInt(Offset))
       return sampleprof_error::truncated;
     InlineCallStack NewStack;
     NewStack.push_back(FProfile);
     llvm::append_range(NewStack, InlineStack);
     if (std::error_code EC = readOneFunctionProfile(NewStack, Update, Offset))
       return EC;
   }

   return sampleprof_error::success;
 }

 /// Read a GCC AutoFDO profile.
 ///
 /// This format is generated by the Linux Perf conversion tool at
 /// https://github.com/google/autofdo.
 std::error_code SampleProfileReaderGCC::readImpl() {
   assert(!ProfileIsFSDisciminator && "Gcc profiles not support FSDisciminator");
   // Read the string table.
   if (std::error_code EC = readNameTable())
     return EC;

   // Read the source profile.
   if (std::error_code EC = readFunctionProfiles())
     return EC;

   return sampleprof_error::success;
 }

 bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) {
   StringRef Magic(reinterpret_cast<const char *>(Buffer.getBufferStart()));
   return Magic == "adcg*704";
 }

 void SampleProfileReaderItaniumRemapper::applyRemapping(LLVMContext &Ctx) {
   // If the reader uses MD5 to represent string, we can't remap it because
   // we don't know what the original function names were.
   if (Reader.useMD5()) {
     Ctx.diagnose(DiagnosticInfoSampleProfile(
         Reader.getBuffer()->getBufferIdentifier(),
         "Profile data remapping cannot be applied to profile data "
         "using MD5 names (original mangled names are not available).",
         DS_Warning));
     return;
   }

   // CSSPGO-TODO: Remapper is not yet supported.
   // We will need to remap the entire context string.
   assert(Remappings && "should be initialized while creating remapper");
   for (auto &Sample : Reader.getProfiles()) {
     DenseSet<FunctionId> NamesInSample;
     Sample.second.findAllNames(NamesInSample);
     for (auto &Name : NamesInSample) {
       StringRef NameStr = Name.stringRef();
       if (auto Key = Remappings->insert(NameStr))
         NameMap.insert({Key, NameStr});
     }
   }

   RemappingApplied = true;
 }

 std::optional<StringRef>
 SampleProfileReaderItaniumRemapper::lookUpNameInProfile(StringRef Fname) {
   if (auto Key = Remappings->lookup(Fname)) {
     StringRef Result = NameMap.lookup(Key);
     if (!Result.empty())
       return Result;
   }
   return std::nullopt;
 }

 /// Prepare a memory buffer for the contents of \p Filename.
 ///
 /// \returns an error code indicating the status of the buffer.
 static ErrorOr<std::unique_ptr<MemoryBuffer>>
 setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) {
   auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN()
                                            : FS.getBufferForFile(Filename);
   if (std::error_code EC = BufferOrErr.getError())
     return EC;
   auto Buffer = std::move(BufferOrErr.get());

   return std::move(Buffer);
 }

 /// Create a sample profile reader based on the format of the input file.
 ///
 /// \param Filename The file to open.
 ///
 /// \param C The LLVM context to use to emit diagnostics.
 ///
 /// \param P The FSDiscriminatorPass.
 ///
 /// \param RemapFilename The file used for profile remapping.
 ///
 /// \returns an error code indicating the status of the created reader.
 ErrorOr<std::unique_ptr<SampleProfileReader>>
 SampleProfileReader::create(StringRef Filename, LLVMContext &C,
                             vfs::FileSystem &FS, FSDiscriminatorPass P,
                             StringRef RemapFilename) {
   auto BufferOrError = setupMemoryBuffer(Filename, FS);
   if (std::error_code EC = BufferOrError.getError())
     return EC;
   return create(BufferOrError.get(), C, FS, P, RemapFilename);
 }

 /// Create a sample profile remapper from the given input, to remap the
 /// function names in the given profile data.
 ///
 /// \param Filename The file to open.
 ///
 /// \param Reader The profile reader the remapper is going to be applied to.
 ///
 /// \param C The LLVM context to use to emit diagnostics.
 ///
 /// \returns an error code indicating the status of the created reader.
 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
 SampleProfileReaderItaniumRemapper::create(StringRef Filename,
                                            vfs::FileSystem &FS,
                                            SampleProfileReader &Reader,
                                            LLVMContext &C) {
   auto BufferOrError = setupMemoryBuffer(Filename, FS);
   if (std::error_code EC = BufferOrError.getError())
     return EC;
   return create(BufferOrError.get(), Reader, C);
 }

 /// Create a sample profile remapper from the given input, to remap the
 /// function names in the given profile data.
 ///
 /// \param B The memory buffer to create the reader from (assumes ownership).
 ///
 /// \param C The LLVM context to use to emit diagnostics.
 ///
 /// \param Reader The profile reader the remapper is going to be applied to.
 ///
 /// \returns an error code indicating the status of the created reader.
 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
 SampleProfileReaderItaniumRemapper::create(std::unique_ptr<MemoryBuffer> &B,
                                            SampleProfileReader &Reader,
                                            LLVMContext &C) {
   auto Remappings = std::make_unique<SymbolRemappingReader>();
   if (Error E = Remappings->read(*B)) {
     handleAllErrors(
         std::move(E), [&](const SymbolRemappingParseError &ParseError) {
           C.diagnose(DiagnosticInfoSampleProfile(B->getBufferIdentifier(),
                                                  ParseError.getLineNum(),
                                                  ParseError.getMessage()));
         });
     return sampleprof_error::malformed;
   }

   return std::make_unique<SampleProfileReaderItaniumRemapper>(
       std::move(B), std::move(Remappings), Reader);
 }

 /// Create a sample profile reader based on the format of the input data.
 ///
 /// \param B The memory buffer to create the reader from (assumes ownership).
 ///
 /// \param C The LLVM context to use to emit diagnostics.
 ///
 /// \param P The FSDiscriminatorPass.
 ///
 /// \param RemapFilename The file used for profile remapping.
 ///
 /// \returns an error code indicating the status of the created reader.
 ErrorOr<std::unique_ptr<SampleProfileReader>>
 SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C,
                             vfs::FileSystem &FS, FSDiscriminatorPass P,
                             StringRef RemapFilename) {
   std::unique_ptr<SampleProfileReader> Reader;
   if (SampleProfileReaderRawBinary::hasFormat(*B))
     Reader.reset(new SampleProfileReaderRawBinary(std::move(B), C));
   else if (SampleProfileReaderExtBinary::hasFormat(*B))
     Reader.reset(new SampleProfileReaderExtBinary(std::move(B), C));
   else if (SampleProfileReaderGCC::hasFormat(*B))
     Reader.reset(new SampleProfileReaderGCC(std::move(B), C));
   else if (SampleProfileReaderText::hasFormat(*B))
     Reader.reset(new SampleProfileReaderText(std::move(B), C));
   else
     return sampleprof_error::unrecognized_format;

   if (!RemapFilename.empty()) {
     auto ReaderOrErr = SampleProfileReaderItaniumRemapper::create(
         RemapFilename, FS, *Reader, C);
     if (std::error_code EC = ReaderOrErr.getError()) {
       std::string Msg = "Could not create remapper: " + EC.message();
       C.diagnose(DiagnosticInfoSampleProfile(RemapFilename, Msg));
       return EC;
     }
     Reader->Remapper = std::move(ReaderOrErr.get());
   }

   if (std::error_code EC = Reader->readHeader()) {
     return EC;
   }

   Reader->setDiscriminatorMaskedBitFrom(P);

   return std::move(Reader);
 }

 // For text and GCC file formats, we compute the summary after reading the
 // profile. Binary format has the profile summary in its header.
 void SampleProfileReader::computeSummary() {
   SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
   Summary = Builder.computeSummaryForProfiles(Profiles);
 }