|  | #include "llvm/ProfileData/DataAccessProf.h" | 
|  | #include "llvm/ADT/STLExtras.h" | 
|  | #include "llvm/ProfileData/InstrProf.h" | 
|  | #include "llvm/Support/Compression.h" | 
|  | #include "llvm/Support/Endian.h" | 
|  | #include "llvm/Support/Errc.h" | 
|  | #include "llvm/Support/Error.h" | 
|  | #include "llvm/Support/StringSaver.h" | 
|  | #include "llvm/Support/raw_ostream.h" | 
|  |  | 
|  | namespace llvm { | 
|  | namespace memprof { | 
|  |  | 
|  | // If `Map` has an entry keyed by `Str`, returns the entry iterator. Otherwise, | 
|  | // creates an owned copy of `Str`, adds a map entry for it and returns the | 
|  | // iterator. | 
|  | static std::pair<StringRef, uint64_t> | 
|  | saveStringToMap(DataAccessProfData::StringToIndexMap &Map, | 
|  | llvm::UniqueStringSaver &Saver, StringRef Str) { | 
|  | auto [Iter, Inserted] = Map.try_emplace(Saver.save(Str), Map.size()); | 
|  | return *Iter; | 
|  | } | 
|  |  | 
|  | // Returns the canonical name or error. | 
|  | static Expected<StringRef> getCanonicalName(StringRef Name) { | 
|  | if (Name.empty()) | 
|  | return make_error<StringError>("Empty symbol name", | 
|  | llvm::errc::invalid_argument); | 
|  | return InstrProfSymtab::getCanonicalName(Name); | 
|  | } | 
|  |  | 
|  | std::optional<DataAccessProfRecord> | 
|  | DataAccessProfData::getProfileRecord(const SymbolHandleRef SymbolID) const { | 
|  | auto Key = SymbolID; | 
|  | if (std::holds_alternative<StringRef>(SymbolID)) { | 
|  | auto NameOrErr = getCanonicalName(std::get<StringRef>(SymbolID)); | 
|  | // If name canonicalization fails, suppress the error inside. | 
|  | if (!NameOrErr) { | 
|  | assert( | 
|  | std::get<StringRef>(SymbolID).empty() && | 
|  | "Name canonicalization only fails when stringified string is empty."); | 
|  | return std::nullopt; | 
|  | } | 
|  | Key = *NameOrErr; | 
|  | } | 
|  |  | 
|  | auto It = Records.find(Key); | 
|  | if (It != Records.end()) { | 
|  | return DataAccessProfRecord(Key, It->second.AccessCount, | 
|  | It->second.Locations); | 
|  | } | 
|  |  | 
|  | return std::nullopt; | 
|  | } | 
|  |  | 
|  | bool DataAccessProfData::isKnownColdSymbol(const SymbolHandleRef SymID) const { | 
|  | if (std::holds_alternative<uint64_t>(SymID)) | 
|  | return KnownColdHashes.contains(std::get<uint64_t>(SymID)); | 
|  | return KnownColdSymbols.contains(std::get<StringRef>(SymID)); | 
|  | } | 
|  |  | 
|  | Error DataAccessProfData::setDataAccessProfile(SymbolHandleRef Symbol, | 
|  | uint64_t AccessCount) { | 
|  | uint64_t RecordID = -1; | 
|  | const bool IsStringLiteral = std::holds_alternative<uint64_t>(Symbol); | 
|  | SymbolHandleRef Key; | 
|  | if (IsStringLiteral) { | 
|  | RecordID = std::get<uint64_t>(Symbol); | 
|  | Key = RecordID; | 
|  | } else { | 
|  | auto CanonicalName = getCanonicalName(std::get<StringRef>(Symbol)); | 
|  | if (!CanonicalName) | 
|  | return CanonicalName.takeError(); | 
|  | std::tie(Key, RecordID) = | 
|  | saveStringToMap(StrToIndexMap, Saver, *CanonicalName); | 
|  | } | 
|  |  | 
|  | auto [Iter, Inserted] = | 
|  | Records.try_emplace(Key, RecordID, AccessCount, IsStringLiteral); | 
|  | if (!Inserted) | 
|  | return make_error<StringError>("Duplicate symbol or string literal added. " | 
|  | "User of DataAccessProfData should " | 
|  | "aggregate count for the same symbol. ", | 
|  | llvm::errc::invalid_argument); | 
|  |  | 
|  | return Error::success(); | 
|  | } | 
|  |  | 
|  | Error DataAccessProfData::setDataAccessProfile( | 
|  | SymbolHandleRef SymbolID, uint64_t AccessCount, | 
|  | ArrayRef<SourceLocation> Locations) { | 
|  | if (Error E = setDataAccessProfile(SymbolID, AccessCount)) | 
|  | return E; | 
|  |  | 
|  | auto &Record = Records.back().second; | 
|  | for (const auto &Location : Locations) | 
|  | Record.Locations.push_back( | 
|  | {saveStringToMap(StrToIndexMap, Saver, Location.FileName).first, | 
|  | Location.Line}); | 
|  |  | 
|  | return Error::success(); | 
|  | } | 
|  |  | 
|  | Error DataAccessProfData::addKnownSymbolWithoutSamples( | 
|  | SymbolHandleRef SymbolID) { | 
|  | if (std::holds_alternative<uint64_t>(SymbolID)) { | 
|  | KnownColdHashes.insert(std::get<uint64_t>(SymbolID)); | 
|  | return Error::success(); | 
|  | } | 
|  | auto CanonicalName = getCanonicalName(std::get<StringRef>(SymbolID)); | 
|  | if (!CanonicalName) | 
|  | return CanonicalName.takeError(); | 
|  | KnownColdSymbols.insert( | 
|  | saveStringToMap(StrToIndexMap, Saver, *CanonicalName).first); | 
|  | return Error::success(); | 
|  | } | 
|  |  | 
|  | Error DataAccessProfData::deserialize(const unsigned char *&Ptr) { | 
|  | uint64_t NumSampledSymbols = | 
|  | support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr); | 
|  | uint64_t NumColdKnownSymbols = | 
|  | support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr); | 
|  | if (Error E = deserializeSymbolsAndFilenames(Ptr, NumSampledSymbols, | 
|  | NumColdKnownSymbols)) | 
|  | return E; | 
|  |  | 
|  | uint64_t Num = | 
|  | support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr); | 
|  | for (uint64_t I = 0; I < Num; ++I) | 
|  | KnownColdHashes.insert( | 
|  | support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr)); | 
|  |  | 
|  | return deserializeRecords(Ptr); | 
|  | } | 
|  |  | 
|  | Error DataAccessProfData::serializeSymbolsAndFilenames(ProfOStream &OS) const { | 
|  | OS.write(StrToIndexMap.size()); | 
|  | OS.write(KnownColdSymbols.size()); | 
|  |  | 
|  | std::vector<std::string> Strs; | 
|  | Strs.reserve(StrToIndexMap.size() + KnownColdSymbols.size()); | 
|  | for (const auto &Str : StrToIndexMap) | 
|  | Strs.push_back(Str.first.str()); | 
|  | for (const auto &Str : KnownColdSymbols) | 
|  | Strs.push_back(Str.str()); | 
|  |  | 
|  | std::string CompressedStrings; | 
|  | if (!Strs.empty()) | 
|  | if (Error E = collectGlobalObjectNameStrings( | 
|  | Strs, compression::zlib::isAvailable(), CompressedStrings)) | 
|  | return E; | 
|  | const uint64_t CompressedStringLen = CompressedStrings.length(); | 
|  | // Record the length of compressed string. | 
|  | OS.write(CompressedStringLen); | 
|  | // Write the chars in compressed strings. | 
|  | for (char C : CompressedStrings) | 
|  | OS.writeByte(static_cast<uint8_t>(C)); | 
|  | // Pad up to a multiple of 8. | 
|  | // InstrProfReader could read bytes according to 'CompressedStringLen'. | 
|  | const uint64_t PaddedLength = alignTo(CompressedStringLen, 8); | 
|  | for (uint64_t K = CompressedStringLen; K < PaddedLength; K++) | 
|  | OS.writeByte(0); | 
|  | return Error::success(); | 
|  | } | 
|  |  | 
|  | uint64_t | 
|  | DataAccessProfData::getEncodedIndex(const SymbolHandleRef SymbolID) const { | 
|  | if (std::holds_alternative<uint64_t>(SymbolID)) | 
|  | return std::get<uint64_t>(SymbolID); | 
|  |  | 
|  | auto Iter = StrToIndexMap.find(std::get<StringRef>(SymbolID)); | 
|  | assert(Iter != StrToIndexMap.end() && | 
|  | "String literals not found in StrToIndexMap"); | 
|  | return Iter->second; | 
|  | } | 
|  |  | 
|  | Error DataAccessProfData::serialize(ProfOStream &OS) const { | 
|  | if (Error E = serializeSymbolsAndFilenames(OS)) | 
|  | return E; | 
|  | OS.write(KnownColdHashes.size()); | 
|  | for (const auto &Hash : KnownColdHashes) | 
|  | OS.write(Hash); | 
|  | OS.write((uint64_t)(Records.size())); | 
|  | for (const auto &[Key, Rec] : Records) { | 
|  | OS.write(getEncodedIndex(Rec.SymbolID)); | 
|  | OS.writeByte(Rec.IsStringLiteral); | 
|  | OS.write(Rec.AccessCount); | 
|  | OS.write(Rec.Locations.size()); | 
|  | for (const auto &Loc : Rec.Locations) { | 
|  | OS.write(getEncodedIndex(Loc.FileName)); | 
|  | OS.write32(Loc.Line); | 
|  | } | 
|  | } | 
|  | return Error::success(); | 
|  | } | 
|  |  | 
|  | Error DataAccessProfData::deserializeSymbolsAndFilenames( | 
|  | const unsigned char *&Ptr, const uint64_t NumSampledSymbols, | 
|  | const uint64_t NumColdKnownSymbols) { | 
|  | uint64_t Len = | 
|  | support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr); | 
|  |  | 
|  | // The first NumSampledSymbols strings are symbols with samples, and next | 
|  | // NumColdKnownSymbols strings are known cold symbols. | 
|  | uint64_t StringCnt = 0; | 
|  | std::function<Error(StringRef)> addName = [&](StringRef Name) { | 
|  | if (StringCnt < NumSampledSymbols) | 
|  | saveStringToMap(StrToIndexMap, Saver, Name); | 
|  | else | 
|  | KnownColdSymbols.insert(Saver.save(Name)); | 
|  | ++StringCnt; | 
|  | return Error::success(); | 
|  | }; | 
|  | if (Error E = | 
|  | readAndDecodeStrings(StringRef((const char *)Ptr, Len), addName)) | 
|  | return E; | 
|  |  | 
|  | Ptr += alignTo(Len, 8); | 
|  | return Error::success(); | 
|  | } | 
|  |  | 
|  | Error DataAccessProfData::deserializeRecords(const unsigned char *&Ptr) { | 
|  | SmallVector<StringRef> Strings = | 
|  | llvm::to_vector(llvm::make_first_range(getStrToIndexMapRef())); | 
|  |  | 
|  | uint64_t NumRecords = | 
|  | support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr); | 
|  |  | 
|  | for (uint64_t I = 0; I < NumRecords; ++I) { | 
|  | uint64_t ID = | 
|  | support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr); | 
|  |  | 
|  | bool IsStringLiteral = | 
|  | support::endian::readNext<uint8_t, llvm::endianness::little>(Ptr); | 
|  |  | 
|  | uint64_t AccessCount = | 
|  | support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr); | 
|  |  | 
|  | SymbolHandleRef SymbolID; | 
|  | if (IsStringLiteral) | 
|  | SymbolID = ID; | 
|  | else | 
|  | SymbolID = Strings[ID]; | 
|  | if (Error E = setDataAccessProfile(SymbolID, AccessCount)) | 
|  | return E; | 
|  |  | 
|  | auto &Record = Records.back().second; | 
|  |  | 
|  | uint64_t NumLocations = | 
|  | support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr); | 
|  |  | 
|  | Record.Locations.reserve(NumLocations); | 
|  | for (uint64_t J = 0; J < NumLocations; ++J) { | 
|  | uint64_t FileNameIndex = | 
|  | support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr); | 
|  | uint32_t Line = | 
|  | support::endian::readNext<uint32_t, llvm::endianness::little>(Ptr); | 
|  | Record.Locations.push_back({Strings[FileNameIndex], Line}); | 
|  | } | 
|  | } | 
|  | return Error::success(); | 
|  | } | 
|  | } // namespace memprof | 
|  | } // namespace llvm |