| //===-- BasicBlockSectionsProfileReader.cpp -------------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // Implementation of the basic block sections profile reader pass. It parses |
| // and stores the basic block sections profile file (which is specified via the |
| // `-basic-block-sections` flag). |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" |
| #include "llvm/ADT/DenseSet.h" |
| #include "llvm/ADT/SmallSet.h" |
| #include "llvm/ADT/SmallString.h" |
| #include "llvm/ADT/SmallVector.h" |
| #include "llvm/ADT/StringMap.h" |
| #include "llvm/ADT/StringRef.h" |
| #include "llvm/IR/DebugInfoMetadata.h" |
| #include "llvm/Pass.h" |
| #include "llvm/Support/Error.h" |
| #include "llvm/Support/ErrorHandling.h" |
| #include "llvm/Support/LineIterator.h" |
| #include "llvm/Support/MemoryBuffer.h" |
| #include "llvm/Support/Path.h" |
| #include <llvm/ADT/STLExtras.h> |
| |
| using namespace llvm; |
| |
| char BasicBlockSectionsProfileReaderWrapperPass::ID = 0; |
| INITIALIZE_PASS(BasicBlockSectionsProfileReaderWrapperPass, |
| "bbsections-profile-reader", |
| "Reads and parses a basic block sections profile.", false, |
| false) |
| |
| Expected<UniqueBBID> |
| BasicBlockSectionsProfileReader::parseUniqueBBID(StringRef S) const { |
| SmallVector<StringRef, 2> Parts; |
| S.split(Parts, '.'); |
| if (Parts.size() > 2) |
| return createProfileParseError(Twine("unable to parse basic block id: '") + |
| S + "'"); |
| unsigned long long BaseBBID; |
| if (getAsUnsignedInteger(Parts[0], 10, BaseBBID)) |
| return createProfileParseError( |
| Twine("unable to parse BB id: '" + Parts[0]) + |
| "': unsigned integer expected"); |
| unsigned long long CloneID = 0; |
| if (Parts.size() > 1 && getAsUnsignedInteger(Parts[1], 10, CloneID)) |
| return createProfileParseError(Twine("unable to parse clone id: '") + |
| Parts[1] + "': unsigned integer expected"); |
| return UniqueBBID{static_cast<unsigned>(BaseBBID), |
| static_cast<unsigned>(CloneID)}; |
| } |
| |
| bool BasicBlockSectionsProfileReader::isFunctionHot(StringRef FuncName) const { |
| return getClusterInfoForFunction(FuncName).first; |
| } |
| |
| std::pair<bool, SmallVector<BBClusterInfo>> |
| BasicBlockSectionsProfileReader::getClusterInfoForFunction( |
| StringRef FuncName) const { |
| auto R = ProgramPathAndClusterInfo.find(getAliasName(FuncName)); |
| return R != ProgramPathAndClusterInfo.end() |
| ? std::pair(true, R->second.ClusterInfo) |
| : std::pair(false, SmallVector<BBClusterInfo>()); |
| } |
| |
| SmallVector<SmallVector<unsigned>> |
| BasicBlockSectionsProfileReader::getClonePathsForFunction( |
| StringRef FuncName) const { |
| return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).ClonePaths; |
| } |
| |
| // Reads the version 1 basic block sections profile. Profile for each function |
| // is encoded as follows: |
| // m <module_name> |
| // f <function_name_1> <function_name_2> ... |
| // c <bb_id_1> <bb_id_2> <bb_id_3> |
| // c <bb_id_4> <bb_id_5> |
| // ... |
| // Module name specifier (starting with 'm') is optional and allows |
| // distinguishing profile for internal-linkage functions with the same name. If |
| // not specified, it will apply to any function with the same name. Function |
| // name specifier (starting with 'f') can specify multiple function name |
| // aliases. Basic block clusters are specified by 'c' and specify the cluster of |
| // basic blocks, and the internal order in which they must be placed in the same |
| // section. |
| // This profile can also specify cloning paths which instruct the compiler to |
| // clone basic blocks along a path. The cloned blocks are then specified in the |
| // cluster information. |
| // The following profile lists two cloning paths (starting with 'p') for |
| // function bar and places the total 9 blocks within two clusters. The first two |
| // blocks of a cloning path specify the edge along which the path is cloned. For |
| // instance, path 1 (1 -> 3 -> 4) instructs that 3 and 4 must be cloned along |
| // the edge 1->3. Within the given clusters, each cloned block is identified by |
| // "<original block id>.<clone id>". For instance, 3.1 represents the first |
| // clone of block 3. Original blocks are specified just with their block ids. A |
| // block cloned multiple times appears with distinct clone ids. The CFG for bar |
| // is shown below before and after cloning with its final clusters labeled. |
| // |
| // f main |
| // f bar |
| // p 1 3 4 # cloning path 1 |
| // p 4 2 # cloning path 2 |
| // c 1 3.1 4.1 6 # basic block cluster 1 |
| // c 0 2 3 4 2.1 5 # basic block cluster 2 |
| // **************************************************************************** |
| // function bar before and after cloning with basic block clusters shown. |
| // **************************************************************************** |
| // .... .............. |
| // 0 -------+ : 0 :---->: 1 ---> 3.1 : |
| // | | : | : :........ | : |
| // v v : v : : v : |
| // +--> 2 --> 5 1 ~~~~~~> +---: 2 : : 4.1: clsuter 1 |
| // | | | | : | : : | : |
| // | v | | : v ....... : v : |
| // | 3 <------+ | : 3 <--+ : : 6 : |
| // | | | : | | : :....: |
| // | v | : v | : |
| // +--- 4 ---> 6 | : 4 | : |
| // | : | | : |
| // | : v | : |
| // | :2.1---+ : cluster 2 |
| // | : | ......: |
| // | : v : |
| // +-->: 5 : |
| // .... |
| // **************************************************************************** |
| Error BasicBlockSectionsProfileReader::ReadV1Profile() { |
| auto FI = ProgramPathAndClusterInfo.end(); |
| |
| // Current cluster ID corresponding to this function. |
| unsigned CurrentCluster = 0; |
| // Current position in the current cluster. |
| unsigned CurrentPosition = 0; |
| |
| // Temporary set to ensure every basic block ID appears once in the clusters |
| // of a function. |
| DenseSet<UniqueBBID> FuncBBIDs; |
| |
| // Debug-info-based module filename for the current function. Empty string |
| // means no filename. |
| StringRef DIFilename; |
| |
| for (; !LineIt.is_at_eof(); ++LineIt) { |
| StringRef S(*LineIt); |
| char Specifier = S[0]; |
| S = S.drop_front().trim(); |
| SmallVector<StringRef, 4> Values; |
| S.split(Values, ' '); |
| switch (Specifier) { |
| case '@': |
| continue; |
| case 'm': // Module name speicifer. |
| if (Values.size() != 1) { |
| return createProfileParseError(Twine("invalid module name value: '") + |
| S + "'"); |
| } |
| DIFilename = sys::path::remove_leading_dotslash(Values[0]); |
| continue; |
| case 'f': { // Function names specifier. |
| bool FunctionFound = any_of(Values, [&](StringRef Alias) { |
| auto It = FunctionNameToDIFilename.find(Alias); |
| // No match if this function name is not found in this module. |
| if (It == FunctionNameToDIFilename.end()) |
| return false; |
| // Return a match if debug-info-filename is not specified. Otherwise, |
| // check for equality. |
| return DIFilename.empty() || It->second.equals(DIFilename); |
| }); |
| if (!FunctionFound) { |
| // Skip the following profile by setting the profile iterator (FI) to |
| // the past-the-end element. |
| FI = ProgramPathAndClusterInfo.end(); |
| DIFilename = ""; |
| continue; |
| } |
| for (size_t i = 1; i < Values.size(); ++i) |
| FuncAliasMap.try_emplace(Values[i], Values.front()); |
| |
| // Prepare for parsing clusters of this function name. |
| // Start a new cluster map for this function name. |
| auto R = ProgramPathAndClusterInfo.try_emplace(Values.front()); |
| // Report error when multiple profiles have been specified for the same |
| // function. |
| if (!R.second) |
| return createProfileParseError("duplicate profile for function '" + |
| Values.front() + "'"); |
| FI = R.first; |
| CurrentCluster = 0; |
| FuncBBIDs.clear(); |
| // We won't need DIFilename anymore. Clean it up to avoid its application |
| // on the next function. |
| DIFilename = ""; |
| continue; |
| } |
| case 'c': // Basic block cluster specifier. |
| // Skip the profile when we the profile iterator (FI) refers to the |
| // past-the-end element. |
| if (FI == ProgramPathAndClusterInfo.end()) |
| continue; |
| // Reset current cluster position. |
| CurrentPosition = 0; |
| for (auto BasicBlockIDStr : Values) { |
| auto BasicBlockID = parseUniqueBBID(BasicBlockIDStr); |
| if (!BasicBlockID) |
| return BasicBlockID.takeError(); |
| if (!FuncBBIDs.insert(*BasicBlockID).second) |
| return createProfileParseError( |
| Twine("duplicate basic block id found '") + BasicBlockIDStr + |
| "'"); |
| |
| FI->second.ClusterInfo.emplace_back(BBClusterInfo{ |
| *std::move(BasicBlockID), CurrentCluster, CurrentPosition++}); |
| } |
| CurrentCluster++; |
| continue; |
| case 'p': { // Basic block cloning path specifier. |
| // Skip the profile when we the profile iterator (FI) refers to the |
| // past-the-end element. |
| if (FI == ProgramPathAndClusterInfo.end()) |
| continue; |
| SmallSet<unsigned, 5> BBsInPath; |
| FI->second.ClonePaths.push_back({}); |
| for (size_t I = 0; I < Values.size(); ++I) { |
| auto BaseBBIDStr = Values[I]; |
| unsigned long long BaseBBID = 0; |
| if (getAsUnsignedInteger(BaseBBIDStr, 10, BaseBBID)) |
| return createProfileParseError(Twine("unsigned integer expected: '") + |
| BaseBBIDStr + "'"); |
| if (I != 0 && !BBsInPath.insert(BaseBBID).second) |
| return createProfileParseError( |
| Twine("duplicate cloned block in path: '") + BaseBBIDStr + "'"); |
| FI->second.ClonePaths.back().push_back(BaseBBID); |
| } |
| continue; |
| } |
| default: |
| return createProfileParseError(Twine("invalid specifier: '") + |
| Twine(Specifier) + "'"); |
| } |
| llvm_unreachable("should not break from this switch statement"); |
| } |
| return Error::success(); |
| } |
| |
| Error BasicBlockSectionsProfileReader::ReadV0Profile() { |
| auto FI = ProgramPathAndClusterInfo.end(); |
| // Current cluster ID corresponding to this function. |
| unsigned CurrentCluster = 0; |
| // Current position in the current cluster. |
| unsigned CurrentPosition = 0; |
| |
| // Temporary set to ensure every basic block ID appears once in the clusters |
| // of a function. |
| SmallSet<unsigned, 4> FuncBBIDs; |
| |
| for (; !LineIt.is_at_eof(); ++LineIt) { |
| StringRef S(*LineIt); |
| if (S[0] == '@') |
| continue; |
| // Check for the leading "!" |
| if (!S.consume_front("!") || S.empty()) |
| break; |
| // Check for second "!" which indicates a cluster of basic blocks. |
| if (S.consume_front("!")) { |
| // Skip the profile when we the profile iterator (FI) refers to the |
| // past-the-end element. |
| if (FI == ProgramPathAndClusterInfo.end()) |
| continue; |
| SmallVector<StringRef, 4> BBIDs; |
| S.split(BBIDs, ' '); |
| // Reset current cluster position. |
| CurrentPosition = 0; |
| for (auto BBIDStr : BBIDs) { |
| unsigned long long BBID; |
| if (getAsUnsignedInteger(BBIDStr, 10, BBID)) |
| return createProfileParseError(Twine("unsigned integer expected: '") + |
| BBIDStr + "'"); |
| if (!FuncBBIDs.insert(BBID).second) |
| return createProfileParseError( |
| Twine("duplicate basic block id found '") + BBIDStr + "'"); |
| |
| FI->second.ClusterInfo.emplace_back( |
| BBClusterInfo({{static_cast<unsigned>(BBID), 0}, |
| CurrentCluster, |
| CurrentPosition++})); |
| } |
| CurrentCluster++; |
| } else { |
| // This is a function name specifier. It may include a debug info filename |
| // specifier starting with `M=`. |
| auto [AliasesStr, DIFilenameStr] = S.split(' '); |
| SmallString<128> DIFilename; |
| if (DIFilenameStr.starts_with("M=")) { |
| DIFilename = |
| sys::path::remove_leading_dotslash(DIFilenameStr.substr(2)); |
| if (DIFilename.empty()) |
| return createProfileParseError("empty module name specifier"); |
| } else if (!DIFilenameStr.empty()) { |
| return createProfileParseError("unknown string found: '" + |
| DIFilenameStr + "'"); |
| } |
| // Function aliases are separated using '/'. We use the first function |
| // name for the cluster info mapping and delegate all other aliases to |
| // this one. |
| SmallVector<StringRef, 4> Aliases; |
| AliasesStr.split(Aliases, '/'); |
| bool FunctionFound = any_of(Aliases, [&](StringRef Alias) { |
| auto It = FunctionNameToDIFilename.find(Alias); |
| // No match if this function name is not found in this module. |
| if (It == FunctionNameToDIFilename.end()) |
| return false; |
| // Return a match if debug-info-filename is not specified. Otherwise, |
| // check for equality. |
| return DIFilename.empty() || It->second.equals(DIFilename); |
| }); |
| if (!FunctionFound) { |
| // Skip the following profile by setting the profile iterator (FI) to |
| // the past-the-end element. |
| FI = ProgramPathAndClusterInfo.end(); |
| continue; |
| } |
| for (size_t i = 1; i < Aliases.size(); ++i) |
| FuncAliasMap.try_emplace(Aliases[i], Aliases.front()); |
| |
| // Prepare for parsing clusters of this function name. |
| // Start a new cluster map for this function name. |
| auto R = ProgramPathAndClusterInfo.try_emplace(Aliases.front()); |
| // Report error when multiple profiles have been specified for the same |
| // function. |
| if (!R.second) |
| return createProfileParseError("duplicate profile for function '" + |
| Aliases.front() + "'"); |
| FI = R.first; |
| CurrentCluster = 0; |
| FuncBBIDs.clear(); |
| } |
| } |
| return Error::success(); |
| } |
| |
| // Basic Block Sections can be enabled for a subset of machine basic blocks. |
| // This is done by passing a file containing names of functions for which basic |
| // block sections are desired. Additionally, machine basic block ids of the |
| // functions can also be specified for a finer granularity. Moreover, a cluster |
| // of basic blocks could be assigned to the same section. |
| // Optionally, a debug-info filename can be specified for each function to allow |
| // distinguishing internal-linkage functions of the same name. |
| // A file with basic block sections for all of function main and three blocks |
| // for function foo (of which 1 and 2 are placed in a cluster) looks like this: |
| // (Profile for function foo is only loaded when its debug-info filename |
| // matches 'path/to/foo_file.cc'). |
| // ---------------------------- |
| // list.txt: |
| // !main |
| // !foo M=path/to/foo_file.cc |
| // !!1 2 |
| // !!4 |
| Error BasicBlockSectionsProfileReader::ReadProfile() { |
| assert(MBuf); |
| |
| unsigned long long Version = 0; |
| StringRef FirstLine(*LineIt); |
| if (FirstLine.consume_front("v")) { |
| if (getAsUnsignedInteger(FirstLine, 10, Version)) { |
| return createProfileParseError(Twine("version number expected: '") + |
| FirstLine + "'"); |
| } |
| if (Version > 1) { |
| return createProfileParseError(Twine("invalid profile version: ") + |
| Twine(Version)); |
| } |
| ++LineIt; |
| } |
| |
| switch (Version) { |
| case 0: |
| // TODO: Deprecate V0 once V1 is fully integrated downstream. |
| return ReadV0Profile(); |
| case 1: |
| return ReadV1Profile(); |
| default: |
| llvm_unreachable("Invalid profile version."); |
| } |
| } |
| |
| bool BasicBlockSectionsProfileReaderWrapperPass::doInitialization(Module &M) { |
| if (!BBSPR.MBuf) |
| return false; |
| // Get the function name to debug info filename mapping. |
| BBSPR.FunctionNameToDIFilename.clear(); |
| for (const Function &F : M) { |
| SmallString<128> DIFilename; |
| if (F.isDeclaration()) |
| continue; |
| DISubprogram *Subprogram = F.getSubprogram(); |
| if (Subprogram) { |
| llvm::DICompileUnit *CU = Subprogram->getUnit(); |
| if (CU) |
| DIFilename = sys::path::remove_leading_dotslash(CU->getFilename()); |
| } |
| [[maybe_unused]] bool inserted = |
| BBSPR.FunctionNameToDIFilename.try_emplace(F.getName(), DIFilename) |
| .second; |
| assert(inserted); |
| } |
| if (auto Err = BBSPR.ReadProfile()) |
| report_fatal_error(std::move(Err)); |
| return false; |
| } |
| |
| AnalysisKey BasicBlockSectionsProfileReaderAnalysis::Key; |
| |
| BasicBlockSectionsProfileReader |
| BasicBlockSectionsProfileReaderAnalysis::run(Function &F, |
| FunctionAnalysisManager &AM) { |
| return BasicBlockSectionsProfileReader(TM->getBBSectionsFuncListBuf()); |
| } |
| |
| bool BasicBlockSectionsProfileReaderWrapperPass::isFunctionHot( |
| StringRef FuncName) const { |
| return BBSPR.isFunctionHot(FuncName); |
| } |
| |
| std::pair<bool, SmallVector<BBClusterInfo>> |
| BasicBlockSectionsProfileReaderWrapperPass::getClusterInfoForFunction( |
| StringRef FuncName) const { |
| return BBSPR.getClusterInfoForFunction(FuncName); |
| } |
| |
| SmallVector<SmallVector<unsigned>> |
| BasicBlockSectionsProfileReaderWrapperPass::getClonePathsForFunction( |
| StringRef FuncName) const { |
| return BBSPR.getClonePathsForFunction(FuncName); |
| } |
| |
| BasicBlockSectionsProfileReader & |
| BasicBlockSectionsProfileReaderWrapperPass::getBBSPR() { |
| return BBSPR; |
| } |
| |
| ImmutablePass *llvm::createBasicBlockSectionsProfileReaderWrapperPass( |
| const MemoryBuffer *Buf) { |
| return new BasicBlockSectionsProfileReaderWrapperPass(Buf); |
| } |