| //===-- ProfiledBinary.h - Binary decoder -----------------------*- C++ -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #ifndef LLVM_TOOLS_LLVM_PROFGEN_PROFILEDBINARY_H |
| #define LLVM_TOOLS_LLVM_PROFGEN_PROFILEDBINARY_H |
| |
| #include "CallContext.h" |
| #include "ErrorHandling.h" |
| #include "llvm/ADT/DenseMap.h" |
| #include "llvm/ADT/StringRef.h" |
| #include "llvm/ADT/StringSet.h" |
| #include "llvm/DebugInfo/DWARF/DWARFContext.h" |
| #include "llvm/DebugInfo/Symbolize/Symbolize.h" |
| #include "llvm/MC/MCAsmInfo.h" |
| #include "llvm/MC/MCContext.h" |
| #include "llvm/MC/MCDisassembler/MCDisassembler.h" |
| #include "llvm/MC/MCInst.h" |
| #include "llvm/MC/MCInstPrinter.h" |
| #include "llvm/MC/MCInstrAnalysis.h" |
| #include "llvm/MC/MCInstrInfo.h" |
| #include "llvm/MC/MCObjectFileInfo.h" |
| #include "llvm/MC/MCPseudoProbe.h" |
| #include "llvm/MC/MCRegisterInfo.h" |
| #include "llvm/MC/MCSubtargetInfo.h" |
| #include "llvm/MC/MCTargetOptions.h" |
| #include "llvm/Object/ELFObjectFile.h" |
| #include "llvm/ProfileData/SampleProf.h" |
| #include "llvm/Support/CommandLine.h" |
| #include "llvm/Support/Path.h" |
| #include "llvm/Transforms/IPO/SampleContextTracker.h" |
| #include <list> |
| #include <map> |
| #include <set> |
| #include <sstream> |
| #include <string> |
| #include <unordered_map> |
| #include <unordered_set> |
| #include <vector> |
| |
| namespace llvm { |
| extern cl::opt<bool> EnableCSPreInliner; |
| extern cl::opt<bool> UseContextCostForPreInliner; |
| } // namespace llvm |
| |
| using namespace llvm; |
| using namespace sampleprof; |
| using namespace llvm::object; |
| |
| namespace llvm { |
| namespace sampleprof { |
| |
| class ProfiledBinary; |
| class MissingFrameInferrer; |
| |
| struct InstructionPointer { |
| const ProfiledBinary *Binary; |
| // Address of the executable segment of the binary. |
| uint64_t Address; |
| // Index to the sorted code address array of the binary. |
| uint64_t Index = 0; |
| InstructionPointer(const ProfiledBinary *Binary, uint64_t Address, |
| bool RoundToNext = false); |
| bool advance(); |
| bool backward(); |
| void update(uint64_t Addr); |
| }; |
| |
| // The special frame addresses. |
| enum SpecialFrameAddr { |
| // Dummy root of frame trie. |
| DummyRoot = 0, |
| // Represent all the addresses outside of current binary. |
| // This's also used to indicate the call stack should be truncated since this |
| // isn't a real call context the compiler will see. |
| ExternalAddr = 1, |
| }; |
| |
| using RangesTy = std::vector<std::pair<uint64_t, uint64_t>>; |
| |
| struct BinaryFunction { |
| StringRef FuncName; |
| // End of range is an exclusive bound. |
| RangesTy Ranges; |
| |
| uint64_t getFuncSize() { |
| uint64_t Sum = 0; |
| for (auto &R : Ranges) { |
| Sum += R.second - R.first; |
| } |
| return Sum; |
| } |
| }; |
| |
| // Info about function range. A function can be split into multiple |
| // non-continuous ranges, each range corresponds to one FuncRange. |
| struct FuncRange { |
| uint64_t StartAddress; |
| // EndAddress is an exclusive bound. |
| uint64_t EndAddress; |
| // Function the range belongs to |
| BinaryFunction *Func; |
| // Whether the start address is the real entry of the function. |
| bool IsFuncEntry = false; |
| |
| StringRef getFuncName() { return Func->FuncName; } |
| }; |
| |
| // PrologEpilog address tracker, used to filter out broken stack samples |
| // Currently we use a heuristic size (two) to infer prolog and epilog |
| // based on the start address and return address. In the future, |
| // we will switch to Dwarf CFI based tracker |
| struct PrologEpilogTracker { |
| // A set of prolog and epilog addresses. Used by virtual unwinding. |
| std::unordered_set<uint64_t> PrologEpilogSet; |
| ProfiledBinary *Binary; |
| PrologEpilogTracker(ProfiledBinary *Bin) : Binary(Bin){}; |
| |
| // Take the two addresses from the start of function as prolog |
| void |
| inferPrologAddresses(std::map<uint64_t, FuncRange> &FuncStartAddressMap) { |
| for (auto I : FuncStartAddressMap) { |
| PrologEpilogSet.insert(I.first); |
| InstructionPointer IP(Binary, I.first); |
| if (!IP.advance()) |
| break; |
| PrologEpilogSet.insert(IP.Address); |
| } |
| } |
| |
| // Take the last two addresses before the return address as epilog |
| void inferEpilogAddresses(std::unordered_set<uint64_t> &RetAddrs) { |
| for (auto Addr : RetAddrs) { |
| PrologEpilogSet.insert(Addr); |
| InstructionPointer IP(Binary, Addr); |
| if (!IP.backward()) |
| break; |
| PrologEpilogSet.insert(IP.Address); |
| } |
| } |
| }; |
| |
| // Track function byte size under different context (outlined version as well as |
| // various inlined versions). It also provides query support to get function |
| // size with the best matching context, which is used to help pre-inliner use |
| // accurate post-optimization size to make decisions. |
| // TODO: If an inlinee is completely optimized away, ideally we should have zero |
| // for its context size, currently we would misss such context since it doesn't |
| // have instructions. To fix this, we need to mark all inlinee with entry probe |
| // but without instructions as having zero size. |
| class BinarySizeContextTracker { |
| public: |
| // Add instruction with given size to a context |
| void addInstructionForContext(const SampleContextFrameVector &Context, |
| uint32_t InstrSize); |
| |
| // Get function size with a specific context. When there's no exact match |
| // for the given context, try to retrieve the size of that function from |
| // closest matching context. |
| uint32_t getFuncSizeForContext(const ContextTrieNode *Context); |
| |
| // For inlinees that are full optimized away, we can establish zero size using |
| // their remaining probes. |
| void trackInlineesOptimizedAway(MCPseudoProbeDecoder &ProbeDecoder); |
| |
| using ProbeFrameStack = SmallVector<std::pair<StringRef, uint32_t>>; |
| void trackInlineesOptimizedAway(MCPseudoProbeDecoder &ProbeDecoder, |
| MCDecodedPseudoProbeInlineTree &ProbeNode, |
| ProbeFrameStack &Context); |
| |
| void dump() { RootContext.dumpTree(); } |
| |
| private: |
| // Root node for context trie tree, node that this is a reverse context trie |
| // with callee as parent and caller as child. This way we can traverse from |
| // root to find the best/longest matching context if an exact match does not |
| // exist. It gives us the best possible estimate for function's post-inline, |
| // post-optimization byte size. |
| ContextTrieNode RootContext; |
| }; |
| |
| using AddressRange = std::pair<uint64_t, uint64_t>; |
| |
| class ProfiledBinary { |
| // Absolute path of the executable binary. |
| std::string Path; |
| // Path of the debug info binary. |
| std::string DebugBinaryPath; |
| // The target triple. |
| Triple TheTriple; |
| // Path of symbolizer path which should be pointed to binary with debug info. |
| StringRef SymbolizerPath; |
| // Options used to configure the symbolizer |
| symbolize::LLVMSymbolizer::Options SymbolizerOpts; |
| // The runtime base address that the first executable segment is loaded at. |
| uint64_t BaseAddress = 0; |
| // The runtime base address that the first loadabe segment is loaded at. |
| uint64_t FirstLoadableAddress = 0; |
| // The preferred load address of each executable segment. |
| std::vector<uint64_t> PreferredTextSegmentAddresses; |
| // The file offset of each executable segment. |
| std::vector<uint64_t> TextSegmentOffsets; |
| |
| // Mutiple MC component info |
| std::unique_ptr<const MCRegisterInfo> MRI; |
| std::unique_ptr<const MCAsmInfo> AsmInfo; |
| std::unique_ptr<const MCSubtargetInfo> STI; |
| std::unique_ptr<const MCInstrInfo> MII; |
| std::unique_ptr<MCDisassembler> DisAsm; |
| std::unique_ptr<const MCInstrAnalysis> MIA; |
| std::unique_ptr<MCInstPrinter> IPrinter; |
| // A list of text sections sorted by start RVA and size. Used to check |
| // if a given RVA is a valid code address. |
| std::set<std::pair<uint64_t, uint64_t>> TextSections; |
| |
| // A map of mapping function name to BinaryFunction info. |
| std::unordered_map<std::string, BinaryFunction> BinaryFunctions; |
| |
| // A list of binary functions that have samples. |
| std::unordered_set<const BinaryFunction *> ProfiledFunctions; |
| |
| // GUID to Elf symbol start address map |
| DenseMap<uint64_t, uint64_t> SymbolStartAddrs; |
| |
| // Start address to Elf symbol GUID map |
| std::unordered_multimap<uint64_t, uint64_t> StartAddrToSymMap; |
| |
| // An ordered map of mapping function's start address to function range |
| // relevant info. Currently to determine if the offset of ELF is the start of |
| // a real function, we leverage the function range info from DWARF. |
| std::map<uint64_t, FuncRange> StartAddrToFuncRangeMap; |
| |
| // Address to context location map. Used to expand the context. |
| std::unordered_map<uint64_t, SampleContextFrameVector> AddressToLocStackMap; |
| |
| // Address to instruction size map. Also used for quick Address lookup. |
| std::unordered_map<uint64_t, uint64_t> AddressToInstSizeMap; |
| |
| // An array of Addresses of all instructions sorted in increasing order. The |
| // sorting is needed to fast advance to the next forward/backward instruction. |
| std::vector<uint64_t> CodeAddressVec; |
| // A set of call instruction addresses. Used by virtual unwinding. |
| std::unordered_set<uint64_t> CallAddressSet; |
| // A set of return instruction addresses. Used by virtual unwinding. |
| std::unordered_set<uint64_t> RetAddressSet; |
| // An ordered set of unconditional branch instruction addresses. |
| std::set<uint64_t> UncondBranchAddrSet; |
| // A set of branch instruction addresses. |
| std::unordered_set<uint64_t> BranchAddressSet; |
| |
| // Estimate and track function prolog and epilog ranges. |
| PrologEpilogTracker ProEpilogTracker; |
| |
| // Infer missing frames due to compiler optimizations such as tail call |
| // elimination. |
| std::unique_ptr<MissingFrameInferrer> MissingContextInferrer; |
| |
| // Track function sizes under different context |
| BinarySizeContextTracker FuncSizeTracker; |
| |
| // The symbolizer used to get inline context for an instruction. |
| std::unique_ptr<symbolize::LLVMSymbolizer> Symbolizer; |
| |
| // String table owning function name strings created from the symbolizer. |
| std::unordered_set<std::string> NameStrings; |
| |
| // A collection of functions to print disassembly for. |
| StringSet<> DisassembleFunctionSet; |
| |
| // Pseudo probe decoder |
| MCPseudoProbeDecoder ProbeDecoder; |
| |
| // Function name to probe frame map for top-level outlined functions. |
| StringMap<MCDecodedPseudoProbeInlineTree *> TopLevelProbeFrameMap; |
| |
| bool UsePseudoProbes = false; |
| |
| bool UseFSDiscriminator = false; |
| |
| // Whether we need to symbolize all instructions to get function context size. |
| bool TrackFuncContextSize = false; |
| |
| // Indicate if the base loading address is parsed from the mmap event or uses |
| // the preferred address |
| bool IsLoadedByMMap = false; |
| // Use to avoid redundant warning. |
| bool MissingMMapWarned = false; |
| |
| void setPreferredTextSegmentAddresses(const ELFObjectFileBase *O); |
| |
| template <class ELFT> |
| void setPreferredTextSegmentAddresses(const ELFFile<ELFT> &Obj, |
| StringRef FileName); |
| |
| void checkPseudoProbe(const ELFObjectFileBase *Obj); |
| |
| void decodePseudoProbe(const ELFObjectFileBase *Obj); |
| |
| void |
| checkUseFSDiscriminator(const ELFObjectFileBase *Obj, |
| std::map<SectionRef, SectionSymbolsTy> &AllSymbols); |
| |
| // Set up disassembler and related components. |
| void setUpDisassembler(const ELFObjectFileBase *Obj); |
| symbolize::LLVMSymbolizer::Options getSymbolizerOpts() const; |
| |
| // Load debug info of subprograms from DWARF section. |
| void loadSymbolsFromDWARF(ObjectFile &Obj); |
| |
| // Load debug info from DWARF unit. |
| void loadSymbolsFromDWARFUnit(DWARFUnit &CompilationUnit); |
| |
| // Create elf symbol to its start address mapping. |
| void populateElfSymbolAddressList(const ELFObjectFileBase *O); |
| |
| // A function may be spilt into multiple non-continuous address ranges. We use |
| // this to set whether start a function range is the real entry of the |
| // function and also set false to the non-function label. |
| void setIsFuncEntry(FuncRange *FRange, StringRef RangeSymName); |
| |
| // Warn if no entry range exists in the function. |
| void warnNoFuncEntry(); |
| |
| /// Dissassemble the text section and build various address maps. |
| void disassemble(const ELFObjectFileBase *O); |
| |
| /// Helper function to dissassemble the symbol and extract info for unwinding |
| bool dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes, |
| SectionSymbolsTy &Symbols, const SectionRef &Section); |
| /// Symbolize a given instruction pointer and return a full call context. |
| SampleContextFrameVector symbolize(const InstructionPointer &IP, |
| bool UseCanonicalFnName = false, |
| bool UseProbeDiscriminator = false); |
| /// Decode the interesting parts of the binary and build internal data |
| /// structures. On high level, the parts of interest are: |
| /// 1. Text sections, including the main code section and the PLT |
| /// entries that will be used to handle cross-module call transitions. |
| /// 2. The .debug_line section, used by Dwarf-based profile generation. |
| /// 3. Pseudo probe related sections, used by probe-based profile |
| /// generation. |
| void load(); |
| |
| public: |
| ProfiledBinary(const StringRef ExeBinPath, const StringRef DebugBinPath); |
| ~ProfiledBinary(); |
| |
| void decodePseudoProbe(); |
| |
| StringRef getPath() const { return Path; } |
| StringRef getName() const { return llvm::sys::path::filename(Path); } |
| uint64_t getBaseAddress() const { return BaseAddress; } |
| void setBaseAddress(uint64_t Address) { BaseAddress = Address; } |
| |
| // Canonicalize to use preferred load address as base address. |
| uint64_t canonicalizeVirtualAddress(uint64_t Address) { |
| return Address - BaseAddress + getPreferredBaseAddress(); |
| } |
| // Return the preferred load address for the first executable segment. |
| uint64_t getPreferredBaseAddress() const { |
| return PreferredTextSegmentAddresses[0]; |
| } |
| // Return the preferred load address for the first loadable segment. |
| uint64_t getFirstLoadableAddress() const { return FirstLoadableAddress; } |
| // Return the file offset for the first executable segment. |
| uint64_t getTextSegmentOffset() const { return TextSegmentOffsets[0]; } |
| const std::vector<uint64_t> &getPreferredTextSegmentAddresses() const { |
| return PreferredTextSegmentAddresses; |
| } |
| const std::vector<uint64_t> &getTextSegmentOffsets() const { |
| return TextSegmentOffsets; |
| } |
| |
| uint64_t getInstSize(uint64_t Address) const { |
| auto I = AddressToInstSizeMap.find(Address); |
| if (I == AddressToInstSizeMap.end()) |
| return 0; |
| return I->second; |
| } |
| |
| bool addressIsCode(uint64_t Address) const { |
| return AddressToInstSizeMap.find(Address) != AddressToInstSizeMap.end(); |
| } |
| |
| bool addressIsCall(uint64_t Address) const { |
| return CallAddressSet.count(Address); |
| } |
| bool addressIsReturn(uint64_t Address) const { |
| return RetAddressSet.count(Address); |
| } |
| bool addressInPrologEpilog(uint64_t Address) const { |
| return ProEpilogTracker.PrologEpilogSet.count(Address); |
| } |
| |
| bool addressIsTransfer(uint64_t Address) { |
| return BranchAddressSet.count(Address) || RetAddressSet.count(Address) || |
| CallAddressSet.count(Address); |
| } |
| |
| bool rangeCrossUncondBranch(uint64_t Start, uint64_t End) { |
| if (Start >= End) |
| return false; |
| auto R = UncondBranchAddrSet.lower_bound(Start); |
| return R != UncondBranchAddrSet.end() && *R < End; |
| } |
| |
| uint64_t getAddressforIndex(uint64_t Index) const { |
| return CodeAddressVec[Index]; |
| } |
| |
| size_t getCodeAddrVecSize() const { return CodeAddressVec.size(); } |
| |
| bool usePseudoProbes() const { return UsePseudoProbes; } |
| bool useFSDiscriminator() const { return UseFSDiscriminator; } |
| // Get the index in CodeAddressVec for the address |
| // As we might get an address which is not the code |
| // here it would round to the next valid code address by |
| // using lower bound operation |
| uint32_t getIndexForAddr(uint64_t Address) const { |
| auto Low = llvm::lower_bound(CodeAddressVec, Address); |
| return Low - CodeAddressVec.begin(); |
| } |
| |
| uint64_t getCallAddrFromFrameAddr(uint64_t FrameAddr) const { |
| if (FrameAddr == ExternalAddr) |
| return ExternalAddr; |
| auto I = getIndexForAddr(FrameAddr); |
| FrameAddr = I ? getAddressforIndex(I - 1) : 0; |
| if (FrameAddr && addressIsCall(FrameAddr)) |
| return FrameAddr; |
| return 0; |
| } |
| |
| FuncRange *findFuncRangeForStartAddr(uint64_t Address) { |
| auto I = StartAddrToFuncRangeMap.find(Address); |
| if (I == StartAddrToFuncRangeMap.end()) |
| return nullptr; |
| return &I->second; |
| } |
| |
| // Binary search the function range which includes the input address. |
| FuncRange *findFuncRange(uint64_t Address) { |
| auto I = StartAddrToFuncRangeMap.upper_bound(Address); |
| if (I == StartAddrToFuncRangeMap.begin()) |
| return nullptr; |
| I--; |
| |
| if (Address >= I->second.EndAddress) |
| return nullptr; |
| |
| return &I->second; |
| } |
| |
| // Get all ranges of one function. |
| RangesTy getRanges(uint64_t Address) { |
| auto *FRange = findFuncRange(Address); |
| // Ignore the range which falls into plt section or system lib. |
| if (!FRange) |
| return RangesTy(); |
| |
| return FRange->Func->Ranges; |
| } |
| |
| const std::unordered_map<std::string, BinaryFunction> & |
| getAllBinaryFunctions() { |
| return BinaryFunctions; |
| } |
| |
| std::unordered_set<const BinaryFunction *> &getProfiledFunctions() { |
| return ProfiledFunctions; |
| } |
| |
| void setProfiledFunctions(std::unordered_set<const BinaryFunction *> &Funcs) { |
| ProfiledFunctions = Funcs; |
| } |
| |
| BinaryFunction *getBinaryFunction(StringRef FName) { |
| auto I = BinaryFunctions.find(FName.str()); |
| if (I == BinaryFunctions.end()) |
| return nullptr; |
| return &I->second; |
| } |
| |
| uint32_t getFuncSizeForContext(const ContextTrieNode *ContextNode) { |
| return FuncSizeTracker.getFuncSizeForContext(ContextNode); |
| } |
| |
| void inferMissingFrames(const SmallVectorImpl<uint64_t> &Context, |
| SmallVectorImpl<uint64_t> &NewContext); |
| |
| // Load the symbols from debug table and populate into symbol list. |
| void populateSymbolListFromDWARF(ProfileSymbolList &SymbolList); |
| |
| SampleContextFrameVector |
| getFrameLocationStack(uint64_t Address, bool UseProbeDiscriminator = false) { |
| InstructionPointer IP(this, Address); |
| return symbolize(IP, SymbolizerOpts.UseSymbolTable, UseProbeDiscriminator); |
| } |
| |
| const SampleContextFrameVector & |
| getCachedFrameLocationStack(uint64_t Address, |
| bool UseProbeDiscriminator = false) { |
| auto I = AddressToLocStackMap.emplace(Address, SampleContextFrameVector()); |
| if (I.second) { |
| I.first->second = getFrameLocationStack(Address, UseProbeDiscriminator); |
| } |
| return I.first->second; |
| } |
| |
| std::optional<SampleContextFrame> getInlineLeafFrameLoc(uint64_t Address) { |
| const auto &Stack = getCachedFrameLocationStack(Address); |
| if (Stack.empty()) |
| return {}; |
| return Stack.back(); |
| } |
| |
| void flushSymbolizer() { Symbolizer.reset(); } |
| |
| MissingFrameInferrer* getMissingContextInferrer() { |
| return MissingContextInferrer.get(); |
| } |
| |
| // Compare two addresses' inline context |
| bool inlineContextEqual(uint64_t Add1, uint64_t Add2); |
| |
| // Get the full context of the current stack with inline context filled in. |
| // It will search the disassembling info stored in AddressToLocStackMap. This |
| // is used as the key of function sample map |
| SampleContextFrameVector |
| getExpandedContext(const SmallVectorImpl<uint64_t> &Stack, |
| bool &WasLeafInlined); |
| // Go through instructions among the given range and record its size for the |
| // inline context. |
| void computeInlinedContextSizeForRange(uint64_t StartAddress, |
| uint64_t EndAddress); |
| |
| void computeInlinedContextSizeForFunc(const BinaryFunction *Func); |
| |
| const MCDecodedPseudoProbe *getCallProbeForAddr(uint64_t Address) const { |
| return ProbeDecoder.getCallProbeForAddr(Address); |
| } |
| |
| void getInlineContextForProbe(const MCDecodedPseudoProbe *Probe, |
| SampleContextFrameVector &InlineContextStack, |
| bool IncludeLeaf = false) const { |
| SmallVector<MCPseduoProbeFrameLocation, 16> ProbeInlineContext; |
| ProbeDecoder.getInlineContextForProbe(Probe, ProbeInlineContext, |
| IncludeLeaf); |
| for (uint32_t I = 0; I < ProbeInlineContext.size(); I++) { |
| auto &Callsite = ProbeInlineContext[I]; |
| // Clear the current context for an unknown probe. |
| if (Callsite.second == 0 && I != ProbeInlineContext.size() - 1) { |
| InlineContextStack.clear(); |
| continue; |
| } |
| InlineContextStack.emplace_back(Callsite.first, |
| LineLocation(Callsite.second, 0)); |
| } |
| } |
| const AddressProbesMap &getAddress2ProbesMap() const { |
| return ProbeDecoder.getAddress2ProbesMap(); |
| } |
| const MCPseudoProbeFuncDesc *getFuncDescForGUID(uint64_t GUID) { |
| return ProbeDecoder.getFuncDescForGUID(GUID); |
| } |
| |
| const MCPseudoProbeFuncDesc * |
| getInlinerDescForProbe(const MCDecodedPseudoProbe *Probe) { |
| return ProbeDecoder.getInlinerDescForProbe(Probe); |
| } |
| |
| bool getTrackFuncContextSize() { return TrackFuncContextSize; } |
| |
| bool getIsLoadedByMMap() { return IsLoadedByMMap; } |
| |
| void setIsLoadedByMMap(bool Value) { IsLoadedByMMap = Value; } |
| |
| bool getMissingMMapWarned() { return MissingMMapWarned; } |
| |
| void setMissingMMapWarned(bool Value) { MissingMMapWarned = Value; } |
| }; |
| |
| } // end namespace sampleprof |
| } // end namespace llvm |
| |
| #endif |