| //==-- MemProfContextDisambiguation.cpp - Disambiguate contexts -------------=// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file implements support for context disambiguation of allocation |
| // calls for profile guided heap optimization. Specifically, it uses Memprof |
| // profiles which indicate context specific allocation behavior (currently |
| // distinguishing cold vs hot memory allocations). Cloning is performed to |
| // expose the cold allocation call contexts, and the allocation calls are |
| // subsequently annotated with an attribute for later transformation. |
| // |
| // The transformations can be performed either directly on IR (regular LTO), or |
| // on a ThinLTO index (and later applied to the IR during the ThinLTO backend). |
| // Both types of LTO operate on a the same base graph representation, which |
| // uses CRTP to support either IR or Index formats. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "llvm/Transforms/IPO/MemProfContextDisambiguation.h" |
| #include "llvm/ADT/DenseMap.h" |
| #include "llvm/ADT/DenseSet.h" |
| #include "llvm/ADT/SetOperations.h" |
| #include "llvm/ADT/SmallPtrSet.h" |
| #include "llvm/ADT/SmallSet.h" |
| #include "llvm/ADT/SmallVector.h" |
| #include "llvm/Analysis/MemoryProfileInfo.h" |
| #include "llvm/Analysis/ModuleSummaryAnalysis.h" |
| #include "llvm/IR/Constants.h" |
| #include "llvm/IR/Instructions.h" |
| #include "llvm/IR/Module.h" |
| #include "llvm/IR/ModuleSummaryIndex.h" |
| #include "llvm/Pass.h" |
| #include "llvm/Support/CommandLine.h" |
| #include "llvm/Support/FileSystem.h" |
| #include "llvm/Support/GraphWriter.h" |
| #include "llvm/Support/raw_ostream.h" |
| #include "llvm/Transforms/IPO.h" |
| #include <sstream> |
| #include <vector> |
| using namespace llvm; |
| using namespace llvm::memprof; |
| |
| #define DEBUG_TYPE "memprof-context-disambiguation" |
| |
| static cl::opt<std::string> DotFilePathPrefix( |
| "memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, |
| cl::value_desc("filename"), |
| cl::desc("Specify the path prefix of the MemProf dot files.")); |
| |
| static cl::opt<bool> ExportToDot("memprof-export-to-dot", cl::init(false), |
| cl::Hidden, |
| cl::desc("Export graph to dot files.")); |
| |
| static cl::opt<bool> |
| DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, |
| cl::desc("Dump CallingContextGraph to stdout after each stage.")); |
| |
| static cl::opt<bool> |
| VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, |
| cl::desc("Perform verification checks on CallingContextGraph.")); |
| |
| static cl::opt<bool> |
| VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, |
| cl::desc("Perform frequent verification checks on nodes.")); |
| |
| inline bool hasSingleAllocType(uint8_t AllocTypes) { |
| switch (AllocTypes) { |
| case (uint8_t)AllocationType::Cold: |
| case (uint8_t)AllocationType::NotCold: |
| return true; |
| break; |
| case (uint8_t)AllocationType::None: |
| assert(false); |
| break; |
| default: |
| return false; |
| break; |
| } |
| llvm_unreachable("invalid alloc type"); |
| } |
| |
| /// CRTP base for graphs built from either IR or ThinLTO summary index. |
| /// |
| /// The graph represents the call contexts in all memprof metadata on allocation |
| /// calls, with nodes for the allocations themselves, as well as for the calls |
| /// in each context. The graph is initially built from the allocation memprof |
| /// metadata (or summary) MIBs. It is then updated to match calls with callsite |
| /// metadata onto the nodes, updating it to reflect any inlining performed on |
| /// those calls. |
| /// |
| /// Each MIB (representing an allocation's call context with allocation |
| /// behavior) is assigned a unique context id during the graph build. The edges |
| /// and nodes in the graph are decorated with the context ids they carry. This |
| /// is used to correctly update the graph when cloning is performed so that we |
| /// can uniquify the context for a single (possibly cloned) allocation. |
| template <typename DerivedCCG, typename FuncTy, typename CallTy> |
| class CallsiteContextGraph { |
| public: |
| CallsiteContextGraph() = default; |
| CallsiteContextGraph(const CallsiteContextGraph &) = default; |
| CallsiteContextGraph(CallsiteContextGraph &&) = default; |
| |
| /// Main entry point to perform analysis and transformations on graph. |
| bool process(); |
| |
| void dump() const; |
| void print(raw_ostream &OS) const; |
| |
| friend raw_ostream &operator<<(raw_ostream &OS, |
| const CallsiteContextGraph &CCG) { |
| CCG.print(OS); |
| return OS; |
| } |
| |
| friend struct GraphTraits< |
| const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>; |
| friend struct DOTGraphTraits< |
| const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>; |
| |
| void exportToDot(std::string Label) const; |
| |
| /// Represents a function clone via FuncTy pointer and clone number pair. |
| struct FuncInfo final |
| : public std::pair<FuncTy *, unsigned /*Clone number*/> { |
| using Base = std::pair<FuncTy *, unsigned>; |
| FuncInfo(const Base &B) : Base(B) {} |
| FuncInfo(FuncTy *F = nullptr, unsigned CloneNo = 0) : Base(F, CloneNo) {} |
| explicit operator bool() const { return this->first != nullptr; } |
| FuncTy *func() const { return this->first; } |
| unsigned cloneNo() const { return this->second; } |
| }; |
| |
| /// Represents a callsite clone via CallTy and clone number pair. |
| struct CallInfo final : public std::pair<CallTy, unsigned /*Clone number*/> { |
| using Base = std::pair<CallTy, unsigned>; |
| CallInfo(const Base &B) : Base(B) {} |
| CallInfo(CallTy Call = nullptr, unsigned CloneNo = 0) |
| : Base(Call, CloneNo) {} |
| explicit operator bool() const { return (bool)this->first; } |
| CallTy call() const { return this->first; } |
| unsigned cloneNo() const { return this->second; } |
| void setCloneNo(unsigned N) { this->second = N; } |
| void print(raw_ostream &OS) const { |
| if (!operator bool()) { |
| assert(!cloneNo()); |
| OS << "null Call"; |
| return; |
| } |
| call()->print(OS); |
| OS << "\t(clone " << cloneNo() << ")"; |
| } |
| void dump() const { |
| print(dbgs()); |
| dbgs() << "\n"; |
| } |
| friend raw_ostream &operator<<(raw_ostream &OS, const CallInfo &Call) { |
| Call.print(OS); |
| return OS; |
| } |
| }; |
| |
| struct ContextEdge; |
| |
| /// Node in the Callsite Context Graph |
| struct ContextNode { |
| // Keep this for now since in the IR case where we have an Instruction* it |
| // is not as immediately discoverable. Used for printing richer information |
| // when dumping graph. |
| bool IsAllocation; |
| |
| // Keeps track of when the Call was reset to null because there was |
| // recursion. |
| bool Recursive = false; |
| |
| // The corresponding allocation or interior call. |
| CallInfo Call; |
| |
| // For alloc nodes this is a unique id assigned when constructed, and for |
| // callsite stack nodes it is the original stack id when the node is |
| // constructed from the memprof MIB metadata on the alloc nodes. Note that |
| // this is only used when matching callsite metadata onto the stack nodes |
| // created when processing the allocation memprof MIBs, and for labeling |
| // nodes in the dot graph. Therefore we don't bother to assign a value for |
| // clones. |
| uint64_t OrigStackOrAllocId = 0; |
| |
| // This will be formed by ORing together the AllocationType enum values |
| // for contexts including this node. |
| uint8_t AllocTypes = 0; |
| |
| // Edges to all callees in the profiled call stacks. |
| // TODO: Should this be a map (from Callee node) for more efficient lookup? |
| std::vector<std::shared_ptr<ContextEdge>> CalleeEdges; |
| |
| // Edges to all callers in the profiled call stacks. |
| // TODO: Should this be a map (from Caller node) for more efficient lookup? |
| std::vector<std::shared_ptr<ContextEdge>> CallerEdges; |
| |
| // The set of IDs for contexts including this node. |
| DenseSet<uint32_t> ContextIds; |
| |
| // List of clones of this ContextNode, initially empty. |
| std::vector<ContextNode *> Clones; |
| |
| // If a clone, points to the original uncloned node. |
| ContextNode *CloneOf = nullptr; |
| |
| ContextNode(bool IsAllocation) : IsAllocation(IsAllocation), Call() {} |
| |
| ContextNode(bool IsAllocation, CallInfo C) |
| : IsAllocation(IsAllocation), Call(C) {} |
| |
| std::unique_ptr<ContextNode> clone() { |
| auto Clone = std::make_unique<ContextNode>(IsAllocation, Call); |
| if (CloneOf) { |
| CloneOf->Clones.push_back(Clone.get()); |
| Clone->CloneOf = CloneOf; |
| } else { |
| Clones.push_back(Clone.get()); |
| Clone->CloneOf = this; |
| } |
| return Clone; |
| } |
| |
| ContextNode *getOrigNode() { |
| if (!CloneOf) |
| return this; |
| return CloneOf; |
| } |
| |
| void addOrUpdateCallerEdge(ContextNode *Caller, AllocationType AllocType, |
| unsigned int ContextId); |
| |
| ContextEdge *findEdgeFromCallee(const ContextNode *Callee); |
| ContextEdge *findEdgeFromCaller(const ContextNode *Caller); |
| void eraseCalleeEdge(const ContextEdge *Edge); |
| void eraseCallerEdge(const ContextEdge *Edge); |
| |
| void setCall(CallInfo C) { Call = C; } |
| |
| bool hasCall() const { return (bool)Call.call(); } |
| |
| void printCall(raw_ostream &OS) const { Call.print(OS); } |
| |
| // True if this node was effectively removed from the graph, in which case |
| // its context id set, caller edges, and callee edges should all be empty. |
| bool isRemoved() const { |
| assert(ContextIds.empty() == |
| (CalleeEdges.empty() && CallerEdges.empty())); |
| return ContextIds.empty(); |
| } |
| |
| void dump() const; |
| void print(raw_ostream &OS) const; |
| |
| friend raw_ostream &operator<<(raw_ostream &OS, const ContextNode &Node) { |
| Node.print(OS); |
| return OS; |
| } |
| }; |
| |
| /// Edge in the Callsite Context Graph from a ContextNode N to a caller or |
| /// callee. |
| struct ContextEdge { |
| ContextNode *Callee; |
| ContextNode *Caller; |
| |
| // This will be formed by ORing together the AllocationType enum values |
| // for contexts including this edge. |
| uint8_t AllocTypes = 0; |
| |
| // The set of IDs for contexts including this edge. |
| DenseSet<uint32_t> ContextIds; |
| |
| ContextEdge(ContextNode *Callee, ContextNode *Caller, uint8_t AllocType, |
| DenseSet<uint32_t> ContextIds) |
| : Callee(Callee), Caller(Caller), AllocTypes(AllocType), |
| ContextIds(ContextIds) {} |
| |
| DenseSet<uint32_t> &getContextIds() { return ContextIds; } |
| |
| void dump() const; |
| void print(raw_ostream &OS) const; |
| |
| friend raw_ostream &operator<<(raw_ostream &OS, const ContextEdge &Edge) { |
| Edge.print(OS); |
| return OS; |
| } |
| }; |
| |
| protected: |
| /// Get a list of nodes corresponding to the stack ids in the given callsite |
| /// context. |
| template <class NodeT, class IteratorT> |
| std::vector<uint64_t> |
| getStackIdsWithContextNodes(CallStack<NodeT, IteratorT> &CallsiteContext); |
| |
| /// Adds nodes for the given allocation and any stack ids on its memprof MIB |
| /// metadata (or summary). |
| ContextNode *addAllocNode(CallInfo Call, const FuncTy *F); |
| |
| /// Adds nodes for the given MIB stack ids. |
| template <class NodeT, class IteratorT> |
| void addStackNodesForMIB(ContextNode *AllocNode, |
| CallStack<NodeT, IteratorT> &StackContext, |
| CallStack<NodeT, IteratorT> &CallsiteContext, |
| AllocationType AllocType); |
| |
| /// Matches all callsite metadata (or summary) to the nodes created for |
| /// allocation memprof MIB metadata, synthesizing new nodes to reflect any |
| /// inlining performed on those callsite instructions. |
| void updateStackNodes(); |
| |
| /// Update graph to conservatively handle any callsite stack nodes that target |
| /// multiple different callee target functions. |
| void handleCallsitesWithMultipleTargets(); |
| |
| /// Save lists of calls with MemProf metadata in each function, for faster |
| /// iteration. |
| std::vector<std::pair<FuncTy *, std::vector<CallInfo>>> |
| FuncToCallsWithMetadata; |
| |
| /// Map from callsite node to the enclosing caller function. |
| std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc; |
| |
| private: |
| using EdgeIter = typename std::vector<std::shared_ptr<ContextEdge>>::iterator; |
| |
| using CallContextInfo = std::tuple<CallTy, std::vector<uint64_t>, |
| const FuncTy *, DenseSet<uint32_t>>; |
| |
| /// Assigns the given Node to calls at or inlined into the location with |
| /// the Node's stack id, after post order traversing and processing its |
| /// caller nodes. Uses the call information recorded in the given |
| /// StackIdToMatchingCalls map, and creates new nodes for inlined sequences |
| /// as needed. Called by updateStackNodes which sets up the given |
| /// StackIdToMatchingCalls map. |
| void assignStackNodesPostOrder( |
| ContextNode *Node, DenseSet<const ContextNode *> &Visited, |
| DenseMap<uint64_t, std::vector<CallContextInfo>> &StackIdToMatchingCalls); |
| |
| /// Duplicates the given set of context ids, updating the provided |
| /// map from each original id with the newly generated context ids, |
| /// and returning the new duplicated id set. |
| DenseSet<uint32_t> duplicateContextIds( |
| const DenseSet<uint32_t> &StackSequenceContextIds, |
| DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds); |
| |
| /// Propagates all duplicated context ids across the graph. |
| void propagateDuplicateContextIds( |
| const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds); |
| |
| /// Connect the NewNode to OrigNode's callees if TowardsCallee is true, |
| /// else to its callers. Also updates OrigNode's edges to remove any context |
| /// ids moved to the newly created edge. |
| void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode, |
| bool TowardsCallee); |
| |
| /// Get the stack id corresponding to the given Id or Index (for IR this will |
| /// return itself, for a summary index this will return the id recorded in the |
| /// index for that stack id index value). |
| uint64_t getStackId(uint64_t IdOrIndex) const { |
| return static_cast<const DerivedCCG *>(this)->getStackId(IdOrIndex); |
| } |
| |
| /// Returns true if the given call targets the given function. |
| bool calleeMatchesFunc(CallTy Call, const FuncTy *Func) { |
| return static_cast<DerivedCCG *>(this)->calleeMatchesFunc(Call, Func); |
| } |
| |
| /// Get a list of nodes corresponding to the stack ids in the given |
| /// callsite's context. |
| std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy Call) { |
| return static_cast<DerivedCCG *>(this)->getStackIdsWithContextNodesForCall( |
| Call); |
| } |
| |
| /// Get the last stack id in the context for callsite. |
| uint64_t getLastStackId(CallTy Call) { |
| return static_cast<DerivedCCG *>(this)->getLastStackId(Call); |
| } |
| |
| /// Gets a label to use in the dot graph for the given call clone in the given |
| /// function. |
| std::string getLabel(const FuncTy *Func, const CallTy Call, |
| unsigned CloneNo) const { |
| return static_cast<const DerivedCCG *>(this)->getLabel(Func, Call, CloneNo); |
| } |
| |
| /// Helpers to find the node corresponding to the given call or stackid. |
| ContextNode *getNodeForInst(const CallInfo &C); |
| ContextNode *getNodeForAlloc(const CallInfo &C); |
| ContextNode *getNodeForStackId(uint64_t StackId); |
| |
| /// Removes the node information recorded for the given call. |
| void unsetNodeForInst(const CallInfo &C); |
| |
| /// Computes the alloc type corresponding to the given context ids, by |
| /// unioning their recorded alloc types. |
| uint8_t computeAllocType(DenseSet<uint32_t> &ContextIds); |
| |
| /// Map from each context ID to the AllocationType assigned to that context. |
| std::map<uint32_t, AllocationType> ContextIdToAllocationType; |
| |
| /// Identifies the context node created for a stack id when adding the MIB |
| /// contexts to the graph. This is used to locate the context nodes when |
| /// trying to assign the corresponding callsites with those stack ids to these |
| /// nodes. |
| std::map<uint64_t, ContextNode *> StackEntryIdToContextNodeMap; |
| |
| /// Maps to track the calls to their corresponding nodes in the graph. |
| std::map<const CallInfo, ContextNode *> AllocationCallToContextNodeMap; |
| std::map<const CallInfo, ContextNode *> NonAllocationCallToContextNodeMap; |
| |
| /// Owner of all ContextNode unique_ptrs. |
| std::vector<std::unique_ptr<ContextNode>> NodeOwner; |
| |
| /// Perform sanity checks on graph when requested. |
| void check() const; |
| |
| /// Keeps track of the last unique context id assigned. |
| unsigned int LastContextId = 0; |
| }; |
| |
| template <typename DerivedCCG, typename FuncTy, typename CallTy> |
| using ContextNode = |
| typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode; |
| template <typename DerivedCCG, typename FuncTy, typename CallTy> |
| using ContextEdge = |
| typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge; |
| template <typename DerivedCCG, typename FuncTy, typename CallTy> |
| using FuncInfo = |
| typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo; |
| template <typename DerivedCCG, typename FuncTy, typename CallTy> |
| using CallInfo = |
| typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo; |
| |
| /// CRTP derived class for graphs built from IR (regular LTO). |
| class ModuleCallsiteContextGraph |
| : public CallsiteContextGraph<ModuleCallsiteContextGraph, Function, |
| Instruction *> { |
| public: |
| ModuleCallsiteContextGraph(Module &M); |
| |
| private: |
| friend CallsiteContextGraph<ModuleCallsiteContextGraph, Function, |
| Instruction *>; |
| |
| uint64_t getStackId(uint64_t IdOrIndex) const; |
| bool calleeMatchesFunc(Instruction *Call, const Function *Func); |
| uint64_t getLastStackId(Instruction *Call); |
| std::vector<uint64_t> getStackIdsWithContextNodesForCall(Instruction *Call); |
| std::string getLabel(const Function *Func, const Instruction *Call, |
| unsigned CloneNo) const; |
| |
| const Module &Mod; |
| }; |
| |
| /// Represents a call in the summary index graph, which can either be an |
| /// allocation or an interior callsite node in an allocation's context. |
| /// Holds a pointer to the corresponding data structure in the index. |
| struct IndexCall : public PointerUnion<CallsiteInfo *, AllocInfo *> { |
| IndexCall() : PointerUnion() {} |
| IndexCall(std::nullptr_t) : IndexCall() {} |
| IndexCall(CallsiteInfo *StackNode) : PointerUnion(StackNode) {} |
| IndexCall(AllocInfo *AllocNode) : PointerUnion(AllocNode) {} |
| |
| IndexCall *operator->() { return this; } |
| |
| PointerUnion<CallsiteInfo *, AllocInfo *> getBase() const { return *this; } |
| |
| void print(raw_ostream &OS) const { |
| if (auto *AI = llvm::dyn_cast_if_present<AllocInfo *>(getBase())) { |
| OS << *AI; |
| } else { |
| auto *CI = llvm::dyn_cast_if_present<CallsiteInfo *>(getBase()); |
| assert(CI); |
| OS << *CI; |
| } |
| } |
| }; |
| |
| /// CRTP derived class for graphs built from summary index (ThinLTO). |
| class IndexCallsiteContextGraph |
| : public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary, |
| IndexCall> { |
| public: |
| IndexCallsiteContextGraph( |
| ModuleSummaryIndex &Index, |
| function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> |
| isPrevailing); |
| |
| private: |
| friend CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary, |
| IndexCall>; |
| |
| uint64_t getStackId(uint64_t IdOrIndex) const; |
| bool calleeMatchesFunc(IndexCall &Call, const FunctionSummary *Func); |
| uint64_t getLastStackId(IndexCall &Call); |
| std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &Call); |
| std::string getLabel(const FunctionSummary *Func, const IndexCall &Call, |
| unsigned CloneNo) const; |
| |
| // Saves mapping from function summaries containing memprof records back to |
| // its VI, for use in checking and debugging. |
| std::map<const FunctionSummary *, ValueInfo> FSToVIMap; |
| |
| const ModuleSummaryIndex &Index; |
| }; |
| |
| namespace { |
| |
| struct FieldSeparator { |
| bool Skip = true; |
| const char *Sep; |
| |
| FieldSeparator(const char *Sep = ", ") : Sep(Sep) {} |
| }; |
| |
| raw_ostream &operator<<(raw_ostream &OS, FieldSeparator &FS) { |
| if (FS.Skip) { |
| FS.Skip = false; |
| return OS; |
| } |
| return OS << FS.Sep; |
| } |
| |
| // Map the uint8_t alloc types (which may contain NotCold|Cold) to the alloc |
| // type we should actually use on the corresponding allocation. |
| // If we can't clone a node that has NotCold+Cold alloc type, we will fall |
| // back to using NotCold. So don't bother cloning to distinguish NotCold+Cold |
| // from NotCold. |
| AllocationType allocTypeToUse(uint8_t AllocTypes) { |
| assert(AllocTypes != (uint8_t)AllocationType::None); |
| if (AllocTypes == |
| ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold)) |
| return AllocationType::NotCold; |
| else |
| return (AllocationType)AllocTypes; |
| } |
| |
| } // end anonymous namespace |
| |
| template <typename DerivedCCG, typename FuncTy, typename CallTy> |
| typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode * |
| CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst( |
| const CallInfo &C) { |
| ContextNode *Node = getNodeForAlloc(C); |
| if (Node) |
| return Node; |
| |
| auto NonAllocCallNode = NonAllocationCallToContextNodeMap.find(C); |
| if (NonAllocCallNode != NonAllocationCallToContextNodeMap.end()) { |
| return NonAllocCallNode->second; |
| } |
| return nullptr; |
| } |
| |
| template <typename DerivedCCG, typename FuncTy, typename CallTy> |
| typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode * |
| CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc( |
| const CallInfo &C) { |
| auto AllocCallNode = AllocationCallToContextNodeMap.find(C); |
| if (AllocCallNode != AllocationCallToContextNodeMap.end()) { |
| return AllocCallNode->second; |
| } |
| return nullptr; |
| } |
| |
| template <typename DerivedCCG, typename FuncTy, typename CallTy> |
| typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode * |
| CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId( |
| uint64_t StackId) { |
| auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId); |
| if (StackEntryNode != StackEntryIdToContextNodeMap.end()) |
| return StackEntryNode->second; |
| return nullptr; |
| } |
| |
| template <typename DerivedCCG, typename FuncTy, typename CallTy> |
| void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::unsetNodeForInst( |
| const CallInfo &C) { |
| AllocationCallToContextNodeMap.erase(C) || |
| NonAllocationCallToContextNodeMap.erase(C); |
| assert(!AllocationCallToContextNodeMap.count(C) && |
| !NonAllocationCallToContextNodeMap.count(C)); |
| } |
| |
| template <typename DerivedCCG, typename FuncTy, typename CallTy> |
| void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode:: |
| addOrUpdateCallerEdge(ContextNode *Caller, AllocationType AllocType, |
| unsigned int ContextId) { |
| for (auto &Edge : CallerEdges) { |
| if (Edge->Caller == Caller) { |
| Edge->AllocTypes |= (uint8_t)AllocType; |
| Edge->getContextIds().insert(ContextId); |
| return; |
| } |
| } |
| std::shared_ptr<ContextEdge> Edge = std::make_shared<ContextEdge>( |
| this, Caller, (uint8_t)AllocType, DenseSet<uint32_t>({ContextId})); |
| CallerEdges.push_back(Edge); |
| Caller->CalleeEdges.push_back(Edge); |
| } |
| |
| template <typename DerivedCCG, typename FuncTy, typename CallTy> |
| typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge * |
| CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode:: |
| findEdgeFromCallee(const ContextNode *Callee) { |
| for (const auto &Edge : CalleeEdges) |
| if (Edge->Callee == Callee) |
| return Edge.get(); |
| return nullptr; |
| } |
| |
| template <typename DerivedCCG, typename FuncTy, typename CallTy> |
| typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge * |
| CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode:: |
| findEdgeFromCaller(const ContextNode *Caller) { |
| for (const auto &Edge : CallerEdges) |
| if (Edge->Caller == Caller) |
| return Edge.get(); |
| return nullptr; |
| } |
| |
| template <typename DerivedCCG, typename FuncTy, typename CallTy> |
| void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode:: |
| eraseCalleeEdge(const ContextEdge *Edge) { |
| auto EI = |
| std::find_if(CalleeEdges.begin(), CalleeEdges.end(), |
| [Edge](const std::shared_ptr<ContextEdge> &CalleeEdge) { |
| return CalleeEdge.get() == Edge; |
| }); |
| assert(EI != CalleeEdges.end()); |
| CalleeEdges.erase(EI); |
| } |
| |
| template <typename DerivedCCG, typename FuncTy, typename CallTy> |
| void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode:: |
| eraseCallerEdge(const ContextEdge *Edge) { |
| auto EI = |
| std::find_if(CallerEdges.begin(), CallerEdges.end(), |
| [Edge](const std::shared_ptr<ContextEdge> &CallerEdge) { |
| return CallerEdge.get() == Edge; |
| }); |
| assert(EI != CallerEdges.end()); |
| CallerEdges.erase(EI); |
| } |
| |
| template <typename DerivedCCG, typename FuncTy, typename CallTy> |
| uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType( |
| DenseSet<uint32_t> &ContextIds) { |
| uint8_t BothTypes = |
| (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold; |
| uint8_t AllocType = (uint8_t)AllocationType::None; |
| for (auto Id : ContextIds) { |
| AllocType |= (uint8_t)ContextIdToAllocationType[Id]; |
| // Bail early if alloc type reached both, no further refinement. |
| if (AllocType == BothTypes) |
| return AllocType; |
| } |
| return AllocType; |
| } |
| |
| template <typename DerivedCCG, typename FuncTy, typename CallTy> |
| typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode * |
| CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode( |
| CallInfo Call, const FuncTy *F) { |
| assert(!getNodeForAlloc(Call)); |
| NodeOwner.push_back( |
| std::make_unique<ContextNode>(/*IsAllocation=*/true, Call)); |
| ContextNode *AllocNode = NodeOwner.back().get(); |
| AllocationCallToContextNodeMap[Call] = AllocNode; |
| NodeToCallingFunc[AllocNode] = F; |
| // Use LastContextId as a uniq id for MIB allocation nodes. |
| AllocNode->OrigStackOrAllocId = LastContextId; |
| // Alloc type should be updated as we add in the MIBs. We should assert |
| // afterwards that it is not still None. |
| AllocNode->AllocTypes = (uint8_t)AllocationType::None; |
| |
| return AllocNode; |
| } |
| |
| template <typename DerivedCCG, typename FuncTy, typename CallTy> |
| template <class NodeT, class IteratorT> |
| void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB( |
| ContextNode *AllocNode, CallStack<NodeT, IteratorT> &StackContext, |
| CallStack<NodeT, IteratorT> &CallsiteContext, AllocationType AllocType) { |
| ContextIdToAllocationType[++LastContextId] = AllocType; |
| |
| // Update alloc type and context ids for this MIB. |
| AllocNode->AllocTypes |= (uint8_t)AllocType; |
| AllocNode->ContextIds.insert(LastContextId); |
| |
| // Now add or update nodes for each stack id in alloc's context. |
| // Later when processing the stack ids on non-alloc callsites we will adjust |
| // for any inlining in the context. |
| ContextNode *PrevNode = AllocNode; |
| // Look for recursion (direct recursion should have been collapsed by |
| // module summary analysis, here we should just be detecting mutual |
| // recursion). Mark these nodes so we don't try to clone. |
| SmallSet<uint64_t, 8> StackIdSet; |
| // Skip any on the allocation call (inlining). |
| for (auto ContextIter = StackContext.beginAfterSharedPrefix(CallsiteContext); |
| ContextIter != StackContext.end(); ++ContextIter) { |
| auto StackId = getStackId(*ContextIter); |
| ContextNode *StackNode = getNodeForStackId(StackId); |
| if (!StackNode) { |
| NodeOwner.push_back( |
| std::make_unique<ContextNode>(/*IsAllocation=*/false)); |
| StackNode = NodeOwner.back().get(); |
| StackEntryIdToContextNodeMap[StackId] = StackNode; |
| StackNode->OrigStackOrAllocId = StackId; |
| } |
| auto Ins = StackIdSet.insert(StackId); |
| if (!Ins.second) |
| StackNode->Recursive = true; |
| StackNode->ContextIds.insert(LastContextId); |
| StackNode->AllocTypes |= (uint8_t)AllocType; |
| PrevNode->addOrUpdateCallerEdge(StackNode, AllocType, LastContextId); |
| PrevNode = StackNode; |
| } |
| } |
| |
| template <typename DerivedCCG, typename FuncTy, typename CallTy> |
| DenseSet<uint32_t> |
| CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds( |
| const DenseSet<uint32_t> &StackSequenceContextIds, |
| DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) { |
| DenseSet<uint32_t> NewContextIds; |
| for (auto OldId : StackSequenceContextIds) { |
| NewContextIds.insert(++LastContextId); |
| OldToNewContextIds[OldId].insert(LastContextId); |
| assert(ContextIdToAllocationType.count(OldId)); |
| // The new context has the same allocation type as original. |
| ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId]; |
| } |
| return NewContextIds; |
| } |
| |
| template <typename DerivedCCG, typename FuncTy, typename CallTy> |
| void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>:: |
| propagateDuplicateContextIds( |
| const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) { |
| // Build a set of duplicated context ids corresponding to the input id set. |
| auto GetNewIds = [&OldToNewContextIds](const DenseSet<uint32_t> &ContextIds) { |
| DenseSet<uint32_t> NewIds; |
| for (auto Id : ContextIds) |
| if (auto NewId = OldToNewContextIds.find(Id); |
| NewId != OldToNewContextIds.end()) |
| NewIds.insert(NewId->second.begin(), NewId->second.end()); |
| return NewIds; |
| }; |
| |
| // Recursively update context ids sets along caller edges. |
| auto UpdateCallers = [&](ContextNode *Node, |
| DenseSet<const ContextEdge *> &Visited, |
| auto &&UpdateCallers) -> void { |
| for (const auto &Edge : Node->CallerEdges) { |
| auto Inserted = Visited.insert(Edge.get()); |
| if (!Inserted.second) |
| continue; |
| ContextNode *NextNode = Edge->Caller; |
| DenseSet<uint32_t> NewIdsToAdd = GetNewIds(Edge->getContextIds()); |
| // Only need to recursively iterate to NextNode via this caller edge if |
| // it resulted in any added ids to NextNode. |
| if (!NewIdsToAdd.empty()) { |
| Edge->getContextIds().insert(NewIdsToAdd.begin(), NewIdsToAdd.end()); |
| NextNode->ContextIds.insert(NewIdsToAdd.begin(), NewIdsToAdd.end()); |
| UpdateCallers(NextNode, Visited, UpdateCallers); |
| } |
| } |
| }; |
| |
| DenseSet<const ContextEdge *> Visited; |
| for (auto &Entry : AllocationCallToContextNodeMap) { |
| auto *Node = Entry.second; |
| // Update ids on the allocation nodes before calling the recursive |
| // update along caller edges, since this simplifies the logic during |
| // that traversal. |
| DenseSet<uint32_t> NewIdsToAdd = GetNewIds(Node->ContextIds); |
| Node->ContextIds.insert(NewIdsToAdd.begin(), NewIdsToAdd.end()); |
| UpdateCallers(Node, Visited, UpdateCallers); |
| } |
| } |
| |
| template <typename DerivedCCG, typename FuncTy, typename CallTy> |
| void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode( |
| ContextNode *NewNode, ContextNode *OrigNode, bool TowardsCallee) { |
| // Make a copy of the context ids, since this will be adjusted below as they |
| // are moved. |
| DenseSet<uint32_t> RemainingContextIds = NewNode->ContextIds; |
| auto &OrigEdges = |
| TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges; |
| // Increment iterator in loop so that we can remove edges as needed. |
| for (auto EI = OrigEdges.begin(); EI != OrigEdges.end();) { |
| auto Edge = *EI; |
| // Remove any matching context ids from Edge, return set that were found and |
| // removed, these are the new edge's context ids. Also update the remaining |
| // (not found ids). |
| DenseSet<uint32_t> NewEdgeContextIds, NotFoundContextIds; |
| set_subtract(Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds, |
| NotFoundContextIds); |
| RemainingContextIds.swap(NotFoundContextIds); |
| // If no matching context ids for this edge, skip it. |
| if (NewEdgeContextIds.empty()) { |
| ++EI; |
| continue; |
| } |
| if (TowardsCallee) { |
| auto NewEdge = std::make_shared<ContextEdge>( |
| Edge->Callee, NewNode, computeAllocType(NewEdgeContextIds), |
| NewEdgeContextIds); |
| NewNode->CalleeEdges.push_back(NewEdge); |
| NewEdge->Callee->CallerEdges.push_back(NewEdge); |
| } else { |
| auto NewEdge = std::make_shared<ContextEdge>( |
| NewNode, Edge->Caller, computeAllocType(NewEdgeContextIds), |
| NewEdgeContextIds); |
| NewNode->CallerEdges.push_back(NewEdge); |
| NewEdge->Caller->CalleeEdges.push_back(NewEdge); |
| } |
| // Remove old edge if context ids empty. |
| if (Edge->getContextIds().empty()) { |
| if (TowardsCallee) { |
| Edge->Callee->eraseCallerEdge(Edge.get()); |
| EI = OrigNode->CalleeEdges.erase(EI); |
| } else { |
| Edge->Caller->eraseCalleeEdge(Edge.get()); |
| EI = OrigNode->CallerEdges.erase(EI); |
| } |
| continue; |
| } |
| ++EI; |
| } |
| } |
| |
| template <typename DerivedCCG, typename FuncTy, typename CallTy> |
| void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>:: |
| assignStackNodesPostOrder(ContextNode *Node, |
| DenseSet<const ContextNode *> &Visited, |
| DenseMap<uint64_t, std::vector<CallContextInfo>> |
| &StackIdToMatchingCalls) { |
| auto Inserted = Visited.insert(Node); |
| if (!Inserted.second) |
| return; |
| // Post order traversal. Iterate over a copy since we may add nodes and |
| // therefore new callers during the recursive call, invalidating any |
| // iterator over the original edge vector. We don't need to process these |
| // new nodes as they were already processed on creation. |
| auto CallerEdges = Node->CallerEdges; |
| for (auto &Edge : CallerEdges) { |
| // Skip any that have been removed during the recursion. |
| if (!Edge) |
| continue; |
| assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls); |
| } |
| |
| // If this node's stack id is in the map, update the graph to contain new |
| // nodes representing any inlining at interior callsites. Note we move the |
| // associated context ids over to the new nodes. |
| |
| // Ignore this node if it is for an allocation or we didn't record any |
| // stack id lists ending at it. |
| if (Node->IsAllocation || |
| !StackIdToMatchingCalls.count(Node->OrigStackOrAllocId)) |
| return; |
| |
| auto &Calls = StackIdToMatchingCalls[Node->OrigStackOrAllocId]; |
| // Handle the simple case first. A single call with a single stack id. |
| // In this case there is no need to create any new context nodes, simply |
| // assign the context node for stack id to this Call. |
| if (Calls.size() == 1) { |
| auto &[Call, Ids, Func, SavedContextIds] = Calls[0]; |
| if (Ids.size() == 1) { |
| assert(SavedContextIds.empty()); |
| // It should be this Node |
| assert(Node == getNodeForStackId(Ids[0])); |
| if (Node->Recursive) |
| return; |
| Node->setCall(Call); |
| NonAllocationCallToContextNodeMap[Call] = Node; |
| NodeToCallingFunc[Node] = Func; |
| return; |
| } |
| } |
| |
| // Find the node for the last stack id, which should be the same |
| // across all calls recorded for this id, and is this node's id. |
| uint64_t LastId = Node->OrigStackOrAllocId; |
| ContextNode *LastNode = getNodeForStackId(LastId); |
| // We should only have kept stack ids that had nodes. |
| assert(LastNode); |
| |
| for (unsigned I = 0; I < Calls.size(); I++) { |
| auto &[Call, Ids, Func, SavedContextIds] = Calls[I]; |
| // Skip any for which we didn't assign any ids, these don't get a node in |
| // the graph. |
| if (SavedContextIds.empty()) |
| continue; |
| |
| assert(LastId == Ids.back()); |
| |
| ContextNode *FirstNode = getNodeForStackId(Ids[0]); |
| assert(FirstNode); |
| |
| // Recompute the context ids for this stack id sequence (the |
| // intersection of the context ids of the corresponding nodes). |
| // Start with the ids we saved in the map for this call, which could be |
| // duplicated context ids. We have to recompute as we might have overlap |
| // overlap between the saved context ids for different last nodes, and |
| // removed them already during the post order traversal. |
| set_intersect(SavedContextIds, FirstNode->ContextIds); |
| ContextNode *PrevNode = nullptr; |
| for (auto Id : Ids) { |
| ContextNode *CurNode = getNodeForStackId(Id); |
| // We should only have kept stack ids that had nodes and weren't |
| // recursive. |
| assert(CurNode); |
| assert(!CurNode->Recursive); |
| if (!PrevNode) { |
| PrevNode = CurNode; |
| continue; |
| } |
| auto *Edge = CurNode->findEdgeFromCallee(PrevNode); |
| if (!Edge) { |
| SavedContextIds.clear(); |
| break; |
| } |
| PrevNode = CurNode; |
| set_intersect(SavedContextIds, Edge->getContextIds()); |
| |
| // If we now have no context ids for clone, skip this call. |
| if (SavedContextIds.empty()) |
| break; |
| } |
| if (SavedContextIds.empty()) |
| continue; |
| |
| // Create new context node. |
| NodeOwner.push_back( |
| std::make_unique<ContextNode>(/*IsAllocation=*/false, Call)); |
| ContextNode *NewNode = NodeOwner.back().get(); |
| NodeToCallingFunc[NewNode] = Func; |
| NonAllocationCallToContextNodeMap[Call] = NewNode; |
| NewNode->ContextIds = SavedContextIds; |
| NewNode->AllocTypes = computeAllocType(NewNode->ContextIds); |
| |
| // Connect to callees of innermost stack frame in inlined call chain. |
| // This updates context ids for FirstNode's callee's to reflect those |
| // moved to NewNode. |
| connectNewNode(NewNode, FirstNode, /*TowardsCallee=*/true); |
| |
| // Connect to callers of outermost stack frame in inlined call chain. |
| // This updates context ids for FirstNode's caller's to reflect those |
| // moved to NewNode. |
| connectNewNode(NewNode, LastNode, /*TowardsCallee=*/false); |
| |
| // Now we need to remove context ids from edges/nodes between First and |
| // Last Node. |
| PrevNode = nullptr; |
| for (auto Id : Ids) { |
| ContextNode *CurNode = getNodeForStackId(Id); |
| // We should only have kept stack ids that had nodes. |
| assert(CurNode); |
| |
| // Remove the context ids moved to NewNode from CurNode, and the |
| // edge from the prior node. |
| set_subtract(CurNode->ContextIds, NewNode->ContextIds); |
| if (PrevNode) { |
| auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode); |
| assert(PrevEdge); |
| set_subtract(PrevEdge->getContextIds(), NewNode->ContextIds); |
| if (PrevEdge->getContextIds().empty()) { |
| PrevNode->eraseCallerEdge(PrevEdge); |
| CurNode->eraseCalleeEdge(PrevEdge); |
| } |
| } |
| PrevNode = CurNode; |
| } |
| } |
| } |
| |
| template <typename DerivedCCG, typename FuncTy, typename CallTy> |
| void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() { |
| // Map of stack id to all calls with that as the last (outermost caller) |
| // callsite id that has a context node (some might not due to pruning |
| // performed during matching of the allocation profile contexts). |
| // The CallContextInfo contains the Call and a list of its stack ids with |
| // ContextNodes, the function containing Call, and the set of context ids |
| // the analysis will eventually identify for use in any new node created |
| // for that callsite. |
| DenseMap<uint64_t, std::vector<CallContextInfo>> StackIdToMatchingCalls; |
| for (auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) { |
| for (auto &Call : CallsWithMetadata) { |
| // Ignore allocations, already handled. |
| if (AllocationCallToContextNodeMap.count(Call)) |
| continue; |
| auto StackIdsWithContextNodes = |
| getStackIdsWithContextNodesForCall(Call.call()); |
| // If there were no nodes created for MIBs on allocs (maybe this was in |
| // the unambiguous part of the MIB stack that was pruned), ignore. |
| if (StackIdsWithContextNodes.empty()) |
| continue; |
| // Otherwise, record this Call along with the list of ids for the last |
| // (outermost caller) stack id with a node. |
| StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back( |
| {Call.call(), StackIdsWithContextNodes, Func, {}}); |
| } |
| } |
| |
| // First make a pass through all stack ids that correspond to a call, |
| // as identified in the above loop. Compute the context ids corresponding to |
| // each of these calls when they correspond to multiple stack ids due to |
| // due to inlining. Perform any duplication of context ids required when |
| // there is more than one call with the same stack ids. Their (possibly newly |
| // duplicated) context ids are saved in the StackIdToMatchingCalls map. |
| DenseMap<uint32_t, DenseSet<uint32_t>> OldToNewContextIds; |
| for (auto &It : StackIdToMatchingCalls) { |
| auto &Calls = It.getSecond(); |
| // Skip single calls with a single stack id. These don't need a new node. |
| if (Calls.size() == 1) { |
| auto &Ids = std::get<1>(Calls[0]); |
| if (Ids.size() == 1) |
| continue; |
| } |
| // In order to do the best and maximal matching of inlined calls to context |
| // node sequences we will sort the vectors of stack ids in descending order |
| // of length, and within each length, lexicographically by stack id. The |
| // latter is so that we can specially handle calls that have identical stack |
| // id sequences (either due to cloning or artificially because of the MIB |
| // context pruning). |
| std::stable_sort(Calls.begin(), Calls.end(), |
| [](const CallContextInfo &A, const CallContextInfo &B) { |
| auto &IdsA = std::get<1>(A); |
| auto &IdsB = std::get<1>(B); |
| return IdsA.size() > IdsB.size() || |
| (IdsA.size() == IdsB.size() && IdsA < IdsB); |
| }); |
| |
| // Find the node for the last stack id, which should be the same |
| // across all calls recorded for this id, and is the id for this |
| // entry in the StackIdToMatchingCalls map. |
| uint64_t LastId = It.getFirst(); |
| ContextNode *LastNode = getNodeForStackId(LastId); |
| // We should only have kept stack ids that had nodes. |
| assert(LastNode); |
| |
| if (LastNode->Recursive) |
| continue; |
| |
| // Initialize the context ids with the last node's. We will subsequently |
| // refine the context ids by computing the intersection along all edges. |
| DenseSet<uint32_t> LastNodeContextIds = LastNode->ContextIds; |
| assert(!LastNodeContextIds.empty()); |
| |
| for (unsigned I = 0; I < Calls.size(); I++) { |
| auto &[Call, Ids, Func, SavedContextIds] = Calls[I]; |
| assert(SavedContextIds.empty()); |
| assert(LastId == Ids.back()); |
| |
| // First compute the context ids for this stack id sequence (the |
| // intersection of the context ids of the corresponding nodes). |
| // Start with the remaining saved ids for the last node. |
| assert(!LastNodeContextIds.empty()); |
| DenseSet<uint32_t> StackSequenceContextIds = LastNodeContextIds; |
| |
| ContextNode *PrevNode = LastNode; |
| ContextNode *CurNode = LastNode; |
| bool Skip = false; |
| |
| // Iterate backwards through the stack Ids, starting after the last Id |
| // in the list, which was handled once outside for all Calls. |
| for (auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) { |
| auto Id = *IdIter; |
| CurNode = getNodeForStackId(Id); |
| // We should only have kept stack ids that had nodes. |
| assert(CurNode); |
| |
| if (CurNode->Recursive) { |
| Skip = true; |
| break; |
| } |
| |
| auto *Edge = CurNode->findEdgeFromCaller(PrevNode); |
| // If there is no edge then the nodes belong to different MIB contexts, |
| // and we should skip this inlined context sequence. For example, this |
| // particular inlined context may include stack ids A->B, and we may |
| // indeed have nodes for both A and B, but it is possible that they were |
| // never profiled in sequence in a single MIB for any allocation (i.e. |
| // we might have profiled an allocation that involves the callsite A, |
| // but through a different one of its callee callsites, and we might |
| // have profiled an allocation that involves callsite B, but reached |
| // from a different caller callsite). |
| if (!Edge) { |
| Skip = true; |
| break; |
| } |
| PrevNode = CurNode; |
| |
| // Update the context ids, which is the intersection of the ids along |
| // all edges in the sequence. |
| set_intersect(StackSequenceContextIds, Edge->getContextIds()); |
| |
| // If we now have no context ids for clone, skip this call. |
| if (StackSequenceContextIds.empty()) { |
| Skip = true; |
| break; |
| } |
| } |
| if (Skip) |
| continue; |
| |
| // If some of this call's stack ids did not have corresponding nodes (due |
| // to pruning), don't include any context ids for contexts that extend |
| // beyond these nodes. Otherwise we would be matching part of unrelated / |
| // not fully matching stack contexts. To do this, subtract any context ids |
| // found in caller nodes of the last node found above. |
| if (Ids.back() != getLastStackId(Call)) { |
| for (const auto &PE : LastNode->CallerEdges) { |
| set_subtract(StackSequenceContextIds, PE->getContextIds()); |
| if (StackSequenceContextIds.empty()) |
| break; |
| } |
| // If we now have no context ids for clone, skip this call. |
| if (StackSequenceContextIds.empty()) |
| continue; |
| } |
| |
| // Check if the next set of stack ids is the same (since the Calls vector |
| // of tuples is sorted by the stack ids we can just look at the next one). |
| bool DuplicateContextIds = false; |
| if (I + 1 < Calls.size()) { |
| auto NextIds = std::get<1>(Calls[I + 1]); |
| DuplicateContextIds = Ids == NextIds; |
| } |
| |
| // If we don't have duplicate context ids, then we can assign all the |
| // context ids computed for the original node sequence to this call. |
| // If there are duplicate calls with the same stack ids then we synthesize |
| // new context ids that are duplicates of the originals. These are |
| // assigned to SavedContextIds, which is a reference into the map entry |
| // for this call, allowing us to access these ids later on. |
| OldToNewContextIds.reserve(OldToNewContextIds.size() + |
| StackSequenceContextIds.size()); |
| SavedContextIds = |
| DuplicateContextIds |
| ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds) |
| : StackSequenceContextIds; |
| assert(!SavedContextIds.empty()); |
| |
| if (!DuplicateContextIds) { |
| // Update saved last node's context ids to remove those that are |
| // assigned to other calls, so that it is ready for the next call at |
| // this stack id. |
| set_subtract(LastNodeContextIds, StackSequenceContextIds); |
| if (LastNodeContextIds.empty()) |
| break; |
| } |
| } |
| } |
| |
| // Propagate the duplicate context ids over the graph. |
| propagateDuplicateContextIds(OldToNewContextIds); |
| |
| if (VerifyCCG) |
| check(); |
| |
| // Now perform a post-order traversal over the graph, starting with the |
| // allocation nodes, essentially processing nodes from callers to callees. |
| // For any that contains an id in the map, update the graph to contain new |
| // nodes representing any inlining at interior callsites. Note we move the |
| // associated context ids over to the new nodes. |
| DenseSet<const ContextNode *> Visited; |
| for (auto &Entry : AllocationCallToContextNodeMap) |
| assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls); |
| } |
| |
| uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *Call) { |
| CallStack<MDNode, MDNode::op_iterator> CallsiteContext( |
| Call->getMetadata(LLVMContext::MD_callsite)); |
| return CallsiteContext.back(); |
| } |
| |
| uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &Call) { |
| assert(isa<CallsiteInfo *>(Call.getBase())); |
| CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator> |
| CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(Call.getBase())); |
| // Need to convert index into stack id. |
| return Index.getStackIdAtIndex(CallsiteContext.back()); |
| } |
| |
| static std::string getMemProfFuncName(Twine Base, unsigned CloneNo) { |
| if (!CloneNo) |
| return Base.str(); |
| return (Base + ".memprof." + Twine(CloneNo)).str(); |
| } |
| |
| std::string ModuleCallsiteContextGraph::getLabel(const Function *Func, |
| const Instruction *Call, |
| unsigned CloneNo) const { |
| return (Twine(Call->getFunction()->getName()) + " -> " + |
| cast<CallBase>(Call)->getCalledFunction()->getName()) |
| .str(); |
| } |
| |
| std::string IndexCallsiteContextGraph::getLabel(const FunctionSummary *Func, |
| const IndexCall &Call, |
| unsigned CloneNo) const { |
| auto VI = FSToVIMap.find(Func); |
| assert(VI != FSToVIMap.end()); |
| if (isa<AllocInfo *>(Call.getBase())) |
| return (VI->second.name() + " -> alloc").str(); |
| else { |
| auto *Callsite = dyn_cast_if_present<CallsiteInfo *>(Call.getBase()); |
| return (VI->second.name() + " -> " + |
| getMemProfFuncName(Callsite->Callee.name(), |
| Callsite->Clones[CloneNo])) |
| .str(); |
| } |
| } |
| |
| std::vector<uint64_t> |
| ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall( |
| Instruction *Call) { |
| CallStack<MDNode, MDNode::op_iterator> CallsiteContext( |
| Call->getMetadata(LLVMContext::MD_callsite)); |
| return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>( |
| CallsiteContext); |
| } |
| |
| std::vector<uint64_t> |
| IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &Call) { |
| assert(isa<CallsiteInfo *>(Call.getBase())); |
| CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator> |
| CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(Call.getBase())); |
| return getStackIdsWithContextNodes<CallsiteInfo, |
| SmallVector<unsigned>::const_iterator>( |
| CallsiteContext); |
| } |
| |
| template <typename DerivedCCG, typename FuncTy, typename CallTy> |
| template <class NodeT, class IteratorT> |
| std::vector<uint64_t> |
| CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes( |
| CallStack<NodeT, IteratorT> &CallsiteContext) { |
| std::vector<uint64_t> StackIds; |
| for (auto IdOrIndex : CallsiteContext) { |
| auto StackId = getStackId(IdOrIndex); |
| ContextNode *Node = getNodeForStackId(StackId); |
| if (!Node) |
| break; |
| StackIds.push_back(StackId); |
| } |
| return StackIds; |
| } |
| |
| ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(Module &M) : Mod(M) { |
| for (auto &F : M) { |
| std::vector<CallInfo> CallsWithMetadata; |
| for (auto &BB : F) { |
| for (auto &I : BB) { |
| if (!isa<CallBase>(I)) |
| continue; |
| if (auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof)) { |
| CallsWithMetadata.push_back(&I); |
| auto *AllocNode = addAllocNode(&I, &F); |
| auto *CallsiteMD = I.getMetadata(LLVMContext::MD_callsite); |
| assert(CallsiteMD); |
| CallStack<MDNode, MDNode::op_iterator> CallsiteContext(CallsiteMD); |
| // Add all of the MIBs and their stack nodes. |
| for (auto &MDOp : MemProfMD->operands()) { |
| auto *MIBMD = cast<const MDNode>(MDOp); |
| MDNode *StackNode = getMIBStackNode(MIBMD); |
| assert(StackNode); |
| CallStack<MDNode, MDNode::op_iterator> StackContext(StackNode); |
| addStackNodesForMIB<MDNode, MDNode::op_iterator>( |
| AllocNode, StackContext, CallsiteContext, |
| getMIBAllocType(MIBMD)); |
| } |
| assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None); |
| // Memprof and callsite metadata on memory allocations no longer |
| // needed. |
| I.setMetadata(LLVMContext::MD_memprof, nullptr); |
| I.setMetadata(LLVMContext::MD_callsite, nullptr); |
| } |
| // For callsite metadata, add to list for this function for later use. |
| else if (I.getMetadata(LLVMContext::MD_callsite)) |
| CallsWithMetadata.push_back(&I); |
| } |
| } |
| if (!CallsWithMetadata.empty()) |
| FuncToCallsWithMetadata.push_back({&F, CallsWithMetadata}); |
| } |
| |
| if (DumpCCG) { |
| dbgs() << "CCG before updating call stack chains:\n"; |
| dbgs() << *this; |
| } |
| |
| if (ExportToDot) |
| exportToDot("prestackupdate"); |
| |
| updateStackNodes(); |
| |
| handleCallsitesWithMultipleTargets(); |
| |
| // Strip off remaining callsite metadata, no longer needed. |
| for (auto &FuncEntry : FuncToCallsWithMetadata) |
| for (auto &Call : FuncEntry.second) |
| Call.call()->setMetadata(LLVMContext::MD_callsite, nullptr); |
| } |
| |
| IndexCallsiteContextGraph::IndexCallsiteContextGraph( |
| ModuleSummaryIndex &Index, |
| function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> |
| isPrevailing) |
| : Index(Index) { |
| for (auto &I : Index) { |
| auto VI = Index.getValueInfo(I); |
| for (auto &S : VI.getSummaryList()) { |
| // We should only add the prevailing nodes. Otherwise we may try to clone |
| // in a weak copy that won't be linked (and may be different than the |
| // prevailing version). |
| // We only keep the memprof summary on the prevailing copy now when |
| // building the combined index, as a space optimization, however don't |
| // rely on this optimization. The linker doesn't resolve local linkage |
| // values so don't check whether those are prevailing. |
| if (!GlobalValue::isLocalLinkage(S->linkage()) && |
| !isPrevailing(VI.getGUID(), S.get())) |
| continue; |
| auto *FS = dyn_cast<FunctionSummary>(S.get()); |
| if (!FS) |
| continue; |
| std::vector<CallInfo> CallsWithMetadata; |
| if (!FS->allocs().empty()) { |
| for (auto &AN : FS->mutableAllocs()) { |
| // This can happen because of recursion elimination handling that |
| // currently exists in ModuleSummaryAnalysis. Skip these for now. |
| // We still added them to the summary because we need to be able to |
| // correlate properly in applyImport in the backends. |
| if (AN.MIBs.empty()) |
| continue; |
| CallsWithMetadata.push_back({&AN}); |
| auto *AllocNode = addAllocNode({&AN}, FS); |
| // Pass an empty CallStack to the CallsiteContext (second) |
| // parameter, since for ThinLTO we already collapsed out the inlined |
| // stack ids on the allocation call during ModuleSummaryAnalysis. |
| CallStack<MIBInfo, SmallVector<unsigned>::const_iterator> |
| EmptyContext; |
| // Now add all of the MIBs and their stack nodes. |
| for (auto &MIB : AN.MIBs) { |
| CallStack<MIBInfo, SmallVector<unsigned>::const_iterator> |
| StackContext(&MIB); |
| addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>( |
| AllocNode, StackContext, EmptyContext, MIB.AllocType); |
| } |
| assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None); |
| // Initialize version 0 on the summary alloc node to the current alloc |
| // type, unless it has both types in which case make it default, so |
| // that in the case where we aren't able to clone the original version |
| // always ends up with the default allocation behavior. |
| AN.Versions[0] = (uint8_t)allocTypeToUse(AllocNode->AllocTypes); |
| } |
| } |
| // For callsite metadata, add to list for this function for later use. |
| if (!FS->callsites().empty()) |
| for (auto &SN : FS->mutableCallsites()) |
| CallsWithMetadata.push_back({&SN}); |
| |
| if (!CallsWithMetadata.empty()) |
| FuncToCallsWithMetadata.push_back({FS, CallsWithMetadata}); |
| |
| if (!FS->allocs().empty() || !FS->callsites().empty()) |
| FSToVIMap[FS] = VI; |
| } |
| } |
| |
| if (DumpCCG) { |
| dbgs() << "CCG before updating call stack chains:\n"; |
| dbgs() << *this; |
| } |
| |
| if (ExportToDot) |
| exportToDot("prestackupdate"); |
| |
| updateStackNodes(); |
| |
| handleCallsitesWithMultipleTargets(); |
| } |
| |
| template <typename DerivedCCG, typename FuncTy, typename CallTy> |
| void CallsiteContextGraph<DerivedCCG, FuncTy, |
| CallTy>::handleCallsitesWithMultipleTargets() { |
| // Look for and workaround callsites that call multiple functions. |
| // This can happen for indirect calls, which needs better handling, and in |
| // more rare cases (e.g. macro expansion). |
| // TODO: To fix this for indirect calls we will want to perform speculative |
| // devirtualization using either the normal PGO info with ICP, or using the |
| // information in the profiled MemProf contexts. We can do this prior to |
| // this transformation for regular LTO, and for ThinLTO we can simulate that |
| // effect in the summary and perform the actual speculative devirtualization |
| // while cloning in the ThinLTO backend. |
| for (auto Entry = NonAllocationCallToContextNodeMap.begin(); |
| Entry != NonAllocationCallToContextNodeMap.end();) { |
| auto *Node = Entry->second; |
| assert(Node->Clones.empty()); |
| // Check all node callees and see if in the same function. |
| bool Removed = false; |
| auto Call = Node->Call.call(); |
| for (auto &Edge : Node->CalleeEdges) { |
| if (!Edge->Callee->hasCall()) |
| continue; |
| assert(NodeToCallingFunc.count(Edge->Callee)); |
| // Check if the called function matches that of the callee node. |
| if (calleeMatchesFunc(Call, NodeToCallingFunc[Edge->Callee])) |
| continue; |
| // Work around by setting Node to have a null call, so it gets |
| // skipped during cloning. Otherwise assignFunctions will assert |
| // because its data structures are not designed to handle this case. |
| Entry = NonAllocationCallToContextNodeMap.erase(Entry); |
| Node->setCall(CallInfo()); |
| Removed = true; |
| break; |
| } |
| if (!Removed) |
| Entry++; |
| } |
| } |
| |
| uint64_t ModuleCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const { |
| // In the Module (IR) case this is already the Id. |
| return IdOrIndex; |
| } |
| |
| uint64_t IndexCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const { |
| // In the Index case this is an index into the stack id list in the summary |
| // index, convert it to an Id. |
| return Index.getStackIdAtIndex(IdOrIndex); |
| } |
| |
| bool ModuleCallsiteContextGraph::calleeMatchesFunc(Instruction *Call, |
| const Function *Func) { |
| auto *CB = dyn_cast<CallBase>(Call); |
| if (!CB->getCalledOperand()) |
| return false; |
| auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts(); |
| auto *CalleeFunc = dyn_cast<Function>(CalleeVal); |
| if (CalleeFunc == Func) |
| return true; |
| auto *Alias = dyn_cast<GlobalAlias>(CalleeVal); |
| return Alias && Alias->getAliasee() == Func; |
| } |
| |
| bool IndexCallsiteContextGraph::calleeMatchesFunc(IndexCall &Call, |
| const FunctionSummary *Func) { |
| ValueInfo Callee = |
| dyn_cast_if_present<CallsiteInfo *>(Call.getBase())->Callee; |
| // If there is no summary list then this is a call to an externally defined |
| // symbol. |
| AliasSummary *Alias = |
| Callee.getSummaryList().empty() |
| ? nullptr |
| : dyn_cast<AliasSummary>(Callee.getSummaryList()[0].get()); |
| assert(FSToVIMap.count(Func)); |
| return Callee == FSToVIMap[Func] || |
| // If callee is an alias, check the aliasee, since only function |
| // summary base objects will contain the stack node summaries and thus |
| // get a context node. |
| (Alias && Alias->getAliaseeVI() == FSToVIMap[Func]); |
| } |
| |
| static std::string getAllocTypeString(uint8_t AllocTypes) { |
| if (!AllocTypes) |
| return "None"; |
| std::string Str; |
| if (AllocTypes & (uint8_t)AllocationType::NotCold) |
| Str += "NotCold"; |
| if (AllocTypes & (uint8_t)AllocationType::Cold) |
| Str += "Cold"; |
| return Str; |
| } |
| |
| template <typename DerivedCCG, typename FuncTy, typename CallTy> |
| void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump() |
| const { |
| print(dbgs()); |
| dbgs() << "\n"; |
| } |
| |
| template <typename DerivedCCG, typename FuncTy, typename CallTy> |
| void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print( |
| raw_ostream &OS) const { |
| OS << "Node " << this << "\n"; |
| OS << "\t"; |
| printCall(OS); |
| if (Recursive) |
| OS << " (recursive)"; |
| OS << "\n"; |
| OS << "\tAllocTypes: " << getAllocTypeString(AllocTypes) << "\n"; |
| OS << "\tContextIds:"; |
| std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end()); |
| std::sort(SortedIds.begin(), SortedIds.end()); |
| for (auto Id : SortedIds) |
| OS << " " << Id; |
| OS << "\n"; |
| OS << "\tCalleeEdges:\n"; |
| for (auto &Edge : CalleeEdges) |
| OS << "\t\t" << *Edge << "\n"; |
| OS << "\tCallerEdges:\n"; |
| for (auto &Edge : CallerEdges) |
| OS << "\t\t" << *Edge << "\n"; |
| if (!Clones.empty()) { |
| OS << "\tClones: "; |
| FieldSeparator FS; |
| for (auto *Clone : Clones) |
| OS << FS << Clone; |
| OS << "\n"; |
| } else if (CloneOf) { |
| OS << "\tClone of " << CloneOf << "\n"; |
| } |
| } |
| |
| template <typename DerivedCCG, typename FuncTy, typename CallTy> |
| void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump() |
| const { |
| print(dbgs()); |
| dbgs() << "\n"; |
| } |
| |
| template <typename DerivedCCG, typename FuncTy, typename CallTy> |
| void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print( |
| raw_ostream &OS) const { |
| OS << "Edge from Callee " << Callee << " to Caller: " << Caller |
| << " AllocTypes: " << getAllocTypeString(AllocTypes); |
| OS << " ContextIds:"; |
| std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end()); |
| std::sort(SortedIds.begin(), SortedIds.end()); |
| for (auto Id : SortedIds) |
| OS << " " << Id; |
| } |
| |
| template <typename DerivedCCG, typename FuncTy, typename CallTy> |
| void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump() const { |
| print(dbgs()); |
| } |
| |
| template <typename DerivedCCG, typename FuncTy, typename CallTy> |
| void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print( |
| raw_ostream &OS) const { |
| OS << "Callsite Context Graph:\n"; |
| using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *; |
| for (const auto Node : nodes<GraphType>(this)) { |
| if (Node->isRemoved()) |
| continue; |
| Node->print(OS); |
| OS << "\n"; |
| } |
| } |
| |
| template <typename DerivedCCG, typename FuncTy, typename CallTy> |
| static void checkEdge( |
| const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) { |
| // Confirm that alloc type is not None and that we have at least one context |
| // id. |
| assert(Edge->AllocTypes != (uint8_t)AllocationType::None); |
| assert(!Edge->ContextIds.empty()); |
| } |
| |
| template <typename DerivedCCG, typename FuncTy, typename CallTy> |
| static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node) { |
| if (Node->isRemoved()) |
| return; |
| // Node's context ids should be the union of both its callee and caller edge |
| // context ids. |
| if (Node->CallerEdges.size()) { |
| auto EI = Node->CallerEdges.begin(); |
| auto &FirstEdge = *EI; |
| EI++; |
| DenseSet<uint32_t> CallerEdgeContextIds(FirstEdge->ContextIds); |
| for (; EI != Node->CallerEdges.end(); EI++) { |
| const auto &Edge = *EI; |
| set_union(CallerEdgeContextIds, Edge->ContextIds); |
| } |
| // Node can have more context ids than callers if some contexts terminate at |
| // node and some are longer. |
| assert(Node->ContextIds == CallerEdgeContextIds || |
| set_is_subset(CallerEdgeContextIds, Node->ContextIds)); |
| } |
| if (Node->CalleeEdges.size()) { |
| auto EI = Node->CalleeEdges.begin(); |
| auto &FirstEdge = *EI; |
| EI++; |
| DenseSet<uint32_t> CalleeEdgeContextIds(FirstEdge->ContextIds); |
| for (; EI != Node->CalleeEdges.end(); EI++) { |
| const auto &Edge = *EI; |
| set_union(CalleeEdgeContextIds, Edge->ContextIds); |
| } |
| assert(Node->ContextIds == CalleeEdgeContextIds); |
| } |
| } |
| |
| template <typename DerivedCCG, typename FuncTy, typename CallTy> |
| void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check() const { |
| using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *; |
| for (const auto Node : nodes<GraphType>(this)) { |
| checkNode<DerivedCCG, FuncTy, CallTy>(Node); |
| for (auto &Edge : Node->CallerEdges) |
| checkEdge<DerivedCCG, FuncTy, CallTy>(Edge); |
| } |
| } |
| |
| template <typename DerivedCCG, typename FuncTy, typename CallTy> |
| struct GraphTraits<const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *> { |
| using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *; |
| using NodeRef = const ContextNode<DerivedCCG, FuncTy, CallTy> *; |
| |
| using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>; |
| static NodeRef getNode(const NodePtrTy &P) { return P.get(); } |
| |
| using nodes_iterator = |
| mapped_iterator<typename std::vector<NodePtrTy>::const_iterator, |
| decltype(&getNode)>; |
| |
| static nodes_iterator nodes_begin(GraphType G) { |
| return nodes_iterator(G->NodeOwner.begin(), &getNode); |
| } |
| |
| static nodes_iterator nodes_end(GraphType G) { |
| return nodes_iterator(G->NodeOwner.end(), &getNode); |
| } |
| |
| static NodeRef getEntryNode(GraphType G) { |
| return G->NodeOwner.begin()->get(); |
| } |
| |
| using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>; |
| static const ContextNode<DerivedCCG, FuncTy, CallTy> * |
| GetCallee(const EdgePtrTy &P) { |
| return P->Callee; |
| } |
| |
| using ChildIteratorType = |
| mapped_iterator<typename std::vector<std::shared_ptr<ContextEdge< |
| DerivedCCG, FuncTy, CallTy>>>::const_iterator, |
| decltype(&GetCallee)>; |
| |
| static ChildIteratorType child_begin(NodeRef N) { |
| return ChildIteratorType(N->CalleeEdges.begin(), &GetCallee); |
| } |
| |
| static ChildIteratorType child_end(NodeRef N) { |
| return ChildIteratorType(N->CalleeEdges.end(), &GetCallee); |
| } |
| }; |
| |
| template <typename DerivedCCG, typename FuncTy, typename CallTy> |
| struct DOTGraphTraits<const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *> |
| : public DefaultDOTGraphTraits { |
| DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {} |
| |
| using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *; |
| using GTraits = GraphTraits<GraphType>; |
| using NodeRef = typename GTraits::NodeRef; |
| using ChildIteratorType = typename GTraits::ChildIteratorType; |
| |
| static std::string getNodeLabel(NodeRef Node, GraphType G) { |
| std::string LabelString = |
| (Twine("OrigId: ") + (Node->IsAllocation ? "Alloc" : "") + |
| Twine(Node->OrigStackOrAllocId)) |
| .str(); |
| LabelString += "\n"; |
| if (Node->hasCall()) { |
| auto Func = G->NodeToCallingFunc.find(Node); |
| assert(Func != G->NodeToCallingFunc.end()); |
| LabelString += |
| G->getLabel(Func->second, Node->Call.call(), Node->Call.cloneNo()); |
| } else { |
| LabelString += "null call"; |
| if (Node->Recursive) |
| LabelString += " (recursive)"; |
| else |
| LabelString += " (external)"; |
| } |
| return LabelString; |
| } |
| |
| static std::string getNodeAttributes(NodeRef Node, GraphType) { |
| std::string AttributeString = (Twine("tooltip=\"") + getNodeId(Node) + " " + |
| getContextIds(Node->ContextIds) + "\"") |
| .str(); |
| AttributeString += |
| (Twine(",fillcolor=\"") + getColor(Node->AllocTypes) + "\"").str(); |
| AttributeString += ",style=\"filled\""; |
| if (Node->CloneOf) { |
| AttributeString += ",color=\"blue\""; |
| AttributeString += ",style=\"filled,bold,dashed\""; |
| } else |
| AttributeString += ",style=\"filled\""; |
| return AttributeString; |
| } |
| |
| static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, |
| GraphType) { |
| auto &Edge = *(ChildIter.getCurrent()); |
| return (Twine("tooltip=\"") + getContextIds(Edge->ContextIds) + "\"" + |
| Twine(",fillcolor=\"") + getColor(Edge->AllocTypes) + "\"") |
| .str(); |
| } |
| |
| // Since the NodeOwners list includes nodes that are no longer connected to |
| // the graph, skip them here. |
| static bool isNodeHidden(NodeRef Node, GraphType) { |
| return Node->isRemoved(); |
| } |
| |
| private: |
| static std::string getContextIds(const DenseSet<uint32_t> &ContextIds) { |
| std::string IdString = "ContextIds:"; |
| if (ContextIds.size() < 100) { |
| std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end()); |
| std::sort(SortedIds.begin(), SortedIds.end()); |
| for (auto Id : SortedIds) |
| IdString += (" " + Twine(Id)).str(); |
| } else { |
| IdString += (" (" + Twine(ContextIds.size()) + " ids)").str(); |
| } |
| return IdString; |
| } |
| |
| static std::string getColor(uint8_t AllocTypes) { |
| if (AllocTypes == (uint8_t)AllocationType::NotCold) |
| // Color "brown1" actually looks like a lighter red. |
| return "brown1"; |
| if (AllocTypes == (uint8_t)AllocationType::Cold) |
| return "cyan"; |
| if (AllocTypes == |
| ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold)) |
| // Lighter purple. |
| return "mediumorchid1"; |
| return "gray"; |
| } |
| |
| static std::string getNodeId(NodeRef Node) { |
| std::stringstream SStream; |
| SStream << std::hex << "N0x" << (unsigned long long)Node; |
| std::string Result = SStream.str(); |
| return Result; |
| } |
| }; |
| |
| template <typename DerivedCCG, typename FuncTy, typename CallTy> |
| void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot( |
| std::string Label) const { |
| WriteGraph(this, "", false, Label, |
| DotFilePathPrefix + "ccg." + Label + ".dot"); |
| } |
| |
| template <typename DerivedCCG, typename FuncTy, typename CallTy> |
| bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() { |
| if (DumpCCG) { |
| dbgs() << "CCG before cloning:\n"; |
| dbgs() << *this; |
| } |
| if (ExportToDot) |
| exportToDot("postbuild"); |
| |
| if (VerifyCCG) { |
| check(); |
| } |
| |
| return false; |
| } |
| |
| bool MemProfContextDisambiguation::processModule(Module &M) { |
| bool Changed = false; |
| |
| ModuleCallsiteContextGraph CCG(M); |
| Changed = CCG.process(); |
| |
| return Changed; |
| } |
| |
| PreservedAnalyses MemProfContextDisambiguation::run(Module &M, |
| ModuleAnalysisManager &AM) { |
| if (!processModule(M)) |
| return PreservedAnalyses::all(); |
| return PreservedAnalyses::none(); |
| } |
| |
| void MemProfContextDisambiguation::run( |
| ModuleSummaryIndex &Index, |
| function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> |
| isPrevailing) { |
| IndexCallsiteContextGraph CCG(Index, isPrevailing); |
| CCG.process(); |
| } |