| //===- bolt/Passes/BinaryFunctionCallGraph.cpp ----------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file implements the BinaryFunctionCallGraph class. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "bolt/Passes/BinaryFunctionCallGraph.h" |
| #include "bolt/Core/BinaryContext.h" |
| #include "bolt/Core/BinaryFunction.h" |
| #include "llvm/Support/CommandLine.h" |
| #include "llvm/Support/Timer.h" |
| #include <stack> |
| |
| #define DEBUG_TYPE "callgraph" |
| |
| namespace opts { |
| extern llvm::cl::opt<bool> TimeOpts; |
| extern llvm::cl::opt<unsigned> Verbosity; |
| } // namespace opts |
| |
| namespace llvm { |
| namespace bolt { |
| |
| CallGraph::NodeId BinaryFunctionCallGraph::addNode(BinaryFunction *BF, |
| uint32_t Size, |
| uint64_t Samples) { |
| NodeId Id = CallGraph::addNode(Size, Samples); |
| assert(size_t(Id) == Funcs.size()); |
| Funcs.push_back(BF); |
| FuncToNodeId[BF] = Id; |
| assert(Funcs[Id] == BF); |
| return Id; |
| } |
| |
| std::deque<BinaryFunction *> BinaryFunctionCallGraph::buildTraversalOrder() { |
| NamedRegionTimer T1("buildcgorder", "Build cg traversal order", |
| "CG breakdown", "CG breakdown", opts::TimeOpts); |
| std::deque<BinaryFunction *> TopologicalOrder; |
| enum NodeStatus { NEW, VISITING, VISITED }; |
| std::vector<NodeStatus> NodeStatus(Funcs.size()); |
| std::stack<NodeId> Worklist; |
| |
| for (BinaryFunction *Func : Funcs) { |
| const NodeId Id = FuncToNodeId.at(Func); |
| Worklist.push(Id); |
| NodeStatus[Id] = NEW; |
| } |
| |
| while (!Worklist.empty()) { |
| const NodeId FuncId = Worklist.top(); |
| Worklist.pop(); |
| |
| if (NodeStatus[FuncId] == VISITED) |
| continue; |
| |
| if (NodeStatus[FuncId] == VISITING) { |
| TopologicalOrder.push_back(Funcs[FuncId]); |
| NodeStatus[FuncId] = VISITED; |
| continue; |
| } |
| |
| assert(NodeStatus[FuncId] == NEW); |
| NodeStatus[FuncId] = VISITING; |
| Worklist.push(FuncId); |
| for (const NodeId Callee : successors(FuncId)) { |
| if (NodeStatus[Callee] == VISITING || NodeStatus[Callee] == VISITED) |
| continue; |
| Worklist.push(Callee); |
| } |
| } |
| |
| return TopologicalOrder; |
| } |
| |
| BinaryFunctionCallGraph |
| buildCallGraph(BinaryContext &BC, CgFilterFunction Filter, bool CgFromPerfData, |
| bool IncludeSplitCalls, bool UseFunctionHotSize, |
| bool UseSplitHotSize, bool UseEdgeCounts, |
| bool IgnoreRecursiveCalls) { |
| NamedRegionTimer T1("buildcg", "Callgraph construction", "CG breakdown", |
| "CG breakdown", opts::TimeOpts); |
| BinaryFunctionCallGraph Cg; |
| static constexpr uint64_t COUNT_NO_PROFILE = |
| BinaryBasicBlock::COUNT_NO_PROFILE; |
| |
| // Compute function size |
| auto functionSize = [&](const BinaryFunction *Function) { |
| return UseFunctionHotSize && Function->isSplit() |
| ? Function->estimateHotSize(UseSplitHotSize) |
| : Function->estimateSize(); |
| }; |
| |
| // Add call graph nodes. |
| auto lookupNode = [&](BinaryFunction *Function) { |
| const CallGraph::NodeId Id = Cg.maybeGetNodeId(Function); |
| if (Id == CallGraph::InvalidId) { |
| // It's ok to use the hot size here when the function is split. This is |
| // because emitFunctions will emit the hot part first in the order that is |
| // computed by ReorderFunctions. The cold part will be emitted with the |
| // rest of the cold functions and code. |
| const size_t Size = functionSize(Function); |
| // NOTE: for functions without a profile, we set the number of samples |
| // to zero. This will keep these functions from appearing in the hot |
| // section. This is a little weird because we wouldn't be trying to |
| // create a node for a function unless it was the target of a call from |
| // a hot block. The alternative would be to set the count to one or |
| // accumulate the number of calls from the callsite into the function |
| // samples. Results from perfomance testing seem to favor the zero |
| // count though, so I'm leaving it this way for now. |
| return Cg.addNode(Function, Size, Function->getKnownExecutionCount()); |
| } |
| return Id; |
| }; |
| |
| // Add call graph edges. |
| uint64_t NotProcessed = 0; |
| uint64_t TotalCallsites = 0; |
| uint64_t NoProfileCallsites = 0; |
| uint64_t NumFallbacks = 0; |
| uint64_t RecursiveCallsites = 0; |
| for (auto &It : BC.getBinaryFunctions()) { |
| BinaryFunction *Function = &It.second; |
| |
| if (Filter(*Function)) |
| continue; |
| |
| const CallGraph::NodeId SrcId = lookupNode(Function); |
| // Offset of the current basic block from the beginning of the function |
| uint64_t Offset = 0; |
| |
| auto recordCall = [&](const MCSymbol *DestSymbol, const uint64_t Count) { |
| if (BinaryFunction *DstFunc = |
| DestSymbol ? BC.getFunctionForSymbol(DestSymbol) : nullptr) { |
| if (DstFunc == Function) { |
| LLVM_DEBUG(dbgs() << "BOLT-INFO: recursive call detected in " |
| << *DstFunc << "\n"); |
| ++RecursiveCallsites; |
| if (IgnoreRecursiveCalls) |
| return false; |
| } |
| if (Filter(*DstFunc)) |
| return false; |
| |
| const CallGraph::NodeId DstId = lookupNode(DstFunc); |
| const bool IsValidCount = Count != COUNT_NO_PROFILE; |
| const uint64_t AdjCount = UseEdgeCounts && IsValidCount ? Count : 1; |
| if (!IsValidCount) |
| ++NoProfileCallsites; |
| Cg.incArcWeight(SrcId, DstId, AdjCount, Offset); |
| LLVM_DEBUG(if (opts::Verbosity > 1) { |
| dbgs() << "BOLT-DEBUG: buildCallGraph: call " << *Function << " -> " |
| << *DstFunc << " @ " << Offset << "\n"; |
| }); |
| return true; |
| } |
| |
| return false; |
| }; |
| |
| // Pairs of (symbol, count) for each target at this callsite. |
| using TargetDesc = std::pair<const MCSymbol *, uint64_t>; |
| using CallInfoTy = std::vector<TargetDesc>; |
| |
| // Get pairs of (symbol, count) for each target at this callsite. |
| // If the call is to an unknown function the symbol will be nullptr. |
| // If there is no profiling data the count will be COUNT_NO_PROFILE. |
| auto getCallInfo = [&](const BinaryBasicBlock *BB, const MCInst &Inst) { |
| CallInfoTy Counts; |
| const MCSymbol *DstSym = BC.MIB->getTargetSymbol(Inst); |
| |
| // If this is an indirect call use perf data directly. |
| if (!DstSym && BC.MIB->hasAnnotation(Inst, "CallProfile")) { |
| const auto &ICSP = BC.MIB->getAnnotationAs<IndirectCallSiteProfile>( |
| Inst, "CallProfile"); |
| for (const IndirectCallProfile &CSI : ICSP) |
| if (CSI.Symbol) |
| Counts.emplace_back(CSI.Symbol, CSI.Count); |
| } else { |
| const uint64_t Count = BB->getExecutionCount(); |
| Counts.emplace_back(DstSym, Count); |
| } |
| |
| return Counts; |
| }; |
| |
| // If the function has an invalid profile, try to use the perf data |
| // directly (if requested). If there is no perf data for this function, |
| // fall back to the CFG walker which attempts to handle missing data. |
| if (!Function->hasValidProfile() && CgFromPerfData && |
| !Function->getAllCallSites().empty()) { |
| LLVM_DEBUG( |
| dbgs() << "BOLT-DEBUG: buildCallGraph: Falling back to perf data" |
| << " for " << *Function << "\n"); |
| ++NumFallbacks; |
| const size_t Size = functionSize(Function); |
| for (const IndirectCallProfile &CSI : Function->getAllCallSites()) { |
| ++TotalCallsites; |
| |
| if (!CSI.Symbol) |
| continue; |
| |
| // The computed offset may exceed the hot part of the function; hence, |
| // bound it by the size. |
| Offset = CSI.Offset; |
| if (Offset > Size) |
| Offset = Size; |
| |
| if (!recordCall(CSI.Symbol, CSI.Count)) |
| ++NotProcessed; |
| } |
| } else { |
| for (BinaryBasicBlock *BB : Function->getLayout().blocks()) { |
| // Don't count calls from split blocks unless requested. |
| if (BB->isSplit() && !IncludeSplitCalls) |
| continue; |
| |
| // Determine whether the block is included in Function's (hot) size |
| // See BinaryFunction::estimateHotSize |
| bool BBIncludedInFunctionSize = false; |
| if (UseFunctionHotSize && Function->isSplit()) { |
| if (UseSplitHotSize) |
| BBIncludedInFunctionSize = !BB->isSplit(); |
| else |
| BBIncludedInFunctionSize = BB->getKnownExecutionCount() != 0; |
| } else { |
| BBIncludedInFunctionSize = true; |
| } |
| |
| for (MCInst &Inst : *BB) { |
| // Find call instructions and extract target symbols from each one. |
| if (BC.MIB->isCall(Inst)) { |
| const CallInfoTy CallInfo = getCallInfo(BB, Inst); |
| |
| if (!CallInfo.empty()) { |
| for (const TargetDesc &CI : CallInfo) { |
| ++TotalCallsites; |
| if (!recordCall(CI.first, CI.second)) |
| ++NotProcessed; |
| } |
| } else { |
| ++TotalCallsites; |
| ++NotProcessed; |
| } |
| } |
| // Increase Offset if needed |
| if (BBIncludedInFunctionSize) |
| Offset += BC.computeCodeSize(&Inst, &Inst + 1); |
| } |
| } |
| } |
| } |
| |
| #ifndef NDEBUG |
| bool PrintInfo = DebugFlag && isCurrentDebugType("callgraph"); |
| #else |
| bool PrintInfo = false; |
| #endif |
| if (PrintInfo || opts::Verbosity > 0) |
| outs() << format("BOLT-INFO: buildCallGraph: %u nodes, %u callsites " |
| "(%u recursive), density = %.6lf, %u callsites not " |
| "processed, %u callsites with invalid profile, " |
| "used perf data for %u stale functions.\n", |
| Cg.numNodes(), TotalCallsites, RecursiveCallsites, |
| Cg.density(), NotProcessed, NoProfileCallsites, |
| NumFallbacks); |
| |
| return Cg; |
| } |
| |
| } // namespace bolt |
| } // namespace llvm |