| //===- bolt/Passes/InstrumentationSummary.h ---------------------*- C++ -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // InstrumentationSummary holds all the data generated during |
| // the Instrumentation pass, which will be needed latter for runtime library |
| // binary emit and linking. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #ifndef BOLT_PASSES_INSTRUMENTATION_SUMMARY_H |
| #define BOLT_PASSES_INSTRUMENTATION_SUMMARY_H |
| |
| #include "llvm/ADT/DenseSet.h" |
| #include <string> |
| #include <vector> |
| |
| namespace llvm { |
| |
| class MCSymbol; |
| |
| namespace bolt { |
| |
| class BinaryFunction; |
| |
| // All structs here are part of the program metadata serialization format and |
| // consist of POD types or array of POD types that are trivially mapped from |
| // disk to memory. This provides the runtime library with a basic |
| // understanding of the program structure, so it can build a CFG for each |
| // function and deduce execution counts for edges that don't require explicit |
| // counters. It also provides function names and offsets used when writing the |
| // fdata file. |
| |
| // Location information -- analoguous to the concept of the same name in fdata |
| // writing/reading. The difference is that the name is stored as an index to a |
| // string table written separately. |
| struct LocDescription { |
| uint32_t FuncString; |
| uint32_t Offset; |
| }; |
| |
| // Inter-function control flow transfer instrumentation |
| struct CallDescription { |
| LocDescription FromLoc; |
| uint32_t FromNode; // Node refers to the CFG node index of the call site |
| LocDescription ToLoc; |
| uint32_t Counter; |
| const BinaryFunction *Target; |
| }; |
| |
| // Spans multiple counters during runtime - this is an indirect call site |
| struct IndCallDescription { |
| LocDescription FromLoc; |
| }; |
| |
| // This is an indirect call target (any entry point from any function). This |
| // is stored sorted in the binary for fast lookups during data writing. |
| struct IndCallTargetDescription { |
| LocDescription ToLoc; |
| const BinaryFunction *Target; |
| }; |
| |
| // Intra-function control flow transfer instrumentation |
| struct EdgeDescription { |
| LocDescription FromLoc; |
| uint32_t FromNode; |
| LocDescription ToLoc; |
| uint32_t ToNode; |
| uint32_t Counter; |
| }; |
| |
| // Basic block frequency (CFG node) instrumentation - only used for spanning |
| // tree leaf nodes. |
| struct InstrumentedNode { |
| uint32_t Node; |
| uint32_t Counter; |
| }; |
| |
| // Entry basic blocks for a function. We record their output addresses to |
| // check frequency of this address (via node number) against all tracked calls |
| // to this address and discover traffic coming from uninstrumented code. |
| struct EntryNode { |
| uint64_t Node; |
| uint64_t Address; |
| }; |
| |
| // Base struct organizing all metadata pertaining to a single function |
| struct FunctionDescription { |
| const BinaryFunction *Function; |
| std::vector<InstrumentedNode> LeafNodes; |
| std::vector<EdgeDescription> Edges; |
| DenseSet<std::pair<uint32_t, uint32_t>> EdgesSet; |
| std::vector<CallDescription> Calls; |
| std::vector<EntryNode> EntryNodes; |
| }; |
| |
| /// Holds the summary of the data generated by the Instrumentation Pass. |
| /// These information will be needed for binary emit. |
| struct InstrumentationSummary { |
| /// Identify all counters used in runtime while instrumentation is running |
| std::vector<MCSymbol *> Counters; |
| |
| /// Stores function names, to be emitted to the runtime |
| std::string StringTable; |
| |
| /// Pointer to runtime instrumentation handlers |
| MCSymbol *IndCallCounterFuncPtr; |
| MCSymbol *IndTailCallCounterFuncPtr; |
| |
| /// Intra-function control flow and direct calls |
| std::vector<FunctionDescription> FunctionDescriptions; |
| |
| /// Inter-function control flow via indirect calls |
| std::vector<IndCallDescription> IndCallDescriptions; |
| std::vector<IndCallTargetDescription> IndCallTargetDescriptions; |
| |
| static constexpr uint64_t NUM_SERIALIZED_CONTAINERS = 4; |
| static constexpr uint64_t SERIALIZED_CONTAINER_SIZE = |
| sizeof(uint32_t) * NUM_SERIALIZED_CONTAINERS; |
| |
| uint32_t getFDSize() const { |
| uint32_t FuncDescSize = 0; |
| for (const FunctionDescription &Func : FunctionDescriptions) { |
| // A function description consists of containers of different |
| // descriptions. We use vectors to store them and when serializing them, |
| // we first output a uint32_t-sized field for the number of elements of |
| // the vector and then we write each element, so a simple parser know |
| // where to stop. |
| FuncDescSize += SERIALIZED_CONTAINER_SIZE + |
| Func.Edges.size() * sizeof(EdgeDescription) + |
| Func.LeafNodes.size() * sizeof(InstrumentedNode) + |
| Func.Calls.size() * sizeof(CallDescription) + |
| Func.EntryNodes.size() * sizeof(EntryNode); |
| } |
| return FuncDescSize; |
| } |
| }; |
| |
| } // namespace bolt |
| } // namespace llvm |
| |
| #endif |