| //===- InlineSizeEstimatorAnalysis.cpp - IR to native size from ML model --===// |
| // |
| // The LLVM Compiler Infrastructure |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This implements feature and label extraction for offline supervised learning |
| // of a IR to native size model. |
| // |
| //===----------------------------------------------------------------------===// |
| #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h" |
| |
| #ifdef LLVM_HAVE_TF_API |
| #include "llvm/Analysis/Utils/TFUtils.h" |
| #endif |
| #include "llvm/Analysis/LoopInfo.h" |
| #include "llvm/Analysis/TargetLibraryInfo.h" |
| #include "llvm/Analysis/TargetTransformInfo.h" |
| #include "llvm/IR/BasicBlock.h" |
| #include "llvm/IR/Dominators.h" |
| #include "llvm/IR/Function.h" |
| #include "llvm/IR/Instructions.h" |
| #include "llvm/IR/PassManager.h" |
| #include "llvm/MC/MCAsmLayout.h" |
| #include "llvm/Support/Casting.h" |
| #include "llvm/Support/CommandLine.h" |
| #include "llvm/Support/raw_ostream.h" |
| |
| #include <algorithm> |
| #include <deque> |
| |
| using namespace llvm; |
| |
| AnalysisKey InlineSizeEstimatorAnalysis::Key; |
| |
| #define DEBUG_TYPE "inline-size-estimator" |
| |
| #ifdef LLVM_HAVE_TF_API |
| cl::opt<std::string> TFIR2NativeModelPath( |
| "ml-inliner-ir2native-model", cl::Hidden, |
| cl::desc("Path to saved model evaluating native size from IR.")); |
| |
| namespace { |
| unsigned getMaxInstructionID() { |
| #define LAST_OTHER_INST(NR) return NR; |
| #include "llvm/IR/Instruction.def" |
| } |
| |
| class IRToNativeSizeLearning { |
| public: |
| enum class NamedFeatureIndex : size_t { |
| InitialSize, |
| Blocks, |
| Calls, |
| IsLocal, |
| IsLinkOnceODR, |
| IsLinkOnce, |
| Loops, |
| MaxLoopDepth, |
| MaxDomTreeLevel, |
| |
| NumNamedFeatures |
| }; |
| static const size_t NumNamedFeatures = |
| static_cast<size_t>(NamedFeatureIndex::NumNamedFeatures); |
| struct FunctionFeatures { |
| static std::vector<std::pair<size_t, size_t>> |
| ImportantInstructionSuccessions; |
| static const size_t FeatureCount; |
| |
| std::array<int32_t, NumNamedFeatures> NamedFeatures = {0}; |
| std::vector<int32_t> InstructionHistogram; |
| std::vector<int32_t> InstructionPairHistogram; |
| |
| void fillTensor(int32_t *Ptr) const; |
| int32_t &operator[](NamedFeatureIndex Pos) { |
| return NamedFeatures[static_cast<size_t>(Pos)]; |
| } |
| }; |
| IRToNativeSizeLearning() = default; |
| |
| static FunctionFeatures getFunctionFeatures(Function &F, |
| FunctionAnalysisManager &FAM); |
| |
| private: |
| /// Sort once the feature tuples. |
| struct SortFeatureTuples { |
| bool IsSorted = false; |
| SortFeatureTuples() { |
| std::sort(FunctionFeatures::ImportantInstructionSuccessions.begin(), |
| FunctionFeatures::ImportantInstructionSuccessions.end()); |
| IsSorted = true; |
| } |
| }; |
| |
| static llvm::ManagedStatic<SortFeatureTuples> TupleSorter; |
| |
| static bool ensureSortedTuples() { return TupleSorter->IsSorted; } |
| }; |
| llvm::ManagedStatic<IRToNativeSizeLearning::SortFeatureTuples> |
| IRToNativeSizeLearning::TupleSorter; |
| |
| // This is a point in time - we determined including these pairs of |
| // consecutive instructions (in the IR layout available at inline time) as |
| // features improves the model performance. We want to move away from manual |
| // feature selection. |
| // The vector is given in opcode pairs rather than labels because 1) labels |
| // weren't readily available, and 2) the successions were hand - extracted |
| std::vector<std::pair<size_t, size_t>> |
| IRToNativeSizeLearning::FunctionFeatures::ImportantInstructionSuccessions = |
| {{1, 34}, {15, 27}, {53, 53}, {53, 34}, {1, 11}, {32, 2}, {2, 48}, |
| {28, 48}, {1, 45}, {49, 32}, {57, 56}, {55, 53}, {1, 28}, {57, 34}, |
| {1, 1}, {32, 28}, {32, 15}, {49, 28}, {53, 1}, {2, 53}, {48, 34}, |
| {28, 53}, {2, 32}, {1, 40}, {32, 48}, {29, 56}, {56, 32}, {55, 56}, |
| {48, 56}, {1, 31}, {33, 34}, {2, 28}, {1, 12}, {55, 1}, {31, 31}, |
| {65, 1}, {33, 56}, {32, 32}, {13, 13}, {1, 26}, {13, 26}, {2, 1}, |
| {1, 33}, {47, 49}, {64, 1}, {2, 38}, {34, 53}, {48, 2}, {55, 34}, |
| {34, 32}, {1, 5}, {56, 13}, {2, 2}, {2, 49}, {33, 2}, {49, 39}, |
| {56, 49}, {33, 49}, {32, 39}, {39, 57}, {29, 33}, {31, 34}, {32, 29}, |
| {47, 15}, {13, 34}, {2, 33}, {32, 49}, {49, 34}, {56, 33}, {1, 30}, |
| {33, 33}, {31, 33}, {2, 29}, {56, 7}, {32, 13}, {2, 55}, {56, 56}, |
| {2, 34}, {1, 42}, {34, 49}, {1, 20}, {32, 33}, {1, 25}, {53, 28}, |
| {1, 14}, {31, 49}, {28, 2}, {2, 13}, {2, 56}, {1, 32}, {56, 53}, |
| {65, 65}, {33, 53}, {64, 64}, {13, 2}, {34, 33}, {1, 4}, {49, 2}, |
| {1, 9}, {56, 1}, {33, 1}, {53, 57}, {32, 53}, {13, 56}, {32, 56}, |
| {55, 55}, {1, 18}, {49, 56}, {34, 34}, {1, 7}, {56, 64}, {32, 1}, |
| {13, 33}, {55, 28}, {49, 33}, {57, 57}, {56, 34}, {34, 56}, {33, 32}, |
| {32, 40}, {1, 29}, {53, 2}, {34, 1}, {32, 34}, {49, 49}, {1, 24}, |
| {40, 34}, {1, 13}, {38, 34}, {29, 2}, {34, 2}, {1, 39}, {1, 22}, |
| {1, 27}, {49, 1}, {1, 8}, {56, 2}}; |
| |
| // We have: 9 calculated features (the features here); 1 feature for each |
| // instruction opcode; and 1 feature for each manually-identified sequence. |
| // For the latter 2, we build a histogram: we count the number of |
| // occurrences of each instruction opcode or succession of instructions, |
| // respectively. |
| // Note that instruction opcodes start from 1. For convenience, we also have an |
| // always 0 feature for the '0' opcode, hence the extra 1. |
| const size_t IRToNativeSizeLearning::FunctionFeatures::FeatureCount = |
| IRToNativeSizeLearning::FunctionFeatures::ImportantInstructionSuccessions |
| .size() + |
| getMaxInstructionID() + 1 + IRToNativeSizeLearning::NumNamedFeatures; |
| |
| size_t getSize(Function &F, TargetTransformInfo &TTI) { |
| size_t Ret = 0; |
| for (auto &BB : F) |
| for (auto &I : BB) |
| Ret += TTI.getInstructionCost( |
| &I, TargetTransformInfo::TargetCostKind::TCK_CodeSize); |
| return Ret; |
| } |
| |
| size_t getSize(Function &F, FunctionAnalysisManager &FAM) { |
| auto &TTI = FAM.getResult<TargetIRAnalysis>(F); |
| return getSize(F, TTI); |
| } |
| |
| unsigned getMaxDominatorTreeDepth(const Function &F, |
| const DominatorTree &Tree) { |
| unsigned Ret = 0; |
| for (auto &BB : F) |
| if (auto *TN = Tree.getNode(&BB)) |
| Ret = std::max(Ret, TN->getLevel()); |
| return Ret; |
| } |
| } // namespace |
| |
| IRToNativeSizeLearning::FunctionFeatures |
| IRToNativeSizeLearning::getFunctionFeatures(Function &F, |
| FunctionAnalysisManager &FAM) { |
| assert(ensureSortedTuples() && "expected lazy initialization"); |
| |
| auto &DomTree = FAM.getResult<DominatorTreeAnalysis>(F); |
| FunctionFeatures FF; |
| size_t InstrCount = getMaxInstructionID() + 1; |
| FF.InstructionHistogram.resize(InstrCount); |
| |
| FF.InstructionPairHistogram.resize( |
| FunctionFeatures::ImportantInstructionSuccessions.size()); |
| |
| auto StartID = 0; |
| auto LastID = StartID; |
| auto getPairIndex = [](size_t a, size_t b) { |
| auto I = |
| std::find(FunctionFeatures::ImportantInstructionSuccessions.begin(), |
| FunctionFeatures::ImportantInstructionSuccessions.end(), |
| std::make_pair(a, b)); |
| if (I == FunctionFeatures::ImportantInstructionSuccessions.end()) |
| return -1; |
| return static_cast<int>(std::distance( |
| FunctionFeatures::ImportantInstructionSuccessions.begin(), I)); |
| }; |
| |
| // We don't want debug calls, because they'd just add noise. |
| for (auto &BB : F) { |
| for (auto I = BB.instructionsWithoutDebug().begin(), |
| E = BB.instructionsWithoutDebug().end(); |
| I != E; ++I) { |
| auto ID = I->getOpcode(); |
| |
| ++FF.InstructionHistogram[ID]; |
| int PairIndex = getPairIndex(LastID, ID); |
| if (PairIndex >= 0) |
| ++FF.InstructionPairHistogram[PairIndex]; |
| LastID = ID; |
| if (isa<CallBase>(*I)) |
| ++FF[NamedFeatureIndex::Calls]; |
| } |
| } |
| |
| FF[NamedFeatureIndex::InitialSize] = getSize(F, FAM); |
| FF[NamedFeatureIndex::IsLocal] = F.hasLocalLinkage(); |
| FF[NamedFeatureIndex::IsLinkOnceODR] = F.hasLinkOnceODRLinkage(); |
| FF[NamedFeatureIndex::IsLinkOnce] = F.hasLinkOnceLinkage(); |
| FF[NamedFeatureIndex::Blocks] = |
| std::distance(F.getBasicBlockList().begin(), F.getBasicBlockList().end()); |
| auto &LI = FAM.getResult<LoopAnalysis>(F); |
| FF[NamedFeatureIndex::Loops] = std::distance(LI.begin(), LI.end()); |
| for (auto &L : LI) |
| FF[NamedFeatureIndex::MaxLoopDepth] = |
| std::max(FF[NamedFeatureIndex::MaxLoopDepth], |
| static_cast<int32_t>(L->getLoopDepth())); |
| FF[NamedFeatureIndex::MaxDomTreeLevel] = getMaxDominatorTreeDepth(F, DomTree); |
| return FF; |
| } |
| |
| void IRToNativeSizeLearning::FunctionFeatures::fillTensor(int32_t *Ptr) const { |
| std::copy(NamedFeatures.begin(), NamedFeatures.end(), Ptr); |
| Ptr += NamedFeatures.size(); |
| std::copy(InstructionHistogram.begin(), InstructionHistogram.end(), Ptr); |
| Ptr += InstructionHistogram.size(); |
| std::copy(InstructionPairHistogram.begin(), InstructionPairHistogram.end(), |
| Ptr); |
| } |
| |
| bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { |
| return !TFIR2NativeModelPath.empty(); |
| } |
| |
| InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() { |
| if (!isEvaluatorRequested()) { |
| return; |
| } |
| std::vector<std::string> InputNames{"serving_default_input_1"}; |
| std::vector<std::string> OutputName{"StatefulPartitionedCall"}; |
| Evaluator = std::make_unique<TFModelEvaluator>( |
| TFIR2NativeModelPath.getValue().c_str(), InputNames, OutputName); |
| if (!Evaluator || !Evaluator->isValid()) { |
| Evaluator.reset(); |
| return; |
| } |
| static const std::vector<int64_t> Dim{ |
| 1, static_cast<int64_t>( |
| IRToNativeSizeLearning::FunctionFeatures::FeatureCount)}; |
| |
| Evaluator->initInput<int32_t>(0, Dim); |
| } |
| |
| InlineSizeEstimatorAnalysis::Result |
| InlineSizeEstimatorAnalysis::run(const Function &F, |
| FunctionAnalysisManager &FAM) { |
| if (!Evaluator) |
| return None; |
| auto Features = IRToNativeSizeLearning::getFunctionFeatures( |
| const_cast<Function &>(F), FAM); |
| int32_t *V = Evaluator->getInput<int32_t>(0); |
| Features.fillTensor(V); |
| auto ER = Evaluator->evaluate(); |
| if (!ER) |
| return None; |
| float Ret = *ER->getTensorValue<float>(0); |
| if (Ret < 0.0) |
| Ret = 0.0; |
| return static_cast<size_t>(Ret); |
| } |
| |
| InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {} |
| InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis( |
| InlineSizeEstimatorAnalysis &&Other) |
| : Evaluator(std::move(Other.Evaluator)) {} |
| |
| #else |
| namespace llvm { |
| class TFModelEvaluator {}; |
| } // namespace llvm |
| InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {} |
| InlineSizeEstimatorAnalysis ::InlineSizeEstimatorAnalysis( |
| InlineSizeEstimatorAnalysis &&) {} |
| InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {} |
| InlineSizeEstimatorAnalysis::Result |
| InlineSizeEstimatorAnalysis::run(const Function &F, |
| FunctionAnalysisManager &FAM) { |
| return None; |
| } |
| bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { return false; } |
| #endif |