//===- InlineSizeEstimatorAnalysis.cpp - IR to native size from ML model --===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This implements feature and label extraction for offline supervised learning // of a IR to native size model. // //===----------------------------------------------------------------------===// #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h" #ifdef LLVM_HAVE_TF_API #include "llvm/Analysis/Utils/TFUtils.h" #endif #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/PassManager.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" #include #include using namespace llvm; AnalysisKey InlineSizeEstimatorAnalysis::Key; #define DEBUG_TYPE "inline-size-estimator" #ifdef LLVM_HAVE_TF_API cl::opt TFIR2NativeModelPath( "ml-inliner-ir2native-model", cl::Hidden, cl::desc("Path to saved model evaluating native size from IR.")); namespace { unsigned getMaxInstructionID() { #define LAST_OTHER_INST(NR) return NR; #include "llvm/IR/Instruction.def" } class IRToNativeSizeLearning { public: enum class NamedFeatureIndex : size_t { InitialSize, Blocks, Calls, IsLocal, IsLinkOnceODR, IsLinkOnce, Loops, MaxLoopDepth, MaxDomTreeLevel, NumNamedFeatures }; static const size_t NumNamedFeatures = static_cast(NamedFeatureIndex::NumNamedFeatures); struct FunctionFeatures { static std::vector> ImportantInstructionSuccessions; static const size_t FeatureCount; std::array NamedFeatures = {0}; std::vector InstructionHistogram; std::vector InstructionPairHistogram; void fillTensor(int32_t *Ptr) const; int32_t &operator[](NamedFeatureIndex Pos) { return NamedFeatures[static_cast(Pos)]; } }; IRToNativeSizeLearning() = default; static FunctionFeatures getFunctionFeatures(Function &F, FunctionAnalysisManager &FAM); private: /// Sort once the feature tuples. struct SortFeatureTuples { bool IsSorted = false; SortFeatureTuples() { std::sort(FunctionFeatures::ImportantInstructionSuccessions.begin(), FunctionFeatures::ImportantInstructionSuccessions.end()); IsSorted = true; } }; static llvm::ManagedStatic TupleSorter; static bool ensureSortedTuples() { return TupleSorter->IsSorted; } }; llvm::ManagedStatic IRToNativeSizeLearning::TupleSorter; // This is a point in time - we determined including these pairs of // consecutive instructions (in the IR layout available at inline time) as // features improves the model performance. We want to move away from manual // feature selection. // The vector is given in opcode pairs rather than labels because 1) labels // weren't readily available, and 2) the successions were hand - extracted std::vector> IRToNativeSizeLearning::FunctionFeatures::ImportantInstructionSuccessions = {{1, 34}, {15, 27}, {53, 53}, {53, 34}, {1, 11}, {32, 2}, {2, 48}, {28, 48}, {1, 45}, {49, 32}, {57, 56}, {55, 53}, {1, 28}, {57, 34}, {1, 1}, {32, 28}, {32, 15}, {49, 28}, {53, 1}, {2, 53}, {48, 34}, {28, 53}, {2, 32}, {1, 40}, {32, 48}, {29, 56}, {56, 32}, {55, 56}, {48, 56}, {1, 31}, {33, 34}, {2, 28}, {1, 12}, {55, 1}, {31, 31}, {65, 1}, {33, 56}, {32, 32}, {13, 13}, {1, 26}, {13, 26}, {2, 1}, {1, 33}, {47, 49}, {64, 1}, {2, 38}, {34, 53}, {48, 2}, {55, 34}, {34, 32}, {1, 5}, {56, 13}, {2, 2}, {2, 49}, {33, 2}, {49, 39}, {56, 49}, {33, 49}, {32, 39}, {39, 57}, {29, 33}, {31, 34}, {32, 29}, {47, 15}, {13, 34}, {2, 33}, {32, 49}, {49, 34}, {56, 33}, {1, 30}, {33, 33}, {31, 33}, {2, 29}, {56, 7}, {32, 13}, {2, 55}, {56, 56}, {2, 34}, {1, 42}, {34, 49}, {1, 20}, {32, 33}, {1, 25}, {53, 28}, {1, 14}, {31, 49}, {28, 2}, {2, 13}, {2, 56}, {1, 32}, {56, 53}, {65, 65}, {33, 53}, {64, 64}, {13, 2}, {34, 33}, {1, 4}, {49, 2}, {1, 9}, {56, 1}, {33, 1}, {53, 57}, {32, 53}, {13, 56}, {32, 56}, {55, 55}, {1, 18}, {49, 56}, {34, 34}, {1, 7}, {56, 64}, {32, 1}, {13, 33}, {55, 28}, {49, 33}, {57, 57}, {56, 34}, {34, 56}, {33, 32}, {32, 40}, {1, 29}, {53, 2}, {34, 1}, {32, 34}, {49, 49}, {1, 24}, {40, 34}, {1, 13}, {38, 34}, {29, 2}, {34, 2}, {1, 39}, {1, 22}, {1, 27}, {49, 1}, {1, 8}, {56, 2}}; // We have: 9 calculated features (the features here); 1 feature for each // instruction opcode; and 1 feature for each manually-identified sequence. // For the latter 2, we build a histogram: we count the number of // occurrences of each instruction opcode or succession of instructions, // respectively. // Note that instruction opcodes start from 1. For convenience, we also have an // always 0 feature for the '0' opcode, hence the extra 1. const size_t IRToNativeSizeLearning::FunctionFeatures::FeatureCount = IRToNativeSizeLearning::FunctionFeatures::ImportantInstructionSuccessions .size() + getMaxInstructionID() + 1 + IRToNativeSizeLearning::NumNamedFeatures; size_t getSize(Function &F, TargetTransformInfo &TTI) { size_t Ret = 0; for (auto &BB : F) for (auto &I : BB) Ret += TTI.getInstructionCost( &I, TargetTransformInfo::TargetCostKind::TCK_CodeSize); return Ret; } size_t getSize(Function &F, FunctionAnalysisManager &FAM) { auto &TTI = FAM.getResult(F); return getSize(F, TTI); } unsigned getMaxDominatorTreeDepth(const Function &F, const DominatorTree &Tree) { unsigned Ret = 0; for (auto &BB : F) if (auto *TN = Tree.getNode(&BB)) Ret = std::max(Ret, TN->getLevel()); return Ret; } } // namespace IRToNativeSizeLearning::FunctionFeatures IRToNativeSizeLearning::getFunctionFeatures(Function &F, FunctionAnalysisManager &FAM) { assert(ensureSortedTuples() && "expected lazy initialization"); auto &DomTree = FAM.getResult(F); FunctionFeatures FF; size_t InstrCount = getMaxInstructionID() + 1; FF.InstructionHistogram.resize(InstrCount); FF.InstructionPairHistogram.resize( FunctionFeatures::ImportantInstructionSuccessions.size()); auto StartID = 0; auto LastID = StartID; auto getPairIndex = [](size_t a, size_t b) { auto I = std::find(FunctionFeatures::ImportantInstructionSuccessions.begin(), FunctionFeatures::ImportantInstructionSuccessions.end(), std::make_pair(a, b)); if (I == FunctionFeatures::ImportantInstructionSuccessions.end()) return -1; return static_cast(std::distance( FunctionFeatures::ImportantInstructionSuccessions.begin(), I)); }; // We don't want debug calls, because they'd just add noise. for (auto &BB : F) { for (auto I = BB.instructionsWithoutDebug().begin(), E = BB.instructionsWithoutDebug().end(); I != E; ++I) { auto ID = I->getOpcode(); ++FF.InstructionHistogram[ID]; int PairIndex = getPairIndex(LastID, ID); if (PairIndex >= 0) ++FF.InstructionPairHistogram[PairIndex]; LastID = ID; if (isa(*I)) ++FF[NamedFeatureIndex::Calls]; } } FF[NamedFeatureIndex::InitialSize] = getSize(F, FAM); FF[NamedFeatureIndex::IsLocal] = F.hasLocalLinkage(); FF[NamedFeatureIndex::IsLinkOnceODR] = F.hasLinkOnceODRLinkage(); FF[NamedFeatureIndex::IsLinkOnce] = F.hasLinkOnceLinkage(); FF[NamedFeatureIndex::Blocks] = std::distance(F.getBasicBlockList().begin(), F.getBasicBlockList().end()); auto &LI = FAM.getResult(F); FF[NamedFeatureIndex::Loops] = std::distance(LI.begin(), LI.end()); for (auto &L : LI) FF[NamedFeatureIndex::MaxLoopDepth] = std::max(FF[NamedFeatureIndex::MaxLoopDepth], static_cast(L->getLoopDepth())); FF[NamedFeatureIndex::MaxDomTreeLevel] = getMaxDominatorTreeDepth(F, DomTree); return FF; } void IRToNativeSizeLearning::FunctionFeatures::fillTensor(int32_t *Ptr) const { std::copy(NamedFeatures.begin(), NamedFeatures.end(), Ptr); Ptr += NamedFeatures.size(); std::copy(InstructionHistogram.begin(), InstructionHistogram.end(), Ptr); Ptr += InstructionHistogram.size(); std::copy(InstructionPairHistogram.begin(), InstructionPairHistogram.end(), Ptr); } bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { return !TFIR2NativeModelPath.empty(); } InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() { if (!isEvaluatorRequested()) { return; } std::vector InputNames{"serving_default_input_1"}; std::vector OutputName{"StatefulPartitionedCall"}; Evaluator = std::make_unique( TFIR2NativeModelPath.getValue().c_str(), InputNames, OutputName); if (!Evaluator || !Evaluator->isValid()) { Evaluator.reset(); return; } static const std::vector Dim{ 1, static_cast( IRToNativeSizeLearning::FunctionFeatures::FeatureCount)}; Evaluator->initInput(0, Dim); } InlineSizeEstimatorAnalysis::Result InlineSizeEstimatorAnalysis::run(const Function &F, FunctionAnalysisManager &FAM) { if (!Evaluator) return None; auto Features = IRToNativeSizeLearning::getFunctionFeatures( const_cast(F), FAM); int32_t *V = Evaluator->getInput(0); Features.fillTensor(V); auto ER = Evaluator->evaluate(); if (!ER) return None; float Ret = *ER->getTensorValue(0); if (Ret < 0.0) Ret = 0.0; return static_cast(Ret); } InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {} InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis( InlineSizeEstimatorAnalysis &&Other) : Evaluator(std::move(Other.Evaluator)) {} #else namespace llvm { class TFModelEvaluator {}; } // namespace llvm InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {} InlineSizeEstimatorAnalysis ::InlineSizeEstimatorAnalysis( InlineSizeEstimatorAnalysis &&) {} InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {} InlineSizeEstimatorAnalysis::Result InlineSizeEstimatorAnalysis::run(const Function &F, FunctionAnalysisManager &FAM) { return None; } bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { return false; } #endif PreservedAnalyses InlineSizeEstimatorAnalysisPrinterPass::run(Function &F, FunctionAnalysisManager &AM) { OS << "[InlineSizeEstimatorAnalysis] size estimate for " << F.getName() << ": " << AM.getResult(F) << "\n"; return PreservedAnalyses::all(); }