Reapply "[llvm] Native size estimator for training -Oz inliner"

This reverts commit 9908a3b9f521c954cbf6adcec35b14b2f6c8da49. The fix was to exclude the content of TFUtils.h (automatically included in the LLVM_Analysis module, when LLVM_ENABLE_MODULES is enabled). Differential Revision: https://reviews.llvm.org/D82817
2024-11-22 02:33:06 +01:00 · 2020-07-13 14:12:32 -07:00 · 2020-07-13 14:12:32 -07:00 · d4fa8385c7
commit d4fa8385c7
parent 58cf4b3a2d
14 changed files with 11466 additions and 10 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -981,6 +981,18 @@ if (NOT TENSORFLOW_AOT_PATH STREQUAL "")
    ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}/tf_runtime)
 endif()
 set(TENSORFLOW_C_LIB_PATH "" CACHE PATH "Path to TensorFlow C library install")
 find_library(tensorflow_c_api tensorflow PATHS ${TENSORFLOW_C_LIB_PATH}/lib)
 # Similar to the above Tensorflow dependency, please refer to the same script.
 # In this case, the latest C API library is available for download from
 # https://www.tensorflow.org/install/lang_c
 if (tensorflow_c_api)
  set(LLVM_HAVE_TF_API "ON" CACHE BOOL "Full Tensorflow API available")
  add_definitions("-DLLVM_HAVE_TF_API")
  include_directories(${TENSORFLOW_C_LIB_PATH}/include)
 endif()
 # Put this before tblgen. Else we have a circular dependence.
 add_subdirectory(lib/Demangle)
 add_subdirectory(lib/Support)
--- a/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h
+++ b/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h
@ -0,0 +1,35 @@
 //===- InlineSizeEstimatorAnalysis.h - ML size estimator --------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 #ifndef LLVM_ANALYSIS_INLINESIZEESTIMATORANALYSIS_H
 #define LLVM_ANALYSIS_INLINESIZEESTIMATORANALYSIS_H
 #include "llvm/IR/PassManager.h"
 namespace llvm {
 class Function;
 class TFModelEvaluator;
 class InlineSizeEstimatorAnalysis
    : public AnalysisInfoMixin<InlineSizeEstimatorAnalysis> {
 public:
  InlineSizeEstimatorAnalysis();
  InlineSizeEstimatorAnalysis(InlineSizeEstimatorAnalysis &&);
  ~InlineSizeEstimatorAnalysis();
  static AnalysisKey Key;
  using Result = Optional<size_t>;
  Result run(const Function &F, FunctionAnalysisManager &FAM);
  static bool isEvaluatorRequested();
 private:
  std::unique_ptr<TFModelEvaluator> Evaluator;
 };
 } // namespace llvm
 #endif // LLVM_ANALYSIS_INLINESIZEESTIMATORANALYSIS_H
--- a/include/llvm/Analysis/Utils/TFUtils.h
+++ b/include/llvm/Analysis/Utils/TFUtils.h
@ -0,0 +1,138 @@
 //===- TFUtils.h - utilities for tensorflow C API ---------------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 #ifndef LLVM_ANALYSIS_UTILS_TFUTILS_H
 #define LLVM_ANALYSIS_UTILS_TFUTILS_H
 #ifdef LLVM_HAVE_TF_API
 #include "tensorflow/c/c_api.h"
 #include "llvm/IR/LLVMContext.h"
 #include <memory>
 #include <vector>
 namespace llvm {
 /// Load a SavedModel, find the given inputs and outputs, and setup storage
 /// for input tensors. The user is responsible for correctly dimensioning the
 /// input tensors and setting their values before calling evaluate().
 /// To initialize:
 /// - construct the object
 /// - initialize the input tensors using initInput. Indices must correspond to
 ///   indices in the InputNames used at construction.
 /// To use:
 /// - set input values by using getInput to get each input tensor, and then
 ///   setting internal scalars, for all dimensions (tensors are row-major:
 ///   https://github.com/tensorflow/tensorflow/blob/r1.5/tensorflow/c/c_api.h#L205)
 /// - prepare an output vector of TF_Output* type, with the correct number of
 /// outputs (i.e. same as OutputNames). Initialize the vector with nullptr
 /// values.
 /// - call evaluate. The input tensors' values are not consumed after this, and
 ///   may still be read.
 /// - use the outputs in the output vector
 /// - deallocate each output tensor in the output vector, using TF_DeleteTensor.
 class TFModelEvaluator final {
 public:
  /// The result of a model evaluation. Handles the lifetime of the output
  /// TF_Tensor objects, which means that their values need to be used before
  /// the EvaluationResult's dtor is called.
  class EvaluationResult {
  public:
    ~EvaluationResult() {
      for (auto *P : Output)
        if (P)
          TF_DeleteTensor(P);
    }
    EvaluationResult(const EvaluationResult &) = delete;
    EvaluationResult(EvaluationResult &&Other)
        : OutputSize(Other.OutputSize), Output(std::move(Other.Output)) {
      Other.Output.clear();
    };
    /// Get a pointer to the first element of the tensor at Index.
    template <typename T> T *getTensorValue(size_t Index) {
      return static_cast<T *>(TF_TensorData(Output[Index]));
    }
  private:
    friend class TFModelEvaluator;
    EvaluationResult(size_t OutputSize)
        : OutputSize(OutputSize), Output(OutputSize){};
    const size_t OutputSize;
    std::vector<TF_Tensor *> Output;
  };
  using TFGraphPtr = std::unique_ptr<TF_Graph, decltype(&TF_DeleteGraph)>;
  using TFSessionOptionsPtr =
      std::unique_ptr<TF_SessionOptions, decltype(&TF_DeleteSessionOptions)>;
  using TFStatusPtr = std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)>;
  TFModelEvaluator(StringRef SavedModelPath,
                   const std::vector<std::string> &InputNames,
                   const std::vector<std::string> &OutputNames,
                   const char *Tags = "serve");
  ~TFModelEvaluator();
  TFModelEvaluator(const TFModelEvaluator &) = delete;
  TFModelEvaluator(TFModelEvaluator &&) = delete;
  /// Evaluate the model, assuming it is valid. Returns None if the evaluation
  /// fails or the model is invalid, or an EvaluationResult otherwise. The
  /// inputs are assumed to have been already provided via getInput(). When
  /// returning None, it also marks the object invalid. Pass an Output vector
  /// with the same size as OutputNames, but with nullptr values. evaluate()
  /// will populate it with tensors, matching in index the corresponding
  /// OutputNames. The caller is responsible for the deallocation of those
  /// tensors, using TF_DeleteTensor.
  Optional<EvaluationResult> evaluate();
  /// Provides access to the input vector. It is already dimensioned correctly,
  /// but the values need to be allocated by the user.
  std::vector<TF_Tensor *> &getInput() { return Input; }
  /// Returns true if the tensorflow model was loaded successfully, false
  /// otherwise.
  bool isValid() const { return !!Session; }
  /// Initialize the input at Index as a tensor of the given type and dimensions
  void initInput(int Index, TF_DataType Type,
                 const std::vector<int64_t> &Dimensions);
 private:
  /// The objects necessary for carrying out an evaluation of the SavedModel.
  /// They are expensive to set up, and we maintain them accross all the
  /// evaluations of the model.
  TF_Session *Session = nullptr;
  TFGraphPtr Graph;
  TFSessionOptionsPtr Options;
  /// The specification of the input nodes.
  std::vector<TF_Output> InputFeed;
  /// The input tensors. They must match by index of the corresponding InputFeed
  /// value. We set up the tensors once and just mutate theirs scalars before
  /// each evaluation. The input tensors keep their value after an evaluation.
  std::vector<TF_Tensor *> Input;
  /// The specification of the output nodes. When evaluating, the tensors in the
  /// output tensor vector must match by index the corresponding element in the
  /// OutputFeed.
  std::vector<TF_Output> OutputFeed;
  /// Reusable utility for deleting the session.
  void deleteSession();
  /// Reusable utility for ensuring we can bind the requested Name to a node in
  /// the SavedModel Graph.
  bool checkReportAndReset(const TF_Output &Output, StringRef Name);
 };
 } // namespace llvm
 #endif // LLVM_HAVE_TF_API
 #endif // LLVM_ANALYSIS_UTILS_TFUTILS_H
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@ -1,17 +1,35 @@
 set(CommonMLSources MLInlineAdvisor.cpp)
 set(ReleaseModeMLSources ReleaseModeModelRunner.cpp)
 set(DevelopmentModeMLSources TFUtils.cpp)
-if (DEFINED LLVM_HAVE_TF_AOT)
+if (DEFINED LLVM_HAVE_TF_AOT OR DEFINED LLVM_HAVE_TF_API)
-  include(TensorFlowCompile)
+  set(MLPolicySources ${CommonMLSources})
-  tfcompile(models/inliner serve action InlinerSizeModel llvm::InlinerSizeModel)
+  if (DEFINED LLVM_HAVE_TF_AOT)
-  list(APPEND ReleaseModeMLSources
+    include(TensorFlowCompile)
-    $<TARGET_OBJECTS:tf_xla_runtime_objects>
+    tfcompile(models/inliner serve action InlinerSizeModel llvm::InlinerSizeModel)
-    ${GENERATED_OBJS}
+    list(APPEND ReleaseModeMLSources
-  )
+      $<TARGET_OBJECTS:tf_xla_runtime_objects>
-  set(MLPolicySources ${CommonMLSources} ${ReleaseModeMLSources})
+      ${GENERATED_OBJS}
    )
    LIST(APPEND MLPolicySources ${ReleaseModeMLSources})
  else()
    LIST(APPEND LLVM_OPTIONAL_SOURCES ${ReleaseModeMLSources})
  endif()
  if (DEFINED LLVM_HAVE_TF_API)
    LIST(APPEND MLPolicySources ${DevelopmentModeMLSources})
    LIST(APPEND MLLinkDeps ${tensorflow_c_api})
  else()
    LIST(APPEND LLVM_OPTIONAL_SOURCES ${DevelopmentModeMLSources})
  endif()
 else()
-  set(LLVM_OPTIONAL_SOURCES ${CommonMLSources} ${ReleaseModeMLSources})
+  LIST(APPEND LLVM_OPTIONAL_SOURCES 
    ${CommonMLSources}
    ${DevelopmentModeMLSources}
    ${ReleaseModeMLSources}
    )
 endif()
 add_llvm_component_library(LLVMAnalysis
  AliasAnalysis.cpp
@ -57,6 +75,7 @@ add_llvm_component_library(LLVMAnalysis
  InlineCost.cpp
  InlineAdvisor.cpp
  InlineFeaturesAnalysis.cpp
  InlineSizeEstimatorAnalysis.cpp
  InstCount.cpp
  InstructionPrecedenceTracking.cpp
  InstructionSimplify.cpp
@ -124,4 +143,7 @@ add_llvm_component_library(LLVMAnalysis
  DEPENDS
  intrinsics_gen
  LINK_LIBS
  ${MLLinkDeps}
  )
--- a/lib/Analysis/InlineSizeEstimatorAnalysis.cpp
+++ b/lib/Analysis/InlineSizeEstimatorAnalysis.cpp
@ -0,0 +1,299 @@
 //===- InlineSizeEstimatorAnalysis.cpp - IR to native size from ML model --===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This implements feature and label extraction for offline supervised learning
 // of a IR to native size model.
 //
 //===----------------------------------------------------------------------===//
 #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
 #ifdef LLVM_HAVE_TF_API
 #include "llvm/Analysis/Utils/TFUtils.h"
 #endif
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/MC/MCAsmLayout.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <deque>
 using namespace llvm;
 AnalysisKey InlineSizeEstimatorAnalysis::Key;
 #define DEBUG_TYPE "inline-size-estimator"
 #ifdef LLVM_HAVE_TF_API
 cl::opt<std::string> TFIR2NativeModelPath(
    "ml-inliner-ir2native-model", cl::Hidden,
    cl::desc("Path to saved model evaluating native size from IR."));
 namespace {
 unsigned getMaxInstructionID() {
 #define LAST_OTHER_INST(NR) return NR;
 #include "llvm/IR/Instruction.def"
 }
 class IRToNativeSizeLearning {
 public:
  enum class NamedFeatureIndex : size_t {
    InitialSize,
    Blocks,
    Calls,
    IsLocal,
    IsLinkOnceODR,
    IsLinkOnce,
    Loops,
    MaxLoopDepth,
    MaxDomTreeLevel,
    NumNamedFeatures
  };
  static const size_t NumNamedFeatures =
      static_cast<size_t>(NamedFeatureIndex::NumNamedFeatures);
  struct FunctionFeatures {
    static std::vector<std::pair<size_t, size_t>>
        ImportantInstructionSuccessions;
    static const size_t FeatureCount;
    std::array<int32_t, NumNamedFeatures> NamedFeatures = {0};
    std::vector<int32_t> InstructionHistogram;
    std::vector<int32_t> InstructionPairHistogram;
    void fillTensor(int32_t *Ptr) const;
    int32_t &operator[](NamedFeatureIndex Pos) {
      return NamedFeatures[static_cast<size_t>(Pos)];
    }
  };
  IRToNativeSizeLearning() = default;
  static FunctionFeatures getFunctionFeatures(Function &F,
                                              FunctionAnalysisManager &FAM);
 private:
  /// Sort once the feature tuples.
  struct SortFeatureTuples {
    bool IsSorted = false;
    SortFeatureTuples() {
      std::sort(FunctionFeatures::ImportantInstructionSuccessions.begin(),
                FunctionFeatures::ImportantInstructionSuccessions.end());
      IsSorted = true;
    }
  };
  static llvm::ManagedStatic<SortFeatureTuples> TupleSorter;
  static bool ensureSortedTuples() { return TupleSorter->IsSorted; }
 };
 llvm::ManagedStatic<IRToNativeSizeLearning::SortFeatureTuples>
    IRToNativeSizeLearning::TupleSorter;
 // This is a point in time - we determined including these pairs of
 // consecutive instructions (in the IR layout available at inline time) as
 // features improves the model performance. We want to move away from manual
 // feature selection.
 // The vector is given in opcode pairs rather than labels because 1) labels
 // weren't readily available, and 2) the successions were hand - extracted
 std::vector<std::pair<size_t, size_t>>
    IRToNativeSizeLearning::FunctionFeatures::ImportantInstructionSuccessions =
        {{1, 34},  {15, 27}, {53, 53}, {53, 34}, {1, 11},  {32, 2},  {2, 48},
         {28, 48}, {1, 45},  {49, 32}, {57, 56}, {55, 53}, {1, 28},  {57, 34},
         {1, 1},   {32, 28}, {32, 15}, {49, 28}, {53, 1},  {2, 53},  {48, 34},
         {28, 53}, {2, 32},  {1, 40},  {32, 48}, {29, 56}, {56, 32}, {55, 56},
         {48, 56}, {1, 31},  {33, 34}, {2, 28},  {1, 12},  {55, 1},  {31, 31},
         {65, 1},  {33, 56}, {32, 32}, {13, 13}, {1, 26},  {13, 26}, {2, 1},
         {1, 33},  {47, 49}, {64, 1},  {2, 38},  {34, 53}, {48, 2},  {55, 34},
         {34, 32}, {1, 5},   {56, 13}, {2, 2},   {2, 49},  {33, 2},  {49, 39},
         {56, 49}, {33, 49}, {32, 39}, {39, 57}, {29, 33}, {31, 34}, {32, 29},
         {47, 15}, {13, 34}, {2, 33},  {32, 49}, {49, 34}, {56, 33}, {1, 30},
         {33, 33}, {31, 33}, {2, 29},  {56, 7},  {32, 13}, {2, 55},  {56, 56},
         {2, 34},  {1, 42},  {34, 49}, {1, 20},  {32, 33}, {1, 25},  {53, 28},
         {1, 14},  {31, 49}, {28, 2},  {2, 13},  {2, 56},  {1, 32},  {56, 53},
         {65, 65}, {33, 53}, {64, 64}, {13, 2},  {34, 33}, {1, 4},   {49, 2},
         {1, 9},   {56, 1},  {33, 1},  {53, 57}, {32, 53}, {13, 56}, {32, 56},
         {55, 55}, {1, 18},  {49, 56}, {34, 34}, {1, 7},   {56, 64}, {32, 1},
         {13, 33}, {55, 28}, {49, 33}, {57, 57}, {56, 34}, {34, 56}, {33, 32},
         {32, 40}, {1, 29},  {53, 2},  {34, 1},  {32, 34}, {49, 49}, {1, 24},
         {40, 34}, {1, 13},  {38, 34}, {29, 2},  {34, 2},  {1, 39},  {1, 22},
         {1, 27},  {49, 1},  {1, 8},   {56, 2}};
 // We have: 9 calculated features (the features here); 1 feature for each
 // instruction opcode; and 1 feature for each manually-identified sequence.
 // For the latter 2, we build a histogram: we count the number of
 // occurrences of each instruction opcode or succession of instructions,
 // respectively.
 // Note that instruction opcodes start from 1. For convenience, we also have an
 // always 0 feature for the '0' opcode, hence the extra 1.
 const size_t IRToNativeSizeLearning::FunctionFeatures::FeatureCount =
    IRToNativeSizeLearning::FunctionFeatures::ImportantInstructionSuccessions
        .size() +
    getMaxInstructionID() + 1 + IRToNativeSizeLearning::NumNamedFeatures;
 size_t getSize(Function &F, TargetTransformInfo &TTI) {
  size_t Ret = 0;
  for (auto &BB : F)
    for (auto &I : BB)
      Ret += TTI.getInstructionCost(
          &I, TargetTransformInfo::TargetCostKind::TCK_CodeSize);
  return Ret;
 }
 size_t getSize(Function &F, FunctionAnalysisManager &FAM) {
  auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
  return getSize(F, TTI);
 }
 unsigned getMaxDominatorTreeDepth(const Function &F,
                                  const DominatorTree &Tree) {
  unsigned Ret = 0;
  for (auto &BB : F)
    if (auto *TN = Tree.getNode(&BB))
      Ret = std::max(Ret, TN->getLevel());
  return Ret;
 }
 } // namespace
 IRToNativeSizeLearning::FunctionFeatures
 IRToNativeSizeLearning::getFunctionFeatures(Function &F,
                                            FunctionAnalysisManager &FAM) {
  assert(ensureSortedTuples() && "expected lazy initialization");
  auto &DomTree = FAM.getResult<DominatorTreeAnalysis>(F);
  FunctionFeatures FF;
  size_t InstrCount = getMaxInstructionID() + 1;
  FF.InstructionHistogram.resize(InstrCount);
  FF.InstructionPairHistogram.resize(
      FunctionFeatures::ImportantInstructionSuccessions.size());
  auto StartID = 0;
  auto LastID = StartID;
  auto getPairIndex = [](size_t a, size_t b) {
    auto I =
        std::find(FunctionFeatures::ImportantInstructionSuccessions.begin(),
                  FunctionFeatures::ImportantInstructionSuccessions.end(),
                  std::make_pair(a, b));
    if (I == FunctionFeatures::ImportantInstructionSuccessions.end())
      return -1;
    return static_cast<int>(std::distance(
        FunctionFeatures::ImportantInstructionSuccessions.begin(), I));
  };
  // We don't want debug calls, because they'd just add noise.
  for (auto &BB : F) {
    for (auto I = BB.instructionsWithoutDebug().begin(),
              E = BB.instructionsWithoutDebug().end();
         I != E; ++I) {
      auto ID = I->getOpcode();
      ++FF.InstructionHistogram[ID];
      int PairIndex = getPairIndex(LastID, ID);
      if (PairIndex >= 0)
        ++FF.InstructionPairHistogram[PairIndex];
      LastID = ID;
      if (isa<CallBase>(*I))
        ++FF[NamedFeatureIndex::Calls];
    }
  }
  FF[NamedFeatureIndex::InitialSize] = getSize(F, FAM);
  FF[NamedFeatureIndex::IsLocal] = F.hasLocalLinkage();
  FF[NamedFeatureIndex::IsLinkOnceODR] = F.hasLinkOnceODRLinkage();
  FF[NamedFeatureIndex::IsLinkOnce] = F.hasLinkOnceLinkage();
  FF[NamedFeatureIndex::Blocks] =
      std::distance(F.getBasicBlockList().begin(), F.getBasicBlockList().end());
  auto &LI = FAM.getResult<LoopAnalysis>(F);
  FF[NamedFeatureIndex::Loops] = std::distance(LI.begin(), LI.end());
  for (auto &L : LI)
    FF[NamedFeatureIndex::MaxLoopDepth] =
        std::max(FF[NamedFeatureIndex::MaxLoopDepth],
                 static_cast<int32_t>(L->getLoopDepth()));
  FF[NamedFeatureIndex::MaxDomTreeLevel] = getMaxDominatorTreeDepth(F, DomTree);
  return FF;
 }
 void IRToNativeSizeLearning::FunctionFeatures::fillTensor(int32_t *Ptr) const {
  std::copy(NamedFeatures.begin(), NamedFeatures.end(), Ptr);
  Ptr += NamedFeatures.size();
  std::copy(InstructionHistogram.begin(), InstructionHistogram.end(), Ptr);
  Ptr += InstructionHistogram.size();
  std::copy(InstructionPairHistogram.begin(), InstructionPairHistogram.end(),
            Ptr);
 }
 bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() {
  return !TFIR2NativeModelPath.empty();
 }
 InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {
  if (!isEvaluatorRequested()) {
    return;
  }
  std::vector<std::string> InputNames{"serving_default_input_1"};
  std::vector<std::string> OutputName{"StatefulPartitionedCall"};
  Evaluator = std::make_unique<TFModelEvaluator>(
      TFIR2NativeModelPath.getValue().c_str(), InputNames, OutputName);
  if (!Evaluator || !Evaluator->isValid()) {
    Evaluator.reset();
    return;
  }
  static const std::vector<int64_t> Dim{
      1, static_cast<int64_t>(
             IRToNativeSizeLearning::FunctionFeatures::FeatureCount)};
  Evaluator->initInput(0, TF_INT32, Dim);
 }
 InlineSizeEstimatorAnalysis::Result
 InlineSizeEstimatorAnalysis::run(const Function &F,
                                 FunctionAnalysisManager &FAM) {
  if (!Evaluator)
    return None;
  auto Features = IRToNativeSizeLearning::getFunctionFeatures(
      const_cast<Function &>(F), FAM);
  int32_t *V = static_cast<int32_t *>(TF_TensorData(Evaluator->getInput()[0]));
  Features.fillTensor(V);
  auto ER = Evaluator->evaluate();
  if (!ER)
    return None;
  float Ret = *ER->getTensorValue<float>(0);
  if (Ret < 0.0)
    Ret = 0.0;
  return static_cast<size_t>(Ret);
 }
 InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {}
 InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis(
    InlineSizeEstimatorAnalysis &&Other)
    : Evaluator(std::move(Other.Evaluator)) {}
 #else
 namespace llvm {
 class TFModelEvaluator {};
 } // namespace llvm
 InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {}
 InlineSizeEstimatorAnalysis ::InlineSizeEstimatorAnalysis(
    InlineSizeEstimatorAnalysis &&) {}
 InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {}
 InlineSizeEstimatorAnalysis::Result
 InlineSizeEstimatorAnalysis::run(const Function &F,
                                 FunctionAnalysisManager &FAM) {
  return None;
 }
 bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { return false; }
 #endif
--- a/lib/Analysis/TFUtils.cpp
+++ b/lib/Analysis/TFUtils.cpp
@ -0,0 +1,143 @@
 //===- TFUtils.cpp - tensorflow evaluation utilities ----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This file implements utilities for interfacing with tensorflow C APIs.
 //
 //===----------------------------------------------------------------------===//
 #include "llvm/Analysis/Utils/TFUtils.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/raw_ostream.h"
 #include "tensorflow/c/c_api_experimental.h"
 #include <cassert>
 using namespace llvm;
 namespace {
 struct TFInitializer {
  TFInitializer() {
    assert(!IsInitialized && "TFInitialized should be called only once");
    int Argc = 1;
    const char *Name = "";
    const char **NamePtr = &Name;
    TF_InitMain(Name, &Argc, const_cast<char ***>(&NamePtr));
    IsInitialized = true;
  }
  bool IsInitialized = false;
 };
 llvm::ManagedStatic<TFInitializer> TFLibInitializer;
 bool ensureInitTF() { return TFLibInitializer->IsInitialized; }
 TFModelEvaluator::TFGraphPtr createTFGraph() {
  return TFModelEvaluator::TFGraphPtr(TF_NewGraph(), &TF_DeleteGraph);
 }
 TFModelEvaluator::TFStatusPtr createTFStatus() {
  return TFModelEvaluator::TFStatusPtr(TF_NewStatus(), &TF_DeleteStatus);
 }
 TFModelEvaluator::TFSessionOptionsPtr createTFSessionOptions() {
  return TFModelEvaluator::TFSessionOptionsPtr(TF_NewSessionOptions(),
                                               &TF_DeleteSessionOptions);
 }
 } // namespace
 TFModelEvaluator::TFModelEvaluator(StringRef SavedModelPath,
                                   const std::vector<std::string> &InputNames,
                                   const std::vector<std::string> &OutputNames,
                                   const char *Tags)
    : Graph(createTFGraph()), Options(createTFSessionOptions()),
      InputFeed(InputNames.size()), Input(InputNames.size()),
      OutputFeed(OutputNames.size()) {
  if (!ensureInitTF()) {
    errs() << "Tensorflow should have been initialized";
    return;
  }
  auto Status = createTFStatus();
  Session = TF_LoadSessionFromSavedModel(Options.get(), nullptr,
                                         SavedModelPath.str().c_str(), &Tags, 1,
                                         Graph.get(), nullptr, Status.get());
  if (TF_GetCode(Status.get()) != TF_Code::TF_OK) {
    errs() << TF_Message(Status.get());
    deleteSession();
  }
  for (size_t I = 0; I < InputNames.size(); ++I) {
    InputFeed[I] = {
        TF_GraphOperationByName(Graph.get(), (InputNames[I]).c_str()), 0};
    if (!checkReportAndReset(InputFeed[I], InputNames[I]))
      return;
  }
  for (size_t I = 0; I < OutputNames.size(); ++I) {
    OutputFeed[I] = {
        TF_GraphOperationByName(Graph.get(), (OutputNames[I]).c_str()), 0};
    if (!checkReportAndReset(OutputFeed[I], OutputNames[I]))
      return;
  }
 }
 TFModelEvaluator::~TFModelEvaluator() {
  for (auto *T : Input) {
    TF_DeleteTensor(T);
  }
  deleteSession();
 }
 bool TFModelEvaluator::checkReportAndReset(const TF_Output &Output,
                                           StringRef Name) {
  if (Output.oper)
    return true;
  errs() << "Could not find TF_Output named: " + Name;
  deleteSession();
  return false;
 }
 void TFModelEvaluator::deleteSession() {
  if (Session == nullptr)
    return;
  auto Status = createTFStatus();
  TF_DeleteSession(Session, Status.get());
  Session = nullptr;
  if (TF_GetCode(Status.get()) != TF_Code::TF_OK)
    errs() << "Could not delete TF session";
 }
 Optional<TFModelEvaluator::EvaluationResult> TFModelEvaluator::evaluate() {
  if (!isValid())
    return None;
  EvaluationResult Ret(OutputFeed.size());
  auto Status = createTFStatus();
  TF_SessionRun(Session, nullptr, InputFeed.data(), Input.data(), Input.size(),
                OutputFeed.data(), Ret.Output.data(), Ret.Output.size(),
                nullptr, 0, nullptr, Status.get());
  if (TF_GetCode(Status.get()) != TF_Code::TF_OK) {
    errs() << TF_Message(Status.get());
    deleteSession();
    return None;
  }
  return Ret;
 }
 void TFModelEvaluator::initInput(int Index, TF_DataType Type,
                                 const std::vector<int64_t> &Dimensions) {
  int64_t TotalSize = TF_DataTypeSize(Type);
  for (auto &D : Dimensions)
    TotalSize *= D;
  Input[Index] =
      TF_AllocateTensor(Type, Dimensions.data(), Dimensions.size(), TotalSize);
  std::memset(TF_TensorData(Input[Index]), 0, TotalSize);
 }
--- a/lib/Passes/PassBuilder.cpp
+++ b/lib/Passes/PassBuilder.cpp
@ -35,6 +35,7 @@
 #include "llvm/Analysis/IVUsers.h"
 #include "llvm/Analysis/InlineAdvisor.h"
 #include "llvm/Analysis/InlineFeaturesAnalysis.h"
 #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
 #include "llvm/Analysis/LazyCallGraph.h"
 #include "llvm/Analysis/LazyValueInfo.h"
 #include "llvm/Analysis/LoopAccessAnalysis.h"
--- a/lib/Passes/PassRegistry.def
+++ b/lib/Passes/PassRegistry.def
@ -133,6 +133,7 @@ FUNCTION_ANALYSIS("loops", LoopAnalysis())
 FUNCTION_ANALYSIS("lazy-value-info", LazyValueAnalysis())
 FUNCTION_ANALYSIS("da", DependenceAnalysis())
 FUNCTION_ANALYSIS("inliner-features", InlineFeaturesAnalysis())
 FUNCTION_ANALYSIS("inliner-size-estimator", InlineSizeEstimatorAnalysis())
 FUNCTION_ANALYSIS("memdep", MemoryDependenceAnalysis())
 FUNCTION_ANALYSIS("memoryssa", MemorySSAAnalysis())
 FUNCTION_ANALYSIS("phi-values", PhiValuesAnalysis())
--- a/unittests/Analysis/CMakeLists.txt
+++ b/unittests/Analysis/CMakeLists.txt
@ -6,7 +6,13 @@ set(LLVM_LINK_COMPONENTS
  TransformUtils
  )
-add_llvm_unittest(AnalysisTests
+if (DEFINED LLVM_HAVE_TF_API)
  LIST(APPEND EXTRA_TESTS TFUtilsTest.cpp)
 else()
  LIST(APPEND LLVM_OPTIONAL_SOURCES TFUtilsTest.cpp)
 endif()
 add_llvm_unittest_with_input_files(AnalysisTests
  AliasAnalysisTest.cpp
  AliasSetTrackerTest.cpp
  AssumeBundleQueriesTest.cpp
@ -22,6 +28,7 @@ add_llvm_unittest(AnalysisTests
  DomTreeUpdaterTest.cpp
  GlobalsModRefTest.cpp
  InlineFeaturesAnalysisTest.cpp
  InlineSizeEstimatorAnalysisTest.cpp
  IVDescriptorsTest.cpp
  LazyCallGraphTest.cpp
  LoadsTest.cpp
@ -40,4 +47,7 @@ add_llvm_unittest(AnalysisTests
  ValueLatticeTest.cpp
  ValueTrackingTest.cpp
  VectorUtilsTest.cpp
  ${EXTRA_TESTS}
  )
 target_link_libraries(AnalysisTests PRIVATE LLVMTestingSupport)
--- a/unittests/Analysis/InlineSizeEstimatorAnalysisTest.cpp
+++ b/unittests/Analysis/InlineSizeEstimatorAnalysisTest.cpp
@ -0,0 +1,101 @@
 //===- InlineSizeEstimatorAnalysisTest.cpp - test for ir2native -----------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/AsmParser/Parser.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Testing/Support/SupportHelpers.h"
 #include "gtest/gtest.h"
 using namespace llvm;
 extern const char *TestMainArgv0;
 extern cl::opt<std::string> TFIR2NativeModelPath;
 #if LLVM_HAVE_TF_API
 static std::string getModelPath() {
  SmallString<128> InputsDir = unittest::getInputFileDirectory(TestMainArgv0);
  llvm::sys::path::append(InputsDir, "ir2native_x86_64_model");
  return std::string(InputsDir);
 }
 #endif
 static std::unique_ptr<Module> parseIR(LLVMContext &C, const char *IR) {
  SMDiagnostic Err;
  std::unique_ptr<Module> Mod = parseAssemblyString(IR, Err, C);
  if (!Mod)
    Err.print("MLAnalysisTests", errs());
  return Mod;
 }
 static FunctionAnalysisManager buildFAM() {
  FunctionAnalysisManager FAM;
  FAM.registerPass([&] { return DominatorTreeAnalysis(); });
  FAM.registerPass([&] { return PassInstrumentationAnalysis(); });
  FAM.registerPass([&] { return TargetIRAnalysis(); });
  FAM.registerPass([&] { return LoopAnalysis(); });
  return FAM;
 }
 // Test model loading and evaluation.
 TEST(InlineSizeEstimatorAnalysis, SizeIsValidTest) {
  LLVMContext C;
  std::unique_ptr<Module> M = parseIR(C,
                                      R"IR(
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-pc-linux-gnu"
 declare i32 @f1(i32)
 declare i32 @f2(i32)
 define i32 @branches(i32) {
  %cond = icmp slt i32 %0, 3
  br i1 %cond, label %then, label %else
 then:
  %ret.1 = call i32 @f1(i32 %0)
  br label %last.block
 else:
  %ret.2 = call i32 @f2(i32 %0)
  br label %last.block
 last.block:
  %ret = phi i32 [%ret.1, %then], [%ret.2, %else]
  ret i32 %ret
 }
 define internal i32 @top() {
  %1 = call i32 @branches(i32 2)
  %2 = call i32 @f1(i32 %1)
  ret i32 %2
 }
 )IR");
  FunctionAnalysisManager FAM = buildFAM();
 #if LLVM_HAVE_TF_API
  TFIR2NativeModelPath = getModelPath();
 #endif
  InlineSizeEstimatorAnalysis FA;
  auto SizeEstimate = FA.run(*M->getFunction("branches"), FAM);
 #if LLVM_HAVE_TF_API
  EXPECT_GT(*SizeEstimate, 0);
 #else
  EXPECT_FALSE(SizeEstimate.hasValue());
 #endif
 }
--- a/unittests/Analysis/Inputs/ir2native_x86_64_model/saved_model.pbtxt
+++ b/unittests/Analysis/Inputs/ir2native_x86_64_model/saved_model.pbtxt
--- a/unittests/Analysis/Inputs/ir2native_x86_64_model/variables/variables.data-00000-of-00001
+++ b/unittests/Analysis/Inputs/ir2native_x86_64_model/variables/variables.data-00000-of-00001
--- a/unittests/Analysis/Inputs/ir2native_x86_64_model/variables/variables.index
+++ b/unittests/Analysis/Inputs/ir2native_x86_64_model/variables/variables.index
--- a/unittests/Analysis/TFUtilsTest.cpp
+++ b/unittests/Analysis/TFUtilsTest.cpp
@ -0,0 +1,98 @@
 //===- TFUtilsTest.cpp - test for TFUtils ---------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 #include "llvm/Analysis/Utils/TFUtils.h"
 #include "llvm/AsmParser/Parser.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Testing/Support/SupportHelpers.h"
 #include "gtest/gtest.h"
 using namespace llvm;
 extern const char *TestMainArgv0;
 static std::string getModelPath() {
  SmallString<128> InputsDir = unittest::getInputFileDirectory(TestMainArgv0);
  llvm::sys::path::append(InputsDir, "ir2native_x86_64_model");
  return std::string(InputsDir);
 }
 // Test observable behavior when no model is provided.
 TEST(TFUtilsTest, NoModel) {
  TFModelEvaluator Evaluator("", {}, {});
  EXPECT_FALSE(Evaluator.isValid());
 }
 // Test we can correctly load a savedmodel and evaluate it.
 TEST(TFUtilsTest, LoadAndExecuteTest) {
  // We use the ir2native model for test. We know it has one feature of
  // dimension (1, 214)
  std::vector<std::string> InputNames{"serving_default_input_1"};
  std::vector<std::string> OutputName{"StatefulPartitionedCall"};
  const static int64_t KnownSize = 214;
  TFModelEvaluator Evaluator(getModelPath(), InputNames, OutputName);
  static const std::vector<int64_t> Dim{1, KnownSize};
  EXPECT_TRUE(Evaluator.isValid());
  Evaluator.initInput(0, TF_INT32, Dim);
  int32_t *V = static_cast<int32_t *>(TF_TensorData(Evaluator.getInput()[0]));
  // Fill it up with 1's, we know the output.
  for (auto I = 0; I < KnownSize; ++I) {
    V[I] = 1;
  }
  {
    auto ER = Evaluator.evaluate();
    EXPECT_TRUE(ER.hasValue());
    float Ret = *ER->getTensorValue<float>(0);
    EXPECT_EQ(static_cast<size_t>(Ret), 80);
  }
  // The input vector should be unchanged
  for (auto I = 0; I < KnownSize; ++I) {
    EXPECT_EQ(V[I], 1);
  }
  // Zero-out the unused position '0' of the instruction histogram, which is
  // after the first 9 calculated values. Should the the same result.
  V[9] = 0;
  {
    auto ER = Evaluator.evaluate();
    EXPECT_TRUE(ER.hasValue());
    float Ret = *ER->getTensorValue<float>(0);
    EXPECT_EQ(static_cast<size_t>(Ret), 80);
  }
 }
 // Test incorrect input setup
 TEST(TFUtilsTest, EvalError) {
  // We use the ir2native model for test. We know it has one feature of
  // dimension (1, 214)
  std::vector<std::string> InputNames{"serving_default_input_1"};
  std::vector<std::string> OutputName{"StatefulPartitionedCall"};
  const static int64_t KnownSize = 213;
  TFModelEvaluator Evaluator(getModelPath(), InputNames, OutputName);
  static const std::vector<int64_t> Dim{1, KnownSize};
  EXPECT_TRUE(Evaluator.isValid());
  Evaluator.initInput(0, TF_INT32, Dim);
  int32_t *V = static_cast<int32_t *>(TF_TensorData(Evaluator.getInput()[0]));
  // Fill it up with 1's, we know the output.
  for (auto I = 0; I < KnownSize; ++I) {
    V[I] = 1;
  }
  auto ER = Evaluator.evaluate();
  EXPECT_FALSE(ER.hasValue());
  EXPECT_FALSE(Evaluator.isValid());
 }