[NFC][TFUtils] Extract out the output spec loader

It's generic for the 'development mode', not specific to the inliner case. Differential Revision: https://reviews.llvm.org/D91751
2025-01-31 20:51:52 +01:00 · 2020-11-18 16:16:10 -08:00 · 2020-11-18 16:16:10 -08:00 · 504ced25f2
commit 504ced25f2
parent 4038ba0242
4 changed files with 110 additions and 92 deletions
--- a/include/llvm/Analysis/Utils/TFUtils.h
+++ b/include/llvm/Analysis/Utils/TFUtils.h
@ -100,6 +100,15 @@ private:
 Optional<TensorSpec> getTensorSpecFromJSON(LLVMContext &Ctx,
                                           const json::Value &Value);

+struct LoggedFeatureSpec {
+  TensorSpec Spec;
+  Optional<std::string> LoggingName;
+};
+
+bool loadOutputSpecs(LLVMContext &Ctx, StringRef FileName,
+                     StringRef ExpectedDecisionName,
+                     std::vector<LoggedFeatureSpec> &Ret);
+
 /// Logging utility - given an ordered specification of features, and assuming
 /// a scalar reward, allow logging feature values and rewards, and then print
 /// as tf.train.SequenceExample text protobuf.
@ -121,11 +130,6 @@ Optional<TensorSpec> getTensorSpecFromJSON(LLVMContext &Ctx,
 /// At the end, call print to generate the protobuf.
 class Logger final {
 public:
-  struct LoggedFeatureSpec {
-    TensorSpec Spec;
-    Optional<std::string> LoggingName;
-  };
-
  /// Construct a Logger. If IncludeReward is false, then logReward shouldn't
  /// be called, and the reward feature won't be printed out.
  Logger(const std::vector<LoggedFeatureSpec> &FeatureSpecs,
@ -201,6 +205,11 @@ public:
                   const std::vector<TensorSpec> &InputSpecs,
                   const std::vector<TensorSpec> &OutputSpecs,
                   const char *Tags = "serve");
+  TFModelEvaluator(StringRef SavedModelPath,
+                   const std::vector<TensorSpec> &InputSpecs,
+                   function_ref<TensorSpec(size_t)> GetOutputSpecs,
+                   size_t OutputSpecsSize, const char *Tags = "serve");
+
  ~TFModelEvaluator();
  TFModelEvaluator(const TFModelEvaluator &) = delete;
  TFModelEvaluator(TFModelEvaluator &&) = delete;
--- a/lib/Analysis/DevelopmentModeInlineAdvisor.cpp
+++ b/lib/Analysis/DevelopmentModeInlineAdvisor.cpp
@ -298,9 +298,9 @@ public:
  int64_t getFeature(int Index) const override;
  bool isValid() const { return !!Evaluator; }

-  const std::vector<std::string> &outputNames() const { return OutputNames; }
-
-  const std::vector<TensorSpec> &outputSpecs() const { return OutputSpecs; }
+  const std::vector<LoggedFeatureSpec> &outputLoggedFeatureSpecs() const {
+    return OutputSpecs;
+  }

  const Optional<TFModelEvaluator::EvaluationResult> &
  lastEvaluationResult() const {
@ -309,12 +309,9 @@ public:

 private:
  std::unique_ptr<TFModelEvaluator> Evaluator;
-  std::vector<std::string> OutputNames;
-  std::vector<TensorSpec> OutputSpecs;
+  std::vector<LoggedFeatureSpec> OutputSpecs;
  Optional<TFModelEvaluator::EvaluationResult> LastEvaluationResult;

-  bool loadOutputSpecs(LLVMContext &Ctx, StringRef FileName);
-
  // The training framework needs some additional features.
  const std::vector<TensorSpec> TrainingOnlyFeatures{
      TensorSpec::createSpec<int64_t>(TFFeedPrefix + "inlining_default", {1}),
@ -329,14 +326,15 @@ TrainingLogger::TrainingLogger(StringRef LogFileName,
    : LogFileName(LogFileName), MUTR(MUTR) {
  // The first output is the inlining decision.
  if (MUTR)
-    OutputCount = MUTR->outputSpecs().size();
-  std::vector<Logger::LoggedFeatureSpec> FT;
+    OutputCount = MUTR->outputLoggedFeatureSpecs().size();
+  std::vector<LoggedFeatureSpec> FT;

  for (size_t I = 0; I < NumberOfFeatures; ++I)
    FT.push_back(
        {TensorSpec::createSpec<int64_t>(FeatureNameMap.at(I), {1}), None});
-  for (size_t I = 1; I < OutputCount; ++I)
-    FT.push_back({MUTR->outputSpecs()[I], MUTR->outputNames()[I]});
+  if (MUTR && MUTR->outputLoggedFeatureSpecs().size() > 1)
+    FT.insert(FT.end(), MUTR->outputLoggedFeatureSpecs().begin() + 1,
+              MUTR->outputLoggedFeatureSpecs().end());

  DefaultDecisionPos = FT.size();
  FT.push_back(
@ -361,7 +359,7 @@ void TrainingLogger::logInlineEvent(const InlineEvent &Event,

  for (size_t I = 1; I < OutputCount; ++I) {
    const auto &Result = *MUTR->lastEvaluationResult();
-    auto &Spec = MUTR->outputSpecs()[I];
+    auto &Spec = MUTR->outputLoggedFeatureSpecs()[I].Spec;
    const char *RawData =
        reinterpret_cast<const char *>(Result.getUntypedTensorValue(I));
    L->logTensorValue(CurrentFeature, RawData,
@ -480,11 +478,13 @@ ModelUnderTrainingRunner::ModelUnderTrainingRunner(LLVMContext &Ctx,
    llvm::sys::path::append(OutputSpecsPath, ModelPath, "output_spec.json");
    OutputSpecPath = {OutputSpecsPath.data(), OutputSpecsPath.size()};
  }
-  if (!loadOutputSpecs(Ctx, OutputSpecPath))
+
+  if (!loadOutputSpecs(Ctx, OutputSpecPath, DecisionName, OutputSpecs))
    return;

-  Evaluator =
-      std::make_unique<TFModelEvaluator>(ModelPath, InputSpecs, OutputSpecs);
+  Evaluator = std::make_unique<TFModelEvaluator>(
+      ModelPath, InputSpecs, [&](size_t I) { return OutputSpecs[I].Spec; },
+      OutputSpecs.size());
  if (!Evaluator || !Evaluator->isValid()) {
    Ctx.emitError("Failed to create inliner saved model evaluator");
    Evaluator.reset();
@ -492,63 +492,6 @@ ModelUnderTrainingRunner::ModelUnderTrainingRunner(LLVMContext &Ctx,
  }
 }

-bool ModelUnderTrainingRunner::loadOutputSpecs(LLVMContext &Ctx,
-                                               StringRef FileName) {
-  auto BufferOrError = MemoryBuffer::getFileOrSTDIN(FileName);
-  if (!BufferOrError) {
-    Ctx.emitError("Error opening output specs file: " + FileName + " : " +
-                  BufferOrError.getError().message());
-    return false;
-  }
-  auto ParsedJSONValues = json::parse(BufferOrError.get()->getBuffer());
-  if (!ParsedJSONValues) {
-    Ctx.emitError("Could not parse specs file: " + FileName);
-    return false;
-  }
-  auto ValuesArray = ParsedJSONValues->getAsArray();
-  if (!ValuesArray) {
-    Ctx.emitError("Expected an array of {tensor_spec:<TensorSpec>, "
-                  "logging_name:<name>} dictionaries");
-    return false;
-  }
-
-  for (const auto &Value : *ValuesArray)
-    if (const auto *Obj = Value.getAsObject())
-      if (const auto *SpecPart = Obj->get("tensor_spec"))
-        if (auto TensorSpec = getTensorSpecFromJSON(Ctx, *SpecPart))
-          if (auto LoggingName = Obj->getString("logging_name")) {
-            if (!TensorSpec->isElementType<int64_t>() &&
-                !TensorSpec->isElementType<int32_t>() &&
-                !TensorSpec->isElementType<float>()) {
-              Ctx.emitError(
-                  "Only int64, int32, and float tensors are supported. "
-                  "Found unsupported type for tensor named " +
-                  TensorSpec->name());
-              return false;
-            }
-            OutputNames.push_back(LoggingName->str());
-            OutputSpecs.push_back(*TensorSpec);
-          }
-
-  if (ValuesArray->size() != OutputNames.size()) {
-    Ctx.emitError(
-        "Unable to parse output spec. It should be a json file containing an "
-        "array of dictionaries. Each dictionary must have a 'tensor_spec' key, "
-        "with a json object describing a TensorSpec; and a 'logging_name' key, "
-        "which is a string to use as name when logging this tensor in the "
-        "training log.");
-    return false;
-  }
-  assert(OutputNames.size() == OutputSpecs.size());
-  if (OutputNames.empty() || OutputNames[0] != DecisionName) {
-    Ctx.emitError("The first output spec must describe the decision tensor, "
-                  "and must have the logging_name " +
-                  StringRef(DecisionName));
-    return false;
-  }
-  return true;
-}
-
 bool ModelUnderTrainingRunner::run() {
  LastEvaluationResult = Evaluator->evaluate();
  if (!LastEvaluationResult.hasValue()) {
--- a/lib/Analysis/TFUtils.cpp
+++ b/lib/Analysis/TFUtils.cpp
@ -18,6 +18,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/JSON.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"

 #include "tensorflow/c/c_api.h"
@ -83,7 +84,7 @@ void writeTensorValues(raw_ostream &OutFile, const char *TensorData,
 /// feature name in the output is either the provided LoggingName, if
 /// specified, otherwise it's the name of the tensor (as given by Spec).
 void writeRawTensorsAsFeatureLists(raw_ostream &OutFile,
-                                   const Logger::LoggedFeatureSpec &LoggedSpec,
+                                   const LoggedFeatureSpec &LoggedSpec,
                                   const char *TensorData, size_t TensorCount,
                                   bool FinalReward = false) {
  const char *FieldName = "<invalid>";
@ -215,12 +216,68 @@ Optional<TensorSpec> getTensorSpecFromJSON(LLVMContext &Ctx,
  return None;
 }

+bool loadOutputSpecs(LLVMContext &Ctx, StringRef FileName,
+                     StringRef ExpectedDecisionName,
+                     std::vector<LoggedFeatureSpec> &Ret) {
+  auto BufferOrError = MemoryBuffer::getFileOrSTDIN(FileName);
+  if (!BufferOrError) {
+    Ctx.emitError("Error opening output specs file: " + FileName + " : " +
+                  BufferOrError.getError().message());
+    return false;
+  }
+  auto ParsedJSONValues = json::parse(BufferOrError.get()->getBuffer());
+  if (!ParsedJSONValues) {
+    Ctx.emitError("Could not parse specs file: " + FileName);
+    return false;
+  }
+  auto ValuesArray = ParsedJSONValues->getAsArray();
+  if (!ValuesArray) {
+    Ctx.emitError("Expected an array of {tensor_spec:<TensorSpec>, "
+                  "logging_name:<name>} dictionaries");
+    return false;
+  }
+
+  for (const auto &Value : *ValuesArray)
+    if (const auto *Obj = Value.getAsObject())
+      if (const auto *SpecPart = Obj->get("tensor_spec"))
+        if (auto TensorSpec = getTensorSpecFromJSON(Ctx, *SpecPart))
+          if (auto LoggingName = Obj->getString("logging_name")) {
+            if (!TensorSpec->isElementType<int64_t>() &&
+                !TensorSpec->isElementType<int32_t>() &&
+                !TensorSpec->isElementType<float>()) {
+              Ctx.emitError(
+                  "Only int64, int32, and float tensors are supported. "
+                  "Found unsupported type for tensor named " +
+                  TensorSpec->name());
+              return false;
+            }
+            Ret.push_back({*TensorSpec, LoggingName->str()});
+          }
+
+  if (ValuesArray->size() != Ret.size()) {
+    Ctx.emitError(
+        "Unable to parse output spec. It should be a json file containing an "
+        "array of dictionaries. Each dictionary must have a 'tensor_spec' key, "
+        "with a json object describing a TensorSpec; and a 'logging_name' key, "
+        "which is a string to use as name when logging this tensor in the "
+        "training log.");
+    return false;
+  }
+  if (Ret.empty() || *Ret[0].LoggingName != ExpectedDecisionName) {
+    Ctx.emitError("The first output spec must describe the decision tensor, "
+                  "and must have the logging_name " +
+                  StringRef(ExpectedDecisionName));
+    return false;
+  }
+  return true;
+}
+
 class TFModelEvaluatorImpl {
 public:
  TFModelEvaluatorImpl(StringRef SavedModelPath,
                       const std::vector<TensorSpec> &InputSpecs,
-                       const std::vector<TensorSpec> &OutputSpecs,
-                       const char *Tags);
+                       function_ref<TensorSpec(size_t)> GetOutputSpecs,
+                       size_t OutputSpecsSize, const char *Tags);

  bool isValid() const { return IsValid; }
  size_t OutputSize() const { return OutputFeed.size(); }
@ -271,10 +328,11 @@ private:

 TFModelEvaluatorImpl::TFModelEvaluatorImpl(
    StringRef SavedModelPath, const std::vector<TensorSpec> &InputSpecs,
-    const std::vector<TensorSpec> &OutputSpecs, const char *Tags)
+    function_ref<TensorSpec(size_t)> GetOutputSpecs, size_t OutputSpecsSize,
+    const char *Tags = "serve")
    : Graph(createTFGraph()), Options(createTFSessionOptions()),
      InputFeed(InputSpecs.size()), Input(InputSpecs.size()),
-      OutputFeed(OutputSpecs.size()) {
+      OutputFeed(OutputSpecsSize) {
  if (!ensureInitTF()) {
    errs() << "Tensorflow should have been initialized";
    return;
@ -298,8 +356,8 @@ TFModelEvaluatorImpl::TFModelEvaluatorImpl(
    initInput(I, static_cast<TF_DataType>(InputSpec.typeIndex()),
              InputSpec.shape());
  }
-  for (size_t I = 0; I < OutputSpecs.size(); ++I) {
-    auto &OutputSpec = OutputSpecs[I];
+  for (size_t I = 0; I < OutputSpecsSize; ++I) {
+    auto OutputSpec = GetOutputSpecs(I);
    OutputFeed[I] = {
        TF_GraphOperationByName(Graph.get(), (OutputSpec.name()).c_str()),
        OutputSpec.port()};
@ -308,15 +366,23 @@ TFModelEvaluatorImpl::TFModelEvaluatorImpl(
  }
 }

+TFModelEvaluator::TFModelEvaluator(
+    StringRef SavedModelPath, const std::vector<TensorSpec> &InputSpecs,
+    function_ref<TensorSpec(size_t)> GetOutputSpecs, size_t OutputSpecsSize,
+    const char *Tags)
+    : Impl(new TFModelEvaluatorImpl(SavedModelPath, InputSpecs, GetOutputSpecs,
+                                    OutputSpecsSize, Tags)) {
+  if (!Impl->isValid())
+    Impl.reset();
+}
+
 TFModelEvaluator::TFModelEvaluator(StringRef SavedModelPath,
                                   const std::vector<TensorSpec> &InputSpecs,
                                   const std::vector<TensorSpec> &OutputSpecs,
                                   const char *Tags)
-    : Impl(new TFModelEvaluatorImpl(SavedModelPath, InputSpecs, OutputSpecs,
-                                    Tags)) {
-  if (!Impl->isValid())
-    Impl.reset();
-}
+    : TFModelEvaluator(
+          SavedModelPath, InputSpecs, [&](size_t I) { return OutputSpecs[I]; },
+          OutputSpecs.size(), Tags) {}

 TFModelEvaluatorImpl::~TFModelEvaluatorImpl() {
  for (auto *T : Input) {
--- a/unittests/Analysis/TFUtilsTest.cpp
+++ b/unittests/Analysis/TFUtilsTest.cpp
@ -144,7 +144,7 @@ TEST(TFUtilsTest, TensorSpecSizesAndTypes) {
 }

 TEST(TFUtilsTest, Logger) {
-  std::vector<Logger::LoggedFeatureSpec> Features;
+  std::vector<LoggedFeatureSpec> Features;
  Features.push_back(
      {TensorSpec::createSpec<float>("the_float", {2, 3}), None});
  Features.push_back({TensorSpec::createSpec<int64_t>("the_int", {2}),
@ -191,7 +191,7 @@ TEST(TFUtilsTest, Logger) {
 }

 TEST(TFUtilsTest, LoggerNoReward) {
-  std::vector<Logger::LoggedFeatureSpec> Features;
+  std::vector<LoggedFeatureSpec> Features;
  Features.push_back(
      {TensorSpec::createSpec<float>("the_float", {2, 3}), None});
  Features.push_back({TensorSpec::createSpec<int64_t>("the_int", {2}),
@ -230,7 +230,7 @@ TEST(TFUtilsTest, LoggerNoReward) {
 }

 TEST(TFUtilsTest, LoggerFinalReward) {
-  std::vector<Logger::LoggedFeatureSpec> Features;
+  std::vector<LoggedFeatureSpec> Features;
  Features.push_back({TensorSpec::createSpec<float>("the_float", {1}), None});
  Features.push_back({TensorSpec::createSpec<int64_t>("the_int", {1}), None});