From 4559a486149c23f2d941b57c3a362bc625db7ba7 Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin@google.com>
Date: Thu, 22 Jul 2021 12:45:32 -0700
Subject: [PATCH] [NFC][MLGO] Just use the underlying protobuf object for
 logging

Avoid buffering just to copy the buffered data, in 'development
mode', when logging. Instead, just populate the underlying protobuf.

Differential Revision: https://reviews.llvm.org/D106592
---
 include/llvm/Analysis/Utils/TFUtils.h         |  69 +++---
 lib/Analysis/DevelopmentModeInlineAdvisor.cpp |  12 +-
 lib/Analysis/TFUtils.cpp                      | 214 +++++++++++-------
 unittests/Analysis/TFUtilsTest.cpp            |  64 ++++--
 4 files changed, 229 insertions(+), 130 deletions(-)
diff --git a/include/llvm/Analysis/Utils/TFUtils.h b/include/llvm/Analysis/Utils/TFUtils.h
index ea6bc2cf19e..47ee23e0600 100644
--- a/include/llvm/Analysis/Utils/TFUtils.h
+++ b/include/llvm/Analysis/Utils/TFUtils.h
@@ -12,6 +12,7 @@
 #include "llvm/Config/llvm-config.h"
 
 #ifdef LLVM_HAVE_TF_API
+#include "llvm/ADT/StringMap.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/JSON.h"
 
@@ -120,56 +121,62 @@ loadOutputSpecs(LLVMContext &Ctx, StringRef ExpectedDecisionName,
 /// The assumption is that, for an event to be logged (i.e. a set of feature
 /// values and a reward), the user calls the log* API for each feature exactly
 /// once, providing the index matching the position in the feature spec list
-/// provided at construction:
+/// provided at construction. The example assumes the first feature's element
+/// type is float, the second is int64, and the reward is float:
+///
 /// event 0:
-///   logTensorValue(0, ...)
-///   logTensorValue(1, ...)
+///   logFloatValue(0, ...)
+///   logInt64Value(1, ...)
 ///   ...
-///   logReward(...)
+///   logFloatReward(...)
 /// event 1:
-///   logTensorValue(0, ...)
-///   logTensorValue(1, ...)
+///   logFloatValue(0, ...)
+///   logInt64Value(1, ...)
 ///   ...
-///   logReward(...)
+///   logFloatReward(...)
 ///
 /// At the end, call print to generate the protobuf.
+/// Alternatively, don't call logReward at the end of each event, just
+/// log{Float|Int32|Int64}FinalReward at the end.
+class LoggerDataImpl;
 class Logger final {
 public:
-  /// Construct a Logger. If IncludeReward is false, then logReward shouldn't
-  /// be called, and the reward feature won't be printed out.
+  /// Construct a Logger. If IncludeReward is false, then logReward or
+  /// logFinalReward shouldn't be called, and the reward feature won't be
+  /// printed out.
   Logger(const std::vector<LoggedFeatureSpec> &FeatureSpecs,
-         const TensorSpec &RewardSpec, bool IncludeReward)
-      : FeatureSpecs(FeatureSpecs), RewardSpec(RewardSpec),
-        RawLogData(FeatureSpecs.size() + IncludeReward),
-        IncludeReward(IncludeReward) {}
+         const TensorSpec &RewardSpec, bool IncludeReward);
 
-  template <typename T> void logReward(T Value) {
-    assert(IncludeReward);
-    logTensorValue(RawLogData.size() - 1, &Value);
-  }
+  ~Logger();
 
-  template <typename T> void logFinalReward(T Value) {
-    assert(RawLogData.back().empty());
-    logReward(Value);
-  }
+  void logFloatReward(float Value);
+  void logInt32Reward(int32_t Value);
+  void logInt64Reward(int64_t Value);
 
-  template <typename T>
-  void logTensorValue(size_t FeatureID, const T *Value, size_t Size = 1) {
-    const char *Start = reinterpret_cast<const char *>(Value);
-    const char *End = Start + sizeof(T) * Size;
-    RawLogData[FeatureID].insert(RawLogData[FeatureID].end(), Start, End);
-  }
+  void logFloatFinalReward(float Value);
+  void logInt32FinalReward(int32_t Value);
+  void logInt64FinalReward(int64_t Value);
+
+  void logFloatValue(size_t FeatureID, const float *Value);
+  void logInt32Value(size_t FeatureID, const int32_t *Value);
+  void logInt64Value(size_t FeatureID, const int64_t *Value);
+
+  void logSpecifiedTensorValue(size_t FeatureID, const char *RawData);
+
+  // Warning! For int32_t, the return is set up for int64_t, so the caller needs
+  // to piecemeal cast their int32_t values.
+  // FIXME: let's drop int32_t support. While it's supported by evaluator, it's
+  // not supported by the tensorflow::SequenceExample proto. For small values,
+  // we can consider using bytes.
+  char *addEntryAndGetFloatOrInt64Buffer(size_t FeatureID);
 
   void print(raw_ostream &OS);
 
 private:
   std::vector<LoggedFeatureSpec> FeatureSpecs;
   TensorSpec RewardSpec;
-  /// RawData has one entry per feature, plus one more for the reward.
-  /// Each feature's values are then stored in a vector, in succession.
-  /// This means the ith event is stored at [*][i]
-  std::vector<std::vector<char>> RawLogData;
   const bool IncludeReward;
+  std::unique_ptr<LoggerDataImpl> LoggerData;
 };
 
 class TFModelEvaluator final {
diff --git a/lib/Analysis/DevelopmentModeInlineAdvisor.cpp b/lib/Analysis/DevelopmentModeInlineAdvisor.cpp
index 940eb01f760..ecfefa36918 100644
--- a/lib/Analysis/DevelopmentModeInlineAdvisor.cpp
+++ b/lib/Analysis/DevelopmentModeInlineAdvisor.cpp
@@ -353,24 +353,22 @@ void TrainingLogger::logInlineEvent(const InlineEvent &Event,
   size_t CurrentFeature = 0;
   for (; CurrentFeature < NumberOfFeatures; ++CurrentFeature) {
     int64_t F = ModelRunner.getFeature(CurrentFeature);
-    L->logTensorValue(CurrentFeature, &F);
+    L->logInt64Value(CurrentFeature, &F);
   }
 
   for (size_t I = 1; I < OutputCount; ++I) {
     const auto &Result = *MUTR->lastEvaluationResult();
-    auto &Spec = MUTR->outputLoggedFeatureSpecs()[I].Spec;
     const char *RawData =
         reinterpret_cast<const char *>(Result.getUntypedTensorValue(I));
-    L->logTensorValue(CurrentFeature, RawData,
-                      Spec.getElementCount() * Spec.getElementByteSize());
+    L->logSpecifiedTensorValue(CurrentFeature, RawData);
     ++CurrentFeature;
   }
 
   assert(CurrentFeature == DefaultDecisionPos);
-  L->logTensorValue(DefaultDecisionPos, &Event.DefaultDecision);
-  L->logTensorValue(DecisionPos, &Event.AdvisedDecision);
+  L->logInt64Value(DefaultDecisionPos, &Event.DefaultDecision);
+  L->logInt64Value(DecisionPos, &Event.AdvisedDecision);
   if (InlineSizeEstimatorAnalysis::isEvaluatorRequested())
-    L->logReward(Event.Reward);
+    L->logInt64Reward(Event.Reward);
 
   // For debugging / later use
   Effects.push_back(Event.Effect);
diff --git a/lib/Analysis/TFUtils.cpp b/lib/Analysis/TFUtils.cpp
index d09668528a6..e93dc303ae6 100644
--- a/lib/Analysis/TFUtils.cpp
+++ b/lib/Analysis/TFUtils.cpp
@@ -32,6 +32,9 @@
 
 using namespace llvm;
 
+using google::protobuf::Message;
+using google::protobuf::TextFormat;
+
 static cl::opt<bool>
     ProtobufTextMode("tfutils-text-log", cl::init(false), cl::Hidden,
                      cl::desc("Output textual (human-readable) protobuf."));
@@ -70,55 +73,6 @@ TFStatusPtr createTFStatus() {
 TFSessionOptionsPtr createTFSessionOptions() {
   return TFSessionOptionsPtr(TF_NewSessionOptions(), &TF_DeleteSessionOptions);
 }
-
-/// Write a list of tensors as a sequence of TensorFlow FeatureList protobufs.
-/// The tensors are assumed to be stored contiguously, in row-major format,
-/// in the TensorData buffer. Each tensor has the shape given by Spec. The
-/// feature name in the output is either the provided LoggingName, if
-/// specified, otherwise it's the name of the tensor (as given by Spec).
-void writeRawTensorsAsFeatureLists(tensorflow::FeatureLists *FE,
-                                   const LoggedFeatureSpec &LoggedSpec,
-                                   const char *TensorData, size_t TensorCount,
-                                   bool FinalReward = false) {
-  const auto &Spec = LoggedSpec.Spec;
-  // The 'Feature' protobuf only has 3 possible fields: float_list,
-  // int64_list, or bytes_list, so we capture int32 values as int64. We don't
-  // support any other types.
-  tensorflow::FeatureList &FL = (*FE->mutable_feature_list())[(
-      LoggedSpec.LoggingName ? *LoggedSpec.LoggingName : Spec.name())];
-
-  const char *CurrentTensor = TensorData;
-  const size_t TensorByteSize =
-      Spec.getElementCount() * Spec.getElementByteSize();
-  const size_t ElemCount = Spec.getElementCount();
-  for (size_t E = 0; E < TensorCount; ++E) {
-    const bool ShouldWrite = E + 1 == TensorCount || !FinalReward;
-
-    if (Spec.isElementType<int64_t>()) {
-      auto *MF = FL.add_feature()->mutable_int64_list()->mutable_value();
-      MF->Resize(ElemCount, 0);
-      if (ShouldWrite)
-        memcpy(MF->mutable_data(), CurrentTensor, TensorByteSize);
-    } else if (Spec.isElementType<int32_t>()) {
-      auto *MF = FL.add_feature()->mutable_int64_list()->mutable_value();
-      MF->Resize(ElemCount, 0);
-      if (ShouldWrite) {
-        const int32_t *TD = reinterpret_cast<const int32_t *>(CurrentTensor);
-        for (size_t I = 0; I < ElemCount; ++I)
-          (*MF)[I] = TD[I];
-      }
-    } else if (Spec.isElementType<float>()) {
-      auto *MF = FL.add_feature()->mutable_float_list()->mutable_value();
-      MF->Resize(ElemCount, 0.0);
-      if (ShouldWrite)
-        memcpy(MF->mutable_data(), CurrentTensor, TensorByteSize);
-    } else {
-      llvm_unreachable("Unsupported tensor type.");
-    }
-    if (ShouldWrite)
-      CurrentTensor += TensorByteSize;
-  }
-}
 } // namespace
 
 namespace llvm {
@@ -304,6 +258,76 @@ private:
   bool checkReportAndInvalidate(const TF_Output &Output,
                                 const TensorSpec &OutputSpec);
 };
+
+class LoggerDataImpl {
+  const std::vector<LoggedFeatureSpec> LoggedFeatureSpecs;
+  const TensorSpec RewardSpec;
+
+  tensorflow::SequenceExample SE;
+  std::vector<tensorflow::FeatureList *> FeatureLists;
+  tensorflow::FeatureList *Reward = nullptr;
+
+public:
+  LoggerDataImpl(const std::vector<LoggedFeatureSpec> &LoggedSpecs,
+                 const TensorSpec &RewardSpec, bool IncludeReward)
+      : LoggedFeatureSpecs(LoggedSpecs), RewardSpec(RewardSpec) {
+    auto *FL = SE.mutable_feature_lists()->mutable_feature_list();
+    if (IncludeReward)
+      Reward = &(*FL)[RewardSpec.name()];
+    // Allocate first the map entries, then capture their address. We will not
+    // mutate the set of features after this (i.e. the pointers won't dangle).
+    for (const auto &LFS : LoggedSpecs) {
+      (*FL)[LFS.LoggingName ? *LFS.LoggingName : LFS.Spec.name()] = {};
+    }
+    for (const auto &LFS : LoggedSpecs)
+      FeatureLists.push_back(
+          &(*FL)[LFS.LoggingName ? *LFS.LoggingName : LFS.Spec.name()]);
+  }
+
+  void print(raw_ostream &OS) {
+    std::string OutStr;
+    if (ProtobufTextMode)
+      google::protobuf::TextFormat::PrintToString(SE, &OutStr);
+    else
+      OutStr = SE.SerializeAsString();
+
+    OS << OutStr;
+  }
+
+  char *addNewTensor(size_t FeatureID) {
+    const auto &Spec = LoggedFeatureSpecs[FeatureID].Spec;
+    if (Spec.isElementType<float>()) {
+      auto *RF = FeatureLists[FeatureID]
+                     ->add_feature()
+                     ->mutable_float_list()
+                     ->mutable_value();
+      RF->Resize(Spec.getElementCount(), 0.0);
+      return reinterpret_cast<char *>(RF->mutable_data());
+    } else if (Spec.isElementType<int32_t>() || Spec.isElementType<int64_t>()) {
+      auto *RF = FeatureLists[FeatureID]
+                     ->add_feature()
+                     ->mutable_int64_list()
+                     ->mutable_value();
+      RF->Resize(Spec.getElementCount(), 0);
+      return reinterpret_cast<char *>(RF->mutable_data());
+    }
+    llvm_unreachable("Unsupported tensor type.");
+  }
+
+  template <typename T> void logReward(T Value) {
+    if (RewardSpec.isElementType<float>())
+      Reward->add_feature()->mutable_float_list()->add_value(Value);
+    else if (RewardSpec.isElementType<int32_t>() ||
+             RewardSpec.isElementType<int64_t>())
+      Reward->add_feature()->mutable_int64_list()->add_value(Value);
+    else
+      llvm_unreachable("Unsupported tensor type.");
+  }
+
+  size_t getNrRecords() const {
+    return FeatureLists.empty() ? 0 : FeatureLists[0]->feature().size();
+  }
+};
 } // namespace llvm
 
 TFModelEvaluatorImpl::TFModelEvaluatorImpl(
@@ -448,37 +472,71 @@ TFUTILS_SUPPORTED_TYPES(TFUTILS_GETDATATYPE_IMPL)
 TFModelEvaluator::EvaluationResult::~EvaluationResult() {}
 TFModelEvaluator::~TFModelEvaluator() {}
 
-void Logger::print(raw_ostream &OS) {
-  tensorflow::SequenceExample SE;
+Logger::Logger(const std::vector<LoggedFeatureSpec> &FeatureSpecs,
+               const TensorSpec &RewardSpec, bool IncludeReward)
+    : FeatureSpecs(FeatureSpecs), RewardSpec(RewardSpec),
+      IncludeReward(IncludeReward),
+      LoggerData(std::make_unique<LoggerDataImpl>(FeatureSpecs, RewardSpec,
+                                                  IncludeReward)) {}
 
-  if (RawLogData.empty())
-    return;
-  if (RawLogData[0].empty())
-    return;
-  size_t Tensor0Size = FeatureSpecs[0].Spec.getElementCount() *
-                       FeatureSpecs[0].Spec.getElementByteSize();
-  size_t NumberOfRecords = RawLogData[0].size() / Tensor0Size;
-  if (NumberOfRecords == 0)
-    return;
-  size_t RewardSize =
-      RewardSpec.getElementCount() * RewardSpec.getElementByteSize();
-  size_t NumberOfRewards = RawLogData.back().size() / RewardSize;
+Logger::~Logger() {}
 
-  tensorflow::FeatureLists *FE = SE.mutable_feature_lists();
-  for (size_t I = 0; I < FeatureSpecs.size(); ++I)
-    writeRawTensorsAsFeatureLists(FE, FeatureSpecs[I], RawLogData[I].data(),
-                                  NumberOfRecords);
-
-  if (IncludeReward)
-    writeRawTensorsAsFeatureLists(FE, {RewardSpec, None},
-                                  RawLogData.back().data(), NumberOfRecords,
-                                  NumberOfRewards == 1);
-  std::string OutStr;
-  if (ProtobufTextMode) {
-    google::protobuf::TextFormat::PrintToString(SE, &OutStr);
-  } else {
-    OutStr = SE.SerializeAsString();
+#define LOG_REWARD(NAME, TYPE)                                                 \
+  void Logger::log##NAME##Reward(TYPE Value) {                                 \
+    assert(IncludeReward);                                                     \
+    LoggerData->logReward(Value);                                              \
   }
-  OS << OutStr;
+
+LOG_REWARD(Float, float)
+LOG_REWARD(Int32, int32_t)
+LOG_REWARD(Int64, int64_t)
+#undef LOG_REWARD
+
+#define LOG_FINAL_REWARD(NAME, TYPE)                                           \
+  void Logger::log##NAME##FinalReward(TYPE Value) {                            \
+    assert(RewardSpec.isElementType<TYPE>());                                  \
+    for (size_t I = 1; I < LoggerData->getNrRecords(); ++I)                    \
+      log##NAME##Reward(0);                                                    \
+    log##NAME##Reward(Value);                                                  \
+  }
+
+LOG_FINAL_REWARD(Float, float)
+LOG_FINAL_REWARD(Int32, int32_t)
+LOG_FINAL_REWARD(Int64, int64_t)
+#undef LOG_FINAL_REWARD
+
+void Logger::logFloatValue(size_t FeatureID, const float *Value) {
+  assert(FeatureSpecs[FeatureID].Spec.isElementType<float>());
+  logSpecifiedTensorValue(FeatureID, reinterpret_cast<const char *>(Value));
 }
+
+void Logger::logInt64Value(size_t FeatureID, const int64_t *Value) {
+  assert(FeatureSpecs[FeatureID].Spec.isElementType<int64_t>());
+  logSpecifiedTensorValue(FeatureID, reinterpret_cast<const char *>(Value));
+}
+
+void Logger::logInt32Value(size_t FeatureID, const int32_t *Value) {
+  assert(FeatureSpecs[FeatureID].Spec.isElementType<int32_t>());
+  logSpecifiedTensorValue(FeatureID, reinterpret_cast<const char *>(Value));
+}
+
+void Logger::logSpecifiedTensorValue(size_t FeatureID, const char *RawData) {
+  const auto &Spec = FeatureSpecs[FeatureID].Spec;
+  char *Buff = addEntryAndGetFloatOrInt64Buffer(FeatureID);
+  if (Spec.isElementType<int32_t>())
+    for (size_t I = 0; I < Spec.getElementCount(); ++I)
+      (reinterpret_cast<int64_t *>(Buff))[I] =
+          static_cast<int64_t>((reinterpret_cast<const int32_t *>(RawData))[I]);
+  else if (Spec.isElementType<int64_t>() || Spec.isElementType<float>())
+    std::memcpy(Buff, RawData,
+                Spec.getElementCount() * Spec.getElementByteSize());
+  else
+    llvm_unreachable("Unsupported tensor type");
+}
+
+char *Logger::addEntryAndGetFloatOrInt64Buffer(size_t FeatureID) {
+  return reinterpret_cast<char *>(LoggerData->addNewTensor(FeatureID));
+}
+
+void Logger::print(raw_ostream &OS) { LoggerData->print(OS); }
 #endif // defined(LLVM_HAVE_TF_API)
diff --git a/unittests/Analysis/TFUtilsTest.cpp b/unittests/Analysis/TFUtilsTest.cpp
index a2146633790..1dfa8e10b73 100644
--- a/unittests/Analysis/TFUtilsTest.cpp
+++ b/unittests/Analysis/TFUtilsTest.cpp
@@ -169,14 +169,14 @@ TEST(TFUtilsTest, Logger) {
   const float F00[]{0.0, 0.1, 0.2, 0.3, 0.4, 0.5};
   const int64_t F01[]{2, 3};
 
-  L.logTensorValue(0, F00, 6);
-  L.logTensorValue(1, F01, 2);
-  L.logReward<float>(3.4);
+  L.logFloatValue(0, F00);
+  L.logInt64Value(1, F01);
+  L.logFloatReward(3.4);
   const float F10[]{0.0, 1.0, 2.0, 3.0, 4.0, 5.0};
   const int64_t F11[]{-2, -3};
-  L.logTensorValue(0, F10, 6);
-  L.logTensorValue(1, F11, 2);
-  L.logReward<float>(-3.0);
+  L.logFloatValue(0, F10);
+  L.logInt64Value(1, F11);
+  L.logFloatReward(-3.0);
   std::string Result;
   raw_string_ostream OS(Result);
   L.print(OS);
@@ -193,6 +193,42 @@ TEST(TFUtilsTest, Logger) {
   PROTO_CHECKER("reward", float_list, 1, R1);
 }
 
+TEST(TFUtilsTest, LoggerInt32FeaturesAndReward) {
+  std::vector<LoggedFeatureSpec> Features;
+  Features.push_back(
+      {TensorSpec::createSpec<float>("the_float", {2, 3}), None});
+  Features.push_back({TensorSpec::createSpec<int32_t>("the_int", {2}),
+                      std::string("alternate_name")});
+
+  auto Rewards = TensorSpec::createSpec<int32_t>("reward", {1});
+  Logger L(Features, Rewards, true);
+  const float F00[]{0.0, 0.1, 0.2, 0.3, 0.4, 0.5};
+  const int32_t F01[]{2, 3};
+
+  L.logFloatValue(0, F00);
+  L.logInt32Value(1, F01);
+  L.logInt32Reward(3);
+  const float F10[]{0.0, 1.0, 2.0, 3.0, 4.0, 5.0};
+  const int32_t F11[]{-2, -3};
+  L.logFloatValue(0, F10);
+  L.logInt32Value(1, F11);
+  L.logInt32Reward(-3);
+  std::string Result;
+  raw_string_ostream OS(Result);
+  L.print(OS);
+
+  tensorflow::SequenceExample Expected;
+  EXPECT_TRUE(Expected.ParseFromString(Result));
+  PROTO_CHECKER("the_float", float_list, 0, F00);
+  PROTO_CHECKER("the_float", float_list, 1, F10);
+  PROTO_CHECKER("alternate_name", int64_list, 0, F01);
+  PROTO_CHECKER("alternate_name", int64_list, 1, F11);
+  int32_t R0[]{3};
+  int32_t R1[]{-3};
+  PROTO_CHECKER("reward", int64_list, 0, R0);
+  PROTO_CHECKER("reward", int64_list, 1, R1);
+}
+
 TEST(TFUtilsTest, LoggerNoReward) {
   std::vector<LoggedFeatureSpec> Features;
   Features.push_back(
@@ -205,12 +241,12 @@ TEST(TFUtilsTest, LoggerNoReward) {
   const float F00[]{0.0, 0.1, 0.2, 0.3, 0.4, 0.5};
   const int64_t F01[]{2, 3};
 
-  L.logTensorValue(0, F00, 6);
-  L.logTensorValue(1, F01, 2);
+  L.logFloatValue(0, F00);
+  L.logInt64Value(1, F01);
   const float F10[]{0.0, 1.0, 2.0, 3.0, 4.0, 5.0};
   const int64_t F11[]{-2, -3};
-  L.logTensorValue(0, F10, 6);
-  L.logTensorValue(1, F11, 2);
+  L.logFloatValue(0, F10);
+  L.logInt64Value(1, F11);
 
   std::string Result;
   raw_string_ostream OS(Result);
@@ -230,12 +266,12 @@ TEST(TFUtilsTest, LoggerFinalReward) {
 
   auto Rewards = TensorSpec::createSpec<float>("reward", {1});
   Logger L(Features, Rewards, true);
-  for (size_t I = 0; I < 3; ++I) {
+  for (int64_t I = 0; I < 3; ++I) {
     float F = static_cast<float>(I);
-    L.logTensorValue(0, &F);
-    L.logTensorValue(1, &I);
+    L.logFloatValue(0, &F);
+    L.logInt64Value(1, &I);
   }
-  L.logFinalReward<float>(3.14);
+  L.logFloatFinalReward(3.14);
   std::string Result;
   raw_string_ostream OS(Result);
   L.print(OS);