[llvm-exegesis] Show sched class details in analysis.

Summary: And update docs. Reviewers: gchatelet Subscribers: tschuett, craig.topper, RKSimon, llvm-commits Differential Revision: https://reviews.llvm.org/D47254 llvm-svn: 333169
2025-01-31 12:41:49 +01:00 · 2018-05-24 10:47:05 +00:00 · 2018-05-24 10:47:05 +00:00 · 0ba5b5b01b
commit 0ba5b5b01b
parent 4f6111191f
4 changed files with 145 additions and 29 deletions
--- a/docs/CommandGuide/llvm-exegesis-analysis.png
+++ b/docs/CommandGuide/llvm-exegesis-analysis.png
--- a/docs/CommandGuide/llvm-exegesis.rst
+++ b/docs/CommandGuide/llvm-exegesis.rst
@ -113,20 +113,10 @@ following format:

 :program:`llvm-exegesis` will also analyze the clusters to point out
 inconsistencies in the scheduling information. The output is an html file. For
-example, `/tmp/inconsistencies.html` will contain messages like:
+example, `/tmp/inconsistencies.html` will contain messages like the following :

-.. code-block:: none
-
-  Sched Class EXTRACTPSrr_VEXTRACTPSrr contains instructions with distinct performance characteristics, falling into 2 clusters:
-  4,EXTRACTPSrr,,3.00
-  3,VEXTRACTPSrr,,2.01
-
-  Sched Class WriteCRC32 contains instructions with distinct performance characteristics, falling into 2 clusters:
-  4,CRC32r32r16,,3.01
-  4,CRC32r32r32,,3.00
-  11,CRC32r32r8,,4.01
-  4,CRC32r64r64,,3.01
-  4,CRC32r64r8,,3.00
+.. image:: llvm-exegesis-analysis.png
+  :align: center

 Note that the scheduling class names will be resolved only when
 :program:`llvm-exegesis` is compiled in debug mode, else only the class id will
--- a/tools/llvm-exegesis/lib/Analysis.cpp
+++ b/tools/llvm-exegesis/lib/Analysis.cpp
@ -167,8 +167,8 @@ Analysis::makePointsPerSchedClass() const {
  return PointsPerSchedClass;
 }

-void Analysis::printSchedClassHtml(std::vector<size_t> PointIds,
-                                   llvm::raw_ostream &OS) const {
+void Analysis::printSchedClassClustersHtml(std::vector<size_t> PointIds,
+                                           llvm::raw_ostream &OS) const {
  assert(!PointIds.empty());
  // Sort the points by cluster id so that we can display them grouped by
  // cluster.
@ -178,7 +178,7 @@ void Analysis::printSchedClassHtml(std::vector<size_t> PointIds,
                     Clustering_.getClusterIdForPoint(B);
            });
  const auto &Points = Clustering_.getPoints();
-  OS << "<table class=\"sched-class\">";
+  OS << "<table class=\"sched-class-clusters\">";
  OS << "<tr><th>ClusterId</th><th>Opcode/Config</th>";
  for (const auto &Measurement : Points[PointIds[0]].Measurements) {
    OS << "<th>";
@ -214,6 +214,120 @@ void Analysis::printSchedClassHtml(std::vector<size_t> PointIds,
  OS << "</table>";
 }

+// Return the non-redundant list of WriteProcRes used by the given sched class.
+// The scheduling model for LLVM is such that each instruction has a certain
+// number of uops which consume resources which are described by WriteProcRes
+// entries. Each entry describe how many cycles are spent on a specific ProcRes
+// kind.
+// For example, an instruction might have 3 uOps, one dispatching on P0
+// (ProcResIdx=1) and two on P06 (ProcResIdx = 7).
+// Note that LLVM additionally denormalizes resource consumption to include
+// usage of super resources by subresources. So in practice if there exists a
+// P016 (ProcResIdx=10), then the cycles consumed by P0 are also consumed by
+// P06 (ProcResIdx = 7) and P016 (ProcResIdx = 10), and the resources consumed
+// by P06 are also consumed by P016. In the figure below, parenthesized cycles
+// denote implied usage of superresources by subresources:
+//            P0      P06    P016
+//     uOp1    1      (1)     (1)
+//     uOp2            1      (1)
+//     uOp3            1      (1)
+//     =============================
+//             1       3       3
+// Eventually we end up with three entries for the WriteProcRes of the
+// instruction:
+//    {ProcResIdx=1,  Cycles=1}  // P0
+//    {ProcResIdx=7,  Cycles=3}  // P06
+//    {ProcResIdx=10, Cycles=3}  // P016
+//
+// Note that in this case, P016 does not contribute any cycles, so it would
+// be removed by this function.
+// FIXME: Move this to MCSubtargetInfo and use it in llvm-mca.
+static llvm::SmallVector<llvm::MCWriteProcResEntry, 8>
+getNonRedundantWriteProcRes(const llvm::MCSchedClassDesc &SCDesc,
+                            const llvm::MCSubtargetInfo &STI) {
+  llvm::SmallVector<llvm::MCWriteProcResEntry, 8> Result;
+  const auto &SM = STI.getSchedModel();
+  const unsigned NumProcRes = SM.getNumProcResourceKinds();
+
+  // This assumes that the ProcResDescs are sorted in topological order, which
+  // is guaranteed by the tablegen backend.
+  llvm::SmallVector<float, 32> ProcResUnitUsage(NumProcRes);
+  for (const auto *WPR = STI.getWriteProcResBegin(&SCDesc),
+                  *const WPREnd = STI.getWriteProcResEnd(&SCDesc);
+       WPR != WPREnd; ++WPR) {
+    const llvm::MCProcResourceDesc *const ProcResDesc =
+        SM.getProcResource(WPR->ProcResourceIdx);
+    if (ProcResDesc->SubUnitsIdxBegin == nullptr) {
+      // This is a ProcResUnit.
+      Result.push_back({WPR->ProcResourceIdx, WPR->Cycles});
+      ProcResUnitUsage[WPR->ProcResourceIdx] += WPR->Cycles;
+    } else {
+      // This is a ProcResGroup. First see if it contributes any cycles or if
+      // it has cycles just from subunits.
+      float RemainingCycles = WPR->Cycles;
+      for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin;
+           SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits;
+           ++SubResIdx) {
+        RemainingCycles -= ProcResUnitUsage[*SubResIdx];
+      }
+      if (RemainingCycles < 0.01f) {
+        // The ProcResGroup contributes no cycles of its own.
+        continue;
+      }
+      // The ProcResGroup contributes `RemainingCycles` cycles of its own.
+      Result.push_back({WPR->ProcResourceIdx,
+                        static_cast<uint16_t>(std::round(RemainingCycles))});
+      // Spread the remaining cycles over all subunits.
+      for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin;
+           SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits;
+           ++SubResIdx) {
+        ProcResUnitUsage[*SubResIdx] += RemainingCycles / ProcResDesc->NumUnits;
+      }
+    }
+  }
+  return Result;
+}
+
+void Analysis::printSchedClassDescHtml(const llvm::MCSchedClassDesc &SCDesc,
+                                       llvm::raw_ostream &OS) const {
+  OS << "<table class=\"sched-class-desc\">";
+  OS << "<tr><th>Valid</th><th>Variant</th><th>uOps</th><th>Latency</"
+        "th><th>WriteProcRes</th></tr>";
+  if (SCDesc.isValid()) {
+    OS << "<tr><td>&#10004;</td>";
+    OS << "<td>" << (SCDesc.isVariant() ? "&#10004;" : "&#10005;") << "</td>";
+    OS << "<td>" << SCDesc.NumMicroOps << "</td>";
+    // Latencies.
+    OS << "<td><ul>";
+    for (int I = 0, E = SCDesc.NumWriteLatencyEntries; I < E; ++I) {
+      const auto *const Entry =
+          SubtargetInfo_->getWriteLatencyEntry(&SCDesc, I);
+      OS << "<li>" << Entry->Cycles;
+      if (SCDesc.NumWriteLatencyEntries > 1) {
+        // Dismabiguate if more than 1 latency.
+        OS << " (WriteResourceID " << Entry->WriteResourceID << ")";
+      }
+      OS << "</li>";
+    }
+    OS << "</ul></td>";
+    // WriteProcRes.
+    OS << "<td><ul>";
+    for (const auto &WPR :
+         getNonRedundantWriteProcRes(SCDesc, *SubtargetInfo_)) {
+      OS << "<li><span class=\"mono\">";
+      writeEscaped<kEscapeHtml>(OS, SubtargetInfo_->getSchedModel()
+                                        .getProcResource(WPR.ProcResourceIdx)
+                                        ->Name);
+      OS << "</spam>: " << WPR.Cycles << "</li>";
+    }
+    OS << "</ul></td>";
+    OS << "</tr>";
+  } else {
+    OS << "<tr><td>&#10005;</td><td></td><td></td></tr>";
+  }
+  OS << "</table>";
+}
+
 static constexpr const char kHtmlHead[] = R"(
 <head>
 <title>llvm-exegesis Analysis Results</title>
@ -234,23 +348,29 @@ span.config {
 div.inconsistency {
  margin-top: 50px;
 }
-table.sched-class {
+table {
  margin-left: 50px;
  border-collapse: collapse;
 }
-table.sched-class, table.sched-class tr,td,th {
+table, table tr,td,th {
  border: 1px solid #444;
 }
-table.sched-class td {
+table ul {
+  padding-left: 0px;
+  margin: 0px;
+  list-style-type: none;
+}
+table.sched-class-clusters td {
  padding-left: 10px;
  padding-right: 10px;
  padding-top: 10px;
  padding-bottom: 10px;
 }
-table.sched-class ul {
-  padding-left: 0px;
-  margin: 0px;
-  list-style-type: none;
+table.sched-class-desc td {
+  padding-left: 10px;
+  padding-right: 10px;
+  padding-top: 2px;
+  padding-bottom: 2px;
 }
 span.mono {
  font-family: monospace;
@ -284,12 +404,14 @@ llvm::Error Analysis::run<Analysis::PrintSchedClassInconsistencies>(
    if (ClustersForSchedClass.size() <= 1)
      continue; // Nothing weird.

-    OS << "<div class=\"inconsistency\"><p>Sched Class <span "
-          "class=\"sched-class-name\">";
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
    const auto &SchedModel = SubtargetInfo_->getSchedModel();
    const llvm::MCSchedClassDesc *const SCDesc =
        SchedModel.getSchedClassDesc(SchedClassAndPoints.first);
+    if (!SCDesc)
+      continue;
+    OS << "<div class=\"inconsistency\"><p>Sched Class <span "
+          "class=\"sched-class-name\">";
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
    writeEscaped<kEscapeHtml>(OS, SCDesc->Name);
 #else
    OS << SchedClassAndPoints.first;
@ -297,7 +419,9 @@ llvm::Error Analysis::run<Analysis::PrintSchedClassInconsistencies>(
    OS << "</span> contains instructions with distinct performance "
          "characteristics, falling into "
       << ClustersForSchedClass.size() << " clusters:</p>";
-    printSchedClassHtml(SchedClassAndPoints.second, OS);
+    printSchedClassClustersHtml(SchedClassAndPoints.second, OS);
+    OS << "<p>llvm data:</p>";
+    printSchedClassDescHtml(*SCDesc, OS);
    OS << "</div>";
  }

--- a/tools/llvm-exegesis/lib/Analysis.h
+++ b/tools/llvm-exegesis/lib/Analysis.h
@ -42,8 +42,10 @@ public:
 private:
  void printInstructionRowCsv(size_t PointId, llvm::raw_ostream &OS) const;

-  void printSchedClassHtml(std::vector<size_t> PointIds,
-                           llvm::raw_ostream &OS) const;
+  void printSchedClassClustersHtml(std::vector<size_t> PointIds,
+                                   llvm::raw_ostream &OS) const;
+  void printSchedClassDescHtml(const llvm::MCSchedClassDesc &SCDesc,
+                               llvm::raw_ostream &OS) const;

  // Builds a map of Sched Class -> indices of points that belong to the sched
  // class.