1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 20:23:11 +01:00

[llvm-exegesis] Show sched class details in analysis.

Summary: And update docs.

Reviewers: gchatelet

Subscribers: tschuett, craig.topper, RKSimon, llvm-commits

Differential Revision: https://reviews.llvm.org/D47254

llvm-svn: 333169
This commit is contained in:
Clement Courbet 2018-05-24 10:47:05 +00:00
parent 4f6111191f
commit 0ba5b5b01b
4 changed files with 145 additions and 29 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 34 KiB

View File

@ -113,20 +113,10 @@ following format:
:program:`llvm-exegesis` will also analyze the clusters to point out :program:`llvm-exegesis` will also analyze the clusters to point out
inconsistencies in the scheduling information. The output is an html file. For inconsistencies in the scheduling information. The output is an html file. For
example, `/tmp/inconsistencies.html` will contain messages like: example, `/tmp/inconsistencies.html` will contain messages like the following :
.. code-block:: none .. image:: llvm-exegesis-analysis.png
:align: center
Sched Class EXTRACTPSrr_VEXTRACTPSrr contains instructions with distinct performance characteristics, falling into 2 clusters:
4,EXTRACTPSrr,,3.00
3,VEXTRACTPSrr,,2.01
Sched Class WriteCRC32 contains instructions with distinct performance characteristics, falling into 2 clusters:
4,CRC32r32r16,,3.01
4,CRC32r32r32,,3.00
11,CRC32r32r8,,4.01
4,CRC32r64r64,,3.01
4,CRC32r64r8,,3.00
Note that the scheduling class names will be resolved only when Note that the scheduling class names will be resolved only when
:program:`llvm-exegesis` is compiled in debug mode, else only the class id will :program:`llvm-exegesis` is compiled in debug mode, else only the class id will

View File

@ -167,8 +167,8 @@ Analysis::makePointsPerSchedClass() const {
return PointsPerSchedClass; return PointsPerSchedClass;
} }
void Analysis::printSchedClassHtml(std::vector<size_t> PointIds, void Analysis::printSchedClassClustersHtml(std::vector<size_t> PointIds,
llvm::raw_ostream &OS) const { llvm::raw_ostream &OS) const {
assert(!PointIds.empty()); assert(!PointIds.empty());
// Sort the points by cluster id so that we can display them grouped by // Sort the points by cluster id so that we can display them grouped by
// cluster. // cluster.
@ -178,7 +178,7 @@ void Analysis::printSchedClassHtml(std::vector<size_t> PointIds,
Clustering_.getClusterIdForPoint(B); Clustering_.getClusterIdForPoint(B);
}); });
const auto &Points = Clustering_.getPoints(); const auto &Points = Clustering_.getPoints();
OS << "<table class=\"sched-class\">"; OS << "<table class=\"sched-class-clusters\">";
OS << "<tr><th>ClusterId</th><th>Opcode/Config</th>"; OS << "<tr><th>ClusterId</th><th>Opcode/Config</th>";
for (const auto &Measurement : Points[PointIds[0]].Measurements) { for (const auto &Measurement : Points[PointIds[0]].Measurements) {
OS << "<th>"; OS << "<th>";
@ -214,6 +214,120 @@ void Analysis::printSchedClassHtml(std::vector<size_t> PointIds,
OS << "</table>"; OS << "</table>";
} }
// Return the non-redundant list of WriteProcRes used by the given sched class.
// The scheduling model for LLVM is such that each instruction has a certain
// number of uops which consume resources which are described by WriteProcRes
// entries. Each entry describe how many cycles are spent on a specific ProcRes
// kind.
// For example, an instruction might have 3 uOps, one dispatching on P0
// (ProcResIdx=1) and two on P06 (ProcResIdx = 7).
// Note that LLVM additionally denormalizes resource consumption to include
// usage of super resources by subresources. So in practice if there exists a
// P016 (ProcResIdx=10), then the cycles consumed by P0 are also consumed by
// P06 (ProcResIdx = 7) and P016 (ProcResIdx = 10), and the resources consumed
// by P06 are also consumed by P016. In the figure below, parenthesized cycles
// denote implied usage of superresources by subresources:
// P0 P06 P016
// uOp1 1 (1) (1)
// uOp2 1 (1)
// uOp3 1 (1)
// =============================
// 1 3 3
// Eventually we end up with three entries for the WriteProcRes of the
// instruction:
// {ProcResIdx=1, Cycles=1} // P0
// {ProcResIdx=7, Cycles=3} // P06
// {ProcResIdx=10, Cycles=3} // P016
//
// Note that in this case, P016 does not contribute any cycles, so it would
// be removed by this function.
// FIXME: Move this to MCSubtargetInfo and use it in llvm-mca.
static llvm::SmallVector<llvm::MCWriteProcResEntry, 8>
getNonRedundantWriteProcRes(const llvm::MCSchedClassDesc &SCDesc,
const llvm::MCSubtargetInfo &STI) {
llvm::SmallVector<llvm::MCWriteProcResEntry, 8> Result;
const auto &SM = STI.getSchedModel();
const unsigned NumProcRes = SM.getNumProcResourceKinds();
// This assumes that the ProcResDescs are sorted in topological order, which
// is guaranteed by the tablegen backend.
llvm::SmallVector<float, 32> ProcResUnitUsage(NumProcRes);
for (const auto *WPR = STI.getWriteProcResBegin(&SCDesc),
*const WPREnd = STI.getWriteProcResEnd(&SCDesc);
WPR != WPREnd; ++WPR) {
const llvm::MCProcResourceDesc *const ProcResDesc =
SM.getProcResource(WPR->ProcResourceIdx);
if (ProcResDesc->SubUnitsIdxBegin == nullptr) {
// This is a ProcResUnit.
Result.push_back({WPR->ProcResourceIdx, WPR->Cycles});
ProcResUnitUsage[WPR->ProcResourceIdx] += WPR->Cycles;
} else {
// This is a ProcResGroup. First see if it contributes any cycles or if
// it has cycles just from subunits.
float RemainingCycles = WPR->Cycles;
for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin;
SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits;
++SubResIdx) {
RemainingCycles -= ProcResUnitUsage[*SubResIdx];
}
if (RemainingCycles < 0.01f) {
// The ProcResGroup contributes no cycles of its own.
continue;
}
// The ProcResGroup contributes `RemainingCycles` cycles of its own.
Result.push_back({WPR->ProcResourceIdx,
static_cast<uint16_t>(std::round(RemainingCycles))});
// Spread the remaining cycles over all subunits.
for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin;
SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits;
++SubResIdx) {
ProcResUnitUsage[*SubResIdx] += RemainingCycles / ProcResDesc->NumUnits;
}
}
}
return Result;
}
void Analysis::printSchedClassDescHtml(const llvm::MCSchedClassDesc &SCDesc,
llvm::raw_ostream &OS) const {
OS << "<table class=\"sched-class-desc\">";
OS << "<tr><th>Valid</th><th>Variant</th><th>uOps</th><th>Latency</"
"th><th>WriteProcRes</th></tr>";
if (SCDesc.isValid()) {
OS << "<tr><td>&#10004;</td>";
OS << "<td>" << (SCDesc.isVariant() ? "&#10004;" : "&#10005;") << "</td>";
OS << "<td>" << SCDesc.NumMicroOps << "</td>";
// Latencies.
OS << "<td><ul>";
for (int I = 0, E = SCDesc.NumWriteLatencyEntries; I < E; ++I) {
const auto *const Entry =
SubtargetInfo_->getWriteLatencyEntry(&SCDesc, I);
OS << "<li>" << Entry->Cycles;
if (SCDesc.NumWriteLatencyEntries > 1) {
// Dismabiguate if more than 1 latency.
OS << " (WriteResourceID " << Entry->WriteResourceID << ")";
}
OS << "</li>";
}
OS << "</ul></td>";
// WriteProcRes.
OS << "<td><ul>";
for (const auto &WPR :
getNonRedundantWriteProcRes(SCDesc, *SubtargetInfo_)) {
OS << "<li><span class=\"mono\">";
writeEscaped<kEscapeHtml>(OS, SubtargetInfo_->getSchedModel()
.getProcResource(WPR.ProcResourceIdx)
->Name);
OS << "</spam>: " << WPR.Cycles << "</li>";
}
OS << "</ul></td>";
OS << "</tr>";
} else {
OS << "<tr><td>&#10005;</td><td></td><td></td></tr>";
}
OS << "</table>";
}
static constexpr const char kHtmlHead[] = R"( static constexpr const char kHtmlHead[] = R"(
<head> <head>
<title>llvm-exegesis Analysis Results</title> <title>llvm-exegesis Analysis Results</title>
@ -234,23 +348,29 @@ span.config {
div.inconsistency { div.inconsistency {
margin-top: 50px; margin-top: 50px;
} }
table.sched-class { table {
margin-left: 50px; margin-left: 50px;
border-collapse: collapse; border-collapse: collapse;
} }
table.sched-class, table.sched-class tr,td,th { table, table tr,td,th {
border: 1px solid #444; border: 1px solid #444;
} }
table.sched-class td { table ul {
padding-left: 0px;
margin: 0px;
list-style-type: none;
}
table.sched-class-clusters td {
padding-left: 10px; padding-left: 10px;
padding-right: 10px; padding-right: 10px;
padding-top: 10px; padding-top: 10px;
padding-bottom: 10px; padding-bottom: 10px;
} }
table.sched-class ul { table.sched-class-desc td {
padding-left: 0px; padding-left: 10px;
margin: 0px; padding-right: 10px;
list-style-type: none; padding-top: 2px;
padding-bottom: 2px;
} }
span.mono { span.mono {
font-family: monospace; font-family: monospace;
@ -284,12 +404,14 @@ llvm::Error Analysis::run<Analysis::PrintSchedClassInconsistencies>(
if (ClustersForSchedClass.size() <= 1) if (ClustersForSchedClass.size() <= 1)
continue; // Nothing weird. continue; // Nothing weird.
OS << "<div class=\"inconsistency\"><p>Sched Class <span "
"class=\"sched-class-name\">";
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
const auto &SchedModel = SubtargetInfo_->getSchedModel(); const auto &SchedModel = SubtargetInfo_->getSchedModel();
const llvm::MCSchedClassDesc *const SCDesc = const llvm::MCSchedClassDesc *const SCDesc =
SchedModel.getSchedClassDesc(SchedClassAndPoints.first); SchedModel.getSchedClassDesc(SchedClassAndPoints.first);
if (!SCDesc)
continue;
OS << "<div class=\"inconsistency\"><p>Sched Class <span "
"class=\"sched-class-name\">";
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
writeEscaped<kEscapeHtml>(OS, SCDesc->Name); writeEscaped<kEscapeHtml>(OS, SCDesc->Name);
#else #else
OS << SchedClassAndPoints.first; OS << SchedClassAndPoints.first;
@ -297,7 +419,9 @@ llvm::Error Analysis::run<Analysis::PrintSchedClassInconsistencies>(
OS << "</span> contains instructions with distinct performance " OS << "</span> contains instructions with distinct performance "
"characteristics, falling into " "characteristics, falling into "
<< ClustersForSchedClass.size() << " clusters:</p>"; << ClustersForSchedClass.size() << " clusters:</p>";
printSchedClassHtml(SchedClassAndPoints.second, OS); printSchedClassClustersHtml(SchedClassAndPoints.second, OS);
OS << "<p>llvm data:</p>";
printSchedClassDescHtml(*SCDesc, OS);
OS << "</div>"; OS << "</div>";
} }

View File

@ -42,8 +42,10 @@ public:
private: private:
void printInstructionRowCsv(size_t PointId, llvm::raw_ostream &OS) const; void printInstructionRowCsv(size_t PointId, llvm::raw_ostream &OS) const;
void printSchedClassHtml(std::vector<size_t> PointIds, void printSchedClassClustersHtml(std::vector<size_t> PointIds,
llvm::raw_ostream &OS) const; llvm::raw_ostream &OS) const;
void printSchedClassDescHtml(const llvm::MCSchedClassDesc &SCDesc,
llvm::raw_ostream &OS) const;
// Builds a map of Sched Class -> indices of points that belong to the sched // Builds a map of Sched Class -> indices of points that belong to the sched
// class. // class.