1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 02:52:53 +02:00

[llvm-mca] Improved report generated by the SchedulerStatistics view.

Before this patch, the SchedulerStatistics only printed the maximum number of
buffer entries consumed in each scheduler's queue at a given point of the
simulation.

This patch restructures the reported table, and adds an extra field named
"Average number of used buffer entries" to it.
This patch also uses different colors to help identifying bottlenecks caused by
high scheduler's buffer pressure.

llvm-svn: 340746
This commit is contained in:
Andrea Di Biagio 2018-08-27 14:52:52 +00:00
parent 7956029f94
commit f707cd4166
9 changed files with 234 additions and 112 deletions

View File

@ -458,7 +458,8 @@ counters for the dispatch logic, the reorder buffer, the retire control unit,
and the register file.
Below is an example of ``-all-stats`` output generated by :program:`llvm-mca`
for the dot-product example discussed in the previous sections.
for 300 iterations of the dot-product example discussed in the previous
sections.
.. code-block:: none
@ -484,11 +485,16 @@ for the dot-product example discussed in the previous sections.
1, 306 (50.2%)
2, 297 (48.7%)
Scheduler's queue usage:
JALU01, 0/20
JFPU01, 18/18
JLSAGU, 0/12
[1] Resource name.
[2] Average number of used buffer entries.
[3] Maximum number of used buffer entries.
[4] Total number of buffer entries.
[1] [2] [3] [4]
JALU01 0 0 20
JFPU01 17 18 18
JLSAGU 0 0 12
Retire Control Unit - number of cycles where we saw N instructions retired:
@ -528,8 +534,8 @@ representing the number of instructions issued on some number of cycles. In
this case, of the 610 simulated cycles, single instructions were issued 306
times (50.2%) and there were 7 cycles where no instructions were issued.
The *Scheduler's queue usage* table shows that the maximum number of buffer
entries (i.e., scheduler queue entries) used at runtime. Resource JFPU01
The *Scheduler's queue usage* table shows that the average and maximum number of
buffer entries (i.e., scheduler queue entries) used at runtime. Resource JFPU01
reached its maximum (18 of 18 queue entries). Note that AMD Jaguar implements
three schedulers:

View File

@ -29,9 +29,15 @@ add %rsi, %rsi
# CHECK-NEXT: 2, 1 (10.0%)
# CHECK: Scheduler's queue usage:
# CHECK-NEXT: JALU01, 1/20
# CHECK-NEXT: JFPU01, 1/18
# CHECK-NEXT: JLSAGU, 1/12
# CHECK-NEXT: [1] Resource name.
# CHECK-NEXT: [2] Average number of used buffer entries.
# CHECK-NEXT: [3] Maximum number of used buffer entries.
# CHECK-NEXT: [4] Total number of buffer entries.
# CHECK: [1] [2] [3] [4]
# CHECK-NEXT: JALU01 0 1 20
# CHECK-NEXT: JFPU01 0 1 18
# CHECK-NEXT: JLSAGU 0 1 12
# CHECK: Resources:
# CHECK-NEXT: [0] - JALU0

View File

@ -44,9 +44,15 @@ add %eax, %eax
# FULLREPORT-NEXT: 1, 100 (97.1%)
# FULLREPORT: Scheduler's queue usage:
# FULLREPORT-NEXT: JALU01, 20/20
# FULLREPORT-NEXT: JFPU01, 0/18
# FULLREPORT-NEXT: JLSAGU, 0/12
# FULLREPORT-NEXT: [1] Resource name.
# FULLREPORT-NEXT: [2] Average number of used buffer entries.
# FULLREPORT-NEXT: [3] Maximum number of used buffer entries.
# FULLREPORT-NEXT: [4] Total number of buffer entries.
# FULLREPORT: [1] [2] [3] [4]
# FULLREPORT-NEXT: JALU01 15 20 20
# FULLREPORT-NEXT: JFPU01 0 0 18
# FULLREPORT-NEXT: JLSAGU 0 0 12
# FULLREPORT: Retire Control Unit - number of cycles where we saw N instructions retired:
# FULLREPORT-NEXT: [# retired], [# cycles]

View File

@ -45,9 +45,15 @@ add %eax, %eax
# FULL-NEXT: 1, 100 (97.1%)
# FULL: Scheduler's queue usage:
# FULL-NEXT: JALU01, 20/20
# FULL-NEXT: JFPU01, 0/18
# FULL-NEXT: JLSAGU, 0/12
# FULL-NEXT: [1] Resource name.
# FULL-NEXT: [2] Average number of used buffer entries.
# FULL-NEXT: [3] Maximum number of used buffer entries.
# FULL-NEXT: [4] Total number of buffer entries.
# FULL: [1] [2] [3] [4]
# FULL-NEXT: JALU01 15 20 20
# FULL-NEXT: JFPU01 0 0 18
# FULL-NEXT: JLSAGU 0 0 12
# FULL: Retire Control Unit - number of cycles where we saw N instructions retired:
# FULL-NEXT: [# retired], [# cycles]

View File

@ -46,9 +46,15 @@ add %eax, %eax
# FULLREPORT-NEXT: 1, 100 (97.1%)
# FULLREPORT: Scheduler's queue usage:
# FULLREPORT-NEXT: JALU01, 20/20
# FULLREPORT-NEXT: JFPU01, 0/18
# FULLREPORT-NEXT: JLSAGU, 0/12
# FULLREPORT-NEXT: [1] Resource name.
# FULLREPORT-NEXT: [2] Average number of used buffer entries.
# FULLREPORT-NEXT: [3] Maximum number of used buffer entries.
# FULLREPORT-NEXT: [4] Total number of buffer entries.
# FULLREPORT: [1] [2] [3] [4]
# FULLREPORT-NEXT: JALU01 15 20 20
# FULLREPORT-NEXT: JFPU01 0 0 18
# FULLREPORT-NEXT: JLSAGU 0 0 12
# FULLREPORT: Retire Control Unit - number of cycles where we saw N instructions retired:
# FULLREPORT-NEXT: [# retired], [# cycles]

View File

@ -45,9 +45,15 @@ add %eax, %eax
# ALL-NEXT: 1, 100 (97.1%)
# ALL: Scheduler's queue usage:
# ALL-NEXT: JALU01, 20/20
# ALL-NEXT: JFPU01, 0/18
# ALL-NEXT: JLSAGU, 0/12
# ALL-NEXT: [1] Resource name.
# ALL-NEXT: [2] Average number of used buffer entries.
# ALL-NEXT: [3] Maximum number of used buffer entries.
# ALL-NEXT: [4] Total number of buffer entries.
# ALL: [1] [2] [3] [4]
# ALL-NEXT: JALU01 15 20 20
# ALL-NEXT: JFPU01 0 0 18
# ALL-NEXT: JLSAGU 0 0 12
# ALL: Retire Control Unit - number of cycles where we saw N instructions retired:
# ALL-NEXT: [# retired], [# cycles]

View File

@ -17,36 +17,90 @@ xor %eax, %ebx
# ALL-NEXT: 0, 3 (75.0%)
# ALL-NEXT: 1, 1 (25.0%)
# BDW: Scheduler's queue usage:
# BDW-NEXT: BWPortAny, 1/60
# HSW: Scheduler's queue usage:
# HSW-NEXT: HWPortAny, 1/60
# KNL: Scheduler's queue usage:
# KNL-NEXT: HWPortAny, 1/60
# BTVER2: Scheduler's queue usage:
# BTVER2-NEXT: JALU01, 1/20
# BTVER2-NEXT: JFPU01, 0/18
# BTVER2-NEXT: JLSAGU, 0/12
# SLM: Scheduler's queue usage:
# SLM-NEXT: No scheduler resources used.
# IVB: Scheduler's queue usage:
# IVB-NEXT: SBPortAny, 1/54
# BDW: Scheduler's queue usage:
# BDW-NEXT: [1] Resource name.
# BDW-NEXT: [2] Average number of used buffer entries.
# BDW-NEXT: [3] Maximum number of used buffer entries.
# BDW-NEXT: [4] Total number of buffer entries.
# SNB: Scheduler's queue usage:
# SNB-NEXT: SBPortAny, 1/54
# BTVER2: Scheduler's queue usage:
# BTVER2-NEXT: [1] Resource name.
# BTVER2-NEXT: [2] Average number of used buffer entries.
# BTVER2-NEXT: [3] Maximum number of used buffer entries.
# BTVER2-NEXT: [4] Total number of buffer entries.
# HSW: Scheduler's queue usage:
# HSW-NEXT: [1] Resource name.
# HSW-NEXT: [2] Average number of used buffer entries.
# HSW-NEXT: [3] Maximum number of used buffer entries.
# HSW-NEXT: [4] Total number of buffer entries.
# IVB: Scheduler's queue usage:
# IVB-NEXT: [1] Resource name.
# IVB-NEXT: [2] Average number of used buffer entries.
# IVB-NEXT: [3] Maximum number of used buffer entries.
# IVB-NEXT: [4] Total number of buffer entries.
# KNL: Scheduler's queue usage:
# KNL-NEXT: [1] Resource name.
# KNL-NEXT: [2] Average number of used buffer entries.
# KNL-NEXT: [3] Maximum number of used buffer entries.
# KNL-NEXT: [4] Total number of buffer entries.
# SKX: Scheduler's queue usage:
# SKX-NEXT: SKLPortAny, 1/60
# SKX-NEXT: [1] Resource name.
# SKX-NEXT: [2] Average number of used buffer entries.
# SKX-NEXT: [3] Maximum number of used buffer entries.
# SKX-NEXT: [4] Total number of buffer entries.
# SKX-AVX512: Scheduler's queue usage:
# SKX-AVX512-NEXT: SKXPortAny, 1/60
# SKX-AVX512-NEXT: [1] Resource name.
# SKX-AVX512-NEXT: [2] Average number of used buffer entries.
# SKX-AVX512-NEXT: [3] Maximum number of used buffer entries.
# SKX-AVX512-NEXT: [4] Total number of buffer entries.
# SNB: Scheduler's queue usage:
# SNB-NEXT: [1] Resource name.
# SNB-NEXT: [2] Average number of used buffer entries.
# SNB-NEXT: [3] Maximum number of used buffer entries.
# SNB-NEXT: [4] Total number of buffer entries.
# ZNVER1: Scheduler's queue usage:
# ZNVER1-NEXT: ZnAGU, 0/28
# ZNVER1-NEXT: ZnALU, 1/56
# ZNVER1-NEXT: ZnFPU, 0/36
# ZNVER1-NEXT: [1] Resource name.
# ZNVER1-NEXT: [2] Average number of used buffer entries.
# ZNVER1-NEXT: [3] Maximum number of used buffer entries.
# ZNVER1-NEXT: [4] Total number of buffer entries.
# BDW: [1] [2] [3] [4]
# BDW-NEXT: BWPortAny 0 1 60
# HSW: [1] [2] [3] [4]
# HSW-NEXT: HWPortAny 0 1 60
# KNL: [1] [2] [3] [4]
# KNL-NEXT: HWPortAny 0 1 60
# BTVER2: [1] [2] [3] [4]
# BTVER2-NEXT: JALU01 0 1 20
# BTVER2-NEXT: JFPU01 0 0 18
# BTVER2-NEXT: JLSAGU 0 0 12
# IVB: [1] [2] [3] [4]
# IVB-NEXT: SBPortAny 0 1 54
# SNB: [1] [2] [3] [4]
# SNB-NEXT: SBPortAny 0 1 54
# SKX: [1] [2] [3] [4]
# SKX-NEXT: SKLPortAny 0 1 60
# SKX-AVX512: [1] [2] [3] [4]
# SKX-AVX512-NEXT: SKXPortAny 0 1 60
# ZNVER1: [1] [2] [3] [4]
# ZNVER1-NEXT: ZnAGU 0 0 28
# ZNVER1-NEXT: ZnALU 0 1 56
# ZNVER1-NEXT: ZnFPU 0 0 36

View File

@ -14,6 +14,7 @@
#include "Views/SchedulerStatistics.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/FormattedStream.h"
using namespace llvm;
@ -26,69 +27,101 @@ void SchedulerStatistics::onEvent(const HWInstructionEvent &Event) {
void SchedulerStatistics::onReservedBuffers(ArrayRef<unsigned> Buffers) {
for (const unsigned Buffer : Buffers) {
if (BufferedResources.find(Buffer) != BufferedResources.end()) {
BufferUsage &BU = BufferedResources[Buffer];
BufferUsage &BU = Usage[Buffer];
BU.SlotsInUse++;
BU.MaxUsedSlots = std::max(BU.MaxUsedSlots, BU.SlotsInUse);
continue;
}
BufferedResources.insert(
std::pair<unsigned, BufferUsage>(Buffer, {1U, 1U}));
}
}
void SchedulerStatistics::onReleasedBuffers(ArrayRef<unsigned> Buffers) {
for (const unsigned Buffer : Buffers) {
assert(BufferedResources.find(Buffer) != BufferedResources.end() &&
"Buffered resource not in map?");
BufferUsage &BU = BufferedResources[Buffer];
BU.SlotsInUse--;
}
for (const unsigned Buffer : Buffers)
Usage[Buffer].SlotsInUse--;
}
void SchedulerStatistics::printSchedulerStatistics(
llvm::raw_ostream &OS) const {
std::string Buffer;
raw_string_ostream TempStream(Buffer);
TempStream << "\n\nSchedulers - number of cycles where we saw N instructions "
"issued:\n";
TempStream << "[# issued], [# cycles]\n";
for (const std::pair<unsigned, unsigned> &Entry : IssuedPerCycle) {
TempStream << " " << Entry.first << ", " << Entry.second << " ("
<< format("%.1f", ((double)Entry.second / NumCycles) * 100)
<< "%)\n";
void SchedulerStatistics::updateHistograms() {
for (BufferUsage &BU : Usage)
BU.CumulativeNumUsedSlots += BU.SlotsInUse;
IssuedPerCycle[NumIssued]++;
NumIssued = 0;
}
TempStream.flush();
OS << Buffer;
void SchedulerStatistics::printSchedulerStats(raw_ostream &OS) const {
OS << "\n\nSchedulers - "
<< "number of cycles where we saw N instructions issued:\n";
OS << "[# issued], [# cycles]\n";
const auto It =
std::max_element(IssuedPerCycle.begin(), IssuedPerCycle.end());
unsigned Index = std::distance(IssuedPerCycle.begin(), It);
bool HasColors = OS.has_colors();
for (unsigned I = 0, E = IssuedPerCycle.size(); I < E; ++I) {
unsigned IPC = IssuedPerCycle[I];
if (!IPC)
continue;
if (I == Index && HasColors)
OS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
OS << " " << I << ", " << IPC << " ("
<< format("%.1f", ((double)IPC / NumCycles) * 100) << "%)\n";
if (HasColors)
OS.resetColor();
}
}
void SchedulerStatistics::printSchedulerUsage(raw_ostream &OS) const {
std::string Buffer;
raw_string_ostream TempStream(Buffer);
TempStream << "\n\nScheduler's queue usage:\n";
// Early exit if no buffered resources were consumed.
if (BufferedResources.empty()) {
TempStream << "No scheduler resources used.\n";
TempStream.flush();
OS << Buffer;
assert(NumCycles && "Unexpected number of cycles!");
OS << "\nScheduler's queue usage:\n";
if (all_of(Usage, [](const BufferUsage &BU) { return !BU.MaxUsedSlots; })) {
OS << "No scheduler resources used.\n";
return;
}
OS << "[1] Resource name.\n"
<< "[2] Average number of used buffer entries.\n"
<< "[3] Maximum number of used buffer entries.\n"
<< "[4] Total number of buffer entries.\n\n"
<< " [1] [2] [3] [4]\n";
formatted_raw_ostream FOS(OS);
bool HasColors = FOS.has_colors();
for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
if (ProcResource.BufferSize <= 0)
continue;
const auto It = BufferedResources.find(I);
unsigned MaxUsedSlots =
It == BufferedResources.end() ? 0 : It->second.MaxUsedSlots;
TempStream << ProcResource.Name << ", " << MaxUsedSlots << '/'
<< ProcResource.BufferSize << '\n';
const BufferUsage &BU = Usage[I];
double AvgUsage = (double)BU.CumulativeNumUsedSlots / NumCycles;
double AlmostFullThreshold = (double)(ProcResource.BufferSize * 4) / 5;
unsigned NormalizedAvg = floor((AvgUsage * 10) + 0.5) / 10;
unsigned NormalizedThreshold = floor((AlmostFullThreshold * 10) + 0.5) / 10;
FOS << ProcResource.Name;
FOS.PadToColumn(17);
if (HasColors && NormalizedAvg >= NormalizedThreshold)
FOS.changeColor(raw_ostream::YELLOW, true, false);
FOS << NormalizedAvg;
if (HasColors)
FOS.resetColor();
FOS.PadToColumn(28);
if (HasColors &&
BU.MaxUsedSlots == static_cast<unsigned>(ProcResource.BufferSize))
FOS.changeColor(raw_ostream::RED, true, false);
FOS << BU.MaxUsedSlots;
if (HasColors)
FOS.resetColor();
FOS.PadToColumn(39);
FOS << ProcResource.BufferSize << '\n';
}
TempStream.flush();
OS << Buffer;
FOS.flush();
}
void SchedulerStatistics::printView(llvm::raw_ostream &OS) const {
printSchedulerStats(OS);
printSchedulerUsage(OS);
}
} // namespace mca

View File

@ -17,15 +17,21 @@
///
/// Schedulers - number of cycles where we saw N instructions issued:
/// [# issued], [# cycles]
/// 0, 7 (5.4%)
/// 1, 4 (3.1%)
/// 2, 8 (6.2%)
/// 0, 6 (2.9%)
/// 1, 106 (50.7%)
/// 2, 97 (46.4%)
///
/// Scheduler's queue usage:
/// JALU01, 0/20
/// JFPU01, 18/18
/// JLSAGU, 0/12
/// [1] Resource name.
/// [2] Average number of used buffer entries.
/// [3] Maximum number of used buffer entries.
/// [4] Total number of buffer entries.
///
/// [1] [2] [3] [4]
/// JALU01 0 0 20
/// JFPU01 15 18 18
/// JLSAGU 0 0 12
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TOOLS_LLVM_MCA_SCHEDULERSTATISTICS_H
@ -38,12 +44,8 @@
namespace mca {
class SchedulerStatistics : public View {
class SchedulerStatistics final : public View {
const llvm::MCSchedModel &SM;
using Histogram = std::map<unsigned, unsigned>;
Histogram IssuedPerCycle;
unsigned NumIssued;
unsigned NumCycles;
@ -51,21 +53,21 @@ class SchedulerStatistics : public View {
struct BufferUsage {
unsigned SlotsInUse;
unsigned MaxUsedSlots;
uint64_t CumulativeNumUsedSlots;
};
std::map<unsigned, BufferUsage> BufferedResources;
std::vector<unsigned> IssuedPerCycle;
std::vector<BufferUsage> Usage;
void updateHistograms() {
IssuedPerCycle[NumIssued]++;
NumIssued = 0;
}
void printSchedulerStatistics(llvm::raw_ostream &OS) const;
void updateHistograms();
void printSchedulerStats(llvm::raw_ostream &OS) const;
void printSchedulerUsage(llvm::raw_ostream &OS) const;
public:
SchedulerStatistics(const llvm::MCSubtargetInfo &STI)
: SM(STI.getSchedModel()), NumIssued(0), NumCycles(0) {}
: SM(STI.getSchedModel()), NumIssued(0), NumCycles(0),
IssuedPerCycle(STI.getSchedModel().NumProcResourceKinds, 0),
Usage(STI.getSchedModel().NumProcResourceKinds, {0, 0, 0}) {}
void onEvent(const HWInstructionEvent &Event) override;
@ -81,10 +83,7 @@ public:
// buffered resource in the Buffers set.
void onReleasedBuffers(llvm::ArrayRef<unsigned> Buffers) override;
void printView(llvm::raw_ostream &OS) const override {
printSchedulerStatistics(OS);
printSchedulerUsage(OS);
}
void printView(llvm::raw_ostream &OS) const override;
};
} // namespace mca