diff --git a/docs/CommandGuide/llvm-mca.rst b/docs/CommandGuide/llvm-mca.rst index 955adc0a049..5dcd97fb113 100644 --- a/docs/CommandGuide/llvm-mca.rst +++ b/docs/CommandGuide/llvm-mca.rst @@ -458,7 +458,8 @@ counters for the dispatch logic, the reorder buffer, the retire control unit, and the register file. Below is an example of ``-all-stats`` output generated by :program:`llvm-mca` -for the dot-product example discussed in the previous sections. +for 300 iterations of the dot-product example discussed in the previous +sections. .. code-block:: none @@ -484,11 +485,16 @@ for the dot-product example discussed in the previous sections. 1, 306 (50.2%) 2, 297 (48.7%) - Scheduler's queue usage: - JALU01, 0/20 - JFPU01, 18/18 - JLSAGU, 0/12 + [1] Resource name. + [2] Average number of used buffer entries. + [3] Maximum number of used buffer entries. + [4] Total number of buffer entries. + + [1] [2] [3] [4] + JALU01 0 0 20 + JFPU01 17 18 18 + JLSAGU 0 0 12 Retire Control Unit - number of cycles where we saw N instructions retired: @@ -528,8 +534,8 @@ representing the number of instructions issued on some number of cycles. In this case, of the 610 simulated cycles, single instructions were issued 306 times (50.2%) and there were 7 cycles where no instructions were issued. -The *Scheduler's queue usage* table shows that the maximum number of buffer -entries (i.e., scheduler queue entries) used at runtime. Resource JFPU01 +The *Scheduler's queue usage* table shows that the average and maximum number of +buffer entries (i.e., scheduler queue entries) used at runtime. Resource JFPU01 reached its maximum (18 of 18 queue entries). Note that AMD Jaguar implements three schedulers: diff --git a/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s b/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s index d02ed8b191f..32b73f37f78 100644 --- a/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s +++ b/test/tools/llvm-mca/X86/BtVer2/scheduler-queue-usage.s @@ -29,9 +29,15 @@ add %rsi, %rsi # CHECK-NEXT: 2, 1 (10.0%) # CHECK: Scheduler's queue usage: -# CHECK-NEXT: JALU01, 1/20 -# CHECK-NEXT: JFPU01, 1/18 -# CHECK-NEXT: JLSAGU, 1/12 +# CHECK-NEXT: [1] Resource name. +# CHECK-NEXT: [2] Average number of used buffer entries. +# CHECK-NEXT: [3] Maximum number of used buffer entries. +# CHECK-NEXT: [4] Total number of buffer entries. + +# CHECK: [1] [2] [3] [4] +# CHECK-NEXT: JALU01 0 1 20 +# CHECK-NEXT: JFPU01 0 1 18 +# CHECK-NEXT: JLSAGU 0 1 12 # CHECK: Resources: # CHECK-NEXT: [0] - JALU0 diff --git a/test/tools/llvm-mca/X86/option-all-stats-1.s b/test/tools/llvm-mca/X86/option-all-stats-1.s index b6b0378cb72..072eec80bec 100644 --- a/test/tools/llvm-mca/X86/option-all-stats-1.s +++ b/test/tools/llvm-mca/X86/option-all-stats-1.s @@ -44,9 +44,15 @@ add %eax, %eax # FULLREPORT-NEXT: 1, 100 (97.1%) # FULLREPORT: Scheduler's queue usage: -# FULLREPORT-NEXT: JALU01, 20/20 -# FULLREPORT-NEXT: JFPU01, 0/18 -# FULLREPORT-NEXT: JLSAGU, 0/12 +# FULLREPORT-NEXT: [1] Resource name. +# FULLREPORT-NEXT: [2] Average number of used buffer entries. +# FULLREPORT-NEXT: [3] Maximum number of used buffer entries. +# FULLREPORT-NEXT: [4] Total number of buffer entries. + +# FULLREPORT: [1] [2] [3] [4] +# FULLREPORT-NEXT: JALU01 15 20 20 +# FULLREPORT-NEXT: JFPU01 0 0 18 +# FULLREPORT-NEXT: JLSAGU 0 0 12 # FULLREPORT: Retire Control Unit - number of cycles where we saw N instructions retired: # FULLREPORT-NEXT: [# retired], [# cycles] diff --git a/test/tools/llvm-mca/X86/option-all-stats-2.s b/test/tools/llvm-mca/X86/option-all-stats-2.s index d40cdf0291c..d5cc3c884fc 100644 --- a/test/tools/llvm-mca/X86/option-all-stats-2.s +++ b/test/tools/llvm-mca/X86/option-all-stats-2.s @@ -45,9 +45,15 @@ add %eax, %eax # FULL-NEXT: 1, 100 (97.1%) # FULL: Scheduler's queue usage: -# FULL-NEXT: JALU01, 20/20 -# FULL-NEXT: JFPU01, 0/18 -# FULL-NEXT: JLSAGU, 0/12 +# FULL-NEXT: [1] Resource name. +# FULL-NEXT: [2] Average number of used buffer entries. +# FULL-NEXT: [3] Maximum number of used buffer entries. +# FULL-NEXT: [4] Total number of buffer entries. + +# FULL: [1] [2] [3] [4] +# FULL-NEXT: JALU01 15 20 20 +# FULL-NEXT: JFPU01 0 0 18 +# FULL-NEXT: JLSAGU 0 0 12 # FULL: Retire Control Unit - number of cycles where we saw N instructions retired: # FULL-NEXT: [# retired], [# cycles] diff --git a/test/tools/llvm-mca/X86/option-all-views-1.s b/test/tools/llvm-mca/X86/option-all-views-1.s index fb2ab14d34b..aa9561e0649 100644 --- a/test/tools/llvm-mca/X86/option-all-views-1.s +++ b/test/tools/llvm-mca/X86/option-all-views-1.s @@ -46,9 +46,15 @@ add %eax, %eax # FULLREPORT-NEXT: 1, 100 (97.1%) # FULLREPORT: Scheduler's queue usage: -# FULLREPORT-NEXT: JALU01, 20/20 -# FULLREPORT-NEXT: JFPU01, 0/18 -# FULLREPORT-NEXT: JLSAGU, 0/12 +# FULLREPORT-NEXT: [1] Resource name. +# FULLREPORT-NEXT: [2] Average number of used buffer entries. +# FULLREPORT-NEXT: [3] Maximum number of used buffer entries. +# FULLREPORT-NEXT: [4] Total number of buffer entries. + +# FULLREPORT: [1] [2] [3] [4] +# FULLREPORT-NEXT: JALU01 15 20 20 +# FULLREPORT-NEXT: JFPU01 0 0 18 +# FULLREPORT-NEXT: JLSAGU 0 0 12 # FULLREPORT: Retire Control Unit - number of cycles where we saw N instructions retired: # FULLREPORT-NEXT: [# retired], [# cycles] diff --git a/test/tools/llvm-mca/X86/option-all-views-2.s b/test/tools/llvm-mca/X86/option-all-views-2.s index 6014061846b..076c30a8e17 100644 --- a/test/tools/llvm-mca/X86/option-all-views-2.s +++ b/test/tools/llvm-mca/X86/option-all-views-2.s @@ -45,9 +45,15 @@ add %eax, %eax # ALL-NEXT: 1, 100 (97.1%) # ALL: Scheduler's queue usage: -# ALL-NEXT: JALU01, 20/20 -# ALL-NEXT: JFPU01, 0/18 -# ALL-NEXT: JLSAGU, 0/12 +# ALL-NEXT: [1] Resource name. +# ALL-NEXT: [2] Average number of used buffer entries. +# ALL-NEXT: [3] Maximum number of used buffer entries. +# ALL-NEXT: [4] Total number of buffer entries. + +# ALL: [1] [2] [3] [4] +# ALL-NEXT: JALU01 15 20 20 +# ALL-NEXT: JFPU01 0 0 18 +# ALL-NEXT: JLSAGU 0 0 12 # ALL: Retire Control Unit - number of cycles where we saw N instructions retired: # ALL-NEXT: [# retired], [# cycles] diff --git a/test/tools/llvm-mca/X86/scheduler-queue-usage.s b/test/tools/llvm-mca/X86/scheduler-queue-usage.s index 8448960c67e..ea06ad0bc80 100644 --- a/test/tools/llvm-mca/X86/scheduler-queue-usage.s +++ b/test/tools/llvm-mca/X86/scheduler-queue-usage.s @@ -17,36 +17,90 @@ xor %eax, %ebx # ALL-NEXT: 0, 3 (75.0%) # ALL-NEXT: 1, 1 (25.0%) -# BDW: Scheduler's queue usage: -# BDW-NEXT: BWPortAny, 1/60 - -# HSW: Scheduler's queue usage: -# HSW-NEXT: HWPortAny, 1/60 - -# KNL: Scheduler's queue usage: -# KNL-NEXT: HWPortAny, 1/60 - -# BTVER2: Scheduler's queue usage: -# BTVER2-NEXT: JALU01, 1/20 -# BTVER2-NEXT: JFPU01, 0/18 -# BTVER2-NEXT: JLSAGU, 0/12 - # SLM: Scheduler's queue usage: # SLM-NEXT: No scheduler resources used. -# IVB: Scheduler's queue usage: -# IVB-NEXT: SBPortAny, 1/54 +# BDW: Scheduler's queue usage: +# BDW-NEXT: [1] Resource name. +# BDW-NEXT: [2] Average number of used buffer entries. +# BDW-NEXT: [3] Maximum number of used buffer entries. +# BDW-NEXT: [4] Total number of buffer entries. -# SNB: Scheduler's queue usage: -# SNB-NEXT: SBPortAny, 1/54 +# BTVER2: Scheduler's queue usage: +# BTVER2-NEXT: [1] Resource name. +# BTVER2-NEXT: [2] Average number of used buffer entries. +# BTVER2-NEXT: [3] Maximum number of used buffer entries. +# BTVER2-NEXT: [4] Total number of buffer entries. + +# HSW: Scheduler's queue usage: +# HSW-NEXT: [1] Resource name. +# HSW-NEXT: [2] Average number of used buffer entries. +# HSW-NEXT: [3] Maximum number of used buffer entries. +# HSW-NEXT: [4] Total number of buffer entries. + +# IVB: Scheduler's queue usage: +# IVB-NEXT: [1] Resource name. +# IVB-NEXT: [2] Average number of used buffer entries. +# IVB-NEXT: [3] Maximum number of used buffer entries. +# IVB-NEXT: [4] Total number of buffer entries. + +# KNL: Scheduler's queue usage: +# KNL-NEXT: [1] Resource name. +# KNL-NEXT: [2] Average number of used buffer entries. +# KNL-NEXT: [3] Maximum number of used buffer entries. +# KNL-NEXT: [4] Total number of buffer entries. # SKX: Scheduler's queue usage: -# SKX-NEXT: SKLPortAny, 1/60 +# SKX-NEXT: [1] Resource name. +# SKX-NEXT: [2] Average number of used buffer entries. +# SKX-NEXT: [3] Maximum number of used buffer entries. +# SKX-NEXT: [4] Total number of buffer entries. # SKX-AVX512: Scheduler's queue usage: -# SKX-AVX512-NEXT: SKXPortAny, 1/60 +# SKX-AVX512-NEXT: [1] Resource name. +# SKX-AVX512-NEXT: [2] Average number of used buffer entries. +# SKX-AVX512-NEXT: [3] Maximum number of used buffer entries. +# SKX-AVX512-NEXT: [4] Total number of buffer entries. + +# SNB: Scheduler's queue usage: +# SNB-NEXT: [1] Resource name. +# SNB-NEXT: [2] Average number of used buffer entries. +# SNB-NEXT: [3] Maximum number of used buffer entries. +# SNB-NEXT: [4] Total number of buffer entries. # ZNVER1: Scheduler's queue usage: -# ZNVER1-NEXT: ZnAGU, 0/28 -# ZNVER1-NEXT: ZnALU, 1/56 -# ZNVER1-NEXT: ZnFPU, 0/36 +# ZNVER1-NEXT: [1] Resource name. +# ZNVER1-NEXT: [2] Average number of used buffer entries. +# ZNVER1-NEXT: [3] Maximum number of used buffer entries. +# ZNVER1-NEXT: [4] Total number of buffer entries. + +# BDW: [1] [2] [3] [4] +# BDW-NEXT: BWPortAny 0 1 60 + +# HSW: [1] [2] [3] [4] +# HSW-NEXT: HWPortAny 0 1 60 + +# KNL: [1] [2] [3] [4] +# KNL-NEXT: HWPortAny 0 1 60 + +# BTVER2: [1] [2] [3] [4] +# BTVER2-NEXT: JALU01 0 1 20 +# BTVER2-NEXT: JFPU01 0 0 18 +# BTVER2-NEXT: JLSAGU 0 0 12 + +# IVB: [1] [2] [3] [4] +# IVB-NEXT: SBPortAny 0 1 54 + +# SNB: [1] [2] [3] [4] +# SNB-NEXT: SBPortAny 0 1 54 + +# SKX: [1] [2] [3] [4] +# SKX-NEXT: SKLPortAny 0 1 60 + +# SKX-AVX512: [1] [2] [3] [4] +# SKX-AVX512-NEXT: SKXPortAny 0 1 60 + +# ZNVER1: [1] [2] [3] [4] +# ZNVER1-NEXT: ZnAGU 0 0 28 +# ZNVER1-NEXT: ZnALU 0 1 56 +# ZNVER1-NEXT: ZnFPU 0 0 36 diff --git a/tools/llvm-mca/Views/SchedulerStatistics.cpp b/tools/llvm-mca/Views/SchedulerStatistics.cpp index f5e4c891c42..4c0051208de 100644 --- a/tools/llvm-mca/Views/SchedulerStatistics.cpp +++ b/tools/llvm-mca/Views/SchedulerStatistics.cpp @@ -14,6 +14,7 @@ #include "Views/SchedulerStatistics.h" #include "llvm/Support/Format.h" +#include "llvm/Support/FormattedStream.h" using namespace llvm; @@ -26,69 +27,101 @@ void SchedulerStatistics::onEvent(const HWInstructionEvent &Event) { void SchedulerStatistics::onReservedBuffers(ArrayRef Buffers) { for (const unsigned Buffer : Buffers) { - if (BufferedResources.find(Buffer) != BufferedResources.end()) { - BufferUsage &BU = BufferedResources[Buffer]; - BU.SlotsInUse++; - BU.MaxUsedSlots = std::max(BU.MaxUsedSlots, BU.SlotsInUse); - continue; - } - - BufferedResources.insert( - std::pair(Buffer, {1U, 1U})); + BufferUsage &BU = Usage[Buffer]; + BU.SlotsInUse++; + BU.MaxUsedSlots = std::max(BU.MaxUsedSlots, BU.SlotsInUse); } } void SchedulerStatistics::onReleasedBuffers(ArrayRef Buffers) { - for (const unsigned Buffer : Buffers) { - assert(BufferedResources.find(Buffer) != BufferedResources.end() && - "Buffered resource not in map?"); - BufferUsage &BU = BufferedResources[Buffer]; - BU.SlotsInUse--; - } + for (const unsigned Buffer : Buffers) + Usage[Buffer].SlotsInUse--; } -void SchedulerStatistics::printSchedulerStatistics( - llvm::raw_ostream &OS) const { - std::string Buffer; - raw_string_ostream TempStream(Buffer); - TempStream << "\n\nSchedulers - number of cycles where we saw N instructions " - "issued:\n"; - TempStream << "[# issued], [# cycles]\n"; - for (const std::pair &Entry : IssuedPerCycle) { - TempStream << " " << Entry.first << ", " << Entry.second << " (" - << format("%.1f", ((double)Entry.second / NumCycles) * 100) - << "%)\n"; - } +void SchedulerStatistics::updateHistograms() { + for (BufferUsage &BU : Usage) + BU.CumulativeNumUsedSlots += BU.SlotsInUse; + IssuedPerCycle[NumIssued]++; + NumIssued = 0; +} - TempStream.flush(); - OS << Buffer; +void SchedulerStatistics::printSchedulerStats(raw_ostream &OS) const { + OS << "\n\nSchedulers - " + << "number of cycles where we saw N instructions issued:\n"; + OS << "[# issued], [# cycles]\n"; + + const auto It = + std::max_element(IssuedPerCycle.begin(), IssuedPerCycle.end()); + unsigned Index = std::distance(IssuedPerCycle.begin(), It); + + bool HasColors = OS.has_colors(); + for (unsigned I = 0, E = IssuedPerCycle.size(); I < E; ++I) { + unsigned IPC = IssuedPerCycle[I]; + if (!IPC) + continue; + + if (I == Index && HasColors) + OS.changeColor(raw_ostream::SAVEDCOLOR, true, false); + + OS << " " << I << ", " << IPC << " (" + << format("%.1f", ((double)IPC / NumCycles) * 100) << "%)\n"; + if (HasColors) + OS.resetColor(); + } } void SchedulerStatistics::printSchedulerUsage(raw_ostream &OS) const { - std::string Buffer; - raw_string_ostream TempStream(Buffer); - TempStream << "\n\nScheduler's queue usage:\n"; - // Early exit if no buffered resources were consumed. - if (BufferedResources.empty()) { - TempStream << "No scheduler resources used.\n"; - TempStream.flush(); - OS << Buffer; + assert(NumCycles && "Unexpected number of cycles!"); + + OS << "\nScheduler's queue usage:\n"; + if (all_of(Usage, [](const BufferUsage &BU) { return !BU.MaxUsedSlots; })) { + OS << "No scheduler resources used.\n"; return; } + OS << "[1] Resource name.\n" + << "[2] Average number of used buffer entries.\n" + << "[3] Maximum number of used buffer entries.\n" + << "[4] Total number of buffer entries.\n\n" + << " [1] [2] [3] [4]\n"; + + formatted_raw_ostream FOS(OS); + bool HasColors = FOS.has_colors(); for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) { const MCProcResourceDesc &ProcResource = *SM.getProcResource(I); if (ProcResource.BufferSize <= 0) continue; - const auto It = BufferedResources.find(I); - unsigned MaxUsedSlots = - It == BufferedResources.end() ? 0 : It->second.MaxUsedSlots; - TempStream << ProcResource.Name << ", " << MaxUsedSlots << '/' - << ProcResource.BufferSize << '\n'; + const BufferUsage &BU = Usage[I]; + double AvgUsage = (double)BU.CumulativeNumUsedSlots / NumCycles; + double AlmostFullThreshold = (double)(ProcResource.BufferSize * 4) / 5; + unsigned NormalizedAvg = floor((AvgUsage * 10) + 0.5) / 10; + unsigned NormalizedThreshold = floor((AlmostFullThreshold * 10) + 0.5) / 10; + + FOS << ProcResource.Name; + FOS.PadToColumn(17); + if (HasColors && NormalizedAvg >= NormalizedThreshold) + FOS.changeColor(raw_ostream::YELLOW, true, false); + FOS << NormalizedAvg; + if (HasColors) + FOS.resetColor(); + FOS.PadToColumn(28); + if (HasColors && + BU.MaxUsedSlots == static_cast(ProcResource.BufferSize)) + FOS.changeColor(raw_ostream::RED, true, false); + FOS << BU.MaxUsedSlots; + if (HasColors) + FOS.resetColor(); + FOS.PadToColumn(39); + FOS << ProcResource.BufferSize << '\n'; } - TempStream.flush(); - OS << Buffer; + FOS.flush(); } + +void SchedulerStatistics::printView(llvm::raw_ostream &OS) const { + printSchedulerStats(OS); + printSchedulerUsage(OS); +} + } // namespace mca diff --git a/tools/llvm-mca/Views/SchedulerStatistics.h b/tools/llvm-mca/Views/SchedulerStatistics.h index 3857c0e55a8..a3f45c24af9 100644 --- a/tools/llvm-mca/Views/SchedulerStatistics.h +++ b/tools/llvm-mca/Views/SchedulerStatistics.h @@ -17,15 +17,21 @@ /// /// Schedulers - number of cycles where we saw N instructions issued: /// [# issued], [# cycles] -/// 0, 7 (5.4%) -/// 1, 4 (3.1%) -/// 2, 8 (6.2%) +/// 0, 6 (2.9%) +/// 1, 106 (50.7%) +/// 2, 97 (46.4%) /// /// Scheduler's queue usage: -/// JALU01, 0/20 -/// JFPU01, 18/18 -/// JLSAGU, 0/12 +/// [1] Resource name. +/// [2] Average number of used buffer entries. +/// [3] Maximum number of used buffer entries. +/// [4] Total number of buffer entries. /// +/// [1] [2] [3] [4] +/// JALU01 0 0 20 +/// JFPU01 15 18 18 +/// JLSAGU 0 0 12 +// //===----------------------------------------------------------------------===// #ifndef LLVM_TOOLS_LLVM_MCA_SCHEDULERSTATISTICS_H @@ -38,12 +44,8 @@ namespace mca { -class SchedulerStatistics : public View { +class SchedulerStatistics final : public View { const llvm::MCSchedModel &SM; - - using Histogram = std::map; - Histogram IssuedPerCycle; - unsigned NumIssued; unsigned NumCycles; @@ -51,21 +53,21 @@ class SchedulerStatistics : public View { struct BufferUsage { unsigned SlotsInUse; unsigned MaxUsedSlots; + uint64_t CumulativeNumUsedSlots; }; - std::map BufferedResources; + std::vector IssuedPerCycle; + std::vector Usage; - void updateHistograms() { - IssuedPerCycle[NumIssued]++; - NumIssued = 0; - } - - void printSchedulerStatistics(llvm::raw_ostream &OS) const; + void updateHistograms(); + void printSchedulerStats(llvm::raw_ostream &OS) const; void printSchedulerUsage(llvm::raw_ostream &OS) const; public: SchedulerStatistics(const llvm::MCSubtargetInfo &STI) - : SM(STI.getSchedModel()), NumIssued(0), NumCycles(0) {} + : SM(STI.getSchedModel()), NumIssued(0), NumCycles(0), + IssuedPerCycle(STI.getSchedModel().NumProcResourceKinds, 0), + Usage(STI.getSchedModel().NumProcResourceKinds, {0, 0, 0}) {} void onEvent(const HWInstructionEvent &Event) override; @@ -81,10 +83,7 @@ public: // buffered resource in the Buffers set. void onReleasedBuffers(llvm::ArrayRef Buffers) override; - void printView(llvm::raw_ostream &OS) const override { - printSchedulerStatistics(OS); - printSchedulerUsage(OS); - } + void printView(llvm::raw_ostream &OS) const override; }; } // namespace mca