From 5da66b880e98eb9f7b130204e414159892285109 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Milo=C5=A1=20Stojanovi=C4=87?= Date: Fri, 17 Jan 2020 14:28:54 +0100 Subject: [PATCH] [llvm-exegesis][NFC] Disassociate snippet generators from benchmark runners The addition of `inverse_throughput` mode highlighted the disjointedness of snippet generators and benchmark runners because it used the `UopsSnippetGenerator` with the `LatencyBenchmarkRunner`. To keep the code consistent tie the snippet generators to parallelization/serialization rather than their benchmark runners. Renaming `LatencySnippetGenerator` -> `SerialSnippetGenerator`. Renaming `UopsSnippetGenerator` -> `ParallelSnippetGenerator`. Differential Revision: https://reviews.llvm.org/D72928 --- tools/llvm-exegesis/lib/AArch64/Target.cpp | 1 - tools/llvm-exegesis/lib/Analysis.cpp | 8 +-- tools/llvm-exegesis/lib/CMakeLists.txt | 6 +- .../lib/LatencyBenchmarkRunner.cpp | 58 +++++++++++++++++++ .../{Latency.h => LatencyBenchmarkRunner.h} | 15 +---- tools/llvm-exegesis/lib/Mips/Target.cpp | 2 +- ...{Uops.cpp => ParallelSnippetGenerator.cpp} | 48 +++------------ .../{Uops.h => ParallelSnippetGenerator.h} | 28 +++------ tools/llvm-exegesis/lib/PowerPC/Target.cpp | 1 - ...Latency.cpp => SerialSnippetGenerator.cpp} | 58 +++---------------- .../lib/SerialSnippetGenerator.h | 37 ++++++++++++ tools/llvm-exegesis/lib/Target.cpp | 18 +++--- tools/llvm-exegesis/lib/Target.h | 4 +- .../llvm-exegesis/lib/UopsBenchmarkRunner.cpp | 46 +++++++++++++++ tools/llvm-exegesis/lib/UopsBenchmarkRunner.h | 38 ++++++++++++ tools/llvm-exegesis/lib/X86/Target.cpp | 28 ++++----- .../Mips/SnippetGeneratorTest.cpp | 23 ++++---- .../X86/SnippetGeneratorTest.cpp | 34 +++++------ .../X86/SnippetRepetitorTest.cpp | 5 +- unittests/tools/llvm-exegesis/X86/TestBase.h | 2 +- 20 files changed, 270 insertions(+), 190 deletions(-) create mode 100644 tools/llvm-exegesis/lib/LatencyBenchmarkRunner.cpp rename tools/llvm-exegesis/lib/{Latency.h => LatencyBenchmarkRunner.h} (69%) rename tools/llvm-exegesis/lib/{Uops.cpp => ParallelSnippetGenerator.cpp} (82%) rename tools/llvm-exegesis/lib/{Uops.h => ParallelSnippetGenerator.h} (70%) rename tools/llvm-exegesis/lib/{Latency.cpp => SerialSnippetGenerator.cpp} (76%) create mode 100644 tools/llvm-exegesis/lib/SerialSnippetGenerator.h create mode 100644 tools/llvm-exegesis/lib/UopsBenchmarkRunner.cpp create mode 100644 tools/llvm-exegesis/lib/UopsBenchmarkRunner.h diff --git a/tools/llvm-exegesis/lib/AArch64/Target.cpp b/tools/llvm-exegesis/lib/AArch64/Target.cpp index f5cc88c94a9..c778b89032c 100644 --- a/tools/llvm-exegesis/lib/AArch64/Target.cpp +++ b/tools/llvm-exegesis/lib/AArch64/Target.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// #include "../Target.h" -#include "../Latency.h" #include "AArch64.h" #include "AArch64RegisterInfo.h" diff --git a/tools/llvm-exegesis/lib/Analysis.cpp b/tools/llvm-exegesis/lib/Analysis.cpp index 5bcccafa411..5e9023b8127 100644 --- a/tools/llvm-exegesis/lib/Analysis.cpp +++ b/tools/llvm-exegesis/lib/Analysis.cpp @@ -244,9 +244,9 @@ Analysis::makePointsPerSchedClass() const { return Entries; } -// Uops repeat the same opcode over again. Just show this opcode and show the -// whole snippet only on hover. -static void writeUopsSnippetHtml(raw_ostream &OS, +// Parallel benchmarks repeat the same opcode multiple times. Just show this +// opcode and show the whole snippet only on hover. +static void writeParallelSnippetHtml(raw_ostream &OS, const std::vector &Instructions, const MCInstrInfo &InstrInfo) { if (Instructions.empty()) @@ -282,7 +282,7 @@ void Analysis::printPointHtml(const InstructionBenchmark &Point, break; case InstructionBenchmark::Uops: case InstructionBenchmark::InverseThroughput: - writeUopsSnippetHtml(OS, Point.Key.Instructions, *InstrInfo_); + writeParallelSnippetHtml(OS, Point.Key.Instructions, *InstrInfo_); break; default: llvm_unreachable("invalid mode"); diff --git a/tools/llvm-exegesis/lib/CMakeLists.txt b/tools/llvm-exegesis/lib/CMakeLists.txt index e5b5d565911..aa11d54922e 100644 --- a/tools/llvm-exegesis/lib/CMakeLists.txt +++ b/tools/llvm-exegesis/lib/CMakeLists.txt @@ -27,18 +27,20 @@ add_library(LLVMExegesis BenchmarkRunner.cpp Clustering.cpp CodeTemplate.cpp - Latency.cpp + LatencyBenchmarkRunner.cpp LlvmState.cpp MCInstrDescView.cpp + ParallelSnippetGenerator.cpp PerfHelper.cpp RegisterAliasing.cpp RegisterValue.cpp SchedClassResolution.cpp + SerialSnippetGenerator.cpp SnippetFile.cpp SnippetGenerator.cpp SnippetRepetitor.cpp Target.cpp - Uops.cpp + UopsBenchmarkRunner.cpp ) llvm_update_compile_flags(LLVMExegesis) diff --git a/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.cpp b/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.cpp new file mode 100644 index 00000000000..995195d8435 --- /dev/null +++ b/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.cpp @@ -0,0 +1,58 @@ +//===-- LatencyBenchmarkRunner.cpp ------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "LatencyBenchmarkRunner.h" + +#include "Target.h" +#include "BenchmarkRunner.h" + +namespace llvm { +namespace exegesis { + +static constexpr size_t kMaxAliasingInstructions = 10; + +LatencyBenchmarkRunner::LatencyBenchmarkRunner(const LLVMState &State, + InstructionBenchmark::ModeE Mode) + : BenchmarkRunner(State, Mode) { + assert((Mode == InstructionBenchmark::Latency || + Mode == InstructionBenchmark::InverseThroughput) && + "invalid mode"); +} + +LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default; + +Expected> LatencyBenchmarkRunner::runMeasurements( + const FunctionExecutor &Executor) const { + // Cycle measurements include some overhead from the kernel. Repeat the + // measure several times and take the minimum value. + constexpr const int NumMeasurements = 30; + int64_t MinValue = std::numeric_limits::max(); + const char *CounterName = State.getPfmCounters().CycleCounter; + for (size_t I = 0; I < NumMeasurements; ++I) { + auto ExpectedCounterValue = Executor.runAndMeasure(CounterName); + if (!ExpectedCounterValue) + return ExpectedCounterValue.takeError(); + if (*ExpectedCounterValue < MinValue) + MinValue = *ExpectedCounterValue; + } + std::vector Result; + switch (Mode) { + case InstructionBenchmark::Latency: + Result = {BenchmarkMeasure::Create("latency", MinValue)}; + break; + case InstructionBenchmark::InverseThroughput: + Result = {BenchmarkMeasure::Create("inverse_throughput", MinValue)}; + break; + default: + break; + } + return std::move(Result); +} + +} // namespace exegesis +} // namespace llvm diff --git a/tools/llvm-exegesis/lib/Latency.h b/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.h similarity index 69% rename from tools/llvm-exegesis/lib/Latency.h rename to tools/llvm-exegesis/lib/LatencyBenchmarkRunner.h index 83d754d25d1..d4bb93416c2 100644 --- a/tools/llvm-exegesis/lib/Latency.h +++ b/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.h @@ -1,4 +1,4 @@ -//===-- Latency.h -----------------------------------------------*- C++ -*-===// +//===-- LatencyBenchmarkRunner.h --------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -15,23 +15,10 @@ #define LLVM_TOOLS_LLVM_EXEGESIS_LATENCY_H #include "BenchmarkRunner.h" -#include "Error.h" -#include "MCInstrDescView.h" -#include "SnippetGenerator.h" namespace llvm { namespace exegesis { -class LatencySnippetGenerator : public SnippetGenerator { -public: - using SnippetGenerator::SnippetGenerator; - ~LatencySnippetGenerator() override; - - Expected> - generateCodeTemplates(const Instruction &Instr, - const BitVector &ForbiddenRegisters) const override; -}; - class LatencyBenchmarkRunner : public BenchmarkRunner { public: LatencyBenchmarkRunner(const LLVMState &State, diff --git a/tools/llvm-exegesis/lib/Mips/Target.cpp b/tools/llvm-exegesis/lib/Mips/Target.cpp index dc408f25280..ec698e678f9 100644 --- a/tools/llvm-exegesis/lib/Mips/Target.cpp +++ b/tools/llvm-exegesis/lib/Mips/Target.cpp @@ -5,8 +5,8 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +#include "../Error.h" #include "../Target.h" -#include "../Latency.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "Mips.h" #include "MipsRegisterInfo.h" diff --git a/tools/llvm-exegesis/lib/Uops.cpp b/tools/llvm-exegesis/lib/ParallelSnippetGenerator.cpp similarity index 82% rename from tools/llvm-exegesis/lib/Uops.cpp rename to tools/llvm-exegesis/lib/ParallelSnippetGenerator.cpp index 66319e37ce8..86e7fda63f3 100644 --- a/tools/llvm-exegesis/lib/Uops.cpp +++ b/tools/llvm-exegesis/lib/ParallelSnippetGenerator.cpp @@ -1,4 +1,4 @@ -//===-- Uops.cpp ------------------------------------------------*- C++ -*-===// +//===-- ParallelSnippetGenerator.cpp ----------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,9 +6,8 @@ // //===----------------------------------------------------------------------===// -#include "Uops.h" +#include "ParallelSnippetGenerator.h" -#include "Assembler.h" #include "BenchmarkRunner.h" #include "MCInstrDescView.h" #include "Target.h" @@ -16,9 +15,9 @@ // FIXME: Load constants into registers (e.g. with fld1) to not break // instructions like x87. -// Ideally we would like the only limitation on executing uops to be the issue -// ports. Maximizing port pressure increases the likelihood that the load is -// distributed evenly across possible ports. +// Ideally we would like the only limitation on executing instructions to be the +// availability of the CPU resources (e.g. execution ports) needed to execute +// them, instead of the availability of their data dependencies. // To achieve that, one approach is to generate instructions that do not have // data dependencies between them. @@ -89,11 +88,9 @@ getVariablesWithTiedOperands(const Instruction &Instr) { return Result; } -UopsBenchmarkRunner::~UopsBenchmarkRunner() = default; +ParallelSnippetGenerator::~ParallelSnippetGenerator() = default; -UopsSnippetGenerator::~UopsSnippetGenerator() = default; - -void UopsSnippetGenerator::instantiateMemoryOperands( +void ParallelSnippetGenerator::instantiateMemoryOperands( const unsigned ScratchSpacePointerInReg, std::vector &Instructions) const { if (ScratchSpacePointerInReg == 0) @@ -157,7 +154,7 @@ static std::vector generateSnippetUsingStaticRenaming( } } -Expected> UopsSnippetGenerator::generateCodeTemplates( +Expected> ParallelSnippetGenerator::generateCodeTemplates( const Instruction &Instr, const BitVector &ForbiddenRegisters) const { CodeTemplate CT; CT.ScratchSpacePointerInReg = @@ -219,34 +216,7 @@ Expected> UopsSnippetGenerator::generateCodeTemplates( return getSingleton(std::move(CT)); } -Expected> -UopsBenchmarkRunner::runMeasurements(const FunctionExecutor &Executor) const { - std::vector Result; - const PfmCountersInfo &PCI = State.getPfmCounters(); - // Uops per port. - for (const auto *IssueCounter = PCI.IssueCounters, - *IssueCounterEnd = PCI.IssueCounters + PCI.NumIssueCounters; - IssueCounter != IssueCounterEnd; ++IssueCounter) { - if (!IssueCounter->Counter) - continue; - auto ExpectedCounterValue = Executor.runAndMeasure(IssueCounter->Counter); - if (!ExpectedCounterValue) - return ExpectedCounterValue.takeError(); - Result.push_back(BenchmarkMeasure::Create(IssueCounter->ProcResName, - *ExpectedCounterValue)); - } - // NumMicroOps. - if (const char *const UopsCounter = PCI.UopsCounter) { - auto ExpectedCounterValue = Executor.runAndMeasure(UopsCounter); - if (!ExpectedCounterValue) - return ExpectedCounterValue.takeError(); - Result.push_back( - BenchmarkMeasure::Create("NumMicroOps", *ExpectedCounterValue)); - } - return std::move(Result); -} - -constexpr const size_t UopsSnippetGenerator::kMinNumDifferentAddresses; +constexpr const size_t ParallelSnippetGenerator::kMinNumDifferentAddresses; } // namespace exegesis } // namespace llvm diff --git a/tools/llvm-exegesis/lib/Uops.h b/tools/llvm-exegesis/lib/ParallelSnippetGenerator.h similarity index 70% rename from tools/llvm-exegesis/lib/Uops.h rename to tools/llvm-exegesis/lib/ParallelSnippetGenerator.h index 48105703cb0..617bf51ba94 100644 --- a/tools/llvm-exegesis/lib/Uops.h +++ b/tools/llvm-exegesis/lib/ParallelSnippetGenerator.h @@ -1,4 +1,4 @@ -//===-- Uops.h --------------------------------------------------*- C++ -*-===// +//===-- ParallelSnippetGenerator.h ------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -7,23 +7,22 @@ //===----------------------------------------------------------------------===// /// /// \file -/// A BenchmarkRunner implementation to measure uop decomposition. +/// A SnippetGenerator implementation to create parallel instruction snippets. /// //===----------------------------------------------------------------------===// -#ifndef LLVM_TOOLS_LLVM_EXEGESIS_UOPS_H -#define LLVM_TOOLS_LLVM_EXEGESIS_UOPS_H +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_PARALLELSNIPPETGENERATOR_H +#define LLVM_TOOLS_LLVM_EXEGESIS_PARALLELSNIPPETGENERATOR_H -#include "BenchmarkRunner.h" #include "SnippetGenerator.h" namespace llvm { namespace exegesis { -class UopsSnippetGenerator : public SnippetGenerator { +class ParallelSnippetGenerator : public SnippetGenerator { public: using SnippetGenerator::SnippetGenerator; - ~UopsSnippetGenerator() override; + ~ParallelSnippetGenerator() override; Expected> generateCodeTemplates(const Instruction &Instr, @@ -60,20 +59,7 @@ private: std::vector &SnippetTemplate) const; }; -class UopsBenchmarkRunner : public BenchmarkRunner { -public: - UopsBenchmarkRunner(const LLVMState &State) - : BenchmarkRunner(State, InstructionBenchmark::Uops) {} - ~UopsBenchmarkRunner() override; - - static constexpr const size_t kMinNumDifferentAddresses = 6; - -private: - Expected> - runMeasurements(const FunctionExecutor &Executor) const override; -}; - } // namespace exegesis } // namespace llvm -#endif // LLVM_TOOLS_LLVM_EXEGESIS_UOPS_H +#endif // LLVM_TOOLS_LLVM_EXEGESIS_PARALLELSNIPPETGENERATOR_H diff --git a/tools/llvm-exegesis/lib/PowerPC/Target.cpp b/tools/llvm-exegesis/lib/PowerPC/Target.cpp index 567d62031f1..771b01a5790 100644 --- a/tools/llvm-exegesis/lib/PowerPC/Target.cpp +++ b/tools/llvm-exegesis/lib/PowerPC/Target.cpp @@ -7,7 +7,6 @@ // The PowerPC ExegesisTarget. //===----------------------------------------------------------------------===// #include "../Target.h" -#include "../Latency.h" #include "PPC.h" #include "PPCRegisterInfo.h" diff --git a/tools/llvm-exegesis/lib/Latency.cpp b/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp similarity index 76% rename from tools/llvm-exegesis/lib/Latency.cpp rename to tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp index 9c1c087bb5e..b0c881c4d62 100644 --- a/tools/llvm-exegesis/lib/Latency.cpp +++ b/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp @@ -1,4 +1,4 @@ -//===-- Latency.cpp ---------------------------------------------*- C++ -*-===// +//===-- SerialSnippetGenerator.cpp ------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,17 +6,13 @@ // //===----------------------------------------------------------------------===// -#include "Latency.h" +#include "SerialSnippetGenerator.h" -#include "Assembler.h" -#include "BenchmarkRunner.h" #include "MCInstrDescView.h" -#include "PerfHelper.h" -#include "Target.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCInstBuilder.h" -#include "llvm/Support/FormatVariadic.h" +#include "CodeTemplate.h" +#include +#include +#include namespace llvm { namespace exegesis { @@ -149,10 +145,10 @@ static void appendCodeTemplates(const LLVMState &State, } } -LatencySnippetGenerator::~LatencySnippetGenerator() = default; +SerialSnippetGenerator::~SerialSnippetGenerator() = default; Expected> -LatencySnippetGenerator::generateCodeTemplates( +SerialSnippetGenerator::generateCodeTemplates( const Instruction &Instr, const BitVector &ForbiddenRegisters) const { std::vector Results; const ExecutionMode EM = getExecutionModes(Instr, ForbiddenRegisters); @@ -169,43 +165,5 @@ LatencySnippetGenerator::generateCodeTemplates( return std::move(Results); } -LatencyBenchmarkRunner::LatencyBenchmarkRunner(const LLVMState &State, - InstructionBenchmark::ModeE Mode) - : BenchmarkRunner(State, Mode) { - assert((Mode == InstructionBenchmark::Latency || - Mode == InstructionBenchmark::InverseThroughput) && - "invalid mode"); -} - -LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default; - -Expected> LatencyBenchmarkRunner::runMeasurements( - const FunctionExecutor &Executor) const { - // Cycle measurements include some overhead from the kernel. Repeat the - // measure several times and take the minimum value. - constexpr const int NumMeasurements = 30; - int64_t MinValue = std::numeric_limits::max(); - const char *CounterName = State.getPfmCounters().CycleCounter; - for (size_t I = 0; I < NumMeasurements; ++I) { - auto ExpectedCounterValue = Executor.runAndMeasure(CounterName); - if (!ExpectedCounterValue) - return ExpectedCounterValue.takeError(); - if (*ExpectedCounterValue < MinValue) - MinValue = *ExpectedCounterValue; - } - std::vector Result; - switch (Mode) { - case InstructionBenchmark::Latency: - Result = {BenchmarkMeasure::Create("latency", MinValue)}; - break; - case InstructionBenchmark::InverseThroughput: - Result = {BenchmarkMeasure::Create("inverse_throughput", MinValue)}; - break; - default: - break; - } - return std::move(Result); -} - } // namespace exegesis } // namespace llvm diff --git a/tools/llvm-exegesis/lib/SerialSnippetGenerator.h b/tools/llvm-exegesis/lib/SerialSnippetGenerator.h new file mode 100644 index 00000000000..292b1bc947c --- /dev/null +++ b/tools/llvm-exegesis/lib/SerialSnippetGenerator.h @@ -0,0 +1,37 @@ +//===-- SerialSnippetGenerator.h --------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// A SnippetGenerator implementation to create serial instruction snippets. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_SERIALSNIPPETGENERATOR_H +#define LLVM_TOOLS_LLVM_EXEGESIS_SERIALSNIPPETGENERATOR_H + +#include "Error.h" +#include "MCInstrDescView.h" +#include "SnippetGenerator.h" + +namespace llvm { +namespace exegesis { + +class SerialSnippetGenerator : public SnippetGenerator { +public: + using SnippetGenerator::SnippetGenerator; + ~SerialSnippetGenerator() override; + + Expected> + generateCodeTemplates(const Instruction &Instr, + const BitVector &ForbiddenRegisters) const override; +}; + +} // namespace exegesis +} // namespace llvm + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_SERIALSNIPPETGENERATOR_H diff --git a/tools/llvm-exegesis/lib/Target.cpp b/tools/llvm-exegesis/lib/Target.cpp index 297419509c1..40021e2931c 100644 --- a/tools/llvm-exegesis/lib/Target.cpp +++ b/tools/llvm-exegesis/lib/Target.cpp @@ -7,8 +7,10 @@ //===----------------------------------------------------------------------===// #include "Target.h" -#include "Latency.h" -#include "Uops.h" +#include "LatencyBenchmarkRunner.h" +#include "ParallelSnippetGenerator.h" +#include "SerialSnippetGenerator.h" +#include "UopsBenchmarkRunner.h" namespace llvm { namespace exegesis { @@ -43,10 +45,10 @@ std::unique_ptr ExegesisTarget::createSnippetGenerator( case InstructionBenchmark::Unknown: return nullptr; case InstructionBenchmark::Latency: - return createLatencySnippetGenerator(State, Opts); + return createSerialSnippetGenerator(State, Opts); case InstructionBenchmark::Uops: case InstructionBenchmark::InverseThroughput: - return createUopsSnippetGenerator(State, Opts); + return createParallelSnippetGenerator(State, Opts); } return nullptr; } @@ -77,14 +79,14 @@ ExegesisTarget::createBenchmarkRunner(InstructionBenchmark::ModeE Mode, return nullptr; } -std::unique_ptr ExegesisTarget::createLatencySnippetGenerator( +std::unique_ptr ExegesisTarget::createSerialSnippetGenerator( const LLVMState &State, const SnippetGenerator::Options &Opts) const { - return std::make_unique(State, Opts); + return std::make_unique(State, Opts); } -std::unique_ptr ExegesisTarget::createUopsSnippetGenerator( +std::unique_ptr ExegesisTarget::createParallelSnippetGenerator( const LLVMState &State, const SnippetGenerator::Options &Opts) const { - return std::make_unique(State, Opts); + return std::make_unique(State, Opts); } std::unique_ptr ExegesisTarget::createLatencyBenchmarkRunner( diff --git a/tools/llvm-exegesis/lib/Target.h b/tools/llvm-exegesis/lib/Target.h index 277db67bfbb..62f0dab1e98 100644 --- a/tools/llvm-exegesis/lib/Target.h +++ b/tools/llvm-exegesis/lib/Target.h @@ -144,9 +144,9 @@ private: // Targets can implement their own snippet generators/benchmarks runners by // implementing these. - std::unique_ptr virtual createLatencySnippetGenerator( + std::unique_ptr virtual createSerialSnippetGenerator( const LLVMState &State, const SnippetGenerator::Options &Opts) const; - std::unique_ptr virtual createUopsSnippetGenerator( + std::unique_ptr virtual createParallelSnippetGenerator( const LLVMState &State, const SnippetGenerator::Options &Opts) const; std::unique_ptr virtual createLatencyBenchmarkRunner( const LLVMState &State, InstructionBenchmark::ModeE Mode) const; diff --git a/tools/llvm-exegesis/lib/UopsBenchmarkRunner.cpp b/tools/llvm-exegesis/lib/UopsBenchmarkRunner.cpp new file mode 100644 index 00000000000..b99b1c5e711 --- /dev/null +++ b/tools/llvm-exegesis/lib/UopsBenchmarkRunner.cpp @@ -0,0 +1,46 @@ +//===-- UopsBenchmarkRunner.cpp ---------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "UopsBenchmarkRunner.h" + +#include "Target.h" + +namespace llvm { +namespace exegesis { + +UopsBenchmarkRunner::~UopsBenchmarkRunner() = default; + +Expected> +UopsBenchmarkRunner::runMeasurements(const FunctionExecutor &Executor) const { + std::vector Result; + const PfmCountersInfo &PCI = State.getPfmCounters(); + // Uops per port. + for (const auto *IssueCounter = PCI.IssueCounters, + *IssueCounterEnd = PCI.IssueCounters + PCI.NumIssueCounters; + IssueCounter != IssueCounterEnd; ++IssueCounter) { + if (!IssueCounter->Counter) + continue; + auto ExpectedCounterValue = Executor.runAndMeasure(IssueCounter->Counter); + if (!ExpectedCounterValue) + return ExpectedCounterValue.takeError(); + Result.push_back(BenchmarkMeasure::Create(IssueCounter->ProcResName, + *ExpectedCounterValue)); + } + // NumMicroOps. + if (const char *const UopsCounter = PCI.UopsCounter) { + auto ExpectedCounterValue = Executor.runAndMeasure(UopsCounter); + if (!ExpectedCounterValue) + return ExpectedCounterValue.takeError(); + Result.push_back( + BenchmarkMeasure::Create("NumMicroOps", *ExpectedCounterValue)); + } + return std::move(Result); +} + +} // namespace exegesis +} // namespace llvm diff --git a/tools/llvm-exegesis/lib/UopsBenchmarkRunner.h b/tools/llvm-exegesis/lib/UopsBenchmarkRunner.h new file mode 100644 index 00000000000..cda74eb453d --- /dev/null +++ b/tools/llvm-exegesis/lib/UopsBenchmarkRunner.h @@ -0,0 +1,38 @@ +//===-- UopsBenchmarkRunner.h -----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// A BenchmarkRunner implementation to measure uop decomposition. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_UOPSBENCHMARKRUNNER_H +#define LLVM_TOOLS_LLVM_EXEGESIS_UOPSBENCHMARKRUNNER_H + +#include "BenchmarkRunner.h" + +namespace llvm { +namespace exegesis { + +class UopsBenchmarkRunner : public BenchmarkRunner { +public: + UopsBenchmarkRunner(const LLVMState &State) + : BenchmarkRunner(State, InstructionBenchmark::Uops) {} + ~UopsBenchmarkRunner() override; + + static constexpr const size_t kMinNumDifferentAddresses = 6; + +private: + Expected> + runMeasurements(const FunctionExecutor &Executor) const override; +}; + +} // namespace exegesis +} // namespace llvm + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_UOPSBENCHMARKRUNNER_H diff --git a/tools/llvm-exegesis/lib/X86/Target.cpp b/tools/llvm-exegesis/lib/X86/Target.cpp index c67630fdebf..d2da6c235ea 100644 --- a/tools/llvm-exegesis/lib/X86/Target.cpp +++ b/tools/llvm-exegesis/lib/X86/Target.cpp @@ -8,9 +8,9 @@ #include "../Target.h" #include "../Error.h" -#include "../Latency.h" +#include "../SerialSnippetGenerator.h" #include "../SnippetGenerator.h" -#include "../Uops.h" +#include "../ParallelSnippetGenerator.h" #include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86MCTargetDesc.h" #include "X86.h" @@ -242,9 +242,9 @@ static Expected> generateLEATemplatesCommon( } namespace { -class X86LatencySnippetGenerator : public LatencySnippetGenerator { +class X86SerialSnippetGenerator : public SerialSnippetGenerator { public: - using LatencySnippetGenerator::LatencySnippetGenerator; + using SerialSnippetGenerator::SerialSnippetGenerator; Expected> generateCodeTemplates(const Instruction &Instr, @@ -253,7 +253,7 @@ public: } // namespace Expected> -X86LatencySnippetGenerator::generateCodeTemplates( +X86SerialSnippetGenerator::generateCodeTemplates( const Instruction &Instr, const BitVector &ForbiddenRegisters) const { if (auto E = IsInvalidOpcode(Instr)) return std::move(E); @@ -271,7 +271,7 @@ X86LatencySnippetGenerator::generateCodeTemplates( switch (getX86FPFlags(Instr)) { case X86II::NotFP: - return LatencySnippetGenerator::generateCodeTemplates(Instr, + return SerialSnippetGenerator::generateCodeTemplates(Instr, ForbiddenRegisters); case X86II::ZeroArgFP: case X86II::OneArgFP: @@ -292,9 +292,9 @@ X86LatencySnippetGenerator::generateCodeTemplates( } namespace { -class X86UopsSnippetGenerator : public UopsSnippetGenerator { +class X86ParallelSnippetGenerator : public ParallelSnippetGenerator { public: - using UopsSnippetGenerator::UopsSnippetGenerator; + using ParallelSnippetGenerator::ParallelSnippetGenerator; Expected> generateCodeTemplates(const Instruction &Instr, @@ -304,7 +304,7 @@ public: } // namespace Expected> -X86UopsSnippetGenerator::generateCodeTemplates( +X86ParallelSnippetGenerator::generateCodeTemplates( const Instruction &Instr, const BitVector &ForbiddenRegisters) const { if (auto E = IsInvalidOpcode(Instr)) return std::move(E); @@ -333,7 +333,7 @@ X86UopsSnippetGenerator::generateCodeTemplates( switch (getX86FPFlags(Instr)) { case X86II::NotFP: - return UopsSnippetGenerator::generateCodeTemplates(Instr, + return ParallelSnippetGenerator::generateCodeTemplates(Instr, ForbiddenRegisters); case X86II::ZeroArgFP: case X86II::OneArgFP: @@ -577,16 +577,16 @@ private: sizeof(kUnavailableRegisters[0])); } - std::unique_ptr createLatencySnippetGenerator( + std::unique_ptr createSerialSnippetGenerator( const LLVMState &State, const SnippetGenerator::Options &Opts) const override { - return std::make_unique(State, Opts); + return std::make_unique(State, Opts); } - std::unique_ptr createUopsSnippetGenerator( + std::unique_ptr createParallelSnippetGenerator( const LLVMState &State, const SnippetGenerator::Options &Opts) const override { - return std::make_unique(State, Opts); + return std::make_unique(State, Opts); } bool matchesArch(Triple::ArchType Arch) const override { diff --git a/unittests/tools/llvm-exegesis/Mips/SnippetGeneratorTest.cpp b/unittests/tools/llvm-exegesis/Mips/SnippetGeneratorTest.cpp index 2ccdee36603..9cb2e3b1ed1 100644 --- a/unittests/tools/llvm-exegesis/Mips/SnippetGeneratorTest.cpp +++ b/unittests/tools/llvm-exegesis/Mips/SnippetGeneratorTest.cpp @@ -7,13 +7,13 @@ //===----------------------------------------------------------------------===// #include "../Common/AssemblerUtils.h" -#include "Latency.h" #include "LlvmState.h" #include "MCInstrDescView.h" #include "MipsInstrInfo.h" +#include "ParallelSnippetGenerator.h" #include "RegisterAliasing.h" +#include "SerialSnippetGenerator.h" #include "TestBase.h" -#include "Uops.h" #include @@ -48,12 +48,12 @@ protected: SnippetGeneratorT Generator; }; -using LatencySnippetGeneratorTest = - SnippetGeneratorTest; +using SerialSnippetGeneratorTest = SnippetGeneratorTest; -using UopsSnippetGeneratorTest = SnippetGeneratorTest; +using ParallelSnippetGeneratorTest = + SnippetGeneratorTest; -TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughExplicitRegs) { +TEST_F(SerialSnippetGeneratorTest, ImplicitSelfDependencyThroughExplicitRegs) { // - ADD // - Op0 Explicit Def RegClass(GPR32) // - Op1 Explicit Use RegClass(GPR32) @@ -77,8 +77,8 @@ TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughExplicitRegs) { << "Op0 is either set to Op1 or to Op2"; } -TEST_F(LatencySnippetGeneratorTest, - ImplicitSelfDependencyThroughExplicitRegsForbidAll) { +TEST_F(SerialSnippetGeneratorTest, + ImplicitSelfDependencyThroughExplicitRegsForbidAll) { // - XOR // - Op0 Explicit Def RegClass(GPR32) // - Op1 Explicit Use RegClass(GPR32) @@ -96,7 +96,7 @@ TEST_F(LatencySnippetGeneratorTest, consumeError(std::move(Error)); } -TEST_F(UopsSnippetGeneratorTest, MemoryUse) { +TEST_F(ParallelSnippetGeneratorTest, MemoryUse) { // LB reads from memory. // - LB // - Op0 Explicit Def RegClass(GPR32) @@ -110,10 +110,11 @@ TEST_F(UopsSnippetGeneratorTest, MemoryUse) { const auto CodeTemplates = checkAndGetCodeTemplates(Opcode); ASSERT_THAT(CodeTemplates, SizeIs(1)); const auto &CT = CodeTemplates[0]; - EXPECT_THAT(CT.Info, HasSubstr("instruction is parallel, repeating a random one.")); + EXPECT_THAT(CT.Info, + HasSubstr("instruction is parallel, repeating a random one.")); EXPECT_THAT(CT.Execution, ExecutionMode::UNKNOWN); ASSERT_THAT(CT.Instructions, - SizeIs(UopsSnippetGenerator::kMinNumDifferentAddresses)); + SizeIs(ParallelSnippetGenerator::kMinNumDifferentAddresses)); const InstructionTemplate &IT = CT.Instructions[0]; EXPECT_THAT(IT.getOpcode(), Opcode); ASSERT_THAT(IT.getVariableValues(), SizeIs(3)); diff --git a/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp b/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp index 0f898d9d7bb..5ad49927f6b 100644 --- a/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp +++ b/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp @@ -7,12 +7,12 @@ //===----------------------------------------------------------------------===// #include "../Common/AssemblerUtils.h" -#include "Latency.h" #include "LlvmState.h" #include "MCInstrDescView.h" +#include "ParallelSnippetGenerator.h" #include "RegisterAliasing.h" +#include "SerialSnippetGenerator.h" #include "TestBase.h" -#include "Uops.h" #include "X86InstrInfo.h" #include @@ -59,12 +59,12 @@ protected: SnippetGeneratorT Generator; }; -using LatencySnippetGeneratorTest = - SnippetGeneratorTest; +using SerialSnippetGeneratorTest = SnippetGeneratorTest; -using UopsSnippetGeneratorTest = SnippetGeneratorTest; +using ParallelSnippetGeneratorTest = + SnippetGeneratorTest; -TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughImplicitReg) { +TEST_F(SerialSnippetGeneratorTest, ImplicitSelfDependencyThroughImplicitReg) { // - ADC16i16 // - Op0 Explicit Use Immediate // - Op1 Implicit Def Reg(AX) @@ -90,7 +90,7 @@ TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughImplicitReg) { EXPECT_THAT(IT.getVariableValues()[0], IsInvalid()) << "Immediate is not set"; } -TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughTiedRegs) { +TEST_F(SerialSnippetGeneratorTest, ImplicitSelfDependencyThroughTiedRegs) { // - ADD16ri // - Op0 Explicit Def RegClass(GR16) // - Op1 Explicit Use RegClass(GR16) TiedToOp0 @@ -114,7 +114,7 @@ TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughTiedRegs) { EXPECT_THAT(IT.getVariableValues()[1], IsInvalid()) << "Operand 2 is not set"; } -TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughExplicitRegs) { +TEST_F(SerialSnippetGeneratorTest, ImplicitSelfDependencyThroughExplicitRegs) { // - VXORPSrr // - Op0 Explicit Def RegClass(VR128) // - Op1 Explicit Use RegClass(VR128) @@ -138,7 +138,7 @@ TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughExplicitRegs) { << "Op0 is either set to Op1 or to Op2"; } -TEST_F(LatencySnippetGeneratorTest, +TEST_F(SerialSnippetGeneratorTest, ImplicitSelfDependencyThroughExplicitRegsForbidAll) { // - VXORPSrr // - Op0 Explicit Def RegClass(VR128) @@ -158,7 +158,7 @@ TEST_F(LatencySnippetGeneratorTest, consumeError(std::move(Error)); } -TEST_F(LatencySnippetGeneratorTest, DependencyThroughOtherOpcode) { +TEST_F(SerialSnippetGeneratorTest, DependencyThroughOtherOpcode) { // - CMP64rr // - Op0 Explicit Use RegClass(GR64) // - Op1 Explicit Use RegClass(GR64) @@ -182,7 +182,7 @@ TEST_F(LatencySnippetGeneratorTest, DependencyThroughOtherOpcode) { } } -TEST_F(LatencySnippetGeneratorTest, LAHF) { +TEST_F(SerialSnippetGeneratorTest, LAHF) { // - LAHF // - Op0 Implicit Def Reg(AH) // - Op1 Implicit Use Reg(EFLAGS) @@ -198,7 +198,7 @@ TEST_F(LatencySnippetGeneratorTest, LAHF) { } } -TEST_F(UopsSnippetGeneratorTest, ParallelInstruction) { +TEST_F(ParallelSnippetGeneratorTest, ParallelInstruction) { // - BNDCL32rr // - Op0 Explicit Use RegClass(BNDR) // - Op1 Explicit Use RegClass(GR32) @@ -218,7 +218,7 @@ TEST_F(UopsSnippetGeneratorTest, ParallelInstruction) { EXPECT_THAT(IT.getVariableValues()[1], IsInvalid()); } -TEST_F(UopsSnippetGeneratorTest, SerialInstruction) { +TEST_F(ParallelSnippetGeneratorTest, SerialInstruction) { // - CDQ // - Op0 Implicit Def Reg(EAX) // - Op1 Implicit Def Reg(EDX) @@ -237,7 +237,7 @@ TEST_F(UopsSnippetGeneratorTest, SerialInstruction) { ASSERT_THAT(IT.getVariableValues(), SizeIs(0)); } -TEST_F(UopsSnippetGeneratorTest, StaticRenaming) { +TEST_F(ParallelSnippetGeneratorTest, StaticRenaming) { // CMOV32rr has tied variables, we enumerate the possible values to execute // as many in parallel as possible. @@ -268,7 +268,7 @@ TEST_F(UopsSnippetGeneratorTest, StaticRenaming) { << "Each instruction writes to a different register"; } -TEST_F(UopsSnippetGeneratorTest, NoTiedVariables) { +TEST_F(ParallelSnippetGeneratorTest, NoTiedVariables) { // CMOV_GR32 has no tied variables, we make sure def and use are different // from each other. @@ -302,7 +302,7 @@ TEST_F(UopsSnippetGeneratorTest, NoTiedVariables) { EXPECT_THAT(IT.getVariableValues()[3], IsInvalid()); } -TEST_F(UopsSnippetGeneratorTest, MemoryUse) { +TEST_F(ParallelSnippetGeneratorTest, MemoryUse) { // Mov32rm reads from memory. // - MOV32rm // - Op0 Explicit Def RegClass(GR32) @@ -326,7 +326,7 @@ TEST_F(UopsSnippetGeneratorTest, MemoryUse) { EXPECT_THAT(CT.Info, HasSubstr("no tied variables")); EXPECT_THAT(CT.Execution, ExecutionMode::UNKNOWN); ASSERT_THAT(CT.Instructions, - SizeIs(UopsSnippetGenerator::kMinNumDifferentAddresses)); + SizeIs(ParallelSnippetGenerator::kMinNumDifferentAddresses)); const InstructionTemplate &IT = CT.Instructions[0]; EXPECT_THAT(IT.getOpcode(), Opcode); ASSERT_THAT(IT.getVariableValues(), SizeIs(6)); diff --git a/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp b/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp index 562fd058b61..298a54abffc 100644 --- a/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp +++ b/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp @@ -7,12 +7,10 @@ //===----------------------------------------------------------------------===// #include "../Common/AssemblerUtils.h" -#include "Latency.h" #include "LlvmState.h" #include "MCInstrDescView.h" #include "RegisterAliasing.h" #include "TestBase.h" -#include "Uops.h" #include "X86InstrInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -34,8 +32,7 @@ protected: void SetUp() { TM = State.createTargetMachine(); Context = std::make_unique(); - Mod = - std::make_unique("X86SnippetRepetitorTest", *Context); + Mod = std::make_unique("X86SnippetRepetitorTest", *Context); Mod->setDataLayout(TM->createDataLayout()); MMI = std::make_unique(TM.get()); MF = &createVoidVoidPtrMachineFunction("TestFn", Mod.get(), MMI.get()); diff --git a/unittests/tools/llvm-exegesis/X86/TestBase.h b/unittests/tools/llvm-exegesis/X86/TestBase.h index c60cd76e53c..a7e542af83d 100644 --- a/unittests/tools/llvm-exegesis/X86/TestBase.h +++ b/unittests/tools/llvm-exegesis/X86/TestBase.h @@ -1,4 +1,4 @@ -//===-- TestBase.cpp --------------------------------------------*- C++ -*-===// +//===-- TestBase.h ----------------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information.