1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 12:12:47 +01:00

[llvm-exegesis][NFC] Disassociate snippet generators from benchmark runners

The addition of `inverse_throughput` mode highlighted the disjointedness
of snippet generators and benchmark runners because it used the
`UopsSnippetGenerator` with the  `LatencyBenchmarkRunner`.
To keep the code consistent tie the snippet generators to
parallelization/serialization rather than their benchmark runners.

Renaming `LatencySnippetGenerator` -> `SerialSnippetGenerator`.
Renaming `UopsSnippetGenerator` -> `ParallelSnippetGenerator`.

Differential Revision: https://reviews.llvm.org/D72928
This commit is contained in:
Miloš Stojanović 2020-01-17 14:28:54 +01:00
parent dea61577e1
commit 5da66b880e
20 changed files with 270 additions and 190 deletions

View File

@ -6,7 +6,6 @@
// //
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include "../Target.h" #include "../Target.h"
#include "../Latency.h"
#include "AArch64.h" #include "AArch64.h"
#include "AArch64RegisterInfo.h" #include "AArch64RegisterInfo.h"

View File

@ -244,9 +244,9 @@ Analysis::makePointsPerSchedClass() const {
return Entries; return Entries;
} }
// Uops repeat the same opcode over again. Just show this opcode and show the // Parallel benchmarks repeat the same opcode multiple times. Just show this
// whole snippet only on hover. // opcode and show the whole snippet only on hover.
static void writeUopsSnippetHtml(raw_ostream &OS, static void writeParallelSnippetHtml(raw_ostream &OS,
const std::vector<MCInst> &Instructions, const std::vector<MCInst> &Instructions,
const MCInstrInfo &InstrInfo) { const MCInstrInfo &InstrInfo) {
if (Instructions.empty()) if (Instructions.empty())
@ -282,7 +282,7 @@ void Analysis::printPointHtml(const InstructionBenchmark &Point,
break; break;
case InstructionBenchmark::Uops: case InstructionBenchmark::Uops:
case InstructionBenchmark::InverseThroughput: case InstructionBenchmark::InverseThroughput:
writeUopsSnippetHtml(OS, Point.Key.Instructions, *InstrInfo_); writeParallelSnippetHtml(OS, Point.Key.Instructions, *InstrInfo_);
break; break;
default: default:
llvm_unreachable("invalid mode"); llvm_unreachable("invalid mode");

View File

@ -27,18 +27,20 @@ add_library(LLVMExegesis
BenchmarkRunner.cpp BenchmarkRunner.cpp
Clustering.cpp Clustering.cpp
CodeTemplate.cpp CodeTemplate.cpp
Latency.cpp LatencyBenchmarkRunner.cpp
LlvmState.cpp LlvmState.cpp
MCInstrDescView.cpp MCInstrDescView.cpp
ParallelSnippetGenerator.cpp
PerfHelper.cpp PerfHelper.cpp
RegisterAliasing.cpp RegisterAliasing.cpp
RegisterValue.cpp RegisterValue.cpp
SchedClassResolution.cpp SchedClassResolution.cpp
SerialSnippetGenerator.cpp
SnippetFile.cpp SnippetFile.cpp
SnippetGenerator.cpp SnippetGenerator.cpp
SnippetRepetitor.cpp SnippetRepetitor.cpp
Target.cpp Target.cpp
Uops.cpp UopsBenchmarkRunner.cpp
) )
llvm_update_compile_flags(LLVMExegesis) llvm_update_compile_flags(LLVMExegesis)

View File

@ -0,0 +1,58 @@
//===-- LatencyBenchmarkRunner.cpp ------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "LatencyBenchmarkRunner.h"
#include "Target.h"
#include "BenchmarkRunner.h"
namespace llvm {
namespace exegesis {
static constexpr size_t kMaxAliasingInstructions = 10;
LatencyBenchmarkRunner::LatencyBenchmarkRunner(const LLVMState &State,
InstructionBenchmark::ModeE Mode)
: BenchmarkRunner(State, Mode) {
assert((Mode == InstructionBenchmark::Latency ||
Mode == InstructionBenchmark::InverseThroughput) &&
"invalid mode");
}
LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default;
Expected<std::vector<BenchmarkMeasure>> LatencyBenchmarkRunner::runMeasurements(
const FunctionExecutor &Executor) const {
// Cycle measurements include some overhead from the kernel. Repeat the
// measure several times and take the minimum value.
constexpr const int NumMeasurements = 30;
int64_t MinValue = std::numeric_limits<int64_t>::max();
const char *CounterName = State.getPfmCounters().CycleCounter;
for (size_t I = 0; I < NumMeasurements; ++I) {
auto ExpectedCounterValue = Executor.runAndMeasure(CounterName);
if (!ExpectedCounterValue)
return ExpectedCounterValue.takeError();
if (*ExpectedCounterValue < MinValue)
MinValue = *ExpectedCounterValue;
}
std::vector<BenchmarkMeasure> Result;
switch (Mode) {
case InstructionBenchmark::Latency:
Result = {BenchmarkMeasure::Create("latency", MinValue)};
break;
case InstructionBenchmark::InverseThroughput:
Result = {BenchmarkMeasure::Create("inverse_throughput", MinValue)};
break;
default:
break;
}
return std::move(Result);
}
} // namespace exegesis
} // namespace llvm

View File

@ -1,4 +1,4 @@
//===-- Latency.h -----------------------------------------------*- C++ -*-===// //===-- LatencyBenchmarkRunner.h --------------------------------*- C++ -*-===//
// //
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information. // See https://llvm.org/LICENSE.txt for license information.
@ -15,23 +15,10 @@
#define LLVM_TOOLS_LLVM_EXEGESIS_LATENCY_H #define LLVM_TOOLS_LLVM_EXEGESIS_LATENCY_H
#include "BenchmarkRunner.h" #include "BenchmarkRunner.h"
#include "Error.h"
#include "MCInstrDescView.h"
#include "SnippetGenerator.h"
namespace llvm { namespace llvm {
namespace exegesis { namespace exegesis {
class LatencySnippetGenerator : public SnippetGenerator {
public:
using SnippetGenerator::SnippetGenerator;
~LatencySnippetGenerator() override;
Expected<std::vector<CodeTemplate>>
generateCodeTemplates(const Instruction &Instr,
const BitVector &ForbiddenRegisters) const override;
};
class LatencyBenchmarkRunner : public BenchmarkRunner { class LatencyBenchmarkRunner : public BenchmarkRunner {
public: public:
LatencyBenchmarkRunner(const LLVMState &State, LatencyBenchmarkRunner(const LLVMState &State,

View File

@ -5,8 +5,8 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// //
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include "../Error.h"
#include "../Target.h" #include "../Target.h"
#include "../Latency.h"
#include "MCTargetDesc/MipsBaseInfo.h" #include "MCTargetDesc/MipsBaseInfo.h"
#include "Mips.h" #include "Mips.h"
#include "MipsRegisterInfo.h" #include "MipsRegisterInfo.h"

View File

@ -1,4 +1,4 @@
//===-- Uops.cpp ------------------------------------------------*- C++ -*-===// //===-- ParallelSnippetGenerator.cpp ----------------------------*- C++ -*-===//
// //
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information. // See https://llvm.org/LICENSE.txt for license information.
@ -6,9 +6,8 @@
// //
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include "Uops.h" #include "ParallelSnippetGenerator.h"
#include "Assembler.h"
#include "BenchmarkRunner.h" #include "BenchmarkRunner.h"
#include "MCInstrDescView.h" #include "MCInstrDescView.h"
#include "Target.h" #include "Target.h"
@ -16,9 +15,9 @@
// FIXME: Load constants into registers (e.g. with fld1) to not break // FIXME: Load constants into registers (e.g. with fld1) to not break
// instructions like x87. // instructions like x87.
// Ideally we would like the only limitation on executing uops to be the issue // Ideally we would like the only limitation on executing instructions to be the
// ports. Maximizing port pressure increases the likelihood that the load is // availability of the CPU resources (e.g. execution ports) needed to execute
// distributed evenly across possible ports. // them, instead of the availability of their data dependencies.
// To achieve that, one approach is to generate instructions that do not have // To achieve that, one approach is to generate instructions that do not have
// data dependencies between them. // data dependencies between them.
@ -89,11 +88,9 @@ getVariablesWithTiedOperands(const Instruction &Instr) {
return Result; return Result;
} }
UopsBenchmarkRunner::~UopsBenchmarkRunner() = default; ParallelSnippetGenerator::~ParallelSnippetGenerator() = default;
UopsSnippetGenerator::~UopsSnippetGenerator() = default; void ParallelSnippetGenerator::instantiateMemoryOperands(
void UopsSnippetGenerator::instantiateMemoryOperands(
const unsigned ScratchSpacePointerInReg, const unsigned ScratchSpacePointerInReg,
std::vector<InstructionTemplate> &Instructions) const { std::vector<InstructionTemplate> &Instructions) const {
if (ScratchSpacePointerInReg == 0) if (ScratchSpacePointerInReg == 0)
@ -157,7 +154,7 @@ static std::vector<InstructionTemplate> generateSnippetUsingStaticRenaming(
} }
} }
Expected<std::vector<CodeTemplate>> UopsSnippetGenerator::generateCodeTemplates( Expected<std::vector<CodeTemplate>> ParallelSnippetGenerator::generateCodeTemplates(
const Instruction &Instr, const BitVector &ForbiddenRegisters) const { const Instruction &Instr, const BitVector &ForbiddenRegisters) const {
CodeTemplate CT; CodeTemplate CT;
CT.ScratchSpacePointerInReg = CT.ScratchSpacePointerInReg =
@ -219,34 +216,7 @@ Expected<std::vector<CodeTemplate>> UopsSnippetGenerator::generateCodeTemplates(
return getSingleton(std::move(CT)); return getSingleton(std::move(CT));
} }
Expected<std::vector<BenchmarkMeasure>> constexpr const size_t ParallelSnippetGenerator::kMinNumDifferentAddresses;
UopsBenchmarkRunner::runMeasurements(const FunctionExecutor &Executor) const {
std::vector<BenchmarkMeasure> Result;
const PfmCountersInfo &PCI = State.getPfmCounters();
// Uops per port.
for (const auto *IssueCounter = PCI.IssueCounters,
*IssueCounterEnd = PCI.IssueCounters + PCI.NumIssueCounters;
IssueCounter != IssueCounterEnd; ++IssueCounter) {
if (!IssueCounter->Counter)
continue;
auto ExpectedCounterValue = Executor.runAndMeasure(IssueCounter->Counter);
if (!ExpectedCounterValue)
return ExpectedCounterValue.takeError();
Result.push_back(BenchmarkMeasure::Create(IssueCounter->ProcResName,
*ExpectedCounterValue));
}
// NumMicroOps.
if (const char *const UopsCounter = PCI.UopsCounter) {
auto ExpectedCounterValue = Executor.runAndMeasure(UopsCounter);
if (!ExpectedCounterValue)
return ExpectedCounterValue.takeError();
Result.push_back(
BenchmarkMeasure::Create("NumMicroOps", *ExpectedCounterValue));
}
return std::move(Result);
}
constexpr const size_t UopsSnippetGenerator::kMinNumDifferentAddresses;
} // namespace exegesis } // namespace exegesis
} // namespace llvm } // namespace llvm

View File

@ -1,4 +1,4 @@
//===-- Uops.h --------------------------------------------------*- C++ -*-===// //===-- ParallelSnippetGenerator.h ------------------------------*- C++ -*-===//
// //
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information. // See https://llvm.org/LICENSE.txt for license information.
@ -7,23 +7,22 @@
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
/// ///
/// \file /// \file
/// A BenchmarkRunner implementation to measure uop decomposition. /// A SnippetGenerator implementation to create parallel instruction snippets.
/// ///
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#ifndef LLVM_TOOLS_LLVM_EXEGESIS_UOPS_H #ifndef LLVM_TOOLS_LLVM_EXEGESIS_PARALLELSNIPPETGENERATOR_H
#define LLVM_TOOLS_LLVM_EXEGESIS_UOPS_H #define LLVM_TOOLS_LLVM_EXEGESIS_PARALLELSNIPPETGENERATOR_H
#include "BenchmarkRunner.h"
#include "SnippetGenerator.h" #include "SnippetGenerator.h"
namespace llvm { namespace llvm {
namespace exegesis { namespace exegesis {
class UopsSnippetGenerator : public SnippetGenerator { class ParallelSnippetGenerator : public SnippetGenerator {
public: public:
using SnippetGenerator::SnippetGenerator; using SnippetGenerator::SnippetGenerator;
~UopsSnippetGenerator() override; ~ParallelSnippetGenerator() override;
Expected<std::vector<CodeTemplate>> Expected<std::vector<CodeTemplate>>
generateCodeTemplates(const Instruction &Instr, generateCodeTemplates(const Instruction &Instr,
@ -60,20 +59,7 @@ private:
std::vector<InstructionTemplate> &SnippetTemplate) const; std::vector<InstructionTemplate> &SnippetTemplate) const;
}; };
class UopsBenchmarkRunner : public BenchmarkRunner {
public:
UopsBenchmarkRunner(const LLVMState &State)
: BenchmarkRunner(State, InstructionBenchmark::Uops) {}
~UopsBenchmarkRunner() override;
static constexpr const size_t kMinNumDifferentAddresses = 6;
private:
Expected<std::vector<BenchmarkMeasure>>
runMeasurements(const FunctionExecutor &Executor) const override;
};
} // namespace exegesis } // namespace exegesis
} // namespace llvm } // namespace llvm
#endif // LLVM_TOOLS_LLVM_EXEGESIS_UOPS_H #endif // LLVM_TOOLS_LLVM_EXEGESIS_PARALLELSNIPPETGENERATOR_H

View File

@ -7,7 +7,6 @@
// The PowerPC ExegesisTarget. // The PowerPC ExegesisTarget.
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include "../Target.h" #include "../Target.h"
#include "../Latency.h"
#include "PPC.h" #include "PPC.h"
#include "PPCRegisterInfo.h" #include "PPCRegisterInfo.h"

View File

@ -1,4 +1,4 @@
//===-- Latency.cpp ---------------------------------------------*- C++ -*-===// //===-- SerialSnippetGenerator.cpp ------------------------------*- C++ -*-===//
// //
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information. // See https://llvm.org/LICENSE.txt for license information.
@ -6,17 +6,13 @@
// //
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include "Latency.h" #include "SerialSnippetGenerator.h"
#include "Assembler.h"
#include "BenchmarkRunner.h"
#include "MCInstrDescView.h" #include "MCInstrDescView.h"
#include "PerfHelper.h" #include "CodeTemplate.h"
#include "Target.h" #include <algorithm>
#include "llvm/ADT/STLExtras.h" #include <numeric>
#include "llvm/MC/MCInst.h" #include <vector>
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/Support/FormatVariadic.h"
namespace llvm { namespace llvm {
namespace exegesis { namespace exegesis {
@ -149,10 +145,10 @@ static void appendCodeTemplates(const LLVMState &State,
} }
} }
LatencySnippetGenerator::~LatencySnippetGenerator() = default; SerialSnippetGenerator::~SerialSnippetGenerator() = default;
Expected<std::vector<CodeTemplate>> Expected<std::vector<CodeTemplate>>
LatencySnippetGenerator::generateCodeTemplates( SerialSnippetGenerator::generateCodeTemplates(
const Instruction &Instr, const BitVector &ForbiddenRegisters) const { const Instruction &Instr, const BitVector &ForbiddenRegisters) const {
std::vector<CodeTemplate> Results; std::vector<CodeTemplate> Results;
const ExecutionMode EM = getExecutionModes(Instr, ForbiddenRegisters); const ExecutionMode EM = getExecutionModes(Instr, ForbiddenRegisters);
@ -169,43 +165,5 @@ LatencySnippetGenerator::generateCodeTemplates(
return std::move(Results); return std::move(Results);
} }
LatencyBenchmarkRunner::LatencyBenchmarkRunner(const LLVMState &State,
InstructionBenchmark::ModeE Mode)
: BenchmarkRunner(State, Mode) {
assert((Mode == InstructionBenchmark::Latency ||
Mode == InstructionBenchmark::InverseThroughput) &&
"invalid mode");
}
LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default;
Expected<std::vector<BenchmarkMeasure>> LatencyBenchmarkRunner::runMeasurements(
const FunctionExecutor &Executor) const {
// Cycle measurements include some overhead from the kernel. Repeat the
// measure several times and take the minimum value.
constexpr const int NumMeasurements = 30;
int64_t MinValue = std::numeric_limits<int64_t>::max();
const char *CounterName = State.getPfmCounters().CycleCounter;
for (size_t I = 0; I < NumMeasurements; ++I) {
auto ExpectedCounterValue = Executor.runAndMeasure(CounterName);
if (!ExpectedCounterValue)
return ExpectedCounterValue.takeError();
if (*ExpectedCounterValue < MinValue)
MinValue = *ExpectedCounterValue;
}
std::vector<BenchmarkMeasure> Result;
switch (Mode) {
case InstructionBenchmark::Latency:
Result = {BenchmarkMeasure::Create("latency", MinValue)};
break;
case InstructionBenchmark::InverseThroughput:
Result = {BenchmarkMeasure::Create("inverse_throughput", MinValue)};
break;
default:
break;
}
return std::move(Result);
}
} // namespace exegesis } // namespace exegesis
} // namespace llvm } // namespace llvm

View File

@ -0,0 +1,37 @@
//===-- SerialSnippetGenerator.h --------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// A SnippetGenerator implementation to create serial instruction snippets.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_TOOLS_LLVM_EXEGESIS_SERIALSNIPPETGENERATOR_H
#define LLVM_TOOLS_LLVM_EXEGESIS_SERIALSNIPPETGENERATOR_H
#include "Error.h"
#include "MCInstrDescView.h"
#include "SnippetGenerator.h"
namespace llvm {
namespace exegesis {
class SerialSnippetGenerator : public SnippetGenerator {
public:
using SnippetGenerator::SnippetGenerator;
~SerialSnippetGenerator() override;
Expected<std::vector<CodeTemplate>>
generateCodeTemplates(const Instruction &Instr,
const BitVector &ForbiddenRegisters) const override;
};
} // namespace exegesis
} // namespace llvm
#endif // LLVM_TOOLS_LLVM_EXEGESIS_SERIALSNIPPETGENERATOR_H

View File

@ -7,8 +7,10 @@
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include "Target.h" #include "Target.h"
#include "Latency.h" #include "LatencyBenchmarkRunner.h"
#include "Uops.h" #include "ParallelSnippetGenerator.h"
#include "SerialSnippetGenerator.h"
#include "UopsBenchmarkRunner.h"
namespace llvm { namespace llvm {
namespace exegesis { namespace exegesis {
@ -43,10 +45,10 @@ std::unique_ptr<SnippetGenerator> ExegesisTarget::createSnippetGenerator(
case InstructionBenchmark::Unknown: case InstructionBenchmark::Unknown:
return nullptr; return nullptr;
case InstructionBenchmark::Latency: case InstructionBenchmark::Latency:
return createLatencySnippetGenerator(State, Opts); return createSerialSnippetGenerator(State, Opts);
case InstructionBenchmark::Uops: case InstructionBenchmark::Uops:
case InstructionBenchmark::InverseThroughput: case InstructionBenchmark::InverseThroughput:
return createUopsSnippetGenerator(State, Opts); return createParallelSnippetGenerator(State, Opts);
} }
return nullptr; return nullptr;
} }
@ -77,14 +79,14 @@ ExegesisTarget::createBenchmarkRunner(InstructionBenchmark::ModeE Mode,
return nullptr; return nullptr;
} }
std::unique_ptr<SnippetGenerator> ExegesisTarget::createLatencySnippetGenerator( std::unique_ptr<SnippetGenerator> ExegesisTarget::createSerialSnippetGenerator(
const LLVMState &State, const SnippetGenerator::Options &Opts) const { const LLVMState &State, const SnippetGenerator::Options &Opts) const {
return std::make_unique<LatencySnippetGenerator>(State, Opts); return std::make_unique<SerialSnippetGenerator>(State, Opts);
} }
std::unique_ptr<SnippetGenerator> ExegesisTarget::createUopsSnippetGenerator( std::unique_ptr<SnippetGenerator> ExegesisTarget::createParallelSnippetGenerator(
const LLVMState &State, const SnippetGenerator::Options &Opts) const { const LLVMState &State, const SnippetGenerator::Options &Opts) const {
return std::make_unique<UopsSnippetGenerator>(State, Opts); return std::make_unique<ParallelSnippetGenerator>(State, Opts);
} }
std::unique_ptr<BenchmarkRunner> ExegesisTarget::createLatencyBenchmarkRunner( std::unique_ptr<BenchmarkRunner> ExegesisTarget::createLatencyBenchmarkRunner(

View File

@ -144,9 +144,9 @@ private:
// Targets can implement their own snippet generators/benchmarks runners by // Targets can implement their own snippet generators/benchmarks runners by
// implementing these. // implementing these.
std::unique_ptr<SnippetGenerator> virtual createLatencySnippetGenerator( std::unique_ptr<SnippetGenerator> virtual createSerialSnippetGenerator(
const LLVMState &State, const SnippetGenerator::Options &Opts) const; const LLVMState &State, const SnippetGenerator::Options &Opts) const;
std::unique_ptr<SnippetGenerator> virtual createUopsSnippetGenerator( std::unique_ptr<SnippetGenerator> virtual createParallelSnippetGenerator(
const LLVMState &State, const SnippetGenerator::Options &Opts) const; const LLVMState &State, const SnippetGenerator::Options &Opts) const;
std::unique_ptr<BenchmarkRunner> virtual createLatencyBenchmarkRunner( std::unique_ptr<BenchmarkRunner> virtual createLatencyBenchmarkRunner(
const LLVMState &State, InstructionBenchmark::ModeE Mode) const; const LLVMState &State, InstructionBenchmark::ModeE Mode) const;

View File

@ -0,0 +1,46 @@
//===-- UopsBenchmarkRunner.cpp ---------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "UopsBenchmarkRunner.h"
#include "Target.h"
namespace llvm {
namespace exegesis {
UopsBenchmarkRunner::~UopsBenchmarkRunner() = default;
Expected<std::vector<BenchmarkMeasure>>
UopsBenchmarkRunner::runMeasurements(const FunctionExecutor &Executor) const {
std::vector<BenchmarkMeasure> Result;
const PfmCountersInfo &PCI = State.getPfmCounters();
// Uops per port.
for (const auto *IssueCounter = PCI.IssueCounters,
*IssueCounterEnd = PCI.IssueCounters + PCI.NumIssueCounters;
IssueCounter != IssueCounterEnd; ++IssueCounter) {
if (!IssueCounter->Counter)
continue;
auto ExpectedCounterValue = Executor.runAndMeasure(IssueCounter->Counter);
if (!ExpectedCounterValue)
return ExpectedCounterValue.takeError();
Result.push_back(BenchmarkMeasure::Create(IssueCounter->ProcResName,
*ExpectedCounterValue));
}
// NumMicroOps.
if (const char *const UopsCounter = PCI.UopsCounter) {
auto ExpectedCounterValue = Executor.runAndMeasure(UopsCounter);
if (!ExpectedCounterValue)
return ExpectedCounterValue.takeError();
Result.push_back(
BenchmarkMeasure::Create("NumMicroOps", *ExpectedCounterValue));
}
return std::move(Result);
}
} // namespace exegesis
} // namespace llvm

View File

@ -0,0 +1,38 @@
//===-- UopsBenchmarkRunner.h -----------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// A BenchmarkRunner implementation to measure uop decomposition.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_TOOLS_LLVM_EXEGESIS_UOPSBENCHMARKRUNNER_H
#define LLVM_TOOLS_LLVM_EXEGESIS_UOPSBENCHMARKRUNNER_H
#include "BenchmarkRunner.h"
namespace llvm {
namespace exegesis {
class UopsBenchmarkRunner : public BenchmarkRunner {
public:
UopsBenchmarkRunner(const LLVMState &State)
: BenchmarkRunner(State, InstructionBenchmark::Uops) {}
~UopsBenchmarkRunner() override;
static constexpr const size_t kMinNumDifferentAddresses = 6;
private:
Expected<std::vector<BenchmarkMeasure>>
runMeasurements(const FunctionExecutor &Executor) const override;
};
} // namespace exegesis
} // namespace llvm
#endif // LLVM_TOOLS_LLVM_EXEGESIS_UOPSBENCHMARKRUNNER_H

View File

@ -8,9 +8,9 @@
#include "../Target.h" #include "../Target.h"
#include "../Error.h" #include "../Error.h"
#include "../Latency.h" #include "../SerialSnippetGenerator.h"
#include "../SnippetGenerator.h" #include "../SnippetGenerator.h"
#include "../Uops.h" #include "../ParallelSnippetGenerator.h"
#include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86BaseInfo.h"
#include "MCTargetDesc/X86MCTargetDesc.h" #include "MCTargetDesc/X86MCTargetDesc.h"
#include "X86.h" #include "X86.h"
@ -242,9 +242,9 @@ static Expected<std::vector<CodeTemplate>> generateLEATemplatesCommon(
} }
namespace { namespace {
class X86LatencySnippetGenerator : public LatencySnippetGenerator { class X86SerialSnippetGenerator : public SerialSnippetGenerator {
public: public:
using LatencySnippetGenerator::LatencySnippetGenerator; using SerialSnippetGenerator::SerialSnippetGenerator;
Expected<std::vector<CodeTemplate>> Expected<std::vector<CodeTemplate>>
generateCodeTemplates(const Instruction &Instr, generateCodeTemplates(const Instruction &Instr,
@ -253,7 +253,7 @@ public:
} // namespace } // namespace
Expected<std::vector<CodeTemplate>> Expected<std::vector<CodeTemplate>>
X86LatencySnippetGenerator::generateCodeTemplates( X86SerialSnippetGenerator::generateCodeTemplates(
const Instruction &Instr, const BitVector &ForbiddenRegisters) const { const Instruction &Instr, const BitVector &ForbiddenRegisters) const {
if (auto E = IsInvalidOpcode(Instr)) if (auto E = IsInvalidOpcode(Instr))
return std::move(E); return std::move(E);
@ -271,7 +271,7 @@ X86LatencySnippetGenerator::generateCodeTemplates(
switch (getX86FPFlags(Instr)) { switch (getX86FPFlags(Instr)) {
case X86II::NotFP: case X86II::NotFP:
return LatencySnippetGenerator::generateCodeTemplates(Instr, return SerialSnippetGenerator::generateCodeTemplates(Instr,
ForbiddenRegisters); ForbiddenRegisters);
case X86II::ZeroArgFP: case X86II::ZeroArgFP:
case X86II::OneArgFP: case X86II::OneArgFP:
@ -292,9 +292,9 @@ X86LatencySnippetGenerator::generateCodeTemplates(
} }
namespace { namespace {
class X86UopsSnippetGenerator : public UopsSnippetGenerator { class X86ParallelSnippetGenerator : public ParallelSnippetGenerator {
public: public:
using UopsSnippetGenerator::UopsSnippetGenerator; using ParallelSnippetGenerator::ParallelSnippetGenerator;
Expected<std::vector<CodeTemplate>> Expected<std::vector<CodeTemplate>>
generateCodeTemplates(const Instruction &Instr, generateCodeTemplates(const Instruction &Instr,
@ -304,7 +304,7 @@ public:
} // namespace } // namespace
Expected<std::vector<CodeTemplate>> Expected<std::vector<CodeTemplate>>
X86UopsSnippetGenerator::generateCodeTemplates( X86ParallelSnippetGenerator::generateCodeTemplates(
const Instruction &Instr, const BitVector &ForbiddenRegisters) const { const Instruction &Instr, const BitVector &ForbiddenRegisters) const {
if (auto E = IsInvalidOpcode(Instr)) if (auto E = IsInvalidOpcode(Instr))
return std::move(E); return std::move(E);
@ -333,7 +333,7 @@ X86UopsSnippetGenerator::generateCodeTemplates(
switch (getX86FPFlags(Instr)) { switch (getX86FPFlags(Instr)) {
case X86II::NotFP: case X86II::NotFP:
return UopsSnippetGenerator::generateCodeTemplates(Instr, return ParallelSnippetGenerator::generateCodeTemplates(Instr,
ForbiddenRegisters); ForbiddenRegisters);
case X86II::ZeroArgFP: case X86II::ZeroArgFP:
case X86II::OneArgFP: case X86II::OneArgFP:
@ -577,16 +577,16 @@ private:
sizeof(kUnavailableRegisters[0])); sizeof(kUnavailableRegisters[0]));
} }
std::unique_ptr<SnippetGenerator> createLatencySnippetGenerator( std::unique_ptr<SnippetGenerator> createSerialSnippetGenerator(
const LLVMState &State, const LLVMState &State,
const SnippetGenerator::Options &Opts) const override { const SnippetGenerator::Options &Opts) const override {
return std::make_unique<X86LatencySnippetGenerator>(State, Opts); return std::make_unique<X86SerialSnippetGenerator>(State, Opts);
} }
std::unique_ptr<SnippetGenerator> createUopsSnippetGenerator( std::unique_ptr<SnippetGenerator> createParallelSnippetGenerator(
const LLVMState &State, const LLVMState &State,
const SnippetGenerator::Options &Opts) const override { const SnippetGenerator::Options &Opts) const override {
return std::make_unique<X86UopsSnippetGenerator>(State, Opts); return std::make_unique<X86ParallelSnippetGenerator>(State, Opts);
} }
bool matchesArch(Triple::ArchType Arch) const override { bool matchesArch(Triple::ArchType Arch) const override {

View File

@ -7,13 +7,13 @@
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include "../Common/AssemblerUtils.h" #include "../Common/AssemblerUtils.h"
#include "Latency.h"
#include "LlvmState.h" #include "LlvmState.h"
#include "MCInstrDescView.h" #include "MCInstrDescView.h"
#include "MipsInstrInfo.h" #include "MipsInstrInfo.h"
#include "ParallelSnippetGenerator.h"
#include "RegisterAliasing.h" #include "RegisterAliasing.h"
#include "SerialSnippetGenerator.h"
#include "TestBase.h" #include "TestBase.h"
#include "Uops.h"
#include <unordered_set> #include <unordered_set>
@ -48,12 +48,12 @@ protected:
SnippetGeneratorT Generator; SnippetGeneratorT Generator;
}; };
using LatencySnippetGeneratorTest = using SerialSnippetGeneratorTest = SnippetGeneratorTest<SerialSnippetGenerator>;
SnippetGeneratorTest<LatencySnippetGenerator>;
using UopsSnippetGeneratorTest = SnippetGeneratorTest<UopsSnippetGenerator>; using ParallelSnippetGeneratorTest =
SnippetGeneratorTest<ParallelSnippetGenerator>;
TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughExplicitRegs) { TEST_F(SerialSnippetGeneratorTest, ImplicitSelfDependencyThroughExplicitRegs) {
// - ADD // - ADD
// - Op0 Explicit Def RegClass(GPR32) // - Op0 Explicit Def RegClass(GPR32)
// - Op1 Explicit Use RegClass(GPR32) // - Op1 Explicit Use RegClass(GPR32)
@ -77,8 +77,8 @@ TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughExplicitRegs) {
<< "Op0 is either set to Op1 or to Op2"; << "Op0 is either set to Op1 or to Op2";
} }
TEST_F(LatencySnippetGeneratorTest, TEST_F(SerialSnippetGeneratorTest,
ImplicitSelfDependencyThroughExplicitRegsForbidAll) { ImplicitSelfDependencyThroughExplicitRegsForbidAll) {
// - XOR // - XOR
// - Op0 Explicit Def RegClass(GPR32) // - Op0 Explicit Def RegClass(GPR32)
// - Op1 Explicit Use RegClass(GPR32) // - Op1 Explicit Use RegClass(GPR32)
@ -96,7 +96,7 @@ TEST_F(LatencySnippetGeneratorTest,
consumeError(std::move(Error)); consumeError(std::move(Error));
} }
TEST_F(UopsSnippetGeneratorTest, MemoryUse) { TEST_F(ParallelSnippetGeneratorTest, MemoryUse) {
// LB reads from memory. // LB reads from memory.
// - LB // - LB
// - Op0 Explicit Def RegClass(GPR32) // - Op0 Explicit Def RegClass(GPR32)
@ -110,10 +110,11 @@ TEST_F(UopsSnippetGeneratorTest, MemoryUse) {
const auto CodeTemplates = checkAndGetCodeTemplates(Opcode); const auto CodeTemplates = checkAndGetCodeTemplates(Opcode);
ASSERT_THAT(CodeTemplates, SizeIs(1)); ASSERT_THAT(CodeTemplates, SizeIs(1));
const auto &CT = CodeTemplates[0]; const auto &CT = CodeTemplates[0];
EXPECT_THAT(CT.Info, HasSubstr("instruction is parallel, repeating a random one.")); EXPECT_THAT(CT.Info,
HasSubstr("instruction is parallel, repeating a random one."));
EXPECT_THAT(CT.Execution, ExecutionMode::UNKNOWN); EXPECT_THAT(CT.Execution, ExecutionMode::UNKNOWN);
ASSERT_THAT(CT.Instructions, ASSERT_THAT(CT.Instructions,
SizeIs(UopsSnippetGenerator::kMinNumDifferentAddresses)); SizeIs(ParallelSnippetGenerator::kMinNumDifferentAddresses));
const InstructionTemplate &IT = CT.Instructions[0]; const InstructionTemplate &IT = CT.Instructions[0];
EXPECT_THAT(IT.getOpcode(), Opcode); EXPECT_THAT(IT.getOpcode(), Opcode);
ASSERT_THAT(IT.getVariableValues(), SizeIs(3)); ASSERT_THAT(IT.getVariableValues(), SizeIs(3));

View File

@ -7,12 +7,12 @@
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include "../Common/AssemblerUtils.h" #include "../Common/AssemblerUtils.h"
#include "Latency.h"
#include "LlvmState.h" #include "LlvmState.h"
#include "MCInstrDescView.h" #include "MCInstrDescView.h"
#include "ParallelSnippetGenerator.h"
#include "RegisterAliasing.h" #include "RegisterAliasing.h"
#include "SerialSnippetGenerator.h"
#include "TestBase.h" #include "TestBase.h"
#include "Uops.h"
#include "X86InstrInfo.h" #include "X86InstrInfo.h"
#include <unordered_set> #include <unordered_set>
@ -59,12 +59,12 @@ protected:
SnippetGeneratorT Generator; SnippetGeneratorT Generator;
}; };
using LatencySnippetGeneratorTest = using SerialSnippetGeneratorTest = SnippetGeneratorTest<SerialSnippetGenerator>;
SnippetGeneratorTest<LatencySnippetGenerator>;
using UopsSnippetGeneratorTest = SnippetGeneratorTest<UopsSnippetGenerator>; using ParallelSnippetGeneratorTest =
SnippetGeneratorTest<ParallelSnippetGenerator>;
TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughImplicitReg) { TEST_F(SerialSnippetGeneratorTest, ImplicitSelfDependencyThroughImplicitReg) {
// - ADC16i16 // - ADC16i16
// - Op0 Explicit Use Immediate // - Op0 Explicit Use Immediate
// - Op1 Implicit Def Reg(AX) // - Op1 Implicit Def Reg(AX)
@ -90,7 +90,7 @@ TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughImplicitReg) {
EXPECT_THAT(IT.getVariableValues()[0], IsInvalid()) << "Immediate is not set"; EXPECT_THAT(IT.getVariableValues()[0], IsInvalid()) << "Immediate is not set";
} }
TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughTiedRegs) { TEST_F(SerialSnippetGeneratorTest, ImplicitSelfDependencyThroughTiedRegs) {
// - ADD16ri // - ADD16ri
// - Op0 Explicit Def RegClass(GR16) // - Op0 Explicit Def RegClass(GR16)
// - Op1 Explicit Use RegClass(GR16) TiedToOp0 // - Op1 Explicit Use RegClass(GR16) TiedToOp0
@ -114,7 +114,7 @@ TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughTiedRegs) {
EXPECT_THAT(IT.getVariableValues()[1], IsInvalid()) << "Operand 2 is not set"; EXPECT_THAT(IT.getVariableValues()[1], IsInvalid()) << "Operand 2 is not set";
} }
TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughExplicitRegs) { TEST_F(SerialSnippetGeneratorTest, ImplicitSelfDependencyThroughExplicitRegs) {
// - VXORPSrr // - VXORPSrr
// - Op0 Explicit Def RegClass(VR128) // - Op0 Explicit Def RegClass(VR128)
// - Op1 Explicit Use RegClass(VR128) // - Op1 Explicit Use RegClass(VR128)
@ -138,7 +138,7 @@ TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughExplicitRegs) {
<< "Op0 is either set to Op1 or to Op2"; << "Op0 is either set to Op1 or to Op2";
} }
TEST_F(LatencySnippetGeneratorTest, TEST_F(SerialSnippetGeneratorTest,
ImplicitSelfDependencyThroughExplicitRegsForbidAll) { ImplicitSelfDependencyThroughExplicitRegsForbidAll) {
// - VXORPSrr // - VXORPSrr
// - Op0 Explicit Def RegClass(VR128) // - Op0 Explicit Def RegClass(VR128)
@ -158,7 +158,7 @@ TEST_F(LatencySnippetGeneratorTest,
consumeError(std::move(Error)); consumeError(std::move(Error));
} }
TEST_F(LatencySnippetGeneratorTest, DependencyThroughOtherOpcode) { TEST_F(SerialSnippetGeneratorTest, DependencyThroughOtherOpcode) {
// - CMP64rr // - CMP64rr
// - Op0 Explicit Use RegClass(GR64) // - Op0 Explicit Use RegClass(GR64)
// - Op1 Explicit Use RegClass(GR64) // - Op1 Explicit Use RegClass(GR64)
@ -182,7 +182,7 @@ TEST_F(LatencySnippetGeneratorTest, DependencyThroughOtherOpcode) {
} }
} }
TEST_F(LatencySnippetGeneratorTest, LAHF) { TEST_F(SerialSnippetGeneratorTest, LAHF) {
// - LAHF // - LAHF
// - Op0 Implicit Def Reg(AH) // - Op0 Implicit Def Reg(AH)
// - Op1 Implicit Use Reg(EFLAGS) // - Op1 Implicit Use Reg(EFLAGS)
@ -198,7 +198,7 @@ TEST_F(LatencySnippetGeneratorTest, LAHF) {
} }
} }
TEST_F(UopsSnippetGeneratorTest, ParallelInstruction) { TEST_F(ParallelSnippetGeneratorTest, ParallelInstruction) {
// - BNDCL32rr // - BNDCL32rr
// - Op0 Explicit Use RegClass(BNDR) // - Op0 Explicit Use RegClass(BNDR)
// - Op1 Explicit Use RegClass(GR32) // - Op1 Explicit Use RegClass(GR32)
@ -218,7 +218,7 @@ TEST_F(UopsSnippetGeneratorTest, ParallelInstruction) {
EXPECT_THAT(IT.getVariableValues()[1], IsInvalid()); EXPECT_THAT(IT.getVariableValues()[1], IsInvalid());
} }
TEST_F(UopsSnippetGeneratorTest, SerialInstruction) { TEST_F(ParallelSnippetGeneratorTest, SerialInstruction) {
// - CDQ // - CDQ
// - Op0 Implicit Def Reg(EAX) // - Op0 Implicit Def Reg(EAX)
// - Op1 Implicit Def Reg(EDX) // - Op1 Implicit Def Reg(EDX)
@ -237,7 +237,7 @@ TEST_F(UopsSnippetGeneratorTest, SerialInstruction) {
ASSERT_THAT(IT.getVariableValues(), SizeIs(0)); ASSERT_THAT(IT.getVariableValues(), SizeIs(0));
} }
TEST_F(UopsSnippetGeneratorTest, StaticRenaming) { TEST_F(ParallelSnippetGeneratorTest, StaticRenaming) {
// CMOV32rr has tied variables, we enumerate the possible values to execute // CMOV32rr has tied variables, we enumerate the possible values to execute
// as many in parallel as possible. // as many in parallel as possible.
@ -268,7 +268,7 @@ TEST_F(UopsSnippetGeneratorTest, StaticRenaming) {
<< "Each instruction writes to a different register"; << "Each instruction writes to a different register";
} }
TEST_F(UopsSnippetGeneratorTest, NoTiedVariables) { TEST_F(ParallelSnippetGeneratorTest, NoTiedVariables) {
// CMOV_GR32 has no tied variables, we make sure def and use are different // CMOV_GR32 has no tied variables, we make sure def and use are different
// from each other. // from each other.
@ -302,7 +302,7 @@ TEST_F(UopsSnippetGeneratorTest, NoTiedVariables) {
EXPECT_THAT(IT.getVariableValues()[3], IsInvalid()); EXPECT_THAT(IT.getVariableValues()[3], IsInvalid());
} }
TEST_F(UopsSnippetGeneratorTest, MemoryUse) { TEST_F(ParallelSnippetGeneratorTest, MemoryUse) {
// Mov32rm reads from memory. // Mov32rm reads from memory.
// - MOV32rm // - MOV32rm
// - Op0 Explicit Def RegClass(GR32) // - Op0 Explicit Def RegClass(GR32)
@ -326,7 +326,7 @@ TEST_F(UopsSnippetGeneratorTest, MemoryUse) {
EXPECT_THAT(CT.Info, HasSubstr("no tied variables")); EXPECT_THAT(CT.Info, HasSubstr("no tied variables"));
EXPECT_THAT(CT.Execution, ExecutionMode::UNKNOWN); EXPECT_THAT(CT.Execution, ExecutionMode::UNKNOWN);
ASSERT_THAT(CT.Instructions, ASSERT_THAT(CT.Instructions,
SizeIs(UopsSnippetGenerator::kMinNumDifferentAddresses)); SizeIs(ParallelSnippetGenerator::kMinNumDifferentAddresses));
const InstructionTemplate &IT = CT.Instructions[0]; const InstructionTemplate &IT = CT.Instructions[0];
EXPECT_THAT(IT.getOpcode(), Opcode); EXPECT_THAT(IT.getOpcode(), Opcode);
ASSERT_THAT(IT.getVariableValues(), SizeIs(6)); ASSERT_THAT(IT.getVariableValues(), SizeIs(6));

View File

@ -7,12 +7,10 @@
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include "../Common/AssemblerUtils.h" #include "../Common/AssemblerUtils.h"
#include "Latency.h"
#include "LlvmState.h" #include "LlvmState.h"
#include "MCInstrDescView.h" #include "MCInstrDescView.h"
#include "RegisterAliasing.h" #include "RegisterAliasing.h"
#include "TestBase.h" #include "TestBase.h"
#include "Uops.h"
#include "X86InstrInfo.h" #include "X86InstrInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBasicBlock.h"
@ -34,8 +32,7 @@ protected:
void SetUp() { void SetUp() {
TM = State.createTargetMachine(); TM = State.createTargetMachine();
Context = std::make_unique<LLVMContext>(); Context = std::make_unique<LLVMContext>();
Mod = Mod = std::make_unique<Module>("X86SnippetRepetitorTest", *Context);
std::make_unique<Module>("X86SnippetRepetitorTest", *Context);
Mod->setDataLayout(TM->createDataLayout()); Mod->setDataLayout(TM->createDataLayout());
MMI = std::make_unique<MachineModuleInfo>(TM.get()); MMI = std::make_unique<MachineModuleInfo>(TM.get());
MF = &createVoidVoidPtrMachineFunction("TestFn", Mod.get(), MMI.get()); MF = &createVoidVoidPtrMachineFunction("TestFn", Mod.get(), MMI.get());

View File

@ -1,4 +1,4 @@
//===-- TestBase.cpp --------------------------------------------*- C++ -*-===// //===-- TestBase.h ----------------------------------------------*- C++ -*-===//
// //
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information. // See https://llvm.org/LICENSE.txt for license information.