mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 12:12:47 +01:00
[llvm-exegesis][NFC] Disassociate snippet generators from benchmark runners
The addition of `inverse_throughput` mode highlighted the disjointedness of snippet generators and benchmark runners because it used the `UopsSnippetGenerator` with the `LatencyBenchmarkRunner`. To keep the code consistent tie the snippet generators to parallelization/serialization rather than their benchmark runners. Renaming `LatencySnippetGenerator` -> `SerialSnippetGenerator`. Renaming `UopsSnippetGenerator` -> `ParallelSnippetGenerator`. Differential Revision: https://reviews.llvm.org/D72928
This commit is contained in:
parent
dea61577e1
commit
5da66b880e
@ -6,7 +6,6 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#include "../Target.h"
|
||||
#include "../Latency.h"
|
||||
#include "AArch64.h"
|
||||
#include "AArch64RegisterInfo.h"
|
||||
|
||||
|
@ -244,9 +244,9 @@ Analysis::makePointsPerSchedClass() const {
|
||||
return Entries;
|
||||
}
|
||||
|
||||
// Uops repeat the same opcode over again. Just show this opcode and show the
|
||||
// whole snippet only on hover.
|
||||
static void writeUopsSnippetHtml(raw_ostream &OS,
|
||||
// Parallel benchmarks repeat the same opcode multiple times. Just show this
|
||||
// opcode and show the whole snippet only on hover.
|
||||
static void writeParallelSnippetHtml(raw_ostream &OS,
|
||||
const std::vector<MCInst> &Instructions,
|
||||
const MCInstrInfo &InstrInfo) {
|
||||
if (Instructions.empty())
|
||||
@ -282,7 +282,7 @@ void Analysis::printPointHtml(const InstructionBenchmark &Point,
|
||||
break;
|
||||
case InstructionBenchmark::Uops:
|
||||
case InstructionBenchmark::InverseThroughput:
|
||||
writeUopsSnippetHtml(OS, Point.Key.Instructions, *InstrInfo_);
|
||||
writeParallelSnippetHtml(OS, Point.Key.Instructions, *InstrInfo_);
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("invalid mode");
|
||||
|
@ -27,18 +27,20 @@ add_library(LLVMExegesis
|
||||
BenchmarkRunner.cpp
|
||||
Clustering.cpp
|
||||
CodeTemplate.cpp
|
||||
Latency.cpp
|
||||
LatencyBenchmarkRunner.cpp
|
||||
LlvmState.cpp
|
||||
MCInstrDescView.cpp
|
||||
ParallelSnippetGenerator.cpp
|
||||
PerfHelper.cpp
|
||||
RegisterAliasing.cpp
|
||||
RegisterValue.cpp
|
||||
SchedClassResolution.cpp
|
||||
SerialSnippetGenerator.cpp
|
||||
SnippetFile.cpp
|
||||
SnippetGenerator.cpp
|
||||
SnippetRepetitor.cpp
|
||||
Target.cpp
|
||||
Uops.cpp
|
||||
UopsBenchmarkRunner.cpp
|
||||
)
|
||||
|
||||
llvm_update_compile_flags(LLVMExegesis)
|
||||
|
58
tools/llvm-exegesis/lib/LatencyBenchmarkRunner.cpp
Normal file
58
tools/llvm-exegesis/lib/LatencyBenchmarkRunner.cpp
Normal file
@ -0,0 +1,58 @@
|
||||
//===-- LatencyBenchmarkRunner.cpp ------------------------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "LatencyBenchmarkRunner.h"
|
||||
|
||||
#include "Target.h"
|
||||
#include "BenchmarkRunner.h"
|
||||
|
||||
namespace llvm {
|
||||
namespace exegesis {
|
||||
|
||||
static constexpr size_t kMaxAliasingInstructions = 10;
|
||||
|
||||
LatencyBenchmarkRunner::LatencyBenchmarkRunner(const LLVMState &State,
|
||||
InstructionBenchmark::ModeE Mode)
|
||||
: BenchmarkRunner(State, Mode) {
|
||||
assert((Mode == InstructionBenchmark::Latency ||
|
||||
Mode == InstructionBenchmark::InverseThroughput) &&
|
||||
"invalid mode");
|
||||
}
|
||||
|
||||
LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default;
|
||||
|
||||
Expected<std::vector<BenchmarkMeasure>> LatencyBenchmarkRunner::runMeasurements(
|
||||
const FunctionExecutor &Executor) const {
|
||||
// Cycle measurements include some overhead from the kernel. Repeat the
|
||||
// measure several times and take the minimum value.
|
||||
constexpr const int NumMeasurements = 30;
|
||||
int64_t MinValue = std::numeric_limits<int64_t>::max();
|
||||
const char *CounterName = State.getPfmCounters().CycleCounter;
|
||||
for (size_t I = 0; I < NumMeasurements; ++I) {
|
||||
auto ExpectedCounterValue = Executor.runAndMeasure(CounterName);
|
||||
if (!ExpectedCounterValue)
|
||||
return ExpectedCounterValue.takeError();
|
||||
if (*ExpectedCounterValue < MinValue)
|
||||
MinValue = *ExpectedCounterValue;
|
||||
}
|
||||
std::vector<BenchmarkMeasure> Result;
|
||||
switch (Mode) {
|
||||
case InstructionBenchmark::Latency:
|
||||
Result = {BenchmarkMeasure::Create("latency", MinValue)};
|
||||
break;
|
||||
case InstructionBenchmark::InverseThroughput:
|
||||
Result = {BenchmarkMeasure::Create("inverse_throughput", MinValue)};
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return std::move(Result);
|
||||
}
|
||||
|
||||
} // namespace exegesis
|
||||
} // namespace llvm
|
@ -1,4 +1,4 @@
|
||||
//===-- Latency.h -----------------------------------------------*- C++ -*-===//
|
||||
//===-- LatencyBenchmarkRunner.h --------------------------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
@ -15,23 +15,10 @@
|
||||
#define LLVM_TOOLS_LLVM_EXEGESIS_LATENCY_H
|
||||
|
||||
#include "BenchmarkRunner.h"
|
||||
#include "Error.h"
|
||||
#include "MCInstrDescView.h"
|
||||
#include "SnippetGenerator.h"
|
||||
|
||||
namespace llvm {
|
||||
namespace exegesis {
|
||||
|
||||
class LatencySnippetGenerator : public SnippetGenerator {
|
||||
public:
|
||||
using SnippetGenerator::SnippetGenerator;
|
||||
~LatencySnippetGenerator() override;
|
||||
|
||||
Expected<std::vector<CodeTemplate>>
|
||||
generateCodeTemplates(const Instruction &Instr,
|
||||
const BitVector &ForbiddenRegisters) const override;
|
||||
};
|
||||
|
||||
class LatencyBenchmarkRunner : public BenchmarkRunner {
|
||||
public:
|
||||
LatencyBenchmarkRunner(const LLVMState &State,
|
@ -5,8 +5,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#include "../Error.h"
|
||||
#include "../Target.h"
|
||||
#include "../Latency.h"
|
||||
#include "MCTargetDesc/MipsBaseInfo.h"
|
||||
#include "Mips.h"
|
||||
#include "MipsRegisterInfo.h"
|
||||
|
@ -1,4 +1,4 @@
|
||||
//===-- Uops.cpp ------------------------------------------------*- C++ -*-===//
|
||||
//===-- ParallelSnippetGenerator.cpp ----------------------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
@ -6,9 +6,8 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "Uops.h"
|
||||
#include "ParallelSnippetGenerator.h"
|
||||
|
||||
#include "Assembler.h"
|
||||
#include "BenchmarkRunner.h"
|
||||
#include "MCInstrDescView.h"
|
||||
#include "Target.h"
|
||||
@ -16,9 +15,9 @@
|
||||
// FIXME: Load constants into registers (e.g. with fld1) to not break
|
||||
// instructions like x87.
|
||||
|
||||
// Ideally we would like the only limitation on executing uops to be the issue
|
||||
// ports. Maximizing port pressure increases the likelihood that the load is
|
||||
// distributed evenly across possible ports.
|
||||
// Ideally we would like the only limitation on executing instructions to be the
|
||||
// availability of the CPU resources (e.g. execution ports) needed to execute
|
||||
// them, instead of the availability of their data dependencies.
|
||||
|
||||
// To achieve that, one approach is to generate instructions that do not have
|
||||
// data dependencies between them.
|
||||
@ -89,11 +88,9 @@ getVariablesWithTiedOperands(const Instruction &Instr) {
|
||||
return Result;
|
||||
}
|
||||
|
||||
UopsBenchmarkRunner::~UopsBenchmarkRunner() = default;
|
||||
ParallelSnippetGenerator::~ParallelSnippetGenerator() = default;
|
||||
|
||||
UopsSnippetGenerator::~UopsSnippetGenerator() = default;
|
||||
|
||||
void UopsSnippetGenerator::instantiateMemoryOperands(
|
||||
void ParallelSnippetGenerator::instantiateMemoryOperands(
|
||||
const unsigned ScratchSpacePointerInReg,
|
||||
std::vector<InstructionTemplate> &Instructions) const {
|
||||
if (ScratchSpacePointerInReg == 0)
|
||||
@ -157,7 +154,7 @@ static std::vector<InstructionTemplate> generateSnippetUsingStaticRenaming(
|
||||
}
|
||||
}
|
||||
|
||||
Expected<std::vector<CodeTemplate>> UopsSnippetGenerator::generateCodeTemplates(
|
||||
Expected<std::vector<CodeTemplate>> ParallelSnippetGenerator::generateCodeTemplates(
|
||||
const Instruction &Instr, const BitVector &ForbiddenRegisters) const {
|
||||
CodeTemplate CT;
|
||||
CT.ScratchSpacePointerInReg =
|
||||
@ -219,34 +216,7 @@ Expected<std::vector<CodeTemplate>> UopsSnippetGenerator::generateCodeTemplates(
|
||||
return getSingleton(std::move(CT));
|
||||
}
|
||||
|
||||
Expected<std::vector<BenchmarkMeasure>>
|
||||
UopsBenchmarkRunner::runMeasurements(const FunctionExecutor &Executor) const {
|
||||
std::vector<BenchmarkMeasure> Result;
|
||||
const PfmCountersInfo &PCI = State.getPfmCounters();
|
||||
// Uops per port.
|
||||
for (const auto *IssueCounter = PCI.IssueCounters,
|
||||
*IssueCounterEnd = PCI.IssueCounters + PCI.NumIssueCounters;
|
||||
IssueCounter != IssueCounterEnd; ++IssueCounter) {
|
||||
if (!IssueCounter->Counter)
|
||||
continue;
|
||||
auto ExpectedCounterValue = Executor.runAndMeasure(IssueCounter->Counter);
|
||||
if (!ExpectedCounterValue)
|
||||
return ExpectedCounterValue.takeError();
|
||||
Result.push_back(BenchmarkMeasure::Create(IssueCounter->ProcResName,
|
||||
*ExpectedCounterValue));
|
||||
}
|
||||
// NumMicroOps.
|
||||
if (const char *const UopsCounter = PCI.UopsCounter) {
|
||||
auto ExpectedCounterValue = Executor.runAndMeasure(UopsCounter);
|
||||
if (!ExpectedCounterValue)
|
||||
return ExpectedCounterValue.takeError();
|
||||
Result.push_back(
|
||||
BenchmarkMeasure::Create("NumMicroOps", *ExpectedCounterValue));
|
||||
}
|
||||
return std::move(Result);
|
||||
}
|
||||
|
||||
constexpr const size_t UopsSnippetGenerator::kMinNumDifferentAddresses;
|
||||
constexpr const size_t ParallelSnippetGenerator::kMinNumDifferentAddresses;
|
||||
|
||||
} // namespace exegesis
|
||||
} // namespace llvm
|
@ -1,4 +1,4 @@
|
||||
//===-- Uops.h --------------------------------------------------*- C++ -*-===//
|
||||
//===-- ParallelSnippetGenerator.h ------------------------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
@ -7,23 +7,22 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
///
|
||||
/// \file
|
||||
/// A BenchmarkRunner implementation to measure uop decomposition.
|
||||
/// A SnippetGenerator implementation to create parallel instruction snippets.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_TOOLS_LLVM_EXEGESIS_UOPS_H
|
||||
#define LLVM_TOOLS_LLVM_EXEGESIS_UOPS_H
|
||||
#ifndef LLVM_TOOLS_LLVM_EXEGESIS_PARALLELSNIPPETGENERATOR_H
|
||||
#define LLVM_TOOLS_LLVM_EXEGESIS_PARALLELSNIPPETGENERATOR_H
|
||||
|
||||
#include "BenchmarkRunner.h"
|
||||
#include "SnippetGenerator.h"
|
||||
|
||||
namespace llvm {
|
||||
namespace exegesis {
|
||||
|
||||
class UopsSnippetGenerator : public SnippetGenerator {
|
||||
class ParallelSnippetGenerator : public SnippetGenerator {
|
||||
public:
|
||||
using SnippetGenerator::SnippetGenerator;
|
||||
~UopsSnippetGenerator() override;
|
||||
~ParallelSnippetGenerator() override;
|
||||
|
||||
Expected<std::vector<CodeTemplate>>
|
||||
generateCodeTemplates(const Instruction &Instr,
|
||||
@ -60,20 +59,7 @@ private:
|
||||
std::vector<InstructionTemplate> &SnippetTemplate) const;
|
||||
};
|
||||
|
||||
class UopsBenchmarkRunner : public BenchmarkRunner {
|
||||
public:
|
||||
UopsBenchmarkRunner(const LLVMState &State)
|
||||
: BenchmarkRunner(State, InstructionBenchmark::Uops) {}
|
||||
~UopsBenchmarkRunner() override;
|
||||
|
||||
static constexpr const size_t kMinNumDifferentAddresses = 6;
|
||||
|
||||
private:
|
||||
Expected<std::vector<BenchmarkMeasure>>
|
||||
runMeasurements(const FunctionExecutor &Executor) const override;
|
||||
};
|
||||
|
||||
} // namespace exegesis
|
||||
} // namespace llvm
|
||||
|
||||
#endif // LLVM_TOOLS_LLVM_EXEGESIS_UOPS_H
|
||||
#endif // LLVM_TOOLS_LLVM_EXEGESIS_PARALLELSNIPPETGENERATOR_H
|
@ -7,7 +7,6 @@
|
||||
// The PowerPC ExegesisTarget.
|
||||
//===----------------------------------------------------------------------===//
|
||||
#include "../Target.h"
|
||||
#include "../Latency.h"
|
||||
#include "PPC.h"
|
||||
#include "PPCRegisterInfo.h"
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
//===-- Latency.cpp ---------------------------------------------*- C++ -*-===//
|
||||
//===-- SerialSnippetGenerator.cpp ------------------------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
@ -6,17 +6,13 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "Latency.h"
|
||||
#include "SerialSnippetGenerator.h"
|
||||
|
||||
#include "Assembler.h"
|
||||
#include "BenchmarkRunner.h"
|
||||
#include "MCInstrDescView.h"
|
||||
#include "PerfHelper.h"
|
||||
#include "Target.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/MC/MCInst.h"
|
||||
#include "llvm/MC/MCInstBuilder.h"
|
||||
#include "llvm/Support/FormatVariadic.h"
|
||||
#include "CodeTemplate.h"
|
||||
#include <algorithm>
|
||||
#include <numeric>
|
||||
#include <vector>
|
||||
|
||||
namespace llvm {
|
||||
namespace exegesis {
|
||||
@ -149,10 +145,10 @@ static void appendCodeTemplates(const LLVMState &State,
|
||||
}
|
||||
}
|
||||
|
||||
LatencySnippetGenerator::~LatencySnippetGenerator() = default;
|
||||
SerialSnippetGenerator::~SerialSnippetGenerator() = default;
|
||||
|
||||
Expected<std::vector<CodeTemplate>>
|
||||
LatencySnippetGenerator::generateCodeTemplates(
|
||||
SerialSnippetGenerator::generateCodeTemplates(
|
||||
const Instruction &Instr, const BitVector &ForbiddenRegisters) const {
|
||||
std::vector<CodeTemplate> Results;
|
||||
const ExecutionMode EM = getExecutionModes(Instr, ForbiddenRegisters);
|
||||
@ -169,43 +165,5 @@ LatencySnippetGenerator::generateCodeTemplates(
|
||||
return std::move(Results);
|
||||
}
|
||||
|
||||
LatencyBenchmarkRunner::LatencyBenchmarkRunner(const LLVMState &State,
|
||||
InstructionBenchmark::ModeE Mode)
|
||||
: BenchmarkRunner(State, Mode) {
|
||||
assert((Mode == InstructionBenchmark::Latency ||
|
||||
Mode == InstructionBenchmark::InverseThroughput) &&
|
||||
"invalid mode");
|
||||
}
|
||||
|
||||
LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default;
|
||||
|
||||
Expected<std::vector<BenchmarkMeasure>> LatencyBenchmarkRunner::runMeasurements(
|
||||
const FunctionExecutor &Executor) const {
|
||||
// Cycle measurements include some overhead from the kernel. Repeat the
|
||||
// measure several times and take the minimum value.
|
||||
constexpr const int NumMeasurements = 30;
|
||||
int64_t MinValue = std::numeric_limits<int64_t>::max();
|
||||
const char *CounterName = State.getPfmCounters().CycleCounter;
|
||||
for (size_t I = 0; I < NumMeasurements; ++I) {
|
||||
auto ExpectedCounterValue = Executor.runAndMeasure(CounterName);
|
||||
if (!ExpectedCounterValue)
|
||||
return ExpectedCounterValue.takeError();
|
||||
if (*ExpectedCounterValue < MinValue)
|
||||
MinValue = *ExpectedCounterValue;
|
||||
}
|
||||
std::vector<BenchmarkMeasure> Result;
|
||||
switch (Mode) {
|
||||
case InstructionBenchmark::Latency:
|
||||
Result = {BenchmarkMeasure::Create("latency", MinValue)};
|
||||
break;
|
||||
case InstructionBenchmark::InverseThroughput:
|
||||
Result = {BenchmarkMeasure::Create("inverse_throughput", MinValue)};
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return std::move(Result);
|
||||
}
|
||||
|
||||
} // namespace exegesis
|
||||
} // namespace llvm
|
37
tools/llvm-exegesis/lib/SerialSnippetGenerator.h
Normal file
37
tools/llvm-exegesis/lib/SerialSnippetGenerator.h
Normal file
@ -0,0 +1,37 @@
|
||||
//===-- SerialSnippetGenerator.h --------------------------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
///
|
||||
/// \file
|
||||
/// A SnippetGenerator implementation to create serial instruction snippets.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_TOOLS_LLVM_EXEGESIS_SERIALSNIPPETGENERATOR_H
|
||||
#define LLVM_TOOLS_LLVM_EXEGESIS_SERIALSNIPPETGENERATOR_H
|
||||
|
||||
#include "Error.h"
|
||||
#include "MCInstrDescView.h"
|
||||
#include "SnippetGenerator.h"
|
||||
|
||||
namespace llvm {
|
||||
namespace exegesis {
|
||||
|
||||
class SerialSnippetGenerator : public SnippetGenerator {
|
||||
public:
|
||||
using SnippetGenerator::SnippetGenerator;
|
||||
~SerialSnippetGenerator() override;
|
||||
|
||||
Expected<std::vector<CodeTemplate>>
|
||||
generateCodeTemplates(const Instruction &Instr,
|
||||
const BitVector &ForbiddenRegisters) const override;
|
||||
};
|
||||
|
||||
} // namespace exegesis
|
||||
} // namespace llvm
|
||||
|
||||
#endif // LLVM_TOOLS_LLVM_EXEGESIS_SERIALSNIPPETGENERATOR_H
|
@ -7,8 +7,10 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
#include "Target.h"
|
||||
|
||||
#include "Latency.h"
|
||||
#include "Uops.h"
|
||||
#include "LatencyBenchmarkRunner.h"
|
||||
#include "ParallelSnippetGenerator.h"
|
||||
#include "SerialSnippetGenerator.h"
|
||||
#include "UopsBenchmarkRunner.h"
|
||||
|
||||
namespace llvm {
|
||||
namespace exegesis {
|
||||
@ -43,10 +45,10 @@ std::unique_ptr<SnippetGenerator> ExegesisTarget::createSnippetGenerator(
|
||||
case InstructionBenchmark::Unknown:
|
||||
return nullptr;
|
||||
case InstructionBenchmark::Latency:
|
||||
return createLatencySnippetGenerator(State, Opts);
|
||||
return createSerialSnippetGenerator(State, Opts);
|
||||
case InstructionBenchmark::Uops:
|
||||
case InstructionBenchmark::InverseThroughput:
|
||||
return createUopsSnippetGenerator(State, Opts);
|
||||
return createParallelSnippetGenerator(State, Opts);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
@ -77,14 +79,14 @@ ExegesisTarget::createBenchmarkRunner(InstructionBenchmark::ModeE Mode,
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::unique_ptr<SnippetGenerator> ExegesisTarget::createLatencySnippetGenerator(
|
||||
std::unique_ptr<SnippetGenerator> ExegesisTarget::createSerialSnippetGenerator(
|
||||
const LLVMState &State, const SnippetGenerator::Options &Opts) const {
|
||||
return std::make_unique<LatencySnippetGenerator>(State, Opts);
|
||||
return std::make_unique<SerialSnippetGenerator>(State, Opts);
|
||||
}
|
||||
|
||||
std::unique_ptr<SnippetGenerator> ExegesisTarget::createUopsSnippetGenerator(
|
||||
std::unique_ptr<SnippetGenerator> ExegesisTarget::createParallelSnippetGenerator(
|
||||
const LLVMState &State, const SnippetGenerator::Options &Opts) const {
|
||||
return std::make_unique<UopsSnippetGenerator>(State, Opts);
|
||||
return std::make_unique<ParallelSnippetGenerator>(State, Opts);
|
||||
}
|
||||
|
||||
std::unique_ptr<BenchmarkRunner> ExegesisTarget::createLatencyBenchmarkRunner(
|
||||
|
@ -144,9 +144,9 @@ private:
|
||||
|
||||
// Targets can implement their own snippet generators/benchmarks runners by
|
||||
// implementing these.
|
||||
std::unique_ptr<SnippetGenerator> virtual createLatencySnippetGenerator(
|
||||
std::unique_ptr<SnippetGenerator> virtual createSerialSnippetGenerator(
|
||||
const LLVMState &State, const SnippetGenerator::Options &Opts) const;
|
||||
std::unique_ptr<SnippetGenerator> virtual createUopsSnippetGenerator(
|
||||
std::unique_ptr<SnippetGenerator> virtual createParallelSnippetGenerator(
|
||||
const LLVMState &State, const SnippetGenerator::Options &Opts) const;
|
||||
std::unique_ptr<BenchmarkRunner> virtual createLatencyBenchmarkRunner(
|
||||
const LLVMState &State, InstructionBenchmark::ModeE Mode) const;
|
||||
|
46
tools/llvm-exegesis/lib/UopsBenchmarkRunner.cpp
Normal file
46
tools/llvm-exegesis/lib/UopsBenchmarkRunner.cpp
Normal file
@ -0,0 +1,46 @@
|
||||
//===-- UopsBenchmarkRunner.cpp ---------------------------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "UopsBenchmarkRunner.h"
|
||||
|
||||
#include "Target.h"
|
||||
|
||||
namespace llvm {
|
||||
namespace exegesis {
|
||||
|
||||
UopsBenchmarkRunner::~UopsBenchmarkRunner() = default;
|
||||
|
||||
Expected<std::vector<BenchmarkMeasure>>
|
||||
UopsBenchmarkRunner::runMeasurements(const FunctionExecutor &Executor) const {
|
||||
std::vector<BenchmarkMeasure> Result;
|
||||
const PfmCountersInfo &PCI = State.getPfmCounters();
|
||||
// Uops per port.
|
||||
for (const auto *IssueCounter = PCI.IssueCounters,
|
||||
*IssueCounterEnd = PCI.IssueCounters + PCI.NumIssueCounters;
|
||||
IssueCounter != IssueCounterEnd; ++IssueCounter) {
|
||||
if (!IssueCounter->Counter)
|
||||
continue;
|
||||
auto ExpectedCounterValue = Executor.runAndMeasure(IssueCounter->Counter);
|
||||
if (!ExpectedCounterValue)
|
||||
return ExpectedCounterValue.takeError();
|
||||
Result.push_back(BenchmarkMeasure::Create(IssueCounter->ProcResName,
|
||||
*ExpectedCounterValue));
|
||||
}
|
||||
// NumMicroOps.
|
||||
if (const char *const UopsCounter = PCI.UopsCounter) {
|
||||
auto ExpectedCounterValue = Executor.runAndMeasure(UopsCounter);
|
||||
if (!ExpectedCounterValue)
|
||||
return ExpectedCounterValue.takeError();
|
||||
Result.push_back(
|
||||
BenchmarkMeasure::Create("NumMicroOps", *ExpectedCounterValue));
|
||||
}
|
||||
return std::move(Result);
|
||||
}
|
||||
|
||||
} // namespace exegesis
|
||||
} // namespace llvm
|
38
tools/llvm-exegesis/lib/UopsBenchmarkRunner.h
Normal file
38
tools/llvm-exegesis/lib/UopsBenchmarkRunner.h
Normal file
@ -0,0 +1,38 @@
|
||||
//===-- UopsBenchmarkRunner.h -----------------------------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
///
|
||||
/// \file
|
||||
/// A BenchmarkRunner implementation to measure uop decomposition.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_TOOLS_LLVM_EXEGESIS_UOPSBENCHMARKRUNNER_H
|
||||
#define LLVM_TOOLS_LLVM_EXEGESIS_UOPSBENCHMARKRUNNER_H
|
||||
|
||||
#include "BenchmarkRunner.h"
|
||||
|
||||
namespace llvm {
|
||||
namespace exegesis {
|
||||
|
||||
class UopsBenchmarkRunner : public BenchmarkRunner {
|
||||
public:
|
||||
UopsBenchmarkRunner(const LLVMState &State)
|
||||
: BenchmarkRunner(State, InstructionBenchmark::Uops) {}
|
||||
~UopsBenchmarkRunner() override;
|
||||
|
||||
static constexpr const size_t kMinNumDifferentAddresses = 6;
|
||||
|
||||
private:
|
||||
Expected<std::vector<BenchmarkMeasure>>
|
||||
runMeasurements(const FunctionExecutor &Executor) const override;
|
||||
};
|
||||
|
||||
} // namespace exegesis
|
||||
} // namespace llvm
|
||||
|
||||
#endif // LLVM_TOOLS_LLVM_EXEGESIS_UOPSBENCHMARKRUNNER_H
|
@ -8,9 +8,9 @@
|
||||
#include "../Target.h"
|
||||
|
||||
#include "../Error.h"
|
||||
#include "../Latency.h"
|
||||
#include "../SerialSnippetGenerator.h"
|
||||
#include "../SnippetGenerator.h"
|
||||
#include "../Uops.h"
|
||||
#include "../ParallelSnippetGenerator.h"
|
||||
#include "MCTargetDesc/X86BaseInfo.h"
|
||||
#include "MCTargetDesc/X86MCTargetDesc.h"
|
||||
#include "X86.h"
|
||||
@ -242,9 +242,9 @@ static Expected<std::vector<CodeTemplate>> generateLEATemplatesCommon(
|
||||
}
|
||||
|
||||
namespace {
|
||||
class X86LatencySnippetGenerator : public LatencySnippetGenerator {
|
||||
class X86SerialSnippetGenerator : public SerialSnippetGenerator {
|
||||
public:
|
||||
using LatencySnippetGenerator::LatencySnippetGenerator;
|
||||
using SerialSnippetGenerator::SerialSnippetGenerator;
|
||||
|
||||
Expected<std::vector<CodeTemplate>>
|
||||
generateCodeTemplates(const Instruction &Instr,
|
||||
@ -253,7 +253,7 @@ public:
|
||||
} // namespace
|
||||
|
||||
Expected<std::vector<CodeTemplate>>
|
||||
X86LatencySnippetGenerator::generateCodeTemplates(
|
||||
X86SerialSnippetGenerator::generateCodeTemplates(
|
||||
const Instruction &Instr, const BitVector &ForbiddenRegisters) const {
|
||||
if (auto E = IsInvalidOpcode(Instr))
|
||||
return std::move(E);
|
||||
@ -271,7 +271,7 @@ X86LatencySnippetGenerator::generateCodeTemplates(
|
||||
|
||||
switch (getX86FPFlags(Instr)) {
|
||||
case X86II::NotFP:
|
||||
return LatencySnippetGenerator::generateCodeTemplates(Instr,
|
||||
return SerialSnippetGenerator::generateCodeTemplates(Instr,
|
||||
ForbiddenRegisters);
|
||||
case X86II::ZeroArgFP:
|
||||
case X86II::OneArgFP:
|
||||
@ -292,9 +292,9 @@ X86LatencySnippetGenerator::generateCodeTemplates(
|
||||
}
|
||||
|
||||
namespace {
|
||||
class X86UopsSnippetGenerator : public UopsSnippetGenerator {
|
||||
class X86ParallelSnippetGenerator : public ParallelSnippetGenerator {
|
||||
public:
|
||||
using UopsSnippetGenerator::UopsSnippetGenerator;
|
||||
using ParallelSnippetGenerator::ParallelSnippetGenerator;
|
||||
|
||||
Expected<std::vector<CodeTemplate>>
|
||||
generateCodeTemplates(const Instruction &Instr,
|
||||
@ -304,7 +304,7 @@ public:
|
||||
} // namespace
|
||||
|
||||
Expected<std::vector<CodeTemplate>>
|
||||
X86UopsSnippetGenerator::generateCodeTemplates(
|
||||
X86ParallelSnippetGenerator::generateCodeTemplates(
|
||||
const Instruction &Instr, const BitVector &ForbiddenRegisters) const {
|
||||
if (auto E = IsInvalidOpcode(Instr))
|
||||
return std::move(E);
|
||||
@ -333,7 +333,7 @@ X86UopsSnippetGenerator::generateCodeTemplates(
|
||||
|
||||
switch (getX86FPFlags(Instr)) {
|
||||
case X86II::NotFP:
|
||||
return UopsSnippetGenerator::generateCodeTemplates(Instr,
|
||||
return ParallelSnippetGenerator::generateCodeTemplates(Instr,
|
||||
ForbiddenRegisters);
|
||||
case X86II::ZeroArgFP:
|
||||
case X86II::OneArgFP:
|
||||
@ -577,16 +577,16 @@ private:
|
||||
sizeof(kUnavailableRegisters[0]));
|
||||
}
|
||||
|
||||
std::unique_ptr<SnippetGenerator> createLatencySnippetGenerator(
|
||||
std::unique_ptr<SnippetGenerator> createSerialSnippetGenerator(
|
||||
const LLVMState &State,
|
||||
const SnippetGenerator::Options &Opts) const override {
|
||||
return std::make_unique<X86LatencySnippetGenerator>(State, Opts);
|
||||
return std::make_unique<X86SerialSnippetGenerator>(State, Opts);
|
||||
}
|
||||
|
||||
std::unique_ptr<SnippetGenerator> createUopsSnippetGenerator(
|
||||
std::unique_ptr<SnippetGenerator> createParallelSnippetGenerator(
|
||||
const LLVMState &State,
|
||||
const SnippetGenerator::Options &Opts) const override {
|
||||
return std::make_unique<X86UopsSnippetGenerator>(State, Opts);
|
||||
return std::make_unique<X86ParallelSnippetGenerator>(State, Opts);
|
||||
}
|
||||
|
||||
bool matchesArch(Triple::ArchType Arch) const override {
|
||||
|
@ -7,13 +7,13 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "../Common/AssemblerUtils.h"
|
||||
#include "Latency.h"
|
||||
#include "LlvmState.h"
|
||||
#include "MCInstrDescView.h"
|
||||
#include "MipsInstrInfo.h"
|
||||
#include "ParallelSnippetGenerator.h"
|
||||
#include "RegisterAliasing.h"
|
||||
#include "SerialSnippetGenerator.h"
|
||||
#include "TestBase.h"
|
||||
#include "Uops.h"
|
||||
|
||||
#include <unordered_set>
|
||||
|
||||
@ -48,12 +48,12 @@ protected:
|
||||
SnippetGeneratorT Generator;
|
||||
};
|
||||
|
||||
using LatencySnippetGeneratorTest =
|
||||
SnippetGeneratorTest<LatencySnippetGenerator>;
|
||||
using SerialSnippetGeneratorTest = SnippetGeneratorTest<SerialSnippetGenerator>;
|
||||
|
||||
using UopsSnippetGeneratorTest = SnippetGeneratorTest<UopsSnippetGenerator>;
|
||||
using ParallelSnippetGeneratorTest =
|
||||
SnippetGeneratorTest<ParallelSnippetGenerator>;
|
||||
|
||||
TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughExplicitRegs) {
|
||||
TEST_F(SerialSnippetGeneratorTest, ImplicitSelfDependencyThroughExplicitRegs) {
|
||||
// - ADD
|
||||
// - Op0 Explicit Def RegClass(GPR32)
|
||||
// - Op1 Explicit Use RegClass(GPR32)
|
||||
@ -77,7 +77,7 @@ TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughExplicitRegs) {
|
||||
<< "Op0 is either set to Op1 or to Op2";
|
||||
}
|
||||
|
||||
TEST_F(LatencySnippetGeneratorTest,
|
||||
TEST_F(SerialSnippetGeneratorTest,
|
||||
ImplicitSelfDependencyThroughExplicitRegsForbidAll) {
|
||||
// - XOR
|
||||
// - Op0 Explicit Def RegClass(GPR32)
|
||||
@ -96,7 +96,7 @@ TEST_F(LatencySnippetGeneratorTest,
|
||||
consumeError(std::move(Error));
|
||||
}
|
||||
|
||||
TEST_F(UopsSnippetGeneratorTest, MemoryUse) {
|
||||
TEST_F(ParallelSnippetGeneratorTest, MemoryUse) {
|
||||
// LB reads from memory.
|
||||
// - LB
|
||||
// - Op0 Explicit Def RegClass(GPR32)
|
||||
@ -110,10 +110,11 @@ TEST_F(UopsSnippetGeneratorTest, MemoryUse) {
|
||||
const auto CodeTemplates = checkAndGetCodeTemplates(Opcode);
|
||||
ASSERT_THAT(CodeTemplates, SizeIs(1));
|
||||
const auto &CT = CodeTemplates[0];
|
||||
EXPECT_THAT(CT.Info, HasSubstr("instruction is parallel, repeating a random one."));
|
||||
EXPECT_THAT(CT.Info,
|
||||
HasSubstr("instruction is parallel, repeating a random one."));
|
||||
EXPECT_THAT(CT.Execution, ExecutionMode::UNKNOWN);
|
||||
ASSERT_THAT(CT.Instructions,
|
||||
SizeIs(UopsSnippetGenerator::kMinNumDifferentAddresses));
|
||||
SizeIs(ParallelSnippetGenerator::kMinNumDifferentAddresses));
|
||||
const InstructionTemplate &IT = CT.Instructions[0];
|
||||
EXPECT_THAT(IT.getOpcode(), Opcode);
|
||||
ASSERT_THAT(IT.getVariableValues(), SizeIs(3));
|
||||
|
@ -7,12 +7,12 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "../Common/AssemblerUtils.h"
|
||||
#include "Latency.h"
|
||||
#include "LlvmState.h"
|
||||
#include "MCInstrDescView.h"
|
||||
#include "ParallelSnippetGenerator.h"
|
||||
#include "RegisterAliasing.h"
|
||||
#include "SerialSnippetGenerator.h"
|
||||
#include "TestBase.h"
|
||||
#include "Uops.h"
|
||||
#include "X86InstrInfo.h"
|
||||
|
||||
#include <unordered_set>
|
||||
@ -59,12 +59,12 @@ protected:
|
||||
SnippetGeneratorT Generator;
|
||||
};
|
||||
|
||||
using LatencySnippetGeneratorTest =
|
||||
SnippetGeneratorTest<LatencySnippetGenerator>;
|
||||
using SerialSnippetGeneratorTest = SnippetGeneratorTest<SerialSnippetGenerator>;
|
||||
|
||||
using UopsSnippetGeneratorTest = SnippetGeneratorTest<UopsSnippetGenerator>;
|
||||
using ParallelSnippetGeneratorTest =
|
||||
SnippetGeneratorTest<ParallelSnippetGenerator>;
|
||||
|
||||
TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughImplicitReg) {
|
||||
TEST_F(SerialSnippetGeneratorTest, ImplicitSelfDependencyThroughImplicitReg) {
|
||||
// - ADC16i16
|
||||
// - Op0 Explicit Use Immediate
|
||||
// - Op1 Implicit Def Reg(AX)
|
||||
@ -90,7 +90,7 @@ TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughImplicitReg) {
|
||||
EXPECT_THAT(IT.getVariableValues()[0], IsInvalid()) << "Immediate is not set";
|
||||
}
|
||||
|
||||
TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughTiedRegs) {
|
||||
TEST_F(SerialSnippetGeneratorTest, ImplicitSelfDependencyThroughTiedRegs) {
|
||||
// - ADD16ri
|
||||
// - Op0 Explicit Def RegClass(GR16)
|
||||
// - Op1 Explicit Use RegClass(GR16) TiedToOp0
|
||||
@ -114,7 +114,7 @@ TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughTiedRegs) {
|
||||
EXPECT_THAT(IT.getVariableValues()[1], IsInvalid()) << "Operand 2 is not set";
|
||||
}
|
||||
|
||||
TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughExplicitRegs) {
|
||||
TEST_F(SerialSnippetGeneratorTest, ImplicitSelfDependencyThroughExplicitRegs) {
|
||||
// - VXORPSrr
|
||||
// - Op0 Explicit Def RegClass(VR128)
|
||||
// - Op1 Explicit Use RegClass(VR128)
|
||||
@ -138,7 +138,7 @@ TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughExplicitRegs) {
|
||||
<< "Op0 is either set to Op1 or to Op2";
|
||||
}
|
||||
|
||||
TEST_F(LatencySnippetGeneratorTest,
|
||||
TEST_F(SerialSnippetGeneratorTest,
|
||||
ImplicitSelfDependencyThroughExplicitRegsForbidAll) {
|
||||
// - VXORPSrr
|
||||
// - Op0 Explicit Def RegClass(VR128)
|
||||
@ -158,7 +158,7 @@ TEST_F(LatencySnippetGeneratorTest,
|
||||
consumeError(std::move(Error));
|
||||
}
|
||||
|
||||
TEST_F(LatencySnippetGeneratorTest, DependencyThroughOtherOpcode) {
|
||||
TEST_F(SerialSnippetGeneratorTest, DependencyThroughOtherOpcode) {
|
||||
// - CMP64rr
|
||||
// - Op0 Explicit Use RegClass(GR64)
|
||||
// - Op1 Explicit Use RegClass(GR64)
|
||||
@ -182,7 +182,7 @@ TEST_F(LatencySnippetGeneratorTest, DependencyThroughOtherOpcode) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(LatencySnippetGeneratorTest, LAHF) {
|
||||
TEST_F(SerialSnippetGeneratorTest, LAHF) {
|
||||
// - LAHF
|
||||
// - Op0 Implicit Def Reg(AH)
|
||||
// - Op1 Implicit Use Reg(EFLAGS)
|
||||
@ -198,7 +198,7 @@ TEST_F(LatencySnippetGeneratorTest, LAHF) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(UopsSnippetGeneratorTest, ParallelInstruction) {
|
||||
TEST_F(ParallelSnippetGeneratorTest, ParallelInstruction) {
|
||||
// - BNDCL32rr
|
||||
// - Op0 Explicit Use RegClass(BNDR)
|
||||
// - Op1 Explicit Use RegClass(GR32)
|
||||
@ -218,7 +218,7 @@ TEST_F(UopsSnippetGeneratorTest, ParallelInstruction) {
|
||||
EXPECT_THAT(IT.getVariableValues()[1], IsInvalid());
|
||||
}
|
||||
|
||||
TEST_F(UopsSnippetGeneratorTest, SerialInstruction) {
|
||||
TEST_F(ParallelSnippetGeneratorTest, SerialInstruction) {
|
||||
// - CDQ
|
||||
// - Op0 Implicit Def Reg(EAX)
|
||||
// - Op1 Implicit Def Reg(EDX)
|
||||
@ -237,7 +237,7 @@ TEST_F(UopsSnippetGeneratorTest, SerialInstruction) {
|
||||
ASSERT_THAT(IT.getVariableValues(), SizeIs(0));
|
||||
}
|
||||
|
||||
TEST_F(UopsSnippetGeneratorTest, StaticRenaming) {
|
||||
TEST_F(ParallelSnippetGeneratorTest, StaticRenaming) {
|
||||
// CMOV32rr has tied variables, we enumerate the possible values to execute
|
||||
// as many in parallel as possible.
|
||||
|
||||
@ -268,7 +268,7 @@ TEST_F(UopsSnippetGeneratorTest, StaticRenaming) {
|
||||
<< "Each instruction writes to a different register";
|
||||
}
|
||||
|
||||
TEST_F(UopsSnippetGeneratorTest, NoTiedVariables) {
|
||||
TEST_F(ParallelSnippetGeneratorTest, NoTiedVariables) {
|
||||
// CMOV_GR32 has no tied variables, we make sure def and use are different
|
||||
// from each other.
|
||||
|
||||
@ -302,7 +302,7 @@ TEST_F(UopsSnippetGeneratorTest, NoTiedVariables) {
|
||||
EXPECT_THAT(IT.getVariableValues()[3], IsInvalid());
|
||||
}
|
||||
|
||||
TEST_F(UopsSnippetGeneratorTest, MemoryUse) {
|
||||
TEST_F(ParallelSnippetGeneratorTest, MemoryUse) {
|
||||
// Mov32rm reads from memory.
|
||||
// - MOV32rm
|
||||
// - Op0 Explicit Def RegClass(GR32)
|
||||
@ -326,7 +326,7 @@ TEST_F(UopsSnippetGeneratorTest, MemoryUse) {
|
||||
EXPECT_THAT(CT.Info, HasSubstr("no tied variables"));
|
||||
EXPECT_THAT(CT.Execution, ExecutionMode::UNKNOWN);
|
||||
ASSERT_THAT(CT.Instructions,
|
||||
SizeIs(UopsSnippetGenerator::kMinNumDifferentAddresses));
|
||||
SizeIs(ParallelSnippetGenerator::kMinNumDifferentAddresses));
|
||||
const InstructionTemplate &IT = CT.Instructions[0];
|
||||
EXPECT_THAT(IT.getOpcode(), Opcode);
|
||||
ASSERT_THAT(IT.getVariableValues(), SizeIs(6));
|
||||
|
@ -7,12 +7,10 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "../Common/AssemblerUtils.h"
|
||||
#include "Latency.h"
|
||||
#include "LlvmState.h"
|
||||
#include "MCInstrDescView.h"
|
||||
#include "RegisterAliasing.h"
|
||||
#include "TestBase.h"
|
||||
#include "Uops.h"
|
||||
#include "X86InstrInfo.h"
|
||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
|
||||
@ -34,8 +32,7 @@ protected:
|
||||
void SetUp() {
|
||||
TM = State.createTargetMachine();
|
||||
Context = std::make_unique<LLVMContext>();
|
||||
Mod =
|
||||
std::make_unique<Module>("X86SnippetRepetitorTest", *Context);
|
||||
Mod = std::make_unique<Module>("X86SnippetRepetitorTest", *Context);
|
||||
Mod->setDataLayout(TM->createDataLayout());
|
||||
MMI = std::make_unique<MachineModuleInfo>(TM.get());
|
||||
MF = &createVoidVoidPtrMachineFunction("TestFn", Mod.get(), MMI.get());
|
||||
|
@ -1,4 +1,4 @@
|
||||
//===-- TestBase.cpp --------------------------------------------*- C++ -*-===//
|
||||
//===-- TestBase.h ----------------------------------------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
|
Loading…
Reference in New Issue
Block a user