mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
[llvm-exegesis] Let Counter returns up to 16 entries
LBR contains (up to) 16 entries for last x branches and the X86LBRCounter (from D77422) should be able to return all those. Currently, it just returns the latest entry, which could lead to mis-leading measurements. This patch aslo changes the LatencyBenchmarkRunner to accommodate multi-value readings. https://reviews.llvm.org/D81050
This commit is contained in:
parent
61be63afc4
commit
052f666a48
@ -74,7 +74,8 @@ struct InstructionBenchmark {
|
||||
std::string Error;
|
||||
std::string Info;
|
||||
std::vector<uint8_t> AssembledSnippet;
|
||||
|
||||
// How to aggregate measurements.
|
||||
enum ResultAggregationModeE { Min, Max, Mean, MinVariance };
|
||||
// Read functions.
|
||||
static Expected<InstructionBenchmark> readYaml(const LLVMState &State,
|
||||
StringRef Filename);
|
||||
|
@ -46,9 +46,29 @@ public:
|
||||
|
||||
private:
|
||||
Expected<int64_t> runAndMeasure(const char *Counters) const override {
|
||||
auto ResultOrError = runAndSample(Counters);
|
||||
if (ResultOrError)
|
||||
return ResultOrError.get()[0];
|
||||
return ResultOrError.takeError();
|
||||
}
|
||||
|
||||
static void
|
||||
accumulateCounterValues(const llvm::SmallVector<int64_t, 4> &NewValues,
|
||||
llvm::SmallVector<int64_t, 4> *Result) {
|
||||
|
||||
const size_t NumValues = std::max(NewValues.size(), Result->size());
|
||||
if (NumValues > Result->size())
|
||||
Result->resize(NumValues, 0);
|
||||
for (size_t I = 0, End = NewValues.size(); I < End; ++I)
|
||||
(*Result)[I] += NewValues[I];
|
||||
}
|
||||
|
||||
Expected<llvm::SmallVector<int64_t, 4>>
|
||||
runAndSample(const char *Counters) const override {
|
||||
// We sum counts when there are several counters for a single ProcRes
|
||||
// (e.g. P23 on SandyBridge).
|
||||
int64_t CounterValue = 0;
|
||||
llvm::SmallVector<int64_t, 4> CounterValues;
|
||||
int Reserved = 0;
|
||||
SmallVector<StringRef, 2> CounterNames;
|
||||
StringRef(Counters).split(CounterNames, '+');
|
||||
char *const ScratchPtr = Scratch->ptr();
|
||||
@ -61,6 +81,17 @@ private:
|
||||
return CounterOrError.takeError();
|
||||
|
||||
pfm::Counter *Counter = CounterOrError.get().get();
|
||||
if (Reserved == 0) {
|
||||
Reserved = Counter->numValues();
|
||||
CounterValues.reserve(Reserved);
|
||||
} else if (Reserved != Counter->numValues())
|
||||
// It'd be wrong to accumulate vectors of different sizes.
|
||||
return make_error<Failure>(
|
||||
llvm::Twine("Inconsistent number of values for counter ")
|
||||
.concat(CounterName)
|
||||
.concat(std::to_string(Counter->numValues()))
|
||||
.concat(" vs expected of ")
|
||||
.concat(std::to_string(Reserved)));
|
||||
Scratch->clear();
|
||||
{
|
||||
CrashRecoveryContext CRC;
|
||||
@ -75,9 +106,13 @@ private:
|
||||
if (Crashed)
|
||||
return make_error<SnippetCrash>("snippet crashed while running");
|
||||
}
|
||||
CounterValue += Counter->read();
|
||||
auto ValueOrError = Counter->readOrError();
|
||||
if (!ValueOrError)
|
||||
return ValueOrError.takeError();
|
||||
|
||||
accumulateCounterValues(ValueOrError.get(), &CounterValues);
|
||||
}
|
||||
return CounterValue;
|
||||
return CounterValues;
|
||||
}
|
||||
|
||||
const LLVMState &State;
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "LlvmState.h"
|
||||
#include "MCInstrDescView.h"
|
||||
#include "SnippetRepetitor.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/MC/MCInst.h"
|
||||
#include "llvm/Support/Error.h"
|
||||
#include <cstdlib>
|
||||
@ -65,7 +66,11 @@ public:
|
||||
class FunctionExecutor {
|
||||
public:
|
||||
virtual ~FunctionExecutor();
|
||||
// FIXME deprecate this.
|
||||
virtual Expected<int64_t> runAndMeasure(const char *Counters) const = 0;
|
||||
|
||||
virtual Expected<llvm::SmallVector<int64_t, 4>>
|
||||
runAndSample(const char *Counters) const = 0;
|
||||
};
|
||||
|
||||
protected:
|
||||
|
@ -8,49 +8,136 @@
|
||||
|
||||
#include "LatencyBenchmarkRunner.h"
|
||||
|
||||
#include "Target.h"
|
||||
#include "BenchmarkRunner.h"
|
||||
#include "Target.h"
|
||||
#include "llvm/ADT/Twine.h"
|
||||
#include "llvm/Support/Error.h"
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
|
||||
namespace llvm {
|
||||
namespace exegesis {
|
||||
|
||||
LatencyBenchmarkRunner::LatencyBenchmarkRunner(const LLVMState &State,
|
||||
InstructionBenchmark::ModeE Mode)
|
||||
LatencyBenchmarkRunner::LatencyBenchmarkRunner(
|
||||
const LLVMState &State, InstructionBenchmark::ModeE Mode,
|
||||
InstructionBenchmark::ResultAggregationModeE ResultAgg)
|
||||
: BenchmarkRunner(State, Mode) {
|
||||
assert((Mode == InstructionBenchmark::Latency ||
|
||||
Mode == InstructionBenchmark::InverseThroughput) &&
|
||||
"invalid mode");
|
||||
ResultAggMode = ResultAgg;
|
||||
}
|
||||
|
||||
LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default;
|
||||
|
||||
static double computeVariance(const llvm::SmallVector<int64_t, 4> &Values) {
|
||||
if (Values.empty())
|
||||
return 0.0;
|
||||
double Sum = std::accumulate(Values.begin(), Values.end(), 0.0);
|
||||
|
||||
const double Mean = Sum / Values.size();
|
||||
double Ret = 0;
|
||||
for (const auto &V : Values) {
|
||||
double Delta = V - Mean;
|
||||
Ret += Delta * Delta;
|
||||
}
|
||||
return Ret / Values.size();
|
||||
}
|
||||
|
||||
static int64_t findMin(const llvm::SmallVector<int64_t, 4> &Values) {
|
||||
if (Values.empty())
|
||||
return 0;
|
||||
return *std::min_element(Values.begin(), Values.end());
|
||||
}
|
||||
|
||||
static int64_t findMax(const llvm::SmallVector<int64_t, 4> &Values) {
|
||||
if (Values.empty())
|
||||
return 0;
|
||||
return *std::max_element(Values.begin(), Values.end());
|
||||
}
|
||||
|
||||
static int64_t findMean(const llvm::SmallVector<int64_t, 4> &Values) {
|
||||
if (Values.empty())
|
||||
return 0;
|
||||
return std::accumulate(Values.begin(), Values.end(), 0.0) /
|
||||
static_cast<double>(Values.size());
|
||||
}
|
||||
|
||||
Expected<std::vector<BenchmarkMeasure>> LatencyBenchmarkRunner::runMeasurements(
|
||||
const FunctionExecutor &Executor) const {
|
||||
// Cycle measurements include some overhead from the kernel. Repeat the
|
||||
// measure several times and take the minimum value.
|
||||
// measure several times and return the aggregated value, as specified by
|
||||
// ResultAggMode.
|
||||
constexpr const int NumMeasurements = 30;
|
||||
int64_t MinValue = std::numeric_limits<int64_t>::max();
|
||||
llvm::SmallVector<int64_t, 4> AccumulatedValues;
|
||||
double MinVariance = std::numeric_limits<double>::infinity();
|
||||
const char *CounterName = State.getPfmCounters().CycleCounter;
|
||||
// Values count for each run.
|
||||
int ValuesCount = 0;
|
||||
for (size_t I = 0; I < NumMeasurements; ++I) {
|
||||
auto ExpectedCounterValue = Executor.runAndMeasure(CounterName);
|
||||
if (!ExpectedCounterValue)
|
||||
return ExpectedCounterValue.takeError();
|
||||
if (*ExpectedCounterValue < MinValue)
|
||||
MinValue = *ExpectedCounterValue;
|
||||
auto ExpectedCounterValues = Executor.runAndSample(CounterName);
|
||||
if (!ExpectedCounterValues)
|
||||
return ExpectedCounterValues.takeError();
|
||||
ValuesCount = ExpectedCounterValues.get().size();
|
||||
if (ValuesCount == 1)
|
||||
AccumulatedValues.push_back(ExpectedCounterValues.get()[0]);
|
||||
else {
|
||||
// We'll keep the reading with lowest variance (ie., most stable)
|
||||
double Variance = computeVariance(*ExpectedCounterValues);
|
||||
if (MinVariance > Variance) {
|
||||
AccumulatedValues = std::move(ExpectedCounterValues.get());
|
||||
MinVariance = Variance;
|
||||
}
|
||||
std::vector<BenchmarkMeasure> Result;
|
||||
}
|
||||
}
|
||||
|
||||
std::string ModeName;
|
||||
switch (Mode) {
|
||||
case InstructionBenchmark::Latency:
|
||||
Result = {BenchmarkMeasure::Create("latency", MinValue)};
|
||||
ModeName = "latency";
|
||||
break;
|
||||
case InstructionBenchmark::InverseThroughput:
|
||||
Result = {BenchmarkMeasure::Create("inverse_throughput", MinValue)};
|
||||
ModeName = "inverse_throughput";
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (ResultAggMode) {
|
||||
case InstructionBenchmark::MinVariance: {
|
||||
if (ValuesCount == 1)
|
||||
llvm::errs() << "Each sample only has one value. result-aggregation-mode "
|
||||
"of min-variance is probably non-sensical\n";
|
||||
std::vector<BenchmarkMeasure> Result;
|
||||
Result.reserve(AccumulatedValues.size());
|
||||
for (const int64_t Value : AccumulatedValues)
|
||||
Result.push_back(BenchmarkMeasure::Create(ModeName, Value));
|
||||
return std::move(Result);
|
||||
}
|
||||
case InstructionBenchmark::Min: {
|
||||
std::vector<BenchmarkMeasure> Result;
|
||||
Result.push_back(
|
||||
BenchmarkMeasure::Create(ModeName, findMin(AccumulatedValues)));
|
||||
return std::move(Result);
|
||||
}
|
||||
case InstructionBenchmark::Max: {
|
||||
std::vector<BenchmarkMeasure> Result;
|
||||
Result.push_back(
|
||||
BenchmarkMeasure::Create(ModeName, findMax(AccumulatedValues)));
|
||||
return std::move(Result);
|
||||
}
|
||||
case InstructionBenchmark::Mean: {
|
||||
std::vector<BenchmarkMeasure> Result;
|
||||
Result.push_back(
|
||||
BenchmarkMeasure::Create(ModeName, findMean(AccumulatedValues)));
|
||||
return std::move(Result);
|
||||
}
|
||||
}
|
||||
return llvm::make_error<Failure>(llvm::Twine("Unexpected benchmark mode(")
|
||||
.concat(std::to_string(Mode))
|
||||
.concat(" and unexpected ResultAggMode ")
|
||||
.concat(std::to_string(ResultAggMode)));
|
||||
}
|
||||
|
||||
} // namespace exegesis
|
||||
} // namespace llvm
|
||||
|
@ -21,13 +21,16 @@ namespace exegesis {
|
||||
|
||||
class LatencyBenchmarkRunner : public BenchmarkRunner {
|
||||
public:
|
||||
LatencyBenchmarkRunner(const LLVMState &State,
|
||||
InstructionBenchmark::ModeE Mode);
|
||||
LatencyBenchmarkRunner(
|
||||
const LLVMState &State, InstructionBenchmark::ModeE Mode,
|
||||
InstructionBenchmark::ResultAggregationModeE ResultAggMode);
|
||||
~LatencyBenchmarkRunner() override;
|
||||
|
||||
private:
|
||||
Expected<std::vector<BenchmarkMeasure>>
|
||||
runMeasurements(const FunctionExecutor &Executor) const override;
|
||||
|
||||
InstructionBenchmark::ResultAggregationModeE ResultAggMode;
|
||||
};
|
||||
} // namespace exegesis
|
||||
} // namespace llvm
|
||||
|
@ -119,23 +119,27 @@ void Counter::stop() { ioctl(FileDescriptor, PERF_EVENT_IOC_DISABLE, 0); }
|
||||
|
||||
int64_t Counter::read() const {
|
||||
auto ValueOrError = readOrError();
|
||||
if (ValueOrError)
|
||||
return ValueOrError.get();
|
||||
|
||||
if (ValueOrError) {
|
||||
if (!ValueOrError.get().empty())
|
||||
return ValueOrError.get()[0];
|
||||
errs() << "Counter has no reading\n";
|
||||
} else
|
||||
errs() << ValueOrError.takeError() << "\n";
|
||||
return -1;
|
||||
}
|
||||
|
||||
llvm::Expected<int64_t> Counter::readOrError() const {
|
||||
llvm::Expected<llvm::SmallVector<int64_t, 4>> Counter::readOrError() const {
|
||||
int64_t Count = 0;
|
||||
ssize_t ReadSize = ::read(FileDescriptor, &Count, sizeof(Count));
|
||||
if (ReadSize != sizeof(Count))
|
||||
return llvm::make_error<llvm::StringError>("Failed to read event counter",
|
||||
llvm::errc::io_error);
|
||||
|
||||
return Count;
|
||||
llvm::SmallVector<int64_t, 4> Result;
|
||||
Result.push_back(Count);
|
||||
return Result;
|
||||
}
|
||||
|
||||
int Counter::numValues() const { return 1; }
|
||||
#else
|
||||
|
||||
Counter::Counter(PerfEvent &&Event) : Event(std::move(Event)) {}
|
||||
@ -148,11 +152,13 @@ void Counter::stop() {}
|
||||
|
||||
int64_t Counter::read() const { return 42; }
|
||||
|
||||
llvm::Expected<int64_t> Counter::readOrError() const {
|
||||
llvm::Expected<llvm::SmallVector<int64_t, 4>> Counter::readOrError() const {
|
||||
return llvm::make_error<llvm::StringError>("Not implemented",
|
||||
llvm::errc::io_error);
|
||||
}
|
||||
|
||||
int Counter::numValues() const { return 1; }
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace pfm
|
||||
|
@ -15,9 +15,11 @@
|
||||
#define LLVM_TOOLS_LLVM_EXEGESIS_PERFHELPER_H
|
||||
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/Config/config.h"
|
||||
#include "llvm/Support/Error.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
@ -85,7 +87,9 @@ public:
|
||||
int64_t read() const;
|
||||
|
||||
/// Returns the current value of the counter or error if it cannot be read.
|
||||
virtual llvm::Expected<int64_t> readOrError() const;
|
||||
virtual llvm::Expected<llvm::SmallVector<int64_t, 4>> readOrError() const;
|
||||
|
||||
virtual int numValues() const;
|
||||
|
||||
private:
|
||||
PerfEvent Event;
|
||||
|
@ -68,8 +68,9 @@ std::unique_ptr<SnippetGenerator> ExegesisTarget::createSnippetGenerator(
|
||||
}
|
||||
|
||||
Expected<std::unique_ptr<BenchmarkRunner>>
|
||||
ExegesisTarget::createBenchmarkRunner(InstructionBenchmark::ModeE Mode,
|
||||
const LLVMState &State) const {
|
||||
ExegesisTarget::createBenchmarkRunner(
|
||||
InstructionBenchmark::ModeE Mode, const LLVMState &State,
|
||||
InstructionBenchmark::ResultAggregationModeE ResultAggMode) const {
|
||||
PfmCountersInfo PfmCounters = State.getPfmCounters();
|
||||
switch (Mode) {
|
||||
case InstructionBenchmark::Unknown:
|
||||
@ -85,12 +86,12 @@ ExegesisTarget::createBenchmarkRunner(InstructionBenchmark::ModeE Mode,
|
||||
.concat(ModeName)
|
||||
.concat("' mode, sched model does not define a cycle counter."));
|
||||
}
|
||||
return createLatencyBenchmarkRunner(State, Mode);
|
||||
return createLatencyBenchmarkRunner(State, Mode, ResultAggMode);
|
||||
case InstructionBenchmark::Uops:
|
||||
if (!PfmCounters.UopsCounter && !PfmCounters.IssueCounters)
|
||||
return make_error<Failure>("can't run 'uops' mode, sched model does not "
|
||||
"define uops or issue counters.");
|
||||
return createUopsBenchmarkRunner(State);
|
||||
return createUopsBenchmarkRunner(State, ResultAggMode);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
@ -106,12 +107,14 @@ std::unique_ptr<SnippetGenerator> ExegesisTarget::createParallelSnippetGenerator
|
||||
}
|
||||
|
||||
std::unique_ptr<BenchmarkRunner> ExegesisTarget::createLatencyBenchmarkRunner(
|
||||
const LLVMState &State, InstructionBenchmark::ModeE Mode) const {
|
||||
return std::make_unique<LatencyBenchmarkRunner>(State, Mode);
|
||||
const LLVMState &State, InstructionBenchmark::ModeE Mode,
|
||||
InstructionBenchmark::ResultAggregationModeE ResultAggMode) const {
|
||||
return std::make_unique<LatencyBenchmarkRunner>(State, Mode, ResultAggMode);
|
||||
}
|
||||
|
||||
std::unique_ptr<BenchmarkRunner>
|
||||
ExegesisTarget::createUopsBenchmarkRunner(const LLVMState &State) const {
|
||||
std::unique_ptr<BenchmarkRunner> ExegesisTarget::createUopsBenchmarkRunner(
|
||||
const LLVMState &State,
|
||||
InstructionBenchmark::ResultAggregationModeE /*unused*/) const {
|
||||
return std::make_unique<UopsBenchmarkRunner>(State);
|
||||
}
|
||||
|
||||
|
@ -148,9 +148,10 @@ public:
|
||||
const LLVMState &State,
|
||||
const SnippetGenerator::Options &Opts) const;
|
||||
// Creates a benchmark runner for the given mode.
|
||||
Expected<std::unique_ptr<BenchmarkRunner>>
|
||||
createBenchmarkRunner(InstructionBenchmark::ModeE Mode,
|
||||
const LLVMState &State) const;
|
||||
Expected<std::unique_ptr<BenchmarkRunner>> createBenchmarkRunner(
|
||||
InstructionBenchmark::ModeE Mode, const LLVMState &State,
|
||||
InstructionBenchmark::ResultAggregationModeE ResultAggMode =
|
||||
InstructionBenchmark::Min) const;
|
||||
|
||||
// Returns the ExegesisTarget for the given triple or nullptr if the target
|
||||
// does not exist.
|
||||
@ -176,9 +177,11 @@ private:
|
||||
std::unique_ptr<SnippetGenerator> virtual createParallelSnippetGenerator(
|
||||
const LLVMState &State, const SnippetGenerator::Options &Opts) const;
|
||||
std::unique_ptr<BenchmarkRunner> virtual createLatencyBenchmarkRunner(
|
||||
const LLVMState &State, InstructionBenchmark::ModeE Mode) const;
|
||||
const LLVMState &State, InstructionBenchmark::ModeE Mode,
|
||||
InstructionBenchmark::ResultAggregationModeE ResultAggMode) const;
|
||||
std::unique_ptr<BenchmarkRunner> virtual createUopsBenchmarkRunner(
|
||||
const LLVMState &State) const;
|
||||
const LLVMState &State,
|
||||
InstructionBenchmark::ResultAggregationModeE ResultAggMode) const;
|
||||
|
||||
const ExegesisTarget *Next = nullptr;
|
||||
const ArrayRef<CpuAndPfmCounters> CpuPfmCounters;
|
||||
|
@ -83,6 +83,21 @@ static cl::opt<exegesis::InstructionBenchmark::ModeE> BenchmarkMode(
|
||||
clEnumValN(exegesis::InstructionBenchmark::Unknown, "analysis",
|
||||
"Analysis")));
|
||||
|
||||
static cl::opt<exegesis::InstructionBenchmark::ResultAggregationModeE>
|
||||
ResultAggMode(
|
||||
"result-aggregation-mode",
|
||||
cl::desc("How to aggregate multi-values result"), cl::cat(Options),
|
||||
cl::values(clEnumValN(exegesis::InstructionBenchmark::Min, "min",
|
||||
"Keep min reading"),
|
||||
clEnumValN(exegesis::InstructionBenchmark::Max, "max",
|
||||
"Keep max reading"),
|
||||
clEnumValN(exegesis::InstructionBenchmark::Mean, "mean",
|
||||
"Compute mean of all readings"),
|
||||
clEnumValN(exegesis::InstructionBenchmark::MinVariance,
|
||||
"min-variance",
|
||||
"Keep readings set with min-variance")),
|
||||
cl::init(exegesis::InstructionBenchmark::Min));
|
||||
|
||||
static cl::opt<exegesis::InstructionBenchmark::RepetitionModeE> RepetitionMode(
|
||||
"repetition-mode", cl::desc("how to repeat the instruction snippet"),
|
||||
cl::cat(BenchmarkOptions),
|
||||
@ -281,8 +296,9 @@ void benchmarkMain() {
|
||||
|
||||
const LLVMState State(CpuName);
|
||||
|
||||
const std::unique_ptr<BenchmarkRunner> Runner = ExitOnErr(
|
||||
State.getExegesisTarget().createBenchmarkRunner(BenchmarkMode, State));
|
||||
const std::unique_ptr<BenchmarkRunner> Runner =
|
||||
ExitOnErr(State.getExegesisTarget().createBenchmarkRunner(
|
||||
BenchmarkMode, State, ResultAggMode));
|
||||
if (!Runner) {
|
||||
ExitWithError("cannot create benchmark runner");
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user