1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

[llvm-exegesis] Let Counter returns up to 16 entries

LBR contains (up to) 16 entries for last x branches and the X86LBRCounter (from D77422) should be able to return all those.
    Currently, it just returns the latest entry, which could lead to mis-leading measurements.
    This patch aslo changes the LatencyBenchmarkRunner to accommodate multi-value readings.

         https://reviews.llvm.org/D81050
This commit is contained in:
Vy Nguyen 2020-06-25 11:15:16 -04:00
parent 61be63afc4
commit 052f666a48
10 changed files with 207 additions and 44 deletions

View File

@ -74,7 +74,8 @@ struct InstructionBenchmark {
std::string Error;
std::string Info;
std::vector<uint8_t> AssembledSnippet;
// How to aggregate measurements.
enum ResultAggregationModeE { Min, Max, Mean, MinVariance };
// Read functions.
static Expected<InstructionBenchmark> readYaml(const LLVMState &State,
StringRef Filename);

View File

@ -46,9 +46,29 @@ public:
private:
Expected<int64_t> runAndMeasure(const char *Counters) const override {
auto ResultOrError = runAndSample(Counters);
if (ResultOrError)
return ResultOrError.get()[0];
return ResultOrError.takeError();
}
static void
accumulateCounterValues(const llvm::SmallVector<int64_t, 4> &NewValues,
llvm::SmallVector<int64_t, 4> *Result) {
const size_t NumValues = std::max(NewValues.size(), Result->size());
if (NumValues > Result->size())
Result->resize(NumValues, 0);
for (size_t I = 0, End = NewValues.size(); I < End; ++I)
(*Result)[I] += NewValues[I];
}
Expected<llvm::SmallVector<int64_t, 4>>
runAndSample(const char *Counters) const override {
// We sum counts when there are several counters for a single ProcRes
// (e.g. P23 on SandyBridge).
int64_t CounterValue = 0;
llvm::SmallVector<int64_t, 4> CounterValues;
int Reserved = 0;
SmallVector<StringRef, 2> CounterNames;
StringRef(Counters).split(CounterNames, '+');
char *const ScratchPtr = Scratch->ptr();
@ -61,6 +81,17 @@ private:
return CounterOrError.takeError();
pfm::Counter *Counter = CounterOrError.get().get();
if (Reserved == 0) {
Reserved = Counter->numValues();
CounterValues.reserve(Reserved);
} else if (Reserved != Counter->numValues())
// It'd be wrong to accumulate vectors of different sizes.
return make_error<Failure>(
llvm::Twine("Inconsistent number of values for counter ")
.concat(CounterName)
.concat(std::to_string(Counter->numValues()))
.concat(" vs expected of ")
.concat(std::to_string(Reserved)));
Scratch->clear();
{
CrashRecoveryContext CRC;
@ -75,9 +106,13 @@ private:
if (Crashed)
return make_error<SnippetCrash>("snippet crashed while running");
}
CounterValue += Counter->read();
auto ValueOrError = Counter->readOrError();
if (!ValueOrError)
return ValueOrError.takeError();
accumulateCounterValues(ValueOrError.get(), &CounterValues);
}
return CounterValue;
return CounterValues;
}
const LLVMState &State;

View File

@ -21,6 +21,7 @@
#include "LlvmState.h"
#include "MCInstrDescView.h"
#include "SnippetRepetitor.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/Error.h"
#include <cstdlib>
@ -65,7 +66,11 @@ public:
class FunctionExecutor {
public:
virtual ~FunctionExecutor();
// FIXME deprecate this.
virtual Expected<int64_t> runAndMeasure(const char *Counters) const = 0;
virtual Expected<llvm::SmallVector<int64_t, 4>>
runAndSample(const char *Counters) const = 0;
};
protected:

View File

@ -8,48 +8,135 @@
#include "LatencyBenchmarkRunner.h"
#include "Target.h"
#include "BenchmarkRunner.h"
#include "Target.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/Error.h"
#include <algorithm>
#include <cmath>
namespace llvm {
namespace exegesis {
LatencyBenchmarkRunner::LatencyBenchmarkRunner(const LLVMState &State,
InstructionBenchmark::ModeE Mode)
LatencyBenchmarkRunner::LatencyBenchmarkRunner(
const LLVMState &State, InstructionBenchmark::ModeE Mode,
InstructionBenchmark::ResultAggregationModeE ResultAgg)
: BenchmarkRunner(State, Mode) {
assert((Mode == InstructionBenchmark::Latency ||
Mode == InstructionBenchmark::InverseThroughput) &&
"invalid mode");
ResultAggMode = ResultAgg;
}
LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default;
static double computeVariance(const llvm::SmallVector<int64_t, 4> &Values) {
if (Values.empty())
return 0.0;
double Sum = std::accumulate(Values.begin(), Values.end(), 0.0);
const double Mean = Sum / Values.size();
double Ret = 0;
for (const auto &V : Values) {
double Delta = V - Mean;
Ret += Delta * Delta;
}
return Ret / Values.size();
}
static int64_t findMin(const llvm::SmallVector<int64_t, 4> &Values) {
if (Values.empty())
return 0;
return *std::min_element(Values.begin(), Values.end());
}
static int64_t findMax(const llvm::SmallVector<int64_t, 4> &Values) {
if (Values.empty())
return 0;
return *std::max_element(Values.begin(), Values.end());
}
static int64_t findMean(const llvm::SmallVector<int64_t, 4> &Values) {
if (Values.empty())
return 0;
return std::accumulate(Values.begin(), Values.end(), 0.0) /
static_cast<double>(Values.size());
}
Expected<std::vector<BenchmarkMeasure>> LatencyBenchmarkRunner::runMeasurements(
const FunctionExecutor &Executor) const {
// Cycle measurements include some overhead from the kernel. Repeat the
// measure several times and take the minimum value.
// measure several times and return the aggregated value, as specified by
// ResultAggMode.
constexpr const int NumMeasurements = 30;
int64_t MinValue = std::numeric_limits<int64_t>::max();
llvm::SmallVector<int64_t, 4> AccumulatedValues;
double MinVariance = std::numeric_limits<double>::infinity();
const char *CounterName = State.getPfmCounters().CycleCounter;
// Values count for each run.
int ValuesCount = 0;
for (size_t I = 0; I < NumMeasurements; ++I) {
auto ExpectedCounterValue = Executor.runAndMeasure(CounterName);
if (!ExpectedCounterValue)
return ExpectedCounterValue.takeError();
if (*ExpectedCounterValue < MinValue)
MinValue = *ExpectedCounterValue;
auto ExpectedCounterValues = Executor.runAndSample(CounterName);
if (!ExpectedCounterValues)
return ExpectedCounterValues.takeError();
ValuesCount = ExpectedCounterValues.get().size();
if (ValuesCount == 1)
AccumulatedValues.push_back(ExpectedCounterValues.get()[0]);
else {
// We'll keep the reading with lowest variance (ie., most stable)
double Variance = computeVariance(*ExpectedCounterValues);
if (MinVariance > Variance) {
AccumulatedValues = std::move(ExpectedCounterValues.get());
MinVariance = Variance;
}
}
}
std::vector<BenchmarkMeasure> Result;
std::string ModeName;
switch (Mode) {
case InstructionBenchmark::Latency:
Result = {BenchmarkMeasure::Create("latency", MinValue)};
ModeName = "latency";
break;
case InstructionBenchmark::InverseThroughput:
Result = {BenchmarkMeasure::Create("inverse_throughput", MinValue)};
ModeName = "inverse_throughput";
break;
default:
break;
}
return std::move(Result);
switch (ResultAggMode) {
case InstructionBenchmark::MinVariance: {
if (ValuesCount == 1)
llvm::errs() << "Each sample only has one value. result-aggregation-mode "
"of min-variance is probably non-sensical\n";
std::vector<BenchmarkMeasure> Result;
Result.reserve(AccumulatedValues.size());
for (const int64_t Value : AccumulatedValues)
Result.push_back(BenchmarkMeasure::Create(ModeName, Value));
return std::move(Result);
}
case InstructionBenchmark::Min: {
std::vector<BenchmarkMeasure> Result;
Result.push_back(
BenchmarkMeasure::Create(ModeName, findMin(AccumulatedValues)));
return std::move(Result);
}
case InstructionBenchmark::Max: {
std::vector<BenchmarkMeasure> Result;
Result.push_back(
BenchmarkMeasure::Create(ModeName, findMax(AccumulatedValues)));
return std::move(Result);
}
case InstructionBenchmark::Mean: {
std::vector<BenchmarkMeasure> Result;
Result.push_back(
BenchmarkMeasure::Create(ModeName, findMean(AccumulatedValues)));
return std::move(Result);
}
}
return llvm::make_error<Failure>(llvm::Twine("Unexpected benchmark mode(")
.concat(std::to_string(Mode))
.concat(" and unexpected ResultAggMode ")
.concat(std::to_string(ResultAggMode)));
}
} // namespace exegesis

View File

@ -21,13 +21,16 @@ namespace exegesis {
class LatencyBenchmarkRunner : public BenchmarkRunner {
public:
LatencyBenchmarkRunner(const LLVMState &State,
InstructionBenchmark::ModeE Mode);
LatencyBenchmarkRunner(
const LLVMState &State, InstructionBenchmark::ModeE Mode,
InstructionBenchmark::ResultAggregationModeE ResultAggMode);
~LatencyBenchmarkRunner() override;
private:
Expected<std::vector<BenchmarkMeasure>>
runMeasurements(const FunctionExecutor &Executor) const override;
InstructionBenchmark::ResultAggregationModeE ResultAggMode;
};
} // namespace exegesis
} // namespace llvm

View File

@ -119,23 +119,27 @@ void Counter::stop() { ioctl(FileDescriptor, PERF_EVENT_IOC_DISABLE, 0); }
int64_t Counter::read() const {
auto ValueOrError = readOrError();
if (ValueOrError)
return ValueOrError.get();
errs() << ValueOrError.takeError() << "\n";
if (ValueOrError) {
if (!ValueOrError.get().empty())
return ValueOrError.get()[0];
errs() << "Counter has no reading\n";
} else
errs() << ValueOrError.takeError() << "\n";
return -1;
}
llvm::Expected<int64_t> Counter::readOrError() const {
llvm::Expected<llvm::SmallVector<int64_t, 4>> Counter::readOrError() const {
int64_t Count = 0;
ssize_t ReadSize = ::read(FileDescriptor, &Count, sizeof(Count));
if (ReadSize != sizeof(Count))
return llvm::make_error<llvm::StringError>("Failed to read event counter",
llvm::errc::io_error);
return Count;
llvm::SmallVector<int64_t, 4> Result;
Result.push_back(Count);
return Result;
}
int Counter::numValues() const { return 1; }
#else
Counter::Counter(PerfEvent &&Event) : Event(std::move(Event)) {}
@ -148,11 +152,13 @@ void Counter::stop() {}
int64_t Counter::read() const { return 42; }
llvm::Expected<int64_t> Counter::readOrError() const {
llvm::Expected<llvm::SmallVector<int64_t, 4>> Counter::readOrError() const {
return llvm::make_error<llvm::StringError>("Not implemented",
llvm::errc::io_error);
}
int Counter::numValues() const { return 1; }
#endif
} // namespace pfm

View File

@ -15,9 +15,11 @@
#define LLVM_TOOLS_LLVM_EXEGESIS_PERFHELPER_H
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Config/config.h"
#include "llvm/Support/Error.h"
#include <cstdint>
#include <functional>
#include <memory>
@ -85,7 +87,9 @@ public:
int64_t read() const;
/// Returns the current value of the counter or error if it cannot be read.
virtual llvm::Expected<int64_t> readOrError() const;
virtual llvm::Expected<llvm::SmallVector<int64_t, 4>> readOrError() const;
virtual int numValues() const;
private:
PerfEvent Event;

View File

@ -68,8 +68,9 @@ std::unique_ptr<SnippetGenerator> ExegesisTarget::createSnippetGenerator(
}
Expected<std::unique_ptr<BenchmarkRunner>>
ExegesisTarget::createBenchmarkRunner(InstructionBenchmark::ModeE Mode,
const LLVMState &State) const {
ExegesisTarget::createBenchmarkRunner(
InstructionBenchmark::ModeE Mode, const LLVMState &State,
InstructionBenchmark::ResultAggregationModeE ResultAggMode) const {
PfmCountersInfo PfmCounters = State.getPfmCounters();
switch (Mode) {
case InstructionBenchmark::Unknown:
@ -85,12 +86,12 @@ ExegesisTarget::createBenchmarkRunner(InstructionBenchmark::ModeE Mode,
.concat(ModeName)
.concat("' mode, sched model does not define a cycle counter."));
}
return createLatencyBenchmarkRunner(State, Mode);
return createLatencyBenchmarkRunner(State, Mode, ResultAggMode);
case InstructionBenchmark::Uops:
if (!PfmCounters.UopsCounter && !PfmCounters.IssueCounters)
return make_error<Failure>("can't run 'uops' mode, sched model does not "
"define uops or issue counters.");
return createUopsBenchmarkRunner(State);
return createUopsBenchmarkRunner(State, ResultAggMode);
}
return nullptr;
}
@ -106,12 +107,14 @@ std::unique_ptr<SnippetGenerator> ExegesisTarget::createParallelSnippetGenerator
}
std::unique_ptr<BenchmarkRunner> ExegesisTarget::createLatencyBenchmarkRunner(
const LLVMState &State, InstructionBenchmark::ModeE Mode) const {
return std::make_unique<LatencyBenchmarkRunner>(State, Mode);
const LLVMState &State, InstructionBenchmark::ModeE Mode,
InstructionBenchmark::ResultAggregationModeE ResultAggMode) const {
return std::make_unique<LatencyBenchmarkRunner>(State, Mode, ResultAggMode);
}
std::unique_ptr<BenchmarkRunner>
ExegesisTarget::createUopsBenchmarkRunner(const LLVMState &State) const {
std::unique_ptr<BenchmarkRunner> ExegesisTarget::createUopsBenchmarkRunner(
const LLVMState &State,
InstructionBenchmark::ResultAggregationModeE /*unused*/) const {
return std::make_unique<UopsBenchmarkRunner>(State);
}

View File

@ -148,9 +148,10 @@ public:
const LLVMState &State,
const SnippetGenerator::Options &Opts) const;
// Creates a benchmark runner for the given mode.
Expected<std::unique_ptr<BenchmarkRunner>>
createBenchmarkRunner(InstructionBenchmark::ModeE Mode,
const LLVMState &State) const;
Expected<std::unique_ptr<BenchmarkRunner>> createBenchmarkRunner(
InstructionBenchmark::ModeE Mode, const LLVMState &State,
InstructionBenchmark::ResultAggregationModeE ResultAggMode =
InstructionBenchmark::Min) const;
// Returns the ExegesisTarget for the given triple or nullptr if the target
// does not exist.
@ -176,9 +177,11 @@ private:
std::unique_ptr<SnippetGenerator> virtual createParallelSnippetGenerator(
const LLVMState &State, const SnippetGenerator::Options &Opts) const;
std::unique_ptr<BenchmarkRunner> virtual createLatencyBenchmarkRunner(
const LLVMState &State, InstructionBenchmark::ModeE Mode) const;
const LLVMState &State, InstructionBenchmark::ModeE Mode,
InstructionBenchmark::ResultAggregationModeE ResultAggMode) const;
std::unique_ptr<BenchmarkRunner> virtual createUopsBenchmarkRunner(
const LLVMState &State) const;
const LLVMState &State,
InstructionBenchmark::ResultAggregationModeE ResultAggMode) const;
const ExegesisTarget *Next = nullptr;
const ArrayRef<CpuAndPfmCounters> CpuPfmCounters;

View File

@ -83,6 +83,21 @@ static cl::opt<exegesis::InstructionBenchmark::ModeE> BenchmarkMode(
clEnumValN(exegesis::InstructionBenchmark::Unknown, "analysis",
"Analysis")));
static cl::opt<exegesis::InstructionBenchmark::ResultAggregationModeE>
ResultAggMode(
"result-aggregation-mode",
cl::desc("How to aggregate multi-values result"), cl::cat(Options),
cl::values(clEnumValN(exegesis::InstructionBenchmark::Min, "min",
"Keep min reading"),
clEnumValN(exegesis::InstructionBenchmark::Max, "max",
"Keep max reading"),
clEnumValN(exegesis::InstructionBenchmark::Mean, "mean",
"Compute mean of all readings"),
clEnumValN(exegesis::InstructionBenchmark::MinVariance,
"min-variance",
"Keep readings set with min-variance")),
cl::init(exegesis::InstructionBenchmark::Min));
static cl::opt<exegesis::InstructionBenchmark::RepetitionModeE> RepetitionMode(
"repetition-mode", cl::desc("how to repeat the instruction snippet"),
cl::cat(BenchmarkOptions),
@ -281,8 +296,9 @@ void benchmarkMain() {
const LLVMState State(CpuName);
const std::unique_ptr<BenchmarkRunner> Runner = ExitOnErr(
State.getExegesisTarget().createBenchmarkRunner(BenchmarkMode, State));
const std::unique_ptr<BenchmarkRunner> Runner =
ExitOnErr(State.getExegesisTarget().createBenchmarkRunner(
BenchmarkMode, State, ResultAggMode));
if (!Runner) {
ExitWithError("cannot create benchmark runner");
}