mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-19 02:52:53 +02:00
Revert "[llvm-exegesis] Add option to check the hardware support for a given feature before benchmarking."
This reverts commit 4fcd1a8e6528ca42fe656f2745e15d2b7f5de495 as `llvm/test/tools/llvm-exegesis/X86/lbr/mov-add.s` failed on hosts without LBR supported if the build has LIBPFM enabled. On that host, `perf_event_open` fails with `EOPNOTSUPP` on LBR config. That change's basic assumption > If this is run on a non-supported hardware, it will produce all zeroes for latency. could not stand as `perf_event_open` system call will fail if the underlying hardware really don't have LBR supported.
This commit is contained in:
parent
1d58f0df56
commit
fe455e705f
@ -19,9 +19,9 @@ else:
|
|||||||
try:
|
try:
|
||||||
with open(os.devnull, 'w') as quiet:
|
with open(os.devnull, 'w') as quiet:
|
||||||
check_llvm_exegesis_uops_result = subprocess.call(
|
check_llvm_exegesis_uops_result = subprocess.call(
|
||||||
[llvm_exegesis_exe, '-mode', 'uops', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet)
|
[llvm_exegesis_exe, '-allowed-host-cpu', 'skylake', '-allowed-host-cpu', 'skylake-avx512', '-mode', 'uops', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet)
|
||||||
check_llvm_exegesis_latency_result = subprocess.call(
|
check_llvm_exegesis_latency_result = subprocess.call(
|
||||||
[llvm_exegesis_exe, '-mode', 'latency', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet)
|
[llvm_exegesis_exe, '-allowed-host-cpu', 'skylake', '-allowed-host-cpu', 'skylake-avx512', '-mode', 'latency', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet)
|
||||||
except OSError:
|
except OSError:
|
||||||
print('could not exec llvm-exegesis')
|
print('could not exec llvm-exegesis')
|
||||||
config.unsupported = True
|
config.unsupported = True
|
||||||
|
@ -142,11 +142,6 @@ public:
|
|||||||
return {&Instr};
|
return {&Instr};
|
||||||
}
|
}
|
||||||
|
|
||||||
// Checks hardware and software support for current benchmark mode.
|
|
||||||
// Returns an error if the target host does not have support to run the
|
|
||||||
// benchmark.
|
|
||||||
virtual Error checkFeatureSupport() const { return Error::success(); }
|
|
||||||
|
|
||||||
// Creates a snippet generator for the given mode.
|
// Creates a snippet generator for the given mode.
|
||||||
std::unique_ptr<SnippetGenerator>
|
std::unique_ptr<SnippetGenerator>
|
||||||
createSnippetGenerator(InstructionBenchmark::ModeE Mode,
|
createSnippetGenerator(InstructionBenchmark::ModeE Mode,
|
||||||
|
@ -674,23 +674,6 @@ private:
|
|||||||
return Arch == Triple::x86_64 || Arch == Triple::x86;
|
return Arch == Triple::x86_64 || Arch == Triple::x86;
|
||||||
}
|
}
|
||||||
|
|
||||||
Error checkFeatureSupport() const override {
|
|
||||||
// LBR is the only feature we conditionally support now.
|
|
||||||
// So if LBR is not requested, then we should be able to run the benchmarks.
|
|
||||||
if (LbrSamplingPeriod == 0)
|
|
||||||
return Error::success();
|
|
||||||
|
|
||||||
#if defined(__linux__) && defined(HAVE_LIBPFM) && \
|
|
||||||
defined(LIBPFM_HAS_FIELD_CYCLES)
|
|
||||||
// If the kernel supports it, the hardware still may not have it.
|
|
||||||
return X86LbrCounter::checkLbrSupport();
|
|
||||||
#else
|
|
||||||
return llvm::make_error<llvm::StringError>(
|
|
||||||
"LBR not supported on this kernel and/or platform",
|
|
||||||
llvm::errc::not_supported);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
static const unsigned kUnavailableRegisters[4];
|
static const unsigned kUnavailableRegisters[4];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -21,7 +21,6 @@
|
|||||||
#endif // HAVE_LIBPFM
|
#endif // HAVE_LIBPFM
|
||||||
|
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
#include <chrono>
|
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
@ -36,8 +35,6 @@
|
|||||||
namespace llvm {
|
namespace llvm {
|
||||||
namespace exegesis {
|
namespace exegesis {
|
||||||
|
|
||||||
// Number of entries in the LBR.
|
|
||||||
static constexpr int kLbrEntries = 16;
|
|
||||||
static constexpr size_t kBufferPages = 8;
|
static constexpr size_t kBufferPages = 8;
|
||||||
static const size_t kDataBufferSize = kBufferPages * getpagesize();
|
static const size_t kDataBufferSize = kBufferPages * getpagesize();
|
||||||
|
|
||||||
@ -73,6 +70,7 @@ static void copyDataBuffer(void *MMappedBuffer, char *Buf, uint64_t Tail,
|
|||||||
static llvm::Error parseDataBuffer(const char *DataBuf, size_t DataSize,
|
static llvm::Error parseDataBuffer(const char *DataBuf, size_t DataSize,
|
||||||
const void *From, const void *To,
|
const void *From, const void *To,
|
||||||
llvm::SmallVector<int64_t, 4> *CycleArray) {
|
llvm::SmallVector<int64_t, 4> *CycleArray) {
|
||||||
|
assert(From != nullptr && To != nullptr);
|
||||||
const char *DataPtr = DataBuf;
|
const char *DataPtr = DataBuf;
|
||||||
while (DataPtr < DataBuf + DataSize) {
|
while (DataPtr < DataBuf + DataSize) {
|
||||||
struct perf_event_header Header;
|
struct perf_event_header Header;
|
||||||
@ -151,47 +149,21 @@ void X86LbrCounter::start() {
|
|||||||
ioctl(FileDescriptor, PERF_EVENT_IOC_REFRESH, 1024 /* kMaxPollsPerFd */);
|
ioctl(FileDescriptor, PERF_EVENT_IOC_REFRESH, 1024 /* kMaxPollsPerFd */);
|
||||||
}
|
}
|
||||||
|
|
||||||
llvm::Error X86LbrCounter::checkLbrSupport() {
|
|
||||||
// Do a sample read and check if the results contain non-zero values.
|
|
||||||
|
|
||||||
X86LbrCounter counter(X86LbrPerfEvent(123));
|
|
||||||
counter.start();
|
|
||||||
|
|
||||||
// Prevent the compiler from unrolling the loop and get rid of all the
|
|
||||||
// branches. We need at least 16 iterations.
|
|
||||||
int Sum = 0;
|
|
||||||
int V = 1;
|
|
||||||
|
|
||||||
volatile int *P = &V;
|
|
||||||
auto TimeLimit =
|
|
||||||
std::chrono::high_resolution_clock::now() + std::chrono::microseconds(5);
|
|
||||||
|
|
||||||
for (int I = 0;
|
|
||||||
I < kLbrEntries || std::chrono::high_resolution_clock::now() < TimeLimit;
|
|
||||||
++I) {
|
|
||||||
Sum += *P;
|
|
||||||
}
|
|
||||||
|
|
||||||
counter.stop();
|
|
||||||
|
|
||||||
auto ResultOrError = counter.doReadCounter(nullptr, nullptr);
|
|
||||||
if (ResultOrError)
|
|
||||||
if (!ResultOrError.get().empty())
|
|
||||||
// If there is at least one non-zero entry, then LBR is supported.
|
|
||||||
for (const int64_t &Value : ResultOrError.get())
|
|
||||||
if (Value != 0)
|
|
||||||
return Error::success();
|
|
||||||
|
|
||||||
return llvm::make_error<llvm::StringError>(
|
|
||||||
"LBR format with cycles is not suppported on the host.",
|
|
||||||
llvm::errc::not_supported);
|
|
||||||
}
|
|
||||||
|
|
||||||
llvm::Expected<llvm::SmallVector<int64_t, 4>>
|
llvm::Expected<llvm::SmallVector<int64_t, 4>>
|
||||||
X86LbrCounter::readOrError(StringRef FunctionBytes) const {
|
X86LbrCounter::readOrError(StringRef FunctionBytes) const {
|
||||||
|
// The max number of time-outs/retries before we give up.
|
||||||
|
static constexpr int kMaxTimeouts = 160;
|
||||||
|
|
||||||
// Disable the event before reading
|
// Disable the event before reading
|
||||||
ioctl(FileDescriptor, PERF_EVENT_IOC_DISABLE, 0);
|
ioctl(FileDescriptor, PERF_EVENT_IOC_DISABLE, 0);
|
||||||
|
|
||||||
|
// Parses the LBR buffer and fills CycleArray with the sequence of cycle
|
||||||
|
// counts from the buffer.
|
||||||
|
llvm::SmallVector<int64_t, 4> CycleArray;
|
||||||
|
std::unique_ptr<char[]> DataBuf(new char[kDataBufferSize]);
|
||||||
|
int NumTimeouts = 0;
|
||||||
|
int PollResult = 0;
|
||||||
|
|
||||||
// Find the boundary of the function so that we could filter the LBRs
|
// Find the boundary of the function so that we could filter the LBRs
|
||||||
// to keep only the relevant records.
|
// to keep only the relevant records.
|
||||||
if (FunctionBytes.empty())
|
if (FunctionBytes.empty())
|
||||||
@ -200,21 +172,6 @@ X86LbrCounter::readOrError(StringRef FunctionBytes) const {
|
|||||||
const void *From = reinterpret_cast<const void *>(FunctionBytes.data());
|
const void *From = reinterpret_cast<const void *>(FunctionBytes.data());
|
||||||
const void *To = reinterpret_cast<const void *>(FunctionBytes.data() +
|
const void *To = reinterpret_cast<const void *>(FunctionBytes.data() +
|
||||||
FunctionBytes.size());
|
FunctionBytes.size());
|
||||||
return doReadCounter(From, To);
|
|
||||||
}
|
|
||||||
|
|
||||||
llvm::Expected<llvm::SmallVector<int64_t, 4>>
|
|
||||||
X86LbrCounter::doReadCounter(const void *From, const void *To) const {
|
|
||||||
// The max number of time-outs/retries before we give up.
|
|
||||||
static constexpr int kMaxTimeouts = 160;
|
|
||||||
|
|
||||||
// Parses the LBR buffer and fills CycleArray with the sequence of cycle
|
|
||||||
// counts from the buffer.
|
|
||||||
llvm::SmallVector<int64_t, 4> CycleArray;
|
|
||||||
auto DataBuf = std::make_unique<char[]>(kDataBufferSize);
|
|
||||||
int NumTimeouts = 0;
|
|
||||||
int PollResult = 0;
|
|
||||||
|
|
||||||
while (PollResult <= 0) {
|
while (PollResult <= 0) {
|
||||||
PollResult = pollLbrPerfEvent(FileDescriptor);
|
PollResult = pollLbrPerfEvent(FileDescriptor);
|
||||||
if (PollResult > 0)
|
if (PollResult > 0)
|
||||||
|
@ -33,8 +33,6 @@ public:
|
|||||||
|
|
||||||
class X86LbrCounter : public pfm::Counter {
|
class X86LbrCounter : public pfm::Counter {
|
||||||
public:
|
public:
|
||||||
static llvm::Error checkLbrSupport();
|
|
||||||
|
|
||||||
explicit X86LbrCounter(pfm::PerfEvent &&Event);
|
explicit X86LbrCounter(pfm::PerfEvent &&Event);
|
||||||
|
|
||||||
virtual ~X86LbrCounter();
|
virtual ~X86LbrCounter();
|
||||||
@ -45,9 +43,6 @@ public:
|
|||||||
readOrError(StringRef FunctionBytes) const override;
|
readOrError(StringRef FunctionBytes) const override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
llvm::Expected<llvm::SmallVector<int64_t, 4>>
|
|
||||||
doReadCounter(const void *From, const void *To) const;
|
|
||||||
|
|
||||||
void *MMappedBuffer = nullptr;
|
void *MMappedBuffer = nullptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -160,6 +160,12 @@ static cl::opt<std::string>
|
|||||||
cl::desc(""), cl::cat(AnalysisOptions),
|
cl::desc(""), cl::cat(AnalysisOptions),
|
||||||
cl::init(""));
|
cl::init(""));
|
||||||
|
|
||||||
|
static cl::list<std::string>
|
||||||
|
AllowedHostCpus("allowed-host-cpu",
|
||||||
|
cl::desc("If specified, only run the benchmark if the host "
|
||||||
|
"CPU matches the names"),
|
||||||
|
cl::cat(Options), cl::ZeroOrMore);
|
||||||
|
|
||||||
static cl::opt<bool> AnalysisDisplayUnstableOpcodes(
|
static cl::opt<bool> AnalysisDisplayUnstableOpcodes(
|
||||||
"analysis-display-unstable-clusters",
|
"analysis-display-unstable-clusters",
|
||||||
cl::desc("if there is more than one benchmark for an opcode, said "
|
cl::desc("if there is more than one benchmark for an opcode, said "
|
||||||
@ -296,9 +302,12 @@ void benchmarkMain() {
|
|||||||
|
|
||||||
const LLVMState State(CpuName);
|
const LLVMState State(CpuName);
|
||||||
|
|
||||||
// Preliminary check to ensure features needed for requested
|
llvm::StringRef ActualCpu = State.getTargetMachine().getTargetCPU();
|
||||||
// benchmark mode are present on target CPU and/or OS.
|
for (auto Begin = AllowedHostCpus.begin(); Begin != AllowedHostCpus.end();
|
||||||
ExitOnErr(State.getExegesisTarget().checkFeatureSupport());
|
++Begin) {
|
||||||
|
if (ActualCpu != *Begin)
|
||||||
|
ExitWithError(llvm::Twine("Unexpected host CPU ").concat(ActualCpu));
|
||||||
|
}
|
||||||
|
|
||||||
const std::unique_ptr<BenchmarkRunner> Runner =
|
const std::unique_ptr<BenchmarkRunner> Runner =
|
||||||
ExitOnErr(State.getExegesisTarget().createBenchmarkRunner(
|
ExitOnErr(State.getExegesisTarget().createBenchmarkRunner(
|
||||||
|
Loading…
Reference in New Issue
Block a user