[llvm-exegesis] Add options to SnippetGenerator.

Summary: This adds a `-max-configs-per-opcode` option to limit the number of configs per opcode. Reviewers: gchatelet Subscribers: tschuett, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D68642 llvm-svn: 374054
2024-11-22 02:33:06 +01:00 · 2019-10-08 14:30:24 +00:00 · 2019-10-08 14:30:24 +00:00 · e0fc2857f3
commit e0fc2857f3
parent 24a2861b8f
11 changed files with 86 additions and 28 deletions
--- a/docs/CommandGuide/llvm-exegesis.rst
+++ b/docs/CommandGuide/llvm-exegesis.rst
@ -195,11 +195,23 @@ OPTIONS
 to specify at least one of the `-analysis-clusters-output-file=` and
 `-analysis-inconsistencies-output-file=`.

-.. option:: -num-repetitions=<Number of repetition>
+.. option:: -num-repetitions=<Number of repetitions>

 Specify the number of repetitions of the asm snippet.
 Higher values lead to more accurate measurements but lengthen the benchmark.

+.. option:: -max-configs-per-opcode=<value>
+
+ Specify the maximum configurations that can be generated for each opcode.
+ By default this is `1`, meaning that we assume that a single measurement is
+ enough to characterize an opcode. This might not be true of all instructions:
+ for example, the performance characteristics of the LEA instruction on X86
+ depends on the value of assigned registers and immediates. Setting a value of
+ `-max-configs-per-opcode` larger than `1` allows `llvm-exegesis` to explore
+ more configurations to discover if some register or immediate assignments
+ lead to different performance characteristics.
+
+
 .. option:: -benchmarks-file=</path/to/file>

 File to read (`analysis` mode) or write (`latency`/`uops`/`inverse_throughput`
--- a/test/tools/llvm-exegesis/X86/max-configs.test
+++ b/test/tools/llvm-exegesis/X86/max-configs.test
@ -0,0 +1,24 @@
+# RUN: llvm-exegesis -mode=latency -opcode-name=SBB8rr -max-configs-per-opcode=1 | FileCheck -check-prefixes=CHECK,CHECK1 %s
+# RUN: llvm-exegesis -mode=latency -opcode-name=SBB8rr -max-configs-per-opcode=2 | FileCheck -check-prefixes=CHECK,CHECK2 %s
+
+CHECK:      ---
+CHECK-NEXT: mode: latency
+CHECK-NEXT: key:
+CHECK-NEXT:   instructions:
+CHECK-NEXT:     SBB8rr
+CHECK-NEXT: config: ''
+CHECK-NEXT: register_initial_values:
+CHECK-DAG: - '[[REG1:[A-Z0-9]+]]=0x0'
+CHECK-LAST: ...
+
+CHECK1-NOT: SBB8rr
+
+CHECK2:      ---
+CHECK2-NEXT: mode: latency
+CHECK2-NEXT: key:
+CHECK2-NEXT:   instructions:
+CHECK2-NEXT:     SBB8rr
+CHECK2-NEXT: config: ''
+CHECK2-NEXT: register_initial_values:
+CHECK2-DAG: - '[[REG1:[A-Z0-9]+]]=0x0'
+CHECK2-LAST: ...
--- a/tools/llvm-exegesis/lib/Latency.h
+++ b/tools/llvm-exegesis/lib/Latency.h
@ -24,7 +24,7 @@ namespace exegesis {

 class LatencySnippetGenerator : public SnippetGenerator {
 public:
-  LatencySnippetGenerator(const LLVMState &State) : SnippetGenerator(State) {}
+  using SnippetGenerator::SnippetGenerator;
  ~LatencySnippetGenerator() override;

  llvm::Expected<std::vector<CodeTemplate>>
--- a/tools/llvm-exegesis/lib/SnippetGenerator.cpp
+++ b/tools/llvm-exegesis/lib/SnippetGenerator.cpp
@ -33,7 +33,8 @@ std::vector<CodeTemplate> getSingleton(CodeTemplate &&CT) {
 SnippetGeneratorFailure::SnippetGeneratorFailure(const llvm::Twine &S)
    : llvm::StringError(S, llvm::inconvertibleErrorCode()) {}

-SnippetGenerator::SnippetGenerator(const LLVMState &State) : State(State) {}
+SnippetGenerator::SnippetGenerator(const LLVMState &State, const Options &Opts)
+    : State(State), Opts(Opts) {}

 SnippetGenerator::~SnippetGenerator() = default;

@ -81,6 +82,9 @@ SnippetGenerator::generateConfigurations(
            computeRegisterInitialValues(CT.Instructions);
        BC.Key.Config = CT.Config;
        Output.push_back(std::move(BC));
+        if (Output.size() >= Opts.MaxConfigsPerOpcode)
+          return Output; // Early exit if we exceeded the number of allowed
+                         // configs.
      }
    }
    return Output;
--- a/tools/llvm-exegesis/lib/SnippetGenerator.h
+++ b/tools/llvm-exegesis/lib/SnippetGenerator.h
@ -51,7 +51,11 @@ public:
 // Common code for all benchmark modes.
 class SnippetGenerator {
 public:
-  explicit SnippetGenerator(const LLVMState &State);
+  struct Options {
+    unsigned MaxConfigsPerOpcode = 1;
+  };
+
+  explicit SnippetGenerator(const LLVMState &State, const Options &Opts);

  virtual ~SnippetGenerator();

@ -66,6 +70,7 @@ public:

 protected:
  const LLVMState &State;
+  const Options Opts;

 private:
  // API to be implemented by subclasses.
--- a/tools/llvm-exegesis/lib/Target.cpp
+++ b/tools/llvm-exegesis/lib/Target.cpp
@ -36,17 +36,17 @@ void ExegesisTarget::registerTarget(ExegesisTarget *Target) {
  FirstTarget = Target;
 }

-std::unique_ptr<SnippetGenerator>
-ExegesisTarget::createSnippetGenerator(InstructionBenchmark::ModeE Mode,
-                                       const LLVMState &State) const {
+std::unique_ptr<SnippetGenerator> ExegesisTarget::createSnippetGenerator(
+    InstructionBenchmark::ModeE Mode, const LLVMState &State,
+    const SnippetGenerator::Options &Opts) const {
  switch (Mode) {
  case InstructionBenchmark::Unknown:
    return nullptr;
  case InstructionBenchmark::Latency:
-    return createLatencySnippetGenerator(State);
+    return createLatencySnippetGenerator(State, Opts);
  case InstructionBenchmark::Uops:
  case InstructionBenchmark::InverseThroughput:
-    return createUopsSnippetGenerator(State);
+    return createUopsSnippetGenerator(State, Opts);
  }
  return nullptr;
 }
@ -66,14 +66,14 @@ ExegesisTarget::createBenchmarkRunner(InstructionBenchmark::ModeE Mode,
  return nullptr;
 }

-std::unique_ptr<SnippetGenerator>
-ExegesisTarget::createLatencySnippetGenerator(const LLVMState &State) const {
-  return std::make_unique<LatencySnippetGenerator>(State);
+std::unique_ptr<SnippetGenerator> ExegesisTarget::createLatencySnippetGenerator(
+    const LLVMState &State, const SnippetGenerator::Options &Opts) const {
+  return std::make_unique<LatencySnippetGenerator>(State, Opts);
 }

-std::unique_ptr<SnippetGenerator>
-ExegesisTarget::createUopsSnippetGenerator(const LLVMState &State) const {
-  return std::make_unique<UopsSnippetGenerator>(State);
+std::unique_ptr<SnippetGenerator> ExegesisTarget::createUopsSnippetGenerator(
+    const LLVMState &State, const SnippetGenerator::Options &Opts) const {
+  return std::make_unique<UopsSnippetGenerator>(State, Opts);
 }

 std::unique_ptr<BenchmarkRunner> ExegesisTarget::createLatencyBenchmarkRunner(
--- a/tools/llvm-exegesis/lib/Target.h
+++ b/tools/llvm-exegesis/lib/Target.h
@ -125,7 +125,8 @@ public:
  // Creates a snippet generator for the given mode.
  std::unique_ptr<SnippetGenerator>
  createSnippetGenerator(InstructionBenchmark::ModeE Mode,
-                         const LLVMState &State) const;
+                         const LLVMState &State,
+                         const SnippetGenerator::Options &Opts) const;
  // Creates a benchmark runner for the given mode.
  std::unique_ptr<BenchmarkRunner>
  createBenchmarkRunner(InstructionBenchmark::ModeE Mode,
@ -151,9 +152,9 @@ private:
  // Targets can implement their own snippet generators/benchmarks runners by
  // implementing these.
  std::unique_ptr<SnippetGenerator> virtual createLatencySnippetGenerator(
-      const LLVMState &State) const;
+      const LLVMState &State, const SnippetGenerator::Options &Opts) const;
  std::unique_ptr<SnippetGenerator> virtual createUopsSnippetGenerator(
-      const LLVMState &State) const;
+      const LLVMState &State, const SnippetGenerator::Options &Opts) const;
  std::unique_ptr<BenchmarkRunner> virtual createLatencyBenchmarkRunner(
      const LLVMState &State, InstructionBenchmark::ModeE Mode) const;
  std::unique_ptr<BenchmarkRunner> virtual createUopsBenchmarkRunner(
--- a/tools/llvm-exegesis/lib/Uops.h
+++ b/tools/llvm-exegesis/lib/Uops.h
@ -22,7 +22,7 @@ namespace exegesis {

 class UopsSnippetGenerator : public SnippetGenerator {
 public:
-  UopsSnippetGenerator(const LLVMState &State) : SnippetGenerator(State) {}
+  using SnippetGenerator::SnippetGenerator;
  ~UopsSnippetGenerator() override;

  llvm::Expected<std::vector<CodeTemplate>>
--- a/tools/llvm-exegesis/lib/X86/Target.cpp
+++ b/tools/llvm-exegesis/lib/X86/Target.cpp
@ -462,14 +462,16 @@ private:
                            sizeof(kUnavailableRegisters[0]));
  }

-  std::unique_ptr<SnippetGenerator>
-  createLatencySnippetGenerator(const LLVMState &State) const override {
-    return std::make_unique<X86LatencySnippetGenerator>(State);
+  std::unique_ptr<SnippetGenerator> createLatencySnippetGenerator(
+      const LLVMState &State,
+      const SnippetGenerator::Options &Opts) const override {
+    return std::make_unique<X86LatencySnippetGenerator>(State, Opts);
  }

-  std::unique_ptr<SnippetGenerator>
-  createUopsSnippetGenerator(const LLVMState &State) const override {
-    return std::make_unique<X86UopsSnippetGenerator>(State);
+  std::unique_ptr<SnippetGenerator> createUopsSnippetGenerator(
+      const LLVMState &State,
+      const SnippetGenerator::Options &Opts) const override {
+    return std::make_unique<X86UopsSnippetGenerator>(State, Opts);
  }

  bool matchesArch(llvm::Triple::ArchType Arch) const override {
--- a/tools/llvm-exegesis/llvm-exegesis.cpp
+++ b/tools/llvm-exegesis/llvm-exegesis.cpp
@ -95,6 +95,12 @@ static cl::opt<unsigned>
                   cl::desc("number of time to repeat the asm snippet"),
                   cl::cat(BenchmarkOptions), cl::init(10000));

+static cl::opt<unsigned> MaxConfigsPerOpcode(
+    "max-configs-per-opcode",
+    cl::desc(
+        "allow to snippet generator to generate at most that many configs"),
+    cl::cat(BenchmarkOptions), cl::init(1));
+
 static cl::opt<bool> IgnoreInvalidSchedClass(
    "ignore-invalid-sched-class",
    cl::desc("ignore instructions that do not define a sched class"),
@ -214,8 +220,11 @@ generateSnippets(const LLVMState &State, unsigned Opcode,
  if (InstrDesc.isCall() || InstrDesc.isReturn())
    return make_error<Failure>("Unsupported opcode: isCall/isReturn");

+  SnippetGenerator::Options Options;
+  Options.MaxConfigsPerOpcode = MaxConfigsPerOpcode;
  const std::unique_ptr<SnippetGenerator> Generator =
-      State.getExegesisTarget().createSnippetGenerator(BenchmarkMode, State);
+      State.getExegesisTarget().createSnippetGenerator(BenchmarkMode, State,
+                                                       Options);
  if (!Generator)
    llvm::report_fatal_error("cannot create snippet generator");
  return Generator->generateConfigurations(Instr, ForbiddenRegs);
--- a/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp
+++ b/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp
@ -45,7 +45,7 @@ protected:
 template <typename SnippetGeneratorT>
 class SnippetGeneratorTest : public X86SnippetGeneratorTest {
 protected:
-  SnippetGeneratorTest() : Generator(State) {}
+  SnippetGeneratorTest() : Generator(State, SnippetGenerator::Options()) {}

  std::vector<CodeTemplate> checkAndGetCodeTemplates(unsigned Opcode) {
    randomGenerator().seed(0); // Initialize seed.
@ -335,7 +335,8 @@ TEST_F(UopsSnippetGeneratorTest, MemoryUse) {

 class FakeSnippetGenerator : public SnippetGenerator {
 public:
-  FakeSnippetGenerator(const LLVMState &State) : SnippetGenerator(State) {}
+  FakeSnippetGenerator(const LLVMState &State, const Options &Opts)
+      : SnippetGenerator(State, Opts) {}

  Instruction createInstruction(unsigned Opcode) {
    return State.getIC().getInstr(Opcode);