llvm-mirror/tools/llvm-exegesis/llvm-exegesis.cpp

//===-- llvm-exegesis.cpp ---------------------------------------*- C++ -*-===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// Measures execution properties (latencies/uops) of an instruction.
///
//===----------------------------------------------------------------------===//

#include "lib/Analysis.h"
#include "lib/BenchmarkResult.h"
#include "lib/BenchmarkRunner.h"
#include "lib/Clustering.h"
#include "lib/Latency.h"
#include "lib/LlvmState.h"
#include "lib/PerfHelper.h"
#include "lib/Uops.h"
#include "lib/X86.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include <algorithm>
#include <random>
#include <string>
#include <unordered_map>

static llvm::cl::opt<unsigned>
    OpcodeIndex("opcode-index", llvm::cl::desc("opcode to measure, by index"),
                llvm::cl::init(0));

static llvm::cl::opt<std::string>
    OpcodeName("opcode-name", llvm::cl::desc("opcode to measure, by name"),
               llvm::cl::init(""));

static llvm::cl::opt<std::string>
    BenchmarkFile("benchmarks-file", llvm::cl::desc(""), llvm::cl::init("-"));

enum class BenchmarkModeE { Latency, Uops, Analysis };
static llvm::cl::opt<BenchmarkModeE> BenchmarkMode(
    "benchmark-mode", llvm::cl::desc("the benchmark mode to run"),
    llvm::cl::values(
        clEnumValN(BenchmarkModeE::Latency, "latency", "Instruction Latency"),
        clEnumValN(BenchmarkModeE::Uops, "uops", "Uop Decomposition"),
        clEnumValN(BenchmarkModeE::Analysis, "analysis", "Analysis")));

static llvm::cl::opt<unsigned>
    NumRepetitions("num-repetitions",
                   llvm::cl::desc("number of time to repeat the asm snippet"),
                   llvm::cl::init(10000));

static llvm::cl::opt<unsigned> AnalysisNumPoints(
    "analysis-numpoints",
    llvm::cl::desc("minimum number of points in an analysis cluster"),
    llvm::cl::init(3));

static llvm::cl::opt<float>
    AnalysisEpsilon("analysis-epsilon",
                    llvm::cl::desc("dbscan epsilon for analysis clustering"),
                    llvm::cl::init(0.1));

namespace exegesis {

void benchmarkMain() {
  if (exegesis::pfm::pfmInitialize())
    llvm::report_fatal_error("cannot initialize libpfm");

  if (OpcodeName.empty() == (OpcodeIndex == 0))
    llvm::report_fatal_error(
        "please provide one and only one of 'opcode-index' or 'opcode-name'");

  llvm::InitializeNativeTarget();
  llvm::InitializeNativeTargetAsmPrinter();

  // FIXME: Target-specific filter.
  X86Filter Filter;

  const LLVMState State;

  if (!State.getSubtargetInfo().getSchedModel().hasExtraProcessorInfo())
    llvm::report_fatal_error("sched model is missing extra processor info!");

  unsigned Opcode = OpcodeIndex;
  if (Opcode == 0) {
    // Resolve opcode name -> opcode.
    for (unsigned I = 0, E = State.getInstrInfo().getNumOpcodes(); I < E; ++I) {
      if (State.getInstrInfo().getName(I) == OpcodeName) {
        Opcode = I;
        break;
      }
    }
    if (Opcode == 0) {
      llvm::report_fatal_error(
          llvm::Twine("unknown opcode ").concat(OpcodeName));
    }
  }

  std::unique_ptr<BenchmarkRunner> Runner;
  switch (BenchmarkMode) {
  case BenchmarkModeE::Latency:
    Runner = llvm::make_unique<LatencyBenchmarkRunner>();
    break;
  case BenchmarkModeE::Uops:
    Runner = llvm::make_unique<UopsBenchmarkRunner>();
    break;
  case BenchmarkModeE::Analysis:
    llvm_unreachable("not a benchmark");
  }

  Runner->run(State, Opcode, NumRepetitions > 0 ? NumRepetitions : 1, Filter)
      .writeYamlOrDie(BenchmarkFile);
  exegesis::pfm::pfmTerminate();
}

void analysisMain() {
  // Read benchmarks.
  const std::vector<InstructionBenchmark> Points =
      InstructionBenchmark::readYamlsOrDie(BenchmarkFile);
  llvm::outs() << "Parsed " << Points.size() << " benchmark points\n";
  if (Points.empty()) {
    llvm::errs() << "no benchmarks to analyze\n";
    return;
  }
  // FIXME: Check that all points have the same triple/cpu.
  // FIXME: Merge points from several runs (latency and uops).

  llvm::InitializeAllTargets();
  std::string Error;
  const auto *TheTarget =
      llvm::TargetRegistry::lookupTarget(Points[0].LLVMTriple, Error);
  if (!TheTarget) {
    llvm::errs() << "unknown target '" << Points[0].LLVMTriple << "'\n";
    return;
  }
  std::unique_ptr<llvm::MCSubtargetInfo> STI(TheTarget->createMCSubtargetInfo(
      Points[0].LLVMTriple, Points[0].CpuName, ""));

  const auto Clustering = llvm::cantFail(InstructionBenchmarkClustering::create(
      Points, AnalysisNumPoints, AnalysisEpsilon));
  if (auto Err = printAnalysisClusters(Clustering, *STI, llvm::outs()))
    llvm::report_fatal_error(std::move(Err));
}

} // namespace exegesis

int main(int Argc, char **Argv) {
  llvm::cl::ParseCommandLineOptions(Argc, Argv, "");

  if (BenchmarkMode == BenchmarkModeE::Analysis) {
    exegesis::analysisMain();
  } else {
    exegesis::benchmarkMain();
  }
  return EXIT_SUCCESS;
}