diff --git a/tools/llvm-exegesis/lib/Analysis.cpp b/tools/llvm-exegesis/lib/Analysis.cpp new file mode 100644 index 00000000000..48700218577 --- /dev/null +++ b/tools/llvm-exegesis/lib/Analysis.cpp @@ -0,0 +1,55 @@ + +#include "Analysis.h" +#include "llvm/Support/Format.h" + +namespace exegesis { + +namespace { + +// Prints a row representing an instruction, along with scheduling info and +// point coordinates (measurements). +void renderInstructionRow(const InstructionBenchmark &Point, + const size_t NameLen, llvm::raw_ostream &OS) { + OS << llvm::format("%*s", NameLen, Point.AsmTmpl.Name.c_str()); + for (const auto &Measurement : Point.Measurements) { + OS << llvm::format(" %*.2f", Measurement.Key.size(), Measurement.Value); + } + OS << "\n"; +} + +void analyzeCluster(const std::vector &Points, + const llvm::MCSubtargetInfo &STI, + const InstructionBenchmarkClustering::Cluster &Cluster, + llvm::raw_ostream &OS) { + // TODO: + // std::sort(Cluster.PointIndices.begin(), Cluster.PointIndices.end(), + // [](int PointIdA, int PointIdB) { return GetSchedClass(Points[PointIdA]) < + // GetSchedClass(Points[PointIdB]); }); + OS << "Cluster:\n"; + // Get max length of the name for alignement. + size_t NameLen = 0; + for (const auto &PointId : Cluster.PointIndices) { + NameLen = std::max(NameLen, Points[PointId].AsmTmpl.Name.size()); + } + + // Print all points. + for (const auto &PointId : Cluster.PointIndices) { + renderInstructionRow(Points[PointId], NameLen, OS); + } +} + +} // namespace + +llvm::Error +printAnalysisClusters(const InstructionBenchmarkClustering &Clustering, + const llvm::MCSubtargetInfo &STI, llvm::raw_ostream &OS) { + + for (const auto &Cluster : Clustering.getValidClusters()) { + analyzeCluster(Clustering.getPoints(), STI, Cluster, OS); + OS << "\n\n\n"; + } + + return llvm::Error::success(); +} + +} // namespace exegesis diff --git a/tools/llvm-exegesis/lib/Analysis.h b/tools/llvm-exegesis/lib/Analysis.h new file mode 100644 index 00000000000..5082b1dffea --- /dev/null +++ b/tools/llvm-exegesis/lib/Analysis.h @@ -0,0 +1,41 @@ +//===-- Analysis.h ----------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Analysis output for benchmark results. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_ANALYSIS_H +#define LLVM_TOOLS_LLVM_EXEGESIS_ANALYSIS_H + +#include "BenchmarkResult.h" +#include "Clustering.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/raw_ostream.h" +#include + +namespace exegesis { + +// All the points in a scheduling class should be in the same cluster. +// Print any scheduling class for which this is not the case. +llvm::Error +printSchedClassInconsistencies(const InstructionBenchmarkClustering &Clustering, + const llvm::MCSubtargetInfo &STI, + llvm::raw_ostream &OS); + +// Prints all instructions for each cluster. +llvm::Error +printAnalysisClusters(const InstructionBenchmarkClustering &Clustering, + const llvm::MCSubtargetInfo &STI, llvm::raw_ostream &OS); + +} // namespace exegesis + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H diff --git a/tools/llvm-exegesis/lib/CMakeLists.txt b/tools/llvm-exegesis/lib/CMakeLists.txt index 5ace962fe59..7c513762962 100644 --- a/tools/llvm-exegesis/lib/CMakeLists.txt +++ b/tools/llvm-exegesis/lib/CMakeLists.txt @@ -1,5 +1,6 @@ add_library(LLVMExegesis STATIC + Analysis.cpp BenchmarkResult.cpp BenchmarkRunner.cpp Clustering.cpp diff --git a/tools/llvm-exegesis/lib/Clustering.cpp b/tools/llvm-exegesis/lib/Clustering.cpp index c8646c7c399..b3f42a38ac8 100644 --- a/tools/llvm-exegesis/lib/Clustering.cpp +++ b/tools/llvm-exegesis/lib/Clustering.cpp @@ -19,7 +19,7 @@ namespace exegesis { // (B) - Number of points : ~thousands (points are measurements of an MCInst) // (C) - Number of clusters: ~tens. // (D) - The number of clusters is not known /a priory/. -// (E) - The amount of noise is relatively small. +// (E) - The amoint of noise is relatively small. // The problem is rather small. In terms of algorithms, (D) disqualifies // k-means and makes algorithms such as DBSCAN[1] or OPTICS[2] more applicable. // @@ -57,17 +57,18 @@ std::vector rangeQuery(const std::vector &Points, } // namespace -InstructionBenchmarkClustering::InstructionBenchmarkClustering() - : NoiseCluster_(ClusterId::noise()), ErrorCluster_(ClusterId::error()) {} +InstructionBenchmarkClustering::InstructionBenchmarkClustering( + const std::vector &Points) + : Points_(Points), NoiseCluster_(ClusterId::noise()), + ErrorCluster_(ClusterId::error()) {} -llvm::Error InstructionBenchmarkClustering::validateAndSetup( - const std::vector &Points) { - ClusterIdForPoint_.resize(Points.size()); +llvm::Error InstructionBenchmarkClustering::validateAndSetup() { + ClusterIdForPoint_.resize(Points_.size()); // Mark erroneous measurements out. // All points must have the same number of dimensions, in the same order. const std::vector *LastMeasurement = nullptr; - for (size_t P = 0, NumPoints = Points.size(); P < NumPoints; ++P) { - const auto &Point = Points[P]; + for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) { + const auto &Point = Points_[P]; if (!Point.Error.empty()) { ClusterIdForPoint_[P] = ClusterId::error(); ErrorCluster_.PointIndices.push_back(P); @@ -96,13 +97,12 @@ llvm::Error InstructionBenchmarkClustering::validateAndSetup( return llvm::Error::success(); } -void InstructionBenchmarkClustering::dbScan( - const std::vector &Points, const size_t MinPts, - const double EpsilonSquared) { - for (size_t P = 0, NumPoints = Points.size(); P < NumPoints; ++P) { +void InstructionBenchmarkClustering::dbScan(const size_t MinPts, + const double EpsilonSquared) { + for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) { if (!ClusterIdForPoint_[P].isUndef()) continue; // Previously processed in inner loop. - const auto Neighbors = rangeQuery(Points, P, EpsilonSquared); + const auto Neighbors = rangeQuery(Points_, P, EpsilonSquared); if (Neighbors.size() + 1 < MinPts) { // Density check. // The region around P is not dense enough to create a new cluster, mark // as noise for now. @@ -136,7 +136,7 @@ void InstructionBenchmarkClustering::dbScan( ClusterIdForPoint_[Q] = CurrentCluster.Id; CurrentCluster.PointIndices.push_back(Q); // And extend to the neighbors of Q if the region is dense enough. - const auto Neighbors = rangeQuery(Points, Q, EpsilonSquared); + const auto Neighbors = rangeQuery(Points_, Q, EpsilonSquared); if (Neighbors.size() + 1 >= MinPts) { ToProcess.insert(Neighbors.begin(), Neighbors.end()); } @@ -144,7 +144,7 @@ void InstructionBenchmarkClustering::dbScan( } // Add noisy points to noise cluster. - for (size_t P = 0, NumPoints = Points.size(); P < NumPoints; ++P) { + for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) { if (ClusterIdForPoint_[P].isNoise()) { NoiseCluster_.PointIndices.push_back(P); } @@ -155,15 +155,15 @@ llvm::Expected InstructionBenchmarkClustering::create( const std::vector &Points, const size_t MinPts, const double Epsilon) { - InstructionBenchmarkClustering Clustering; - if (auto Error = Clustering.validateAndSetup(Points)) { - return std::move(Error); + InstructionBenchmarkClustering Clustering(Points); + if (auto Error = Clustering.validateAndSetup()) { + return Error; } if (Clustering.ErrorCluster_.PointIndices.size() == Points.size()) { return Clustering; // Nothing to cluster. } - Clustering.dbScan(Points, MinPts, Epsilon * Epsilon); + Clustering.dbScan(MinPts, Epsilon * Epsilon); return Clustering; } diff --git a/tools/llvm-exegesis/lib/Clustering.h b/tools/llvm-exegesis/lib/Clustering.h index aa4ef67133e..bc5a03af96f 100644 --- a/tools/llvm-exegesis/lib/Clustering.h +++ b/tools/llvm-exegesis/lib/Clustering.h @@ -72,6 +72,8 @@ public: return ClusterIdForPoint_[P]; } + const std::vector &getPoints() const { return Points_; } + const Cluster &getCluster(ClusterId Id) const { assert(!Id.isUndef() && "unlabeled cluster"); if (Id.isNoise()) { @@ -86,10 +88,11 @@ public: const std::vector &getValidClusters() const { return Clusters_; } private: - InstructionBenchmarkClustering(); - llvm::Error validateAndSetup(const std::vector &Points); - void dbScan(const std::vector &Points, size_t MinPts, + InstructionBenchmarkClustering(const std::vector &Points); + llvm::Error validateAndSetup(); + void dbScan(size_t MinPts, double EpsilonSquared); + const std::vector &Points_; int NumDimensions_ = 0; // ClusterForPoint_[P] is the cluster id for Points[P]. std::vector ClusterIdForPoint_; diff --git a/tools/llvm-exegesis/llvm-exegesis.cpp b/tools/llvm-exegesis/llvm-exegesis.cpp index 77683572da6..0c8a4173c96 100644 --- a/tools/llvm-exegesis/llvm-exegesis.cpp +++ b/tools/llvm-exegesis/llvm-exegesis.cpp @@ -12,8 +12,10 @@ /// //===----------------------------------------------------------------------===// +#include "lib/Analysis.h" #include "lib/BenchmarkResult.h" #include "lib/BenchmarkRunner.h" +#include "lib/Clustering.h" #include "lib/Latency.h" #include "lib/LlvmState.h" #include "lib/PerfHelper.h" @@ -23,8 +25,11 @@ #include "llvm/ADT/Twine.h" #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Format.h" #include "llvm/Support/Path.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/TargetSelect.h" #include #include @@ -39,26 +44,41 @@ static llvm::cl::opt OpcodeName("opcode-name", llvm::cl::desc("opcode to measure, by name"), llvm::cl::init("")); -enum class BenchmarkModeE { Latency, Uops }; -static llvm::cl::opt - BenchmarkMode("benchmark-mode", llvm::cl::desc("the benchmark mode to run"), - llvm::cl::values(clEnumValN(BenchmarkModeE::Latency, - "latency", "Instruction Latency"), - clEnumValN(BenchmarkModeE::Uops, "uops", - "Uop Decomposition"))); +static llvm::cl::opt + BenchmarkFile("benchmarks-file", llvm::cl::desc(""), llvm::cl::init("-")); + +enum class BenchmarkModeE { Latency, Uops, Analysis }; +static llvm::cl::opt BenchmarkMode( + "benchmark-mode", llvm::cl::desc("the benchmark mode to run"), + llvm::cl::values( + clEnumValN(BenchmarkModeE::Latency, "latency", "Instruction Latency"), + clEnumValN(BenchmarkModeE::Uops, "uops", "Uop Decomposition"), + clEnumValN(BenchmarkModeE::Analysis, "analysis", "Analysis"))); static llvm::cl::opt NumRepetitions("num-repetitions", llvm::cl::desc("number of time to repeat the asm snippet"), llvm::cl::init(10000)); +static llvm::cl::opt AnalysisNumPoints( + "analysis-numpoints", + llvm::cl::desc("minimum number of points in an analysis cluster"), + llvm::cl::init(3)); + +static llvm::cl::opt + AnalysisEpsilon("analysis-epsilon", + llvm::cl::desc("dbscan epsilon for analysis clustering"), + llvm::cl::init(0.1)); + namespace exegesis { -void main() { - if (OpcodeName.empty() == (OpcodeIndex == 0)) { +void benchmarkMain() { + if (exegesis::pfm::pfmInitialize()) + llvm::report_fatal_error("cannot initialize libpfm"); + + if (OpcodeName.empty() == (OpcodeIndex == 0)) llvm::report_fatal_error( "please provide one and only one of 'opcode-index' or 'opcode-name'"); - } llvm::InitializeNativeTarget(); llvm::InitializeNativeTargetAsmPrinter(); @@ -94,10 +114,43 @@ void main() { case BenchmarkModeE::Uops: Runner = llvm::make_unique(); break; + case BenchmarkModeE::Analysis: + llvm_unreachable("not a benchmark"); } Runner->run(State, Opcode, NumRepetitions > 0 ? NumRepetitions : 1, Filter) - .writeYamlOrDie("-"); + .writeYamlOrDie(BenchmarkFile); + exegesis::pfm::pfmTerminate(); +} + +void analysisMain() { + // Read benchmarks. + const std::vector Points = + InstructionBenchmark::readYamlsOrDie(BenchmarkFile); + llvm::outs() << "Parsed " << Points.size() << " benchmark points\n"; + if (Points.empty()) { + llvm::errs() << "no benchmarks to analyze\n"; + return; + } + // TODO: Merge points from several runs (latency and uops). + + // FIXME: Check that all points have the same triple/cpu. + llvm::InitializeAllTargets(); + std::string Error; + const auto *TheTarget = + llvm::TargetRegistry::lookupTarget(Points[0].LLVMTriple, Error); + if (!TheTarget) { + llvm::errs() << "unknown target '" << Points[0].LLVMTriple << "'\n"; + return; + } + std::unique_ptr STI(TheTarget->createMCSubtargetInfo( + Points[0].LLVMTriple, Points[0].CpuName, "")); + + const auto Clustering = llvm::cantFail(InstructionBenchmarkClustering::create( + Points, AnalysisNumPoints, AnalysisEpsilon)); + if (auto Err = printAnalysisClusters(Clustering, *STI, llvm::outs())) { + llvm::report_fatal_error(std::move(Err)); + } } } // namespace exegesis @@ -105,13 +158,10 @@ void main() { int main(int Argc, char **Argv) { llvm::cl::ParseCommandLineOptions(Argc, Argv, ""); - if (exegesis::pfm::pfmInitialize()) { - llvm::errs() << "cannot initialize libpfm\n"; - return EXIT_FAILURE; + if (BenchmarkMode == BenchmarkModeE::Analysis) { + exegesis::analysisMain(); + } else { + exegesis::benchmarkMain(); } - - exegesis::main(); - - exegesis::pfm::pfmTerminate(); return EXIT_SUCCESS; }