diff --git a/tools/llvm-exegesis/lib/Assembler.cpp b/tools/llvm-exegesis/lib/Assembler.cpp index 2e3712ce7dc..b0758d4f8e3 100644 --- a/tools/llvm-exegesis/lib/Assembler.cpp +++ b/tools/llvm-exegesis/lib/Assembler.cpp @@ -32,10 +32,19 @@ static constexpr const char FunctionID[] = "foo"; static std::vector generateSnippetSetupCode(const ExegesisTarget &ET, const llvm::MCSubtargetInfo *const MSI, + const unsigned ScratchReg, + llvm::ArrayRef ScratchRegisterCopies, llvm::ArrayRef RegisterInitialValues, bool &IsSnippetSetupComplete) { IsSnippetSetupComplete = true; std::vector Result; + // Copy registers. + for (const unsigned Reg : ScratchRegisterCopies) { + assert(ScratchReg > 0 && "scratch reg copies but no scratch reg"); + const auto CopyRegisterCode = ET.copyReg(*MSI, Reg, ScratchReg); + Result.insert(Result.end(), CopyRegisterCode.begin(), CopyRegisterCode.end()); + } + // Load values in registers. for (const RegisterValue &RV : RegisterInitialValues) { // Load a constant in the register. const auto SetRegisterCode = ET.setRegTo(*MSI, RV.Register, RV.Value); @@ -155,6 +164,7 @@ llvm::BitVector getFunctionReservedRegs(const llvm::TargetMachine &TM) { void assembleToStream(const ExegesisTarget &ET, std::unique_ptr TM, llvm::ArrayRef LiveIns, + llvm::ArrayRef ScratchRegisterCopies, llvm::ArrayRef RegisterInitialValues, llvm::ArrayRef Instructions, llvm::raw_pwrite_stream &AsmStream) { @@ -178,7 +188,7 @@ void assembleToStream(const ExegesisTarget &ET, bool IsSnippetSetupComplete; std::vector Code = - generateSnippetSetupCode(ET, TM->getMCSubtargetInfo(), + generateSnippetSetupCode(ET, TM->getMCSubtargetInfo(), ET.getScratchMemoryRegister(TM->getTargetTriple()), ScratchRegisterCopies, RegisterInitialValues, IsSnippetSetupComplete); Code.insert(Code.end(), Instructions.begin(), Instructions.end()); @@ -199,7 +209,7 @@ void assembleToStream(const ExegesisTarget &ET, llvm::MCContext &MCContext = MMI->getContext(); llvm::legacy::PassManager PM; - llvm::TargetLibraryInfoImpl TLII(llvm::Triple(Module->getTargetTriple())); + llvm::TargetLibraryInfoImpl TLII(Triple(Module->getTargetTriple())); PM.add(new llvm::TargetLibraryInfoWrapperPass(TLII)); llvm::TargetPassConfig *TPC = TM->createPassConfig(PM); diff --git a/tools/llvm-exegesis/lib/Assembler.h b/tools/llvm-exegesis/lib/Assembler.h index ee6bc86f378..2626fbbe9fb 100644 --- a/tools/llvm-exegesis/lib/Assembler.h +++ b/tools/llvm-exegesis/lib/Assembler.h @@ -48,6 +48,7 @@ llvm::BitVector getFunctionReservedRegs(const llvm::TargetMachine &TM); void assembleToStream(const ExegesisTarget &ET, std::unique_ptr TM, llvm::ArrayRef LiveIns, + llvm::ArrayRef ScratchRegisterCopies, llvm::ArrayRef RegisterInitialValues, llvm::ArrayRef Instructions, llvm::raw_pwrite_stream &AsmStream); diff --git a/tools/llvm-exegesis/lib/BenchmarkCode.h b/tools/llvm-exegesis/lib/BenchmarkCode.h index 38bea2519a6..dda1b29c126 100644 --- a/tools/llvm-exegesis/lib/BenchmarkCode.h +++ b/tools/llvm-exegesis/lib/BenchmarkCode.h @@ -27,6 +27,10 @@ struct BenchmarkCode { // registers initial values. std::vector RegisterInitialValues; + // Before the code is executed some instructions are added to copy the + // scratch register into the specified registers. + std::vector ScratchRegisterCopies; + // We also need to provide the registers that are live on entry for the // assembler to generate proper prologue/epilogue. std::vector LiveIns; diff --git a/tools/llvm-exegesis/lib/BenchmarkResult.h b/tools/llvm-exegesis/lib/BenchmarkResult.h index 773a2e50abc..6df57d21316 100644 --- a/tools/llvm-exegesis/lib/BenchmarkResult.h +++ b/tools/llvm-exegesis/lib/BenchmarkResult.h @@ -58,7 +58,7 @@ struct BenchmarkMeasure { // The result of an instruction benchmark. struct InstructionBenchmark { InstructionBenchmarkKey Key; - enum ModeE { Unknown, Latency, Uops }; + enum ModeE { Unknown, Latency, Uops, ROBSize }; ModeE Mode; std::string CpuName; std::string LLVMTriple; diff --git a/tools/llvm-exegesis/lib/BenchmarkRunner.cpp b/tools/llvm-exegesis/lib/BenchmarkRunner.cpp index 437503f8486..398489e53f8 100644 --- a/tools/llvm-exegesis/lib/BenchmarkRunner.cpp +++ b/tools/llvm-exegesis/lib/BenchmarkRunner.cpp @@ -168,7 +168,7 @@ BenchmarkRunner::writeObjectFile(const BenchmarkCode &BC, return std::move(E); llvm::raw_fd_ostream OFS(ResultFD, true /*ShouldClose*/); assembleToStream(State.getExegesisTarget(), State.createTargetMachine(), - BC.LiveIns, BC.RegisterInitialValues, Code, OFS); + BC.LiveIns, BC.ScratchRegisterCopies, BC.RegisterInitialValues, Code, OFS); return ResultPath.str(); } diff --git a/tools/llvm-exegesis/lib/CMakeLists.txt b/tools/llvm-exegesis/lib/CMakeLists.txt index 8fdf8b997e0..3c1cf0b8e56 100644 --- a/tools/llvm-exegesis/lib/CMakeLists.txt +++ b/tools/llvm-exegesis/lib/CMakeLists.txt @@ -23,6 +23,7 @@ add_library(LLVMExegesis LlvmState.cpp MCInstrDescView.cpp PerfHelper.cpp + ROBSize.cpp RegisterAliasing.cpp SnippetGenerator.cpp RegisterValue.cpp diff --git a/tools/llvm-exegesis/lib/CodeTemplate.h b/tools/llvm-exegesis/lib/CodeTemplate.h index 4c55487f3d1..2738da67cf3 100644 --- a/tools/llvm-exegesis/lib/CodeTemplate.h +++ b/tools/llvm-exegesis/lib/CodeTemplate.h @@ -17,6 +17,7 @@ #define LLVM_TOOLS_LLVM_EXEGESIS_CODETEMPLATE_H #include "MCInstrDescView.h" +#include "RegisterValue.h" #include "llvm/ADT/BitmaskEnum.h" namespace llvm { @@ -120,6 +121,9 @@ struct CodeTemplate { std::string Info; // The list of the instructions for this template. std::vector Instructions; + // The list of registers in which to copy the scratch register as a setup + // step. + std::vector ScratchRegisterCopies; // If the template uses the provided scratch memory, the register in which // the pointer to this memory is passed in to the function. unsigned ScratchSpacePointerInReg = 0; diff --git a/tools/llvm-exegesis/lib/ROBSize.cpp b/tools/llvm-exegesis/lib/ROBSize.cpp new file mode 100644 index 00000000000..65d81bd0b71 --- /dev/null +++ b/tools/llvm-exegesis/lib/ROBSize.cpp @@ -0,0 +1,69 @@ +//===-- Uops.cpp ------------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ROBSize.h" + +#include "Assembler.h" +#include "BenchmarkRunner.h" +#include "MCInstrDescView.h" +#include "Target.h" + +namespace llvm { +namespace exegesis { + +ROBSizeSnippetGenerator::~ROBSizeSnippetGenerator() = default; + +llvm::Expected> +ROBSizeSnippetGenerator::generateCodeTemplates(const Instruction &Instr) const { + CodeTemplate CT; + // const llvm::BitVector *ScratchSpaceAliasedRegs = nullptr; + const auto &ET = State.getExegesisTarget(); + const auto &TM = State.getTargetMachine(); + + CT.ScratchSpacePointerInReg = + ET.getScratchMemoryRegister(TM.getTargetTriple()); + if (CT.ScratchSpacePointerInReg == 0) + return llvm::make_error( + "Infeasible : target does not support memory instructions"); + // ScratchSpaceAliasedRegs = + // &State.getRATC().getRegister(CT.ScratchSpacePointerInReg).aliasedBits(); + + const unsigned ECX = 50u; // FIXME: pick any available register. + const unsigned EDX = 52u; // FIXME: pick any available register. + CT.ScratchRegisterCopies.push_back(ECX); + CT.ScratchRegisterCopies.push_back(EDX); + + /* + const llvm::TargetInstrInfo *const TII = + State.getSubtargetInfo().getInstrInfo(); MCInst NopInst; + TII->getNoop(NopInst); + */ + Instruction ChaseRegInst(State.getInstrInfo(), State.getRATC(), ET.getChaseRegOpcode()); + //errs() << ChaseRegInst.Variables.size() << "\n"; + assert(ChaseRegInst.Variables.size() >= 2 && "'mov reg, [reg]'' should have at least two variables"); + InstructionTemplate IT1(ChaseRegInst); + IT1.getValueFor(ChaseRegInst.Variables[0]) = MCOperand::createReg(ECX); + ET.fillMemoryOperands(IT1, ECX, 0); + CT.Instructions.push_back(std::move(IT1)); + InstructionTemplate IT2(ChaseRegInst); + IT2.getValueFor(ChaseRegInst.Variables[0]) = MCOperand::createReg(EDX); + ET.fillMemoryOperands(IT2, EDX, 0); + CT.Instructions.push_back(std::move(IT2)); + + // const auto &ReservedRegisters = State.getRATC().reservedRegisters(); + // No tied variables, we pick random values for defs. + llvm::BitVector Defs(State.getRegInfo().getNumRegs()); + CT.Info = + "instruction has no tied variables picking Uses different from defs"; + // CT.Instructions.push_back(std::move(IT)); + return getSingleton(std::move(CT)); +} + +} // namespace exegesis +} // namespace llvm diff --git a/tools/llvm-exegesis/lib/ROBSize.h b/tools/llvm-exegesis/lib/ROBSize.h new file mode 100644 index 00000000000..e02d51b3570 --- /dev/null +++ b/tools/llvm-exegesis/lib/ROBSize.h @@ -0,0 +1,36 @@ +//===-- Uops.h --------------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// A BenchmarkRunner implementation to measure uop decomposition. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_ROBSIZE_H +#define LLVM_TOOLS_LLVM_EXEGESIS_ROBSIZE_H + +#include "BenchmarkRunner.h" +#include "SnippetGenerator.h" + +namespace llvm { +namespace exegesis { + +class ROBSizeSnippetGenerator : public SnippetGenerator { +public: + ROBSizeSnippetGenerator(const LLVMState &State) : SnippetGenerator(State) {} + ~ROBSizeSnippetGenerator() override; + + llvm::Expected> + generateCodeTemplates(const Instruction &Instr) const override; +}; + +} // namespace exegesis +} // namespace llvm + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_ROBSIZE_H diff --git a/tools/llvm-exegesis/lib/RegisterValue.h b/tools/llvm-exegesis/lib/RegisterValue.h index 51ea30ac8eb..689e354e241 100644 --- a/tools/llvm-exegesis/lib/RegisterValue.h +++ b/tools/llvm-exegesis/lib/RegisterValue.h @@ -14,6 +14,9 @@ /// //===----------------------------------------------------------------------===// +#ifndef LLVM_TOOLS_LLVM_EXEGESIS_REGISTERVALUE_H +#define LLVM_TOOLS_LLVM_EXEGESIS_REGISTERVALUE_H + #include #include @@ -22,6 +25,7 @@ namespace exegesis { // A simple object storing the value for a particular register. struct RegisterValue { + static RegisterValue zero(unsigned Reg) { return {Reg, llvm::APInt()}; } unsigned Register; llvm::APInt Value; }; @@ -45,3 +49,5 @@ llvm::APInt bitcastFloatValue(const llvm::fltSemantics &FltSemantics, } // namespace exegesis } // namespace llvm + +#endif // LLVM_TOOLS_LLVM_EXEGESIS_REGISTERVALUE_H diff --git a/tools/llvm-exegesis/lib/SnippetGenerator.cpp b/tools/llvm-exegesis/lib/SnippetGenerator.cpp index eb6a8577b57..b8c56265fd3 100644 --- a/tools/llvm-exegesis/lib/SnippetGenerator.cpp +++ b/tools/llvm-exegesis/lib/SnippetGenerator.cpp @@ -56,8 +56,9 @@ SnippetGenerator::generateConfigurations(const Instruction &Instr) const { } if (CT.ScratchSpacePointerInReg) BC.LiveIns.push_back(CT.ScratchSpacePointerInReg); + BC.ScratchRegisterCopies = CT.ScratchRegisterCopies; BC.RegisterInitialValues = - computeRegisterInitialValues(CT.Instructions); + computeRegisterInitialValues(BC.ScratchRegisterCopies, CT.Instructions); Output.push_back(std::move(BC)); } } @@ -67,12 +68,15 @@ SnippetGenerator::generateConfigurations(const Instruction &Instr) const { } std::vector SnippetGenerator::computeRegisterInitialValues( + const std::vector &ScratchRegisterCopies, const std::vector &Instructions) const { // Collect all register uses and create an assignment for each of them. // Ignore memory operands which are handled separately. // Loop invariant: DefinedRegs[i] is true iif it has been set at least once // before the current instruction. llvm::BitVector DefinedRegs = State.getRATC().emptyRegisters(); + for (const auto& Reg : ScratchRegisterCopies) + DefinedRegs.set(Reg); std::vector RIV; for (const InstructionTemplate &IT : Instructions) { // Returns the register that this Operand sets or uses, or 0 if this is not @@ -91,7 +95,7 @@ std::vector SnippetGenerator::computeRegisterInitialValues( if (Op.isUse()) { const unsigned Reg = GetOpReg(Op); if (Reg > 0 && !DefinedRegs.test(Reg)) { - RIV.push_back(RegisterValue{Reg, llvm::APInt()}); + RIV.push_back(RegisterValue::zero(Reg)); DefinedRegs.set(Reg); } } diff --git a/tools/llvm-exegesis/lib/SnippetGenerator.h b/tools/llvm-exegesis/lib/SnippetGenerator.h index 967b273182b..0141d5fd4f9 100644 --- a/tools/llvm-exegesis/lib/SnippetGenerator.h +++ b/tools/llvm-exegesis/lib/SnippetGenerator.h @@ -62,6 +62,7 @@ public: // Given a snippet, computes which registers the setup code needs to define. std::vector computeRegisterInitialValues( + const std::vector &ScratchRegisterCopies, const std::vector &Snippet) const; protected: diff --git a/tools/llvm-exegesis/lib/Target.cpp b/tools/llvm-exegesis/lib/Target.cpp index 06557770418..085518c9a67 100644 --- a/tools/llvm-exegesis/lib/Target.cpp +++ b/tools/llvm-exegesis/lib/Target.cpp @@ -9,6 +9,7 @@ #include "Target.h" #include "Latency.h" +#include "ROBSize.h" #include "Uops.h" namespace llvm { @@ -37,6 +38,31 @@ void ExegesisTarget::registerTarget(ExegesisTarget *Target) { FirstTarget = Target; } +std::unique_ptr +ExegesisTarget::createLatencySnippetGenerator(const LLVMState &State) const { + return llvm::make_unique(State); +} + +std::unique_ptr +ExegesisTarget::createUopsSnippetGenerator(const LLVMState &State) const { + return llvm::make_unique(State); +} + +std::unique_ptr +static createROBSizeSnippetGenerator(const LLVMState &State) { + return llvm::make_unique(State); +} + +std::unique_ptr +ExegesisTarget::createLatencyBenchmarkRunner(const LLVMState &State) const { + return llvm::make_unique(State); +} + +std::unique_ptr +ExegesisTarget::createUopsBenchmarkRunner(const LLVMState &State) const { + return llvm::make_unique(State); +} + std::unique_ptr ExegesisTarget::createSnippetGenerator(InstructionBenchmark::ModeE Mode, const LLVMState &State) const { @@ -47,6 +73,8 @@ ExegesisTarget::createSnippetGenerator(InstructionBenchmark::ModeE Mode, return createLatencySnippetGenerator(State); case InstructionBenchmark::Uops: return createUopsSnippetGenerator(State); + case InstructionBenchmark::ROBSize: + return createROBSizeSnippetGenerator(State); } return nullptr; } @@ -58,6 +86,7 @@ ExegesisTarget::createBenchmarkRunner(InstructionBenchmark::ModeE Mode, case InstructionBenchmark::Unknown: return nullptr; case InstructionBenchmark::Latency: + case InstructionBenchmark::ROBSize: return createLatencyBenchmarkRunner(State); case InstructionBenchmark::Uops: return createUopsBenchmarkRunner(State); @@ -65,26 +94,6 @@ ExegesisTarget::createBenchmarkRunner(InstructionBenchmark::ModeE Mode, return nullptr; } -std::unique_ptr -ExegesisTarget::createLatencySnippetGenerator(const LLVMState &State) const { - return llvm::make_unique(State); -} - -std::unique_ptr -ExegesisTarget::createUopsSnippetGenerator(const LLVMState &State) const { - return llvm::make_unique(State); -} - -std::unique_ptr -ExegesisTarget::createLatencyBenchmarkRunner(const LLVMState &State) const { - return llvm::make_unique(State); -} - -std::unique_ptr -ExegesisTarget::createUopsBenchmarkRunner(const LLVMState &State) const { - return llvm::make_unique(State); -} - static_assert(std::is_pod::value, "We shouldn't have dynamic initialization here"); const PfmCountersInfo PfmCountersInfo::Default = {nullptr, nullptr, nullptr, 0u}; @@ -123,6 +132,11 @@ private: llvm_unreachable("Not yet implemented"); } + std::vector copyReg(const llvm::MCSubtargetInfo &STI, + unsigned ToReg, unsigned FromReg) const override { + llvm_unreachable("Not yet implemented"); + } + bool matchesArch(llvm::Triple::ArchType Arch) const override { llvm_unreachable("never called"); return false; diff --git a/tools/llvm-exegesis/lib/Target.h b/tools/llvm-exegesis/lib/Target.h index b0f0e996173..c4be621b291 100644 --- a/tools/llvm-exegesis/lib/Target.h +++ b/tools/llvm-exegesis/lib/Target.h @@ -76,6 +76,11 @@ public: setRegTo(const llvm::MCSubtargetInfo &STI, unsigned Reg, const llvm::APInt &Value) const = 0; + // Generates code to copy `FromReg` to `ToReg`. + // Precondition: Registers must be the same size. + virtual std::vector + copyReg(const llvm::MCSubtargetInfo &STI, unsigned ToReg, unsigned FromReg) const = 0; + // Returns the register pointing to scratch memory, or 0 if this target // does not support memory operands. The benchmark function uses the // default calling convention. @@ -83,10 +88,16 @@ public: return 0; } + // Returns the opcode to move the value at `[Reg]` into `Reg`, where `Reg` is + // the from the same register class as getScratchMemoryRegister(). + virtual unsigned getChaseRegOpcode() const { + llvm_unreachable( + "fillMemoryOperands() requires getScratchMemoryRegister() > 0"); + } + // Fills memory operands with references to the address at [Reg] + Offset. virtual void fillMemoryOperands(InstructionTemplate &IT, unsigned Reg, unsigned Offset) const { - llvm_unreachable( "fillMemoryOperands() requires getScratchMemoryRegister() > 0"); } diff --git a/tools/llvm-exegesis/lib/X86/Target.cpp b/tools/llvm-exegesis/lib/X86/Target.cpp index 618e4d77db4..282fd1db154 100644 --- a/tools/llvm-exegesis/lib/X86/Target.cpp +++ b/tools/llvm-exegesis/lib/X86/Target.cpp @@ -484,6 +484,19 @@ private: return {}; // Not yet implemented. } + std::vector copyReg(const llvm::MCSubtargetInfo &STI, + unsigned ToReg, + unsigned FromReg) const override { + if (llvm::X86::GR64RegClass.contains(ToReg)) + assert(llvm::X86::GR64RegClass.contains(FromReg) && "registers must be the same size"); + return {llvm::MCInstBuilder(X86::MOV64rr).addReg(ToReg).addReg(FromReg)}; + return {}; // Not yet implemented. + } + + unsigned getChaseRegOpcode() const override { + return X86::MOV64rm; + } + std::unique_ptr createLatencySnippetGenerator(const LLVMState &State) const override { return llvm::make_unique(State); diff --git a/tools/llvm-exegesis/llvm-exegesis.cpp b/tools/llvm-exegesis/llvm-exegesis.cpp index a28e68ec006..6d15fcfef4e 100644 --- a/tools/llvm-exegesis/llvm-exegesis.cpp +++ b/tools/llvm-exegesis/llvm-exegesis.cpp @@ -63,6 +63,8 @@ static cl::opt "latency", "Instruction Latency"), clEnumValN(exegesis::InstructionBenchmark::Uops, "uops", "Uop Decomposition"), + clEnumValN(exegesis::InstructionBenchmark::ROBSize, + "rob_size", "ROB Size"), // When not asking for a specific benchmark mode, // we'll analyse the results. clEnumValN(exegesis::InstructionBenchmark::Unknown, @@ -201,7 +203,6 @@ public: return; if (CommentText.consume_front("DEFREG")) { // LLVM-EXEGESIS-DEFREF - RegisterValue RegVal; llvm::SmallVector Parts; CommentText.split(Parts, ' ', /*unlimited splits*/ -1, /*do not keep empty strings*/ false); @@ -210,6 +211,7 @@ public: << "\n"; ++InvalidComments; } + RegisterValue RegVal; if (!(RegVal.Register = findRegisterByName(Parts[0].trim()))) { llvm::errs() << "unknown register in 'LLVM-EXEGESIS-DEFREG " << CommentText << "\n";