mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
[llvm-exegesis] Add uop computation for more X87 instruction classes.
Summary: This allows measuring comparisons (UCOM_FpIr32,UCOM_Fpr32,...), conditional moves (CMOVBE_Fp32,...) Reviewers: gchatelet Subscribers: tschuett, llvm-commits Differential Revision: https://reviews.llvm.org/D48713 llvm-svn: 336352
This commit is contained in:
parent
96a0660150
commit
bd719986f6
@ -196,8 +196,8 @@ BenchmarkRunner::writeObjectFile(const BenchmarkConfiguration::Setup &Setup,
|
||||
return ResultPath.str();
|
||||
}
|
||||
|
||||
llvm::Expected<SnippetPrototype> BenchmarkRunner::generateSelfAliasingPrototype(
|
||||
const Instruction &Instr) const {
|
||||
llvm::Expected<SnippetPrototype>
|
||||
BenchmarkRunner::generateSelfAliasingPrototype(const Instruction &Instr) const {
|
||||
const AliasingConfigurations SelfAliasing(Instr, Instr);
|
||||
if (SelfAliasing.empty()) {
|
||||
return llvm::make_error<BenchmarkFailure>("empty self aliasing");
|
||||
@ -217,4 +217,13 @@ llvm::Expected<SnippetPrototype> BenchmarkRunner::generateSelfAliasingPrototype(
|
||||
return std::move(Prototype);
|
||||
}
|
||||
|
||||
llvm::Expected<SnippetPrototype>
|
||||
BenchmarkRunner::generateUnconstrainedPrototype(const Instruction &Instr,
|
||||
llvm::StringRef Msg) const {
|
||||
SnippetPrototype Prototype;
|
||||
Prototype.Explanation =
|
||||
llvm::formatv("{0}, repeating an unconstrained assignment", Msg);
|
||||
Prototype.Snippet.emplace_back(Instr);
|
||||
return std::move(Prototype);
|
||||
}
|
||||
} // namespace exegesis
|
||||
|
@ -70,8 +70,13 @@ protected:
|
||||
const LLVMState &State;
|
||||
const RegisterAliasingTrackerCache RATC;
|
||||
|
||||
// Generates a single instruction prototype that has a self-dependency.
|
||||
llvm::Expected<SnippetPrototype>
|
||||
generateSelfAliasingPrototype(const Instruction &Instr) const;
|
||||
// Generates a single instruction prototype without assignment constraints.
|
||||
llvm::Expected<SnippetPrototype>
|
||||
generateUnconstrainedPrototype(const Instruction &Instr,
|
||||
llvm::StringRef Msg) const;
|
||||
|
||||
private:
|
||||
// API to be implemented by subclasses.
|
||||
|
@ -139,16 +139,10 @@ UopsBenchmarkRunner::generatePrototype(unsigned Opcode) const {
|
||||
const Instruction Instr(InstrDesc, RATC);
|
||||
const AliasingConfigurations SelfAliasing(Instr, Instr);
|
||||
if (SelfAliasing.empty()) {
|
||||
SnippetPrototype Prototype;
|
||||
Prototype.Explanation = "instruction is parallel, repeating a random one.";
|
||||
Prototype.Snippet.emplace_back(Instr);
|
||||
return std::move(Prototype);
|
||||
return generateUnconstrainedPrototype(Instr, "instruction is parallel");
|
||||
}
|
||||
if (SelfAliasing.hasImplicitAliasing()) {
|
||||
SnippetPrototype Prototype;
|
||||
Prototype.Explanation = "instruction is serial, repeating a random one.";
|
||||
Prototype.Snippet.emplace_back(Instr);
|
||||
return std::move(Prototype);
|
||||
return generateUnconstrainedPrototype(Instr, "instruction is serial");
|
||||
}
|
||||
const auto TiedVariables = getTiedVariables(Instr);
|
||||
if (!TiedVariables.empty()) {
|
||||
|
@ -44,9 +44,9 @@ template <typename Impl> class X86BenchmarkRunner : public Impl {
|
||||
case llvm::X86II::NotFP:
|
||||
break;
|
||||
case llvm::X86II::ZeroArgFP:
|
||||
return Impl::handleZeroArgFP(Instr);
|
||||
return llvm::make_error<BenchmarkFailure>("Unsupported x87 ZeroArgFP");
|
||||
case llvm::X86II::OneArgFP:
|
||||
return Impl::handleOneArgFP(Instr); // fstp ST(0)
|
||||
return llvm::make_error<BenchmarkFailure>("Unsupported x87 OneArgFP");
|
||||
case llvm::X86II::OneArgFPRW:
|
||||
case llvm::X86II::TwoArgFP: {
|
||||
// These are instructions like
|
||||
@ -61,7 +61,7 @@ template <typename Impl> class X86BenchmarkRunner : public Impl {
|
||||
case llvm::X86II::CondMovFP:
|
||||
return Impl::handleCondMovFP(Instr);
|
||||
case llvm::X86II::SpecialFP:
|
||||
return Impl::handleSpecialFP(Instr);
|
||||
return llvm::make_error<BenchmarkFailure>("Unsupported x87 SpecialFP");
|
||||
default:
|
||||
llvm_unreachable("Unknown FP Type!");
|
||||
}
|
||||
@ -76,14 +76,6 @@ protected:
|
||||
using Base = LatencyBenchmarkRunner;
|
||||
using Base::Base;
|
||||
llvm::Expected<SnippetPrototype>
|
||||
handleZeroArgFP(const Instruction &Instr) const {
|
||||
return llvm::make_error<BenchmarkFailure>("Unsupported x87 ZeroArgFP");
|
||||
}
|
||||
llvm::Expected<SnippetPrototype>
|
||||
handleOneArgFP(const Instruction &Instr) const {
|
||||
return llvm::make_error<BenchmarkFailure>("Unsupported x87 OneArgFP");
|
||||
}
|
||||
llvm::Expected<SnippetPrototype>
|
||||
handleCompareFP(const Instruction &Instr) const {
|
||||
return llvm::make_error<BenchmarkFailure>("Unsupported x87 CompareFP");
|
||||
}
|
||||
@ -91,35 +83,23 @@ protected:
|
||||
handleCondMovFP(const Instruction &Instr) const {
|
||||
return llvm::make_error<BenchmarkFailure>("Unsupported x87 CondMovFP");
|
||||
}
|
||||
llvm::Expected<SnippetPrototype>
|
||||
handleSpecialFP(const Instruction &Instr) const {
|
||||
return llvm::make_error<BenchmarkFailure>("Unsupported x87 SpecialFP");
|
||||
}
|
||||
};
|
||||
|
||||
class X86UopsImpl : public UopsBenchmarkRunner {
|
||||
protected:
|
||||
using Base = UopsBenchmarkRunner;
|
||||
using Base::Base;
|
||||
llvm::Expected<SnippetPrototype>
|
||||
handleZeroArgFP(const Instruction &Instr) const {
|
||||
return llvm::make_error<BenchmarkFailure>("Unsupported x87 ZeroArgFP");
|
||||
}
|
||||
llvm::Expected<SnippetPrototype>
|
||||
handleOneArgFP(const Instruction &Instr) const {
|
||||
return llvm::make_error<BenchmarkFailure>("Unsupported x87 OneArgFP");
|
||||
}
|
||||
// We can compute uops for any FP instruction that does not grow or shrink the
|
||||
// stack (either do not touch the stack or push as much as they pop).
|
||||
llvm::Expected<SnippetPrototype>
|
||||
handleCompareFP(const Instruction &Instr) const {
|
||||
return llvm::make_error<BenchmarkFailure>("Unsupported x87 CompareFP");
|
||||
return generateUnconstrainedPrototype(
|
||||
Instr, "instruction does not grow/shrink the FP stack");
|
||||
}
|
||||
llvm::Expected<SnippetPrototype>
|
||||
handleCondMovFP(const Instruction &Instr) const {
|
||||
return llvm::make_error<BenchmarkFailure>("Unsupported x87 CondMovFP");
|
||||
}
|
||||
llvm::Expected<SnippetPrototype>
|
||||
handleSpecialFP(const Instruction &Instr) const {
|
||||
return llvm::make_error<BenchmarkFailure>("Unsupported x87 SpecialFP");
|
||||
return generateUnconstrainedPrototype(
|
||||
Instr, "instruction does not grow/shrink the FP stack");
|
||||
}
|
||||
};
|
||||
|
||||
@ -163,6 +143,15 @@ class ExegesisX86Target : public ExegesisTarget {
|
||||
llvm::X86::RFP64RegClass.contains(Reg) ||
|
||||
llvm::X86::RFP80RegClass.contains(Reg))
|
||||
return setVectorRegToConstant(Reg, 8, llvm::X86::LD_Fp64m);
|
||||
if (Reg == llvm::X86::EFLAGS) {
|
||||
// Set all flags to 0 but the bits that are "reserved and set to 1".
|
||||
constexpr const uint32_t kImmValue = 0x00007002u;
|
||||
std::vector<llvm::MCInst> Result;
|
||||
Result.push_back(allocateStackSpace(8));
|
||||
Result.push_back(fillStackSpace(llvm::X86::MOV64mi32, 0, kImmValue));
|
||||
Result.push_back(llvm::MCInstBuilder(llvm::X86::POPF64)); // Also pops.
|
||||
return Result;
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
@ -193,41 +182,59 @@ private:
|
||||
// value that has set bits for all byte values and is a normal float/
|
||||
// double. 0x40404040 is ~32.5 when interpreted as a double and ~3.0f when
|
||||
// interpreted as a float.
|
||||
constexpr const uint64_t kImmValue = 0x40404040ull;
|
||||
constexpr const uint32_t kImmValue = 0x40404040u;
|
||||
std::vector<llvm::MCInst> Result;
|
||||
// Allocate scratch memory on the stack.
|
||||
Result.push_back(llvm::MCInstBuilder(llvm::X86::SUB64ri8)
|
||||
.addReg(llvm::X86::RSP)
|
||||
.addReg(llvm::X86::RSP)
|
||||
.addImm(RegSizeBytes));
|
||||
// Fill scratch memory.
|
||||
for (unsigned Disp = 0; Disp < RegSizeBytes; Disp += 4) {
|
||||
Result.push_back(llvm::MCInstBuilder(llvm::X86::MOV32mi)
|
||||
// Address = ESP
|
||||
.addReg(llvm::X86::RSP) // BaseReg
|
||||
.addImm(1) // ScaleAmt
|
||||
.addReg(0) // IndexReg
|
||||
.addImm(Disp) // Disp
|
||||
.addReg(0) // Segment
|
||||
// Immediate.
|
||||
.addImm(kImmValue));
|
||||
Result.push_back(allocateStackSpace(RegSizeBytes));
|
||||
constexpr const unsigned kMov32NumBytes = 4;
|
||||
for (unsigned Disp = 0; Disp < RegSizeBytes; Disp += kMov32NumBytes) {
|
||||
Result.push_back(fillStackSpace(llvm::X86::MOV32mi, Disp, kImmValue));
|
||||
}
|
||||
// Load Reg from scratch memory.
|
||||
Result.push_back(llvm::MCInstBuilder(RMOpcode)
|
||||
.addReg(Reg)
|
||||
// Address = ESP
|
||||
.addReg(llvm::X86::RSP) // BaseReg
|
||||
.addImm(1) // ScaleAmt
|
||||
.addReg(0) // IndexReg
|
||||
.addImm(0) // Disp
|
||||
.addReg(0)); // Segment
|
||||
// Release scratch memory.
|
||||
Result.push_back(llvm::MCInstBuilder(llvm::X86::ADD64ri8)
|
||||
.addReg(llvm::X86::RSP)
|
||||
.addReg(llvm::X86::RSP)
|
||||
.addImm(RegSizeBytes));
|
||||
Result.push_back(loadToReg(Reg, RMOpcode));
|
||||
Result.push_back(releaseStackSpace(RegSizeBytes));
|
||||
return Result;
|
||||
}
|
||||
|
||||
// Allocates scratch memory on the stack.
|
||||
static llvm::MCInst allocateStackSpace(unsigned Bytes) {
|
||||
return llvm::MCInstBuilder(llvm::X86::SUB64ri8)
|
||||
.addReg(llvm::X86::RSP)
|
||||
.addReg(llvm::X86::RSP)
|
||||
.addImm(Bytes);
|
||||
}
|
||||
|
||||
// Fills scratch memory at offset `OffsetBytes` with value `Imm`.
|
||||
static llvm::MCInst fillStackSpace(unsigned MovOpcode, unsigned OffsetBytes,
|
||||
uint64_t Imm) {
|
||||
return llvm::MCInstBuilder(MovOpcode)
|
||||
// Address = ESP
|
||||
.addReg(llvm::X86::RSP) // BaseReg
|
||||
.addImm(1) // ScaleAmt
|
||||
.addReg(0) // IndexReg
|
||||
.addImm(OffsetBytes) // Disp
|
||||
.addReg(0) // Segment
|
||||
// Immediate.
|
||||
.addImm(Imm);
|
||||
}
|
||||
|
||||
// Loads scratch memory into register `Reg` using opcode `RMOpcode`.
|
||||
static llvm::MCInst loadToReg(unsigned Reg, unsigned RMOpcode) {
|
||||
return llvm::MCInstBuilder(RMOpcode)
|
||||
.addReg(Reg)
|
||||
// Address = ESP
|
||||
.addReg(llvm::X86::RSP) // BaseReg
|
||||
.addImm(1) // ScaleAmt
|
||||
.addReg(0) // IndexReg
|
||||
.addImm(0) // Disp
|
||||
.addReg(0); // Segment
|
||||
}
|
||||
|
||||
// Releases scratch memory.
|
||||
static llvm::MCInst releaseStackSpace(unsigned Bytes) {
|
||||
return llvm::MCInstBuilder(llvm::X86::ADD64ri8)
|
||||
.addReg(llvm::X86::RSP)
|
||||
.addReg(llvm::X86::RSP)
|
||||
.addImm(Bytes);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
Loading…
Reference in New Issue
Block a user