1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

[llvm-exegesis] Add uop computation for more X87 instruction classes.

Summary:
This allows measuring comparisons (UCOM_FpIr32,UCOM_Fpr32,...),
conditional moves (CMOVBE_Fp32,...)

Reviewers: gchatelet

Subscribers: tschuett, llvm-commits

Differential Revision: https://reviews.llvm.org/D48713

llvm-svn: 336352
This commit is contained in:
Clement Courbet 2018-07-05 13:54:51 +00:00
parent 96a0660150
commit bd719986f6
4 changed files with 85 additions and 70 deletions

View File

@ -196,8 +196,8 @@ BenchmarkRunner::writeObjectFile(const BenchmarkConfiguration::Setup &Setup,
return ResultPath.str();
}
llvm::Expected<SnippetPrototype> BenchmarkRunner::generateSelfAliasingPrototype(
const Instruction &Instr) const {
llvm::Expected<SnippetPrototype>
BenchmarkRunner::generateSelfAliasingPrototype(const Instruction &Instr) const {
const AliasingConfigurations SelfAliasing(Instr, Instr);
if (SelfAliasing.empty()) {
return llvm::make_error<BenchmarkFailure>("empty self aliasing");
@ -217,4 +217,13 @@ llvm::Expected<SnippetPrototype> BenchmarkRunner::generateSelfAliasingPrototype(
return std::move(Prototype);
}
llvm::Expected<SnippetPrototype>
BenchmarkRunner::generateUnconstrainedPrototype(const Instruction &Instr,
llvm::StringRef Msg) const {
SnippetPrototype Prototype;
Prototype.Explanation =
llvm::formatv("{0}, repeating an unconstrained assignment", Msg);
Prototype.Snippet.emplace_back(Instr);
return std::move(Prototype);
}
} // namespace exegesis

View File

@ -70,8 +70,13 @@ protected:
const LLVMState &State;
const RegisterAliasingTrackerCache RATC;
// Generates a single instruction prototype that has a self-dependency.
llvm::Expected<SnippetPrototype>
generateSelfAliasingPrototype(const Instruction &Instr) const;
// Generates a single instruction prototype without assignment constraints.
llvm::Expected<SnippetPrototype>
generateUnconstrainedPrototype(const Instruction &Instr,
llvm::StringRef Msg) const;
private:
// API to be implemented by subclasses.

View File

@ -139,16 +139,10 @@ UopsBenchmarkRunner::generatePrototype(unsigned Opcode) const {
const Instruction Instr(InstrDesc, RATC);
const AliasingConfigurations SelfAliasing(Instr, Instr);
if (SelfAliasing.empty()) {
SnippetPrototype Prototype;
Prototype.Explanation = "instruction is parallel, repeating a random one.";
Prototype.Snippet.emplace_back(Instr);
return std::move(Prototype);
return generateUnconstrainedPrototype(Instr, "instruction is parallel");
}
if (SelfAliasing.hasImplicitAliasing()) {
SnippetPrototype Prototype;
Prototype.Explanation = "instruction is serial, repeating a random one.";
Prototype.Snippet.emplace_back(Instr);
return std::move(Prototype);
return generateUnconstrainedPrototype(Instr, "instruction is serial");
}
const auto TiedVariables = getTiedVariables(Instr);
if (!TiedVariables.empty()) {

View File

@ -44,9 +44,9 @@ template <typename Impl> class X86BenchmarkRunner : public Impl {
case llvm::X86II::NotFP:
break;
case llvm::X86II::ZeroArgFP:
return Impl::handleZeroArgFP(Instr);
return llvm::make_error<BenchmarkFailure>("Unsupported x87 ZeroArgFP");
case llvm::X86II::OneArgFP:
return Impl::handleOneArgFP(Instr); // fstp ST(0)
return llvm::make_error<BenchmarkFailure>("Unsupported x87 OneArgFP");
case llvm::X86II::OneArgFPRW:
case llvm::X86II::TwoArgFP: {
// These are instructions like
@ -61,7 +61,7 @@ template <typename Impl> class X86BenchmarkRunner : public Impl {
case llvm::X86II::CondMovFP:
return Impl::handleCondMovFP(Instr);
case llvm::X86II::SpecialFP:
return Impl::handleSpecialFP(Instr);
return llvm::make_error<BenchmarkFailure>("Unsupported x87 SpecialFP");
default:
llvm_unreachable("Unknown FP Type!");
}
@ -76,14 +76,6 @@ protected:
using Base = LatencyBenchmarkRunner;
using Base::Base;
llvm::Expected<SnippetPrototype>
handleZeroArgFP(const Instruction &Instr) const {
return llvm::make_error<BenchmarkFailure>("Unsupported x87 ZeroArgFP");
}
llvm::Expected<SnippetPrototype>
handleOneArgFP(const Instruction &Instr) const {
return llvm::make_error<BenchmarkFailure>("Unsupported x87 OneArgFP");
}
llvm::Expected<SnippetPrototype>
handleCompareFP(const Instruction &Instr) const {
return llvm::make_error<BenchmarkFailure>("Unsupported x87 CompareFP");
}
@ -91,35 +83,23 @@ protected:
handleCondMovFP(const Instruction &Instr) const {
return llvm::make_error<BenchmarkFailure>("Unsupported x87 CondMovFP");
}
llvm::Expected<SnippetPrototype>
handleSpecialFP(const Instruction &Instr) const {
return llvm::make_error<BenchmarkFailure>("Unsupported x87 SpecialFP");
}
};
class X86UopsImpl : public UopsBenchmarkRunner {
protected:
using Base = UopsBenchmarkRunner;
using Base::Base;
llvm::Expected<SnippetPrototype>
handleZeroArgFP(const Instruction &Instr) const {
return llvm::make_error<BenchmarkFailure>("Unsupported x87 ZeroArgFP");
}
llvm::Expected<SnippetPrototype>
handleOneArgFP(const Instruction &Instr) const {
return llvm::make_error<BenchmarkFailure>("Unsupported x87 OneArgFP");
}
// We can compute uops for any FP instruction that does not grow or shrink the
// stack (either do not touch the stack or push as much as they pop).
llvm::Expected<SnippetPrototype>
handleCompareFP(const Instruction &Instr) const {
return llvm::make_error<BenchmarkFailure>("Unsupported x87 CompareFP");
return generateUnconstrainedPrototype(
Instr, "instruction does not grow/shrink the FP stack");
}
llvm::Expected<SnippetPrototype>
handleCondMovFP(const Instruction &Instr) const {
return llvm::make_error<BenchmarkFailure>("Unsupported x87 CondMovFP");
}
llvm::Expected<SnippetPrototype>
handleSpecialFP(const Instruction &Instr) const {
return llvm::make_error<BenchmarkFailure>("Unsupported x87 SpecialFP");
return generateUnconstrainedPrototype(
Instr, "instruction does not grow/shrink the FP stack");
}
};
@ -163,6 +143,15 @@ class ExegesisX86Target : public ExegesisTarget {
llvm::X86::RFP64RegClass.contains(Reg) ||
llvm::X86::RFP80RegClass.contains(Reg))
return setVectorRegToConstant(Reg, 8, llvm::X86::LD_Fp64m);
if (Reg == llvm::X86::EFLAGS) {
// Set all flags to 0 but the bits that are "reserved and set to 1".
constexpr const uint32_t kImmValue = 0x00007002u;
std::vector<llvm::MCInst> Result;
Result.push_back(allocateStackSpace(8));
Result.push_back(fillStackSpace(llvm::X86::MOV64mi32, 0, kImmValue));
Result.push_back(llvm::MCInstBuilder(llvm::X86::POPF64)); // Also pops.
return Result;
}
return {};
}
@ -193,41 +182,59 @@ private:
// value that has set bits for all byte values and is a normal float/
// double. 0x40404040 is ~32.5 when interpreted as a double and ~3.0f when
// interpreted as a float.
constexpr const uint64_t kImmValue = 0x40404040ull;
constexpr const uint32_t kImmValue = 0x40404040u;
std::vector<llvm::MCInst> Result;
// Allocate scratch memory on the stack.
Result.push_back(llvm::MCInstBuilder(llvm::X86::SUB64ri8)
.addReg(llvm::X86::RSP)
.addReg(llvm::X86::RSP)
.addImm(RegSizeBytes));
// Fill scratch memory.
for (unsigned Disp = 0; Disp < RegSizeBytes; Disp += 4) {
Result.push_back(llvm::MCInstBuilder(llvm::X86::MOV32mi)
// Address = ESP
.addReg(llvm::X86::RSP) // BaseReg
.addImm(1) // ScaleAmt
.addReg(0) // IndexReg
.addImm(Disp) // Disp
.addReg(0) // Segment
// Immediate.
.addImm(kImmValue));
Result.push_back(allocateStackSpace(RegSizeBytes));
constexpr const unsigned kMov32NumBytes = 4;
for (unsigned Disp = 0; Disp < RegSizeBytes; Disp += kMov32NumBytes) {
Result.push_back(fillStackSpace(llvm::X86::MOV32mi, Disp, kImmValue));
}
// Load Reg from scratch memory.
Result.push_back(llvm::MCInstBuilder(RMOpcode)
.addReg(Reg)
// Address = ESP
.addReg(llvm::X86::RSP) // BaseReg
.addImm(1) // ScaleAmt
.addReg(0) // IndexReg
.addImm(0) // Disp
.addReg(0)); // Segment
// Release scratch memory.
Result.push_back(llvm::MCInstBuilder(llvm::X86::ADD64ri8)
.addReg(llvm::X86::RSP)
.addReg(llvm::X86::RSP)
.addImm(RegSizeBytes));
Result.push_back(loadToReg(Reg, RMOpcode));
Result.push_back(releaseStackSpace(RegSizeBytes));
return Result;
}
// Allocates scratch memory on the stack.
static llvm::MCInst allocateStackSpace(unsigned Bytes) {
return llvm::MCInstBuilder(llvm::X86::SUB64ri8)
.addReg(llvm::X86::RSP)
.addReg(llvm::X86::RSP)
.addImm(Bytes);
}
// Fills scratch memory at offset `OffsetBytes` with value `Imm`.
static llvm::MCInst fillStackSpace(unsigned MovOpcode, unsigned OffsetBytes,
uint64_t Imm) {
return llvm::MCInstBuilder(MovOpcode)
// Address = ESP
.addReg(llvm::X86::RSP) // BaseReg
.addImm(1) // ScaleAmt
.addReg(0) // IndexReg
.addImm(OffsetBytes) // Disp
.addReg(0) // Segment
// Immediate.
.addImm(Imm);
}
// Loads scratch memory into register `Reg` using opcode `RMOpcode`.
static llvm::MCInst loadToReg(unsigned Reg, unsigned RMOpcode) {
return llvm::MCInstBuilder(RMOpcode)
.addReg(Reg)
// Address = ESP
.addReg(llvm::X86::RSP) // BaseReg
.addImm(1) // ScaleAmt
.addReg(0) // IndexReg
.addImm(0) // Disp
.addReg(0); // Segment
}
// Releases scratch memory.
static llvm::MCInst releaseStackSpace(unsigned Bytes) {
return llvm::MCInstBuilder(llvm::X86::ADD64ri8)
.addReg(llvm::X86::RSP)
.addReg(llvm::X86::RSP)
.addImm(Bytes);
}
};
} // namespace