1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-18 10:32:48 +02:00

[llvm-exegesis] Refactor how forbidden registers are computed.

Summary:
Right now latency generation can incorrectly select the scratch register
as a dependency-carrying register.
 - Move the logic for preventing register selection from Uops
   implementation to common SnippetGenerator class.
 - Aliasing detection now takes a set of forbidden registers just like
   random register assignment does.

Reviewers: gchatelet

Subscribers: tschuett, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D68084

llvm-svn: 373048
This commit is contained in:
Clement Courbet 2019-09-27 08:04:10 +00:00
parent e03a35d303
commit e063a909a2
10 changed files with 150 additions and 99 deletions

View File

@ -39,7 +39,8 @@ static constexpr size_t kMaxAliasingInstructions = 10;
static std::vector<Instruction>
computeAliasingInstructions(const LLVMState &State, const Instruction &Instr,
size_t MaxAliasingInstructions) {
size_t MaxAliasingInstructions,
const BitVector &ForbiddenRegisters) {
// Randomly iterate the set of instructions.
std::vector<unsigned> Opcodes;
Opcodes.resize(State.getInstrInfo().getNumOpcodes());
@ -53,15 +54,16 @@ computeAliasingInstructions(const LLVMState &State, const Instruction &Instr,
const Instruction &OtherInstr = State.getIC().getInstr(OtherOpcode);
if (OtherInstr.hasMemoryOperands())
continue;
if (Instr.hasAliasingRegistersThrough(OtherInstr))
AliasingInstructions.push_back(std::move(OtherInstr));
if (Instr.hasAliasingRegistersThrough(OtherInstr, ForbiddenRegisters))
AliasingInstructions.push_back(OtherInstr);
if (AliasingInstructions.size() >= MaxAliasingInstructions)
break;
}
return AliasingInstructions;
}
static ExecutionMode getExecutionModes(const Instruction &Instr) {
static ExecutionMode getExecutionModes(const Instruction &Instr,
const BitVector &ForbiddenRegisters) {
ExecutionMode EM = ExecutionMode::UNKNOWN;
if (Instr.hasAliasingImplicitRegisters())
EM |= ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS;
@ -70,7 +72,7 @@ static ExecutionMode getExecutionModes(const Instruction &Instr) {
if (Instr.hasMemoryOperands())
EM |= ExecutionMode::SERIAL_VIA_MEMORY_INSTR;
else {
if (Instr.hasAliasingRegisters())
if (Instr.hasAliasingRegisters(ForbiddenRegisters))
EM |= ExecutionMode::SERIAL_VIA_EXPLICIT_REGS;
if (Instr.hasOneUseOrOneDef())
EM |= ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR;
@ -80,6 +82,7 @@ static ExecutionMode getExecutionModes(const Instruction &Instr) {
static void appendCodeTemplates(const LLVMState &State,
const Instruction &Instr,
const BitVector &ForbiddenRegisters,
ExecutionMode ExecutionModeBit,
llvm::StringRef ExecutionClassDescription,
std::vector<CodeTemplate> &CodeTemplates) {
@ -122,8 +125,8 @@ static void appendCodeTemplates(const LLVMState &State,
}
case ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR: {
// Select back-to-back non-memory instruction.
for (const auto OtherInstr :
computeAliasingInstructions(State, Instr, kMaxAliasingInstructions)) {
for (const auto OtherInstr : computeAliasingInstructions(
State, Instr, kMaxAliasingInstructions, ForbiddenRegisters)) {
const AliasingConfigurations Forward(Instr, OtherInstr);
const AliasingConfigurations Back(OtherInstr, Instr);
InstructionTemplate ThisIT(Instr);
@ -149,13 +152,14 @@ static void appendCodeTemplates(const LLVMState &State,
LatencySnippetGenerator::~LatencySnippetGenerator() = default;
llvm::Expected<std::vector<CodeTemplate>>
LatencySnippetGenerator::generateCodeTemplates(const Instruction &Instr) const {
LatencySnippetGenerator::generateCodeTemplates(
const Instruction &Instr, const BitVector &ForbiddenRegisters) const {
std::vector<CodeTemplate> Results;
const ExecutionMode EM = getExecutionModes(Instr);
const ExecutionMode EM = getExecutionModes(Instr, ForbiddenRegisters);
for (const auto EC : kExecutionClasses) {
for (const auto ExecutionModeBit : getExecutionModeBits(EM & EC.Mask))
appendCodeTemplates(State, Instr, ExecutionModeBit, EC.Description,
Results);
appendCodeTemplates(State, Instr, ForbiddenRegisters, ExecutionModeBit,
EC.Description, Results);
if (!Results.empty())
break;
}

View File

@ -27,7 +27,8 @@ public:
~LatencySnippetGenerator() override;
llvm::Expected<std::vector<CodeTemplate>>
generateCodeTemplates(const Instruction &Instr) const override;
generateCodeTemplates(const Instruction &Instr,
const BitVector &ForbiddenRegisters) const override;
};
class LatencyBenchmarkRunner : public BenchmarkRunner {

View File

@ -183,10 +183,29 @@ bool Instruction::hasAliasingImplicitRegisters() const {
return ImplDefRegs.anyCommon(ImplUseRegs);
}
// Returns true if there are registers that are both in `A` and `B` but not in
// `Forbidden`.
static bool anyCommonExcludingForbidden(const BitVector &A, const BitVector &B,
const BitVector &Forbidden) {
assert(A.size() == B.size() && B.size() == Forbidden.size());
const auto Size = A.size();
for (int AIndex = A.find_first(); AIndex != -1;) {
const int BIndex = B.find_first_in(AIndex, Size);
if (BIndex == -1)
return false;
if (AIndex == BIndex && !Forbidden.test(AIndex))
return true;
AIndex = A.find_first_in(BIndex + 1, Size);
}
return false;
}
bool Instruction::hasAliasingRegistersThrough(
const Instruction &OtherInstr) const {
return AllDefRegs.anyCommon(OtherInstr.AllUseRegs) &&
OtherInstr.AllDefRegs.anyCommon(AllUseRegs);
const Instruction &OtherInstr, const BitVector &ForbiddenRegisters) const {
return anyCommonExcludingForbidden(AllDefRegs, OtherInstr.AllUseRegs,
ForbiddenRegisters) &&
anyCommonExcludingForbidden(OtherInstr.AllDefRegs, AllUseRegs,
ForbiddenRegisters);
}
bool Instruction::hasTiedRegisters() const {
@ -194,8 +213,10 @@ bool Instruction::hasTiedRegisters() const {
Variables, [](const Variable &Var) { return Var.hasTiedOperands(); });
}
bool Instruction::hasAliasingRegisters() const {
return AllDefRegs.anyCommon(AllUseRegs);
bool Instruction::hasAliasingRegisters(
const BitVector &ForbiddenRegisters) const {
return anyCommonExcludingForbidden(AllDefRegs, AllUseRegs,
ForbiddenRegisters);
}
bool Instruction::hasOneUseOrOneDef() const {
@ -203,6 +224,7 @@ bool Instruction::hasOneUseOrOneDef() const {
}
void Instruction::dump(const llvm::MCRegisterInfo &RegInfo,
const RegisterAliasingTrackerCache &RATC,
llvm::raw_ostream &Stream) const {
Stream << "- " << Name << "\n";
for (const auto &Op : Operands) {
@ -251,7 +273,7 @@ void Instruction::dump(const llvm::MCRegisterInfo &RegInfo,
Stream << "- hasAliasingImplicitRegisters (execution is always serial)\n";
if (hasTiedRegisters())
Stream << "- hasTiedRegisters (execution is always serial)\n";
if (hasAliasingRegisters())
if (hasAliasingRegisters(RATC.emptyRegisters()))
Stream << "- hasAliasingRegisters\n";
}

View File

@ -112,10 +112,11 @@ struct Instruction {
// Repeating this instruction may execute sequentially by picking aliasing
// Use and Def registers. It may also execute in parallel by picking non
// aliasing Use and Def registers.
bool hasAliasingRegisters() const;
bool hasAliasingRegisters(const BitVector &ForbiddenRegisters) const;
// Whether this instruction's registers alias with OtherInstr's registers.
bool hasAliasingRegistersThrough(const Instruction &OtherInstr) const;
bool hasAliasingRegistersThrough(const Instruction &OtherInstr,
const BitVector &ForbiddenRegisters) const;
// Returns whether this instruction has Memory Operands.
// Repeating this instruction executes sequentially with an instruction that
@ -129,6 +130,7 @@ struct Instruction {
// Convenient function to help with debugging.
void dump(const llvm::MCRegisterInfo &RegInfo,
const RegisterAliasingTrackerCache &RATC,
llvm::raw_ostream &Stream) const;
const llvm::MCInstrDesc *Description; // Never nullptr.

View File

@ -38,14 +38,33 @@ SnippetGenerator::~SnippetGenerator() = default;
llvm::Expected<std::vector<BenchmarkCode>>
SnippetGenerator::generateConfigurations(const Instruction &Instr) const {
if (auto E = generateCodeTemplates(Instr)) {
const auto &RATC = State.getRATC();
llvm::BitVector ForbiddenRegs = State.getRATC().reservedRegisters();
// If the instruction has memory registers, prevent the generator from
// using the scratch register and its aliasing registers.
if (Instr.hasMemoryOperands()) {
const auto &ET = State.getExegesisTarget();
unsigned ScratchSpacePointerInReg =
ET.getScratchMemoryRegister(State.getTargetMachine().getTargetTriple());
if (ScratchSpacePointerInReg == 0)
return llvm::make_error<BenchmarkFailure>(
"Infeasible : target does not support memory instructions");
const auto &ScratchRegAliases =
State.getRATC().getRegister(ScratchSpacePointerInReg).aliasedBits();
// If the instruction implicitly writes to ScratchSpacePointerInReg , abort.
// FIXME: We could make a copy of the scratch register.
for (const auto &Op : Instr.Operands) {
if (Op.isDef() && Op.isImplicitReg() &&
ScratchRegAliases.test(Op.getImplicitReg()))
return llvm::make_error<BenchmarkFailure>(
"Infeasible : memory instruction uses scratch memory register");
}
ForbiddenRegs |= ScratchRegAliases;
}
if (auto E = generateCodeTemplates(Instr, ForbiddenRegs)) {
std::vector<BenchmarkCode> Output;
for (CodeTemplate &CT : E.get()) {
const llvm::BitVector &ForbiddenRegs =
CT.ScratchSpacePointerInReg
? RATC.getRegister(CT.ScratchSpacePointerInReg).aliasedBits()
: RATC.emptyRegisters();
// TODO: Generate as many BenchmarkCode as needed.
{
BenchmarkCode BC;

View File

@ -69,7 +69,8 @@ protected:
private:
// API to be implemented by subclasses.
virtual llvm::Expected<std::vector<CodeTemplate>>
generateCodeTemplates(const Instruction &Instr) const = 0;
generateCodeTemplates(const Instruction &Instr,
const BitVector &ForbiddenRegisters) const = 0;
};
// A global Random Number Generator to randomize configurations.

View File

@ -126,7 +126,7 @@ void UopsSnippetGenerator::instantiateMemoryOperands(
static std::vector<InstructionTemplate> generateSnippetUsingStaticRenaming(
const LLVMState &State, const InstructionTemplate &IT,
const ArrayRef<const Variable *> TiedVariables,
const BitVector *ScratchSpaceAliasedRegs) {
const BitVector &ForbiddenRegisters) {
std::vector<InstructionTemplate> Instructions;
// Assign registers to variables in a round-robin manner. This is simple but
// ensures that the most register-constrained variable does not get starved.
@ -135,12 +135,8 @@ static std::vector<InstructionTemplate> generateSnippetUsingStaticRenaming(
assert(Var);
const Operand &Op = IT.Instr.getPrimaryOperand(*Var);
assert(Op.isReg());
BitVector PossibleRegs = State.getRATC().emptyRegisters();
if (ScratchSpaceAliasedRegs) {
PossibleRegs |= *ScratchSpaceAliasedRegs;
}
PossibleRegs.flip();
PossibleRegs &= Op.getRegisterAliasing().sourceBits();
BitVector PossibleRegs = Op.getRegisterAliasing().sourceBits();
remove(PossibleRegs, ForbiddenRegisters);
PossibleRegsForVar.push_back(std::move(PossibleRegs));
}
SmallVector<int, 2> Iterators(TiedVariables.size(), 0);
@ -168,28 +164,14 @@ static std::vector<InstructionTemplate> generateSnippetUsingStaticRenaming(
}
llvm::Expected<std::vector<CodeTemplate>>
UopsSnippetGenerator::generateCodeTemplates(const Instruction &Instr) const {
UopsSnippetGenerator::generateCodeTemplates(
const Instruction &Instr, const BitVector &ForbiddenRegisters) const {
CodeTemplate CT;
const llvm::BitVector *ScratchSpaceAliasedRegs = nullptr;
if (Instr.hasMemoryOperands()) {
const auto &ET = State.getExegesisTarget();
CT.ScratchSpacePointerInReg =
ET.getScratchMemoryRegister(State.getTargetMachine().getTargetTriple());
if (CT.ScratchSpacePointerInReg == 0)
return llvm::make_error<BenchmarkFailure>(
"Infeasible : target does not support memory instructions");
ScratchSpaceAliasedRegs =
&State.getRATC().getRegister(CT.ScratchSpacePointerInReg).aliasedBits();
// If the instruction implicitly writes to ScratchSpacePointerInReg , abort.
// FIXME: We could make a copy of the scratch register.
for (const auto &Op : Instr.Operands) {
if (Op.isDef() && Op.isImplicitReg() &&
ScratchSpaceAliasedRegs->test(Op.getImplicitReg()))
return llvm::make_error<BenchmarkFailure>(
"Infeasible : memory instruction uses scratch memory register");
}
}
CT.ScratchSpacePointerInReg =
Instr.hasMemoryOperands()
? State.getExegesisTarget().getScratchMemoryRegister(
State.getTargetMachine().getTargetTriple())
: 0;
const AliasingConfigurations SelfAliasing(Instr, Instr);
InstructionTemplate IT(Instr);
if (SelfAliasing.empty()) {
@ -208,20 +190,17 @@ UopsSnippetGenerator::generateCodeTemplates(const Instruction &Instr) const {
if (!TiedVariables.empty()) {
CT.Info = "instruction has tied variables, using static renaming.";
CT.Instructions = generateSnippetUsingStaticRenaming(
State, IT, TiedVariables, ScratchSpaceAliasedRegs);
State, IT, TiedVariables, ForbiddenRegisters);
instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions);
return getSingleton(std::move(CT));
}
const auto &ReservedRegisters = State.getRATC().reservedRegisters();
// No tied variables, we pick random values for defs.
llvm::BitVector Defs(State.getRegInfo().getNumRegs());
for (const auto &Op : Instr.Operands) {
if (Op.isReg() && Op.isExplicit() && Op.isDef() && !Op.isMemory()) {
auto PossibleRegisters = Op.getRegisterAliasing().sourceBits();
remove(PossibleRegisters, ReservedRegisters);
// Do not use the scratch memory address register.
if (ScratchSpaceAliasedRegs)
remove(PossibleRegisters, *ScratchSpaceAliasedRegs);
// Do not use forbidden registers.
remove(PossibleRegisters, ForbiddenRegisters);
assert(PossibleRegisters.any() && "No register left to choose from");
const auto RandomReg = randomBit(PossibleRegisters);
Defs.set(RandomReg);
@ -233,10 +212,7 @@ UopsSnippetGenerator::generateCodeTemplates(const Instruction &Instr) const {
for (const auto &Op : Instr.Operands) {
if (Op.isReg() && Op.isExplicit() && Op.isUse() && !Op.isMemory()) {
auto PossibleRegisters = Op.getRegisterAliasing().sourceBits();
remove(PossibleRegisters, ReservedRegisters);
// Do not use the scratch memory address register.
if (ScratchSpaceAliasedRegs)
remove(PossibleRegisters, *ScratchSpaceAliasedRegs);
remove(PossibleRegisters, ForbiddenRegisters);
remove(PossibleRegisters, DefAliases);
assert(PossibleRegisters.any() && "No register left to choose from");
const auto RandomReg = randomBit(PossibleRegisters);

View File

@ -26,7 +26,8 @@ public:
~UopsSnippetGenerator() override;
llvm::Expected<std::vector<CodeTemplate>>
generateCodeTemplates(const Instruction &Instr) const override;
generateCodeTemplates(const Instruction &Instr,
const BitVector &ForbiddenRegisters) const override;
static constexpr const size_t kMinNumDifferentAddresses = 6;

View File

@ -182,19 +182,21 @@ public:
using LatencySnippetGenerator::LatencySnippetGenerator;
llvm::Expected<std::vector<CodeTemplate>>
generateCodeTemplates(const Instruction &Instr) const override;
generateCodeTemplates(const Instruction &Instr,
const BitVector &ForbiddenRegisters) const override;
};
} // namespace
llvm::Expected<std::vector<CodeTemplate>>
X86LatencySnippetGenerator::generateCodeTemplates(
const Instruction &Instr) const {
const Instruction &Instr, const BitVector &ForbiddenRegisters) const {
if (auto E = IsInvalidOpcode(Instr))
return std::move(E);
switch (getX86FPFlags(Instr)) {
case llvm::X86II::NotFP:
return LatencySnippetGenerator::generateCodeTemplates(Instr);
return LatencySnippetGenerator::generateCodeTemplates(Instr,
ForbiddenRegisters);
case llvm::X86II::ZeroArgFP:
case llvm::X86II::OneArgFP:
case llvm::X86II::SpecialFP:
@ -219,19 +221,21 @@ public:
using UopsSnippetGenerator::UopsSnippetGenerator;
llvm::Expected<std::vector<CodeTemplate>>
generateCodeTemplates(const Instruction &Instr) const override;
generateCodeTemplates(const Instruction &Instr,
const BitVector &ForbiddenRegisters) const override;
};
} // namespace
llvm::Expected<std::vector<CodeTemplate>>
X86UopsSnippetGenerator::generateCodeTemplates(
const Instruction &Instr) const {
const Instruction &Instr, const BitVector &ForbiddenRegisters) const {
if (auto E = IsInvalidOpcode(Instr))
return std::move(E);
switch (getX86FPFlags(Instr)) {
case llvm::X86II::NotFP:
return UopsSnippetGenerator::generateCodeTemplates(Instr);
return UopsSnippetGenerator::generateCodeTemplates(Instr,
ForbiddenRegisters);
case llvm::X86II::ZeroArgFP:
case llvm::X86II::OneArgFP:
case llvm::X86II::SpecialFP:

View File

@ -61,7 +61,8 @@ protected:
std::vector<CodeTemplate> checkAndGetCodeTemplates(unsigned Opcode) {
randomGenerator().seed(0); // Initialize seed.
const Instruction &Instr = State.getIC().getInstr(Opcode);
auto CodeTemplateOrError = Generator.generateCodeTemplates(Instr);
auto CodeTemplateOrError = Generator.generateCodeTemplates(
Instr, State.getRATC().emptyRegisters());
EXPECT_FALSE(CodeTemplateOrError.takeError()); // Valid configuration.
return std::move(CodeTemplateOrError.get());
}
@ -148,6 +149,26 @@ TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughExplicitRegs) {
<< "Op0 is either set to Op1 or to Op2";
}
TEST_F(LatencySnippetGeneratorTest,
ImplicitSelfDependencyThroughExplicitRegsForbidAll) {
// - VXORPSrr
// - Op0 Explicit Def RegClass(VR128)
// - Op1 Explicit Use RegClass(VR128)
// - Op2 Explicit Use RegClass(VR128)
// - Var0 [Op0]
// - Var1 [Op1]
// - Var2 [Op2]
// - hasAliasingRegisters
const unsigned Opcode = llvm::X86::VXORPSrr;
randomGenerator().seed(0); // Initialize seed.
const Instruction &Instr = State.getIC().getInstr(Opcode);
auto AllRegisters = State.getRATC().emptyRegisters();
AllRegisters.flip();
auto Error = Generator.generateCodeTemplates(Instr, AllRegisters).takeError();
EXPECT_TRUE((bool)Error);
llvm::consumeError(std::move(Error));
}
TEST_F(LatencySnippetGeneratorTest, DependencyThroughOtherOpcode) {
// - CMP64rr
// - Op0 Explicit Use RegClass(GR64)
@ -323,7 +344,31 @@ TEST_F(UopsSnippetGeneratorTest, MemoryUse) {
EXPECT_EQ(IT.VariableValues[5].getReg(), 0u);
}
TEST_F(UopsSnippetGeneratorTest, MemoryUse_Movsb) {
class FakeSnippetGenerator : public SnippetGenerator {
public:
FakeSnippetGenerator(const LLVMState &State) : SnippetGenerator(State) {}
Instruction createInstruction(unsigned Opcode) {
return State.getIC().getInstr(Opcode);
}
private:
llvm::Expected<std::vector<CodeTemplate>>
generateCodeTemplates(const Instruction &, const BitVector &) const override {
return llvm::make_error<llvm::StringError>("not implemented",
llvm::inconvertibleErrorCode());
}
};
using FakeSnippetGeneratorTest = SnippetGeneratorTest<FakeSnippetGenerator>;
testing::Matcher<const RegisterValue &> IsRegisterValue(unsigned Reg,
llvm::APInt Value) {
return testing::AllOf(testing::Field(&RegisterValue::Register, Reg),
testing::Field(&RegisterValue::Value, Value));
}
TEST_F(FakeSnippetGeneratorTest, MemoryUse_Movsb) {
// MOVSB writes to scratch memory register.
// - MOVSB
// - Op0 Explicit Use Memory RegClass(GR8)
@ -342,35 +387,11 @@ TEST_F(UopsSnippetGeneratorTest, MemoryUse_Movsb) {
// - hasAliasingRegisters
const unsigned Opcode = llvm::X86::MOVSB;
const Instruction &Instr = State.getIC().getInstr(Opcode);
auto Error = Generator.generateCodeTemplates(Instr).takeError();
auto Error = Generator.generateConfigurations(Instr).takeError();
EXPECT_TRUE((bool)Error);
llvm::consumeError(std::move(Error));
}
class FakeSnippetGenerator : public SnippetGenerator {
public:
FakeSnippetGenerator(const LLVMState &State) : SnippetGenerator(State) {}
Instruction createInstruction(unsigned Opcode) {
return State.getIC().getInstr(Opcode);
}
private:
llvm::Expected<std::vector<CodeTemplate>>
generateCodeTemplates(const Instruction &Instr) const override {
return llvm::make_error<llvm::StringError>("not implemented",
llvm::inconvertibleErrorCode());
}
};
using FakeSnippetGeneratorTest = SnippetGeneratorTest<FakeSnippetGenerator>;
testing::Matcher<const RegisterValue &> IsRegisterValue(unsigned Reg,
llvm::APInt Value) {
return testing::AllOf(testing::Field(&RegisterValue::Register, Reg),
testing::Field(&RegisterValue::Value, Value));
}
TEST_F(FakeSnippetGeneratorTest, ComputeRegisterInitialValuesAdd16ri) {
// ADD16ri:
// explicit def 0 : reg RegClass=GR16