1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

AMDGPU/GlobalISel: Tolerate negated control flow intrinsic outputs

If the condition output is negated, swap the branch targets. This is
similar to what SelectionDAG does for when SelectionDAGBuilder
decides to invert the condition and swap the branches.

This is leaving behind a dead constant def for some reason.
This commit is contained in:
Matt Arsenault 2020-08-25 19:53:32 -04:00
parent 0ce967f763
commit cacc0ebf96
3 changed files with 320 additions and 14 deletions

View File

@ -2625,23 +2625,42 @@ bool AMDGPULegalizerInfo::legalizeBuildVector(
return true;
}
// Check that this is a G_XOR x, -1
static bool isNot(const MachineRegisterInfo &MRI, const MachineInstr &MI) {
if (MI.getOpcode() != TargetOpcode::G_XOR)
return false;
auto ConstVal = getConstantVRegVal(MI.getOperand(2).getReg(), MRI);
return ConstVal && *ConstVal == -1;
}
// Return the use branch instruction, otherwise null if the usage is invalid.
static MachineInstr *verifyCFIntrinsic(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineInstr *&Br,
MachineBasicBlock *&UncondBrTarget) {
static MachineInstr *
verifyCFIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, MachineInstr *&Br,
MachineBasicBlock *&UncondBrTarget, bool &Negated) {
Register CondDef = MI.getOperand(0).getReg();
if (!MRI.hasOneNonDBGUse(CondDef))
return nullptr;
MachineBasicBlock *Parent = MI.getParent();
MachineInstr &UseMI = *MRI.use_instr_nodbg_begin(CondDef);
if (UseMI.getParent() != Parent ||
UseMI.getOpcode() != AMDGPU::G_BRCOND)
MachineInstr *UseMI = &*MRI.use_instr_nodbg_begin(CondDef);
if (isNot(MRI, *UseMI)) {
Register NegatedCond = UseMI->getOperand(0).getReg();
if (!MRI.hasOneNonDBGUse(NegatedCond))
return nullptr;
// We're deleting the def of this value, so we need to remove it.
UseMI->eraseFromParent();
UseMI = &*MRI.use_instr_nodbg_begin(NegatedCond);
Negated = true;
}
if (UseMI->getParent() != Parent || UseMI->getOpcode() != AMDGPU::G_BRCOND)
return nullptr;
// Make sure the cond br is followed by a G_BR, or is the last instruction.
MachineBasicBlock::iterator Next = std::next(UseMI.getIterator());
MachineBasicBlock::iterator Next = std::next(UseMI->getIterator());
if (Next == Parent->end()) {
MachineFunction::iterator NextMBB = std::next(Parent->getIterator());
if (NextMBB == Parent->getParent()->end()) // Illegal intrinsic use.
@ -2654,7 +2673,7 @@ static MachineInstr *verifyCFIntrinsic(MachineInstr &MI,
UncondBrTarget = Br->getOperand(0).getMBB();
}
return &UseMI;
return UseMI;
}
bool AMDGPULegalizerInfo::loadInputValue(Register DstReg, MachineIRBuilder &B,
@ -4467,7 +4486,9 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::amdgcn_else: {
MachineInstr *Br = nullptr;
MachineBasicBlock *UncondBrTarget = nullptr;
if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI, Br, UncondBrTarget)) {
bool Negated = false;
if (MachineInstr *BrCond =
verifyCFIntrinsic(MI, MRI, Br, UncondBrTarget, Negated)) {
const SIRegisterInfo *TRI
= static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
@ -4475,6 +4496,10 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
Register Use = MI.getOperand(3).getReg();
MachineBasicBlock *CondBrTarget = BrCond->getOperand(1).getMBB();
if (Negated)
std::swap(CondBrTarget, UncondBrTarget);
B.setInsertPt(B.getMBB(), BrCond->getIterator());
if (IntrID == Intrinsic::amdgcn_if) {
B.buildInstr(AMDGPU::SI_IF)
@ -4510,13 +4535,18 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::amdgcn_loop: {
MachineInstr *Br = nullptr;
MachineBasicBlock *UncondBrTarget = nullptr;
if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI, Br, UncondBrTarget)) {
bool Negated = false;
if (MachineInstr *BrCond =
verifyCFIntrinsic(MI, MRI, Br, UncondBrTarget, Negated)) {
const SIRegisterInfo *TRI
= static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
MachineBasicBlock *CondBrTarget = BrCond->getOperand(1).getMBB();
Register Reg = MI.getOperand(2).getReg();
if (Negated)
std::swap(CondBrTarget, UncondBrTarget);
B.setInsertPt(B.getMBB(), BrCond->getIterator());
B.buildInstr(AMDGPU::SI_LOOP)
.addUse(Reg)

View File

@ -5,7 +5,9 @@
# ERR: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_different_block)
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: si_if_not_brcond_user)
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: si_if_multi_user)
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: si_if_not_condition)
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_xor_0)
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_or_neg1)
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_negated_multi_use)
---
@ -55,10 +57,63 @@ body: |
...
# Make sure we only match G_XOR (if), -1
---
name: si_if_not_condition
name: brcond_si_if_xor_0
body: |
bb.0:
successors: %bb.1
liveins: $vgpr0, $vgpr1
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(ne), %0, %1
%3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
%5:_(s1) = G_CONSTANT i1 false
%6:_(s1) = G_XOR %3, %5
G_BRCOND %6, %bb.2
G_BR %bb.3
bb.1:
S_NOP 0
bb.2:
S_NOP 1
bb.3:
S_NOP 2
...
# Make sure we only match G_XOR (if), -1
---
name: brcond_si_if_or_neg1
body: |
bb.0:
successors: %bb.1
liveins: $vgpr0, $vgpr1
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(ne), %0, %1
%3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
%5:_(s1) = G_CONSTANT i1 true
%6:_(s1) = G_OR %3, %5
G_BRCOND %6, %bb.2
G_BR %bb.3
bb.1:
S_NOP 0
bb.2:
S_NOP 1
bb.3:
S_NOP 2
...
---
name: brcond_si_if_negated_multi_use
body: |
bb.0:
successors: %bb.1
liveins: $vgpr0, $vgpr1
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
@ -66,8 +121,16 @@ body: |
%3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
%5:_(s1) = G_CONSTANT i1 true
%6:_(s1) = G_XOR %3, %5
G_BRCOND %6, %bb.1
S_NOP 0, implicit %6
G_BRCOND %6, %bb.2
G_BR %bb.3
bb.1:
S_NOP 0
bb.2:
S_NOP 1
bb.3:
S_NOP 2
...

View File

@ -406,3 +406,216 @@ body: |
bb.2:
S_NOP 0
...
---
name: brcond_si_if_negated
body: |
; WAVE64-LABEL: name: brcond_si_if_negated
; WAVE64: bb.0:
; WAVE64: successors: %bb.1(0x80000000)
; WAVE64: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; WAVE64: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; WAVE64: [[ICMP:%[0-9]+]]:sreg_64_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
; WAVE64: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; WAVE64: [[SI_IF:%[0-9]+]]:sreg_64_xexec(s64) = SI_IF [[ICMP]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
; WAVE64: G_BR %bb.1
; WAVE64: bb.1:
; WAVE64: successors: %bb.2(0x80000000)
; WAVE64: S_NOP 0
; WAVE64: bb.2:
; WAVE64: S_NOP 1
; WAVE32-LABEL: name: brcond_si_if_negated
; WAVE32: bb.0:
; WAVE32: successors: %bb.1(0x80000000)
; WAVE32: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; WAVE32: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; WAVE32: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
; WAVE32: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; WAVE32: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s64) = SI_IF [[ICMP]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
; WAVE32: G_BR %bb.1
; WAVE32: bb.1:
; WAVE32: successors: %bb.2(0x80000000)
; WAVE32: S_NOP 0
; WAVE32: bb.2:
; WAVE32: S_NOP 1
bb.0:
successors: %bb.1
liveins: $vgpr0, $vgpr1
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(ne), %0, %1
%3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
%5:_(s1) = G_CONSTANT i1 true
%6:_(s1) = G_XOR %3, %5
G_BRCOND %6, %bb.2
bb.1:
S_NOP 0
bb.2:
S_NOP 1
...
---
name: brcond_si_if_br_negated
body: |
; WAVE64-LABEL: name: brcond_si_if_br_negated
; WAVE64: bb.0:
; WAVE64: successors: %bb.1(0x80000000)
; WAVE64: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; WAVE64: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; WAVE64: [[ICMP:%[0-9]+]]:sreg_64_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
; WAVE64: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; WAVE64: [[SI_IF:%[0-9]+]]:sreg_64_xexec(s64) = SI_IF [[ICMP]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
; WAVE64: G_BR %bb.3
; WAVE64: bb.1:
; WAVE64: successors: %bb.2(0x80000000)
; WAVE64: S_NOP 0
; WAVE64: bb.2:
; WAVE64: successors: %bb.3(0x80000000)
; WAVE64: S_NOP 1
; WAVE64: bb.3:
; WAVE64: S_NOP 2
; WAVE32-LABEL: name: brcond_si_if_br_negated
; WAVE32: bb.0:
; WAVE32: successors: %bb.1(0x80000000)
; WAVE32: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; WAVE32: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; WAVE32: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
; WAVE32: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; WAVE32: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s64) = SI_IF [[ICMP]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
; WAVE32: G_BR %bb.3
; WAVE32: bb.1:
; WAVE32: successors: %bb.2(0x80000000)
; WAVE32: S_NOP 0
; WAVE32: bb.2:
; WAVE32: successors: %bb.3(0x80000000)
; WAVE32: S_NOP 1
; WAVE32: bb.3:
; WAVE32: S_NOP 2
bb.0:
successors: %bb.1
liveins: $vgpr0, $vgpr1
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(ne), %0, %1
%3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
%5:_(s1) = G_CONSTANT i1 true
%6:_(s1) = G_XOR %3, %5
G_BRCOND %6, %bb.2
G_BR %bb.3
bb.1:
S_NOP 0
bb.2:
S_NOP 1
bb.3:
S_NOP 2
...
---
name: brcond_si_loop_brcond_negated
tracksRegLiveness: true
body: |
; WAVE64-LABEL: name: brcond_si_loop_brcond_negated
; WAVE64: bb.0:
; WAVE64: successors: %bb.1(0x80000000)
; WAVE64: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
; WAVE64: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; WAVE64: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; WAVE64: [[COPY2:%[0-9]+]]:sreg_64_xexec(s64) = COPY $sgpr0_sgpr1
; WAVE64: bb.1:
; WAVE64: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; WAVE64: S_NOP 0
; WAVE64: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; WAVE64: SI_LOOP [[COPY2]](s64), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; WAVE64: G_BR %bb.2
; WAVE64: bb.2:
; WAVE64: S_NOP 0
; WAVE32-LABEL: name: brcond_si_loop_brcond_negated
; WAVE32: bb.0:
; WAVE32: successors: %bb.1(0x80000000)
; WAVE32: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
; WAVE32: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; WAVE32: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; WAVE32: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec(s64) = COPY $sgpr0_sgpr1
; WAVE32: bb.1:
; WAVE32: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; WAVE32: S_NOP 0
; WAVE32: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; WAVE32: SI_LOOP [[COPY2]](s64), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
; WAVE32: G_BR %bb.2
; WAVE32: bb.2:
; WAVE32: S_NOP 0
bb.0:
liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s64) = COPY $sgpr0_sgpr1
bb.1:
successors: %bb.1, %bb.2
S_NOP 0
%3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
%4:_(s1) = G_CONSTANT i1 true
%5:_(s1) = G_XOR %3, %4
G_BRCOND %5, %bb.1
bb.2:
S_NOP 0
...
---
name: brcond_si_loop_brcond_br_negated
tracksRegLiveness: true
body: |
; WAVE64-LABEL: name: brcond_si_loop_brcond_br_negated
; WAVE64: bb.0:
; WAVE64: successors: %bb.1(0x80000000)
; WAVE64: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
; WAVE64: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; WAVE64: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; WAVE64: [[COPY2:%[0-9]+]]:sreg_64_xexec(s64) = COPY $sgpr0_sgpr1
; WAVE64: bb.1:
; WAVE64: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; WAVE64: S_NOP 0
; WAVE64: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; WAVE64: SI_LOOP [[COPY2]](s64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
; WAVE64: G_BR %bb.1
; WAVE64: bb.2:
; WAVE64: S_NOP 0
; WAVE32-LABEL: name: brcond_si_loop_brcond_br_negated
; WAVE32: bb.0:
; WAVE32: successors: %bb.1(0x80000000)
; WAVE32: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
; WAVE32: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; WAVE32: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; WAVE32: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec(s64) = COPY $sgpr0_sgpr1
; WAVE32: bb.1:
; WAVE32: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; WAVE32: S_NOP 0
; WAVE32: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; WAVE32: SI_LOOP [[COPY2]](s64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
; WAVE32: G_BR %bb.1
; WAVE32: bb.2:
; WAVE32: S_NOP 0
bb.0:
liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s64) = COPY $sgpr0_sgpr1
bb.1:
successors: %bb.1, %bb.2
S_NOP 0
%3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
%4:_(s1) = G_CONSTANT i1 true
%5:_(s1) = G_XOR %3, %4
G_BRCOND %5, %bb.2
G_BR %bb.1
bb.2:
S_NOP 0
...