mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 20:23:11 +01:00
[AMDGPU] Optimize S_CBRANCH_VCC[N]Z -> S_CBRANCH_EXEC[N]Z
Sometimes after basic block placement we end up with a code like: sreg = s_mov_b64 -1 vcc = s_and_b64 exec, sreg s_cbranch_vccz This happens as a join of a block assigning -1 to a saved mask and another block which consumes that saved mask with s_and_b64 and a branch. This is essentially a single s_cbranch_execz instruction when moved into a single new basic block. Differential Revision: https://reviews.llvm.org/D54164 llvm-svn: 346690
This commit is contained in:
parent
6999228790
commit
dc754bb13d
@ -66,6 +66,8 @@ private:
|
||||
|
||||
bool skipMaskBranch(MachineInstr &MI, MachineBasicBlock &MBB);
|
||||
|
||||
bool optimizeVccBranch(MachineInstr &MI) const;
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
|
||||
@ -320,6 +322,96 @@ bool SIInsertSkips::skipMaskBranch(MachineInstr &MI,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SIInsertSkips::optimizeVccBranch(MachineInstr &MI) const {
|
||||
// Match:
|
||||
// sreg = -1
|
||||
// vcc = S_AND_B64 exec, sreg
|
||||
// S_CBRANCH_VCC[N]Z
|
||||
// =>
|
||||
// S_CBRANCH_EXEC[N]Z
|
||||
bool Changed = false;
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
const unsigned CondReg = AMDGPU::VCC;
|
||||
const unsigned ExecReg = AMDGPU::EXEC;
|
||||
const unsigned And = AMDGPU::S_AND_B64;
|
||||
|
||||
MachineBasicBlock::reverse_iterator A = MI.getReverseIterator(),
|
||||
E = MBB.rend();
|
||||
bool ReadsCond = false;
|
||||
unsigned Threshold = 5;
|
||||
for (++A ; A != E ; ++A) {
|
||||
if (!--Threshold)
|
||||
return false;
|
||||
if (A->modifiesRegister(ExecReg, TRI))
|
||||
return false;
|
||||
if (A->modifiesRegister(CondReg, TRI)) {
|
||||
if (!A->definesRegister(CondReg, TRI) || A->getOpcode() != And)
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
ReadsCond |= A->readsRegister(CondReg, TRI);
|
||||
}
|
||||
if (A == E)
|
||||
return false;
|
||||
|
||||
MachineOperand &Op1 = A->getOperand(1);
|
||||
MachineOperand &Op2 = A->getOperand(2);
|
||||
if (Op1.getReg() != ExecReg && Op2.isReg() && Op2.getReg() == ExecReg) {
|
||||
TII->commuteInstruction(*A);
|
||||
Changed = true;
|
||||
}
|
||||
if (Op1.getReg() != ExecReg)
|
||||
return Changed;
|
||||
if (Op2.isImm() && Op2.getImm() != -1)
|
||||
return Changed;
|
||||
|
||||
unsigned SReg = AMDGPU::NoRegister;
|
||||
if (Op2.isReg()) {
|
||||
SReg = Op2.getReg();
|
||||
auto M = std::next(A);
|
||||
bool ReadsSreg = false;
|
||||
for ( ; M != E ; ++M) {
|
||||
if (M->definesRegister(SReg, TRI))
|
||||
break;
|
||||
if (M->modifiesRegister(SReg, TRI))
|
||||
return Changed;
|
||||
ReadsSreg |= M->readsRegister(SReg, TRI);
|
||||
}
|
||||
if (M == E ||
|
||||
!M->isMoveImmediate() ||
|
||||
!M->getOperand(1).isImm() ||
|
||||
M->getOperand(1).getImm() != -1)
|
||||
return Changed;
|
||||
// First if sreg is only used in and instruction fold the immediate
|
||||
// into that and.
|
||||
if (!ReadsSreg && Op2.isKill()) {
|
||||
A->getOperand(2).ChangeToImmediate(-1);
|
||||
M->eraseFromParent();
|
||||
}
|
||||
}
|
||||
|
||||
if (!ReadsCond && A->registerDefIsDead(AMDGPU::SCC) &&
|
||||
MI.killsRegister(CondReg, TRI))
|
||||
A->eraseFromParent();
|
||||
|
||||
bool IsVCCZ = MI.getOpcode() == AMDGPU::S_CBRANCH_VCCZ;
|
||||
if (SReg == ExecReg) {
|
||||
if (IsVCCZ) {
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
MI.setDesc(TII->get(AMDGPU::S_BRANCH));
|
||||
} else {
|
||||
MI.setDesc(TII->get(IsVCCZ ? AMDGPU::S_CBRANCH_EXECZ
|
||||
: AMDGPU::S_CBRANCH_EXECNZ));
|
||||
}
|
||||
|
||||
MI.RemoveOperand(MI.findRegisterUseOperandIdx(CondReg, false /*Kill*/, TRI));
|
||||
MI.addImplicitDefUseOperands(*MBB.getParent());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
TII = ST.getInstrInfo();
|
||||
@ -417,6 +509,11 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
|
||||
}
|
||||
break;
|
||||
|
||||
case AMDGPU::S_CBRANCH_VCCZ:
|
||||
case AMDGPU::S_CBRANCH_VCCNZ:
|
||||
MadeChange |= optimizeVccBranch(MI);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -453,7 +453,7 @@ endif:
|
||||
; GCN: v_nop_e64
|
||||
; GCN: v_nop_e64
|
||||
; GCN: ;;#ASMEND
|
||||
; GCN: s_cbranch_vccz [[RET]]
|
||||
; GCN: s_cbranch_execz [[RET]]
|
||||
|
||||
; GCN-NEXT: [[LONGBB:BB[0-9]+_[0-9]+]]: ; %loop
|
||||
; GCN-NEXT: ; in Loop: Header=[[LOOP_BODY]] Depth=1
|
||||
|
@ -36,7 +36,7 @@ loop:
|
||||
; SI: s_and_b64 vcc, exec, -1
|
||||
; SI: s_waitcnt lgkmcnt(0)
|
||||
; SI: buffer_store_dword [[REG]]
|
||||
; SI: s_cbranch_vccnz [[LOOP]]
|
||||
; SI: s_cbranch_execnz [[LOOP]]
|
||||
|
||||
; SI: [[RET]]: ; %UnifiedReturnBlock
|
||||
; SI: s_endpgm
|
||||
|
320
test/CodeGen/AMDGPU/insert-skip-from-vcc.mir
Normal file
320
test/CodeGen/AMDGPU/insert-skip-from-vcc.mir
Normal file
@ -0,0 +1,320 @@
|
||||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass si-insert-skips -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
---
|
||||
# GCN-LABEL: name: and_execz_mov_vccz
|
||||
# GCN-NOT: S_MOV_
|
||||
# GCN-NOT: S_AND_
|
||||
# GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
|
||||
name: and_execz_mov_vccz
|
||||
body: |
|
||||
bb.0:
|
||||
S_NOP 0
|
||||
|
||||
bb.1:
|
||||
S_NOP 0
|
||||
|
||||
bb.2:
|
||||
$sgpr0_sgpr1 = S_MOV_B64 -1
|
||||
$vcc = S_AND_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc
|
||||
S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: and_execz_imm_vccz
|
||||
# GCN-NOT: S_AND_
|
||||
# GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
|
||||
name: and_execz_imm_vccz
|
||||
body: |
|
||||
bb.0:
|
||||
S_NOP 0
|
||||
|
||||
bb.1:
|
||||
S_NOP 0
|
||||
|
||||
bb.2:
|
||||
$vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
|
||||
S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: and_execnz_imm_vccnz
|
||||
# GCN-NOT: S_AND_
|
||||
# GCN: S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
||||
name: and_execnz_imm_vccnz
|
||||
body: |
|
||||
bb.0:
|
||||
S_NOP 0
|
||||
|
||||
bb.1:
|
||||
S_NOP 0
|
||||
|
||||
bb.2:
|
||||
$vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
|
||||
S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: and_execz_imm_vccz_live_scc
|
||||
# GCN: $vcc = S_AND_B64 $exec, -1, implicit-def $scc
|
||||
# GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
|
||||
name: and_execz_imm_vccz_live_scc
|
||||
body: |
|
||||
bb.0:
|
||||
S_NOP 0
|
||||
|
||||
bb.1:
|
||||
S_NOP 0
|
||||
|
||||
bb.2:
|
||||
$vcc = S_AND_B64 $exec, -1, implicit-def $scc
|
||||
S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: and_execz_mov_vccz_live_scc
|
||||
# GCN-NOT: S_MOV_
|
||||
# GCN: $vcc = S_AND_B64 $exec, -1, implicit-def $scc
|
||||
# GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
|
||||
name: and_execz_mov_vccz_live_scc
|
||||
body: |
|
||||
bb.0:
|
||||
S_NOP 0
|
||||
|
||||
bb.1:
|
||||
S_NOP 0
|
||||
|
||||
bb.2:
|
||||
$sgpr0_sgpr1 = S_MOV_B64 -1
|
||||
$vcc = S_AND_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
|
||||
S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: and_execz_mov_vccz_live_sreg
|
||||
# GCN: $sgpr0_sgpr1 = S_MOV_B64 -1
|
||||
# GCN-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
|
||||
name: and_execz_mov_vccz_live_sreg
|
||||
body: |
|
||||
bb.0:
|
||||
S_NOP 0
|
||||
|
||||
bb.1:
|
||||
S_NOP 0
|
||||
|
||||
bb.2:
|
||||
$sgpr0_sgpr1 = S_MOV_B64 -1
|
||||
$vcc = S_AND_B64 $exec, $sgpr0_sgpr1, implicit-def dead $scc
|
||||
S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: and_execz_mov_vccz_live_sreg_commute
|
||||
# GCN: $sgpr0_sgpr1 = S_MOV_B64 -1
|
||||
# GCN-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
|
||||
name: and_execz_mov_vccz_live_sreg_commute
|
||||
body: |
|
||||
bb.0:
|
||||
S_NOP 0
|
||||
|
||||
bb.1:
|
||||
S_NOP 0
|
||||
|
||||
bb.2:
|
||||
$sgpr0_sgpr1 = S_MOV_B64 -1
|
||||
$vcc = S_AND_B64 $sgpr0_sgpr1, $exec, implicit-def dead $scc
|
||||
S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: and_execz_mov_vccz_live_scc_commute
|
||||
# GCN-NOT: S_MOV_
|
||||
# GCN: $vcc = S_AND_B64 $exec, -1, implicit-def $scc
|
||||
# GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
|
||||
name: and_execz_mov_vccz_live_scc_commute
|
||||
body: |
|
||||
bb.0:
|
||||
S_NOP 0
|
||||
|
||||
bb.1:
|
||||
S_NOP 0
|
||||
|
||||
bb.2:
|
||||
$sgpr0_sgpr1 = S_MOV_B64 -1
|
||||
$vcc = S_AND_B64 killed $sgpr0_sgpr1, $exec, implicit-def $scc
|
||||
S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: and_execz_mov_vccz_commute
|
||||
# GCN-NOT: S_MOV_
|
||||
# GCN-NOT: S_AND_
|
||||
# GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
|
||||
name: and_execz_mov_vccz_commute
|
||||
body: |
|
||||
bb.0:
|
||||
S_NOP 0
|
||||
|
||||
bb.1:
|
||||
S_NOP 0
|
||||
|
||||
bb.2:
|
||||
$sgpr0_sgpr1 = S_MOV_B64 -1
|
||||
$vcc = S_AND_B64 killed $sgpr0_sgpr1, $exec, implicit-def dead $scc
|
||||
S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: and_execz_mov_exec_vccz
|
||||
# GCN: $exec = S_MOV_B64 -1
|
||||
# GCN-NEXT: S_ENDPGM
|
||||
name: and_execz_mov_exec_vccz
|
||||
body: |
|
||||
bb.0:
|
||||
S_NOP 0
|
||||
|
||||
bb.1:
|
||||
S_NOP 0
|
||||
|
||||
bb.2:
|
||||
$exec = S_MOV_B64 -1
|
||||
$vcc = S_AND_B64 $exec, $exec, implicit-def dead $scc
|
||||
S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: and_execz_mov_exec_vccnz
|
||||
# GCN: $exec = S_MOV_B64 -1
|
||||
# GCN-NEXT: S_BRANCH %bb.1{{$}}
|
||||
name: and_execz_mov_exec_vccnz
|
||||
body: |
|
||||
bb.0:
|
||||
S_NOP 0
|
||||
|
||||
bb.1:
|
||||
S_NOP 0
|
||||
|
||||
bb.2:
|
||||
$exec = S_MOV_B64 -1
|
||||
$vcc = S_AND_B64 $exec, $exec, implicit-def dead $scc
|
||||
S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: and_execz_mov_vccz_reads_sreg_early
|
||||
# GCN: $sgpr0_sgpr1 = S_MOV_B64 -1
|
||||
# GCN-NEXT: $sgpr2 = S_MOV_B32 $sgpr1
|
||||
# GCN-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
|
||||
name: and_execz_mov_vccz_reads_sreg_early
|
||||
body: |
|
||||
bb.0:
|
||||
S_NOP 0
|
||||
|
||||
bb.1:
|
||||
S_NOP 0
|
||||
|
||||
bb.2:
|
||||
$sgpr0_sgpr1 = S_MOV_B64 -1
|
||||
$sgpr2 = S_MOV_B32 $sgpr1
|
||||
$vcc = S_AND_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc
|
||||
S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: and_execz_mov_vccz_reads_sreg_late
|
||||
# GCN: $sgpr0_sgpr1 = S_MOV_B64 -1
|
||||
# GCN-NEXT: $sgpr2 = S_MOV_B32 $sgpr1
|
||||
# GCN-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
|
||||
name: and_execz_mov_vccz_reads_sreg_late
|
||||
body: |
|
||||
bb.0:
|
||||
S_NOP 0
|
||||
|
||||
bb.1:
|
||||
S_NOP 0
|
||||
|
||||
bb.2:
|
||||
$sgpr0_sgpr1 = S_MOV_B64 -1
|
||||
$vcc = S_AND_B64 $exec, $sgpr0_sgpr1, implicit-def dead $scc
|
||||
$sgpr2 = S_MOV_B32 $sgpr1
|
||||
S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
|
||||
S_ENDPGM
|
||||
...
|
||||
# GCN-LABEL: name: and_execz_mov_vccz_reads_writes_sreg_early
|
||||
# GCN: $sgpr0_sgpr1 = S_MOV_B64 -1
|
||||
# GCN-NEXT: $sgpr1 = S_MOV_B32 $sgpr0
|
||||
# GCN-NEXT: $vcc = S_AND_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc
|
||||
# GCN-NEXT: S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
|
||||
name: and_execz_mov_vccz_reads_writes_sreg_early
|
||||
body: |
|
||||
bb.0:
|
||||
S_NOP 0
|
||||
|
||||
bb.1:
|
||||
S_NOP 0
|
||||
|
||||
bb.2:
|
||||
$sgpr0_sgpr1 = S_MOV_B64 -1
|
||||
$sgpr1 = S_MOV_B32 $sgpr0
|
||||
$vcc = S_AND_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc
|
||||
S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: and_execz_mov_vccz_reads_cond
|
||||
# GCN: $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
|
||||
# GCN-NEXT: $sgpr2 = S_MOV_B32 $vcc_lo
|
||||
# GCN-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
|
||||
name: and_execz_mov_vccz_reads_cond
|
||||
body: |
|
||||
bb.0:
|
||||
S_NOP 0
|
||||
|
||||
bb.1:
|
||||
S_NOP 0
|
||||
|
||||
bb.2:
|
||||
$sgpr0_sgpr1 = S_MOV_B64 -1
|
||||
$vcc = S_AND_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc
|
||||
$sgpr2 = S_MOV_B32 $vcc_lo
|
||||
S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: and_execz_mov_vccz_modifies_sreg
|
||||
# GCN: $sgpr0_sgpr1 = S_MOV_B64 -1
|
||||
# GCN-NEXT: $sgpr0 = S_MOV_B32 0
|
||||
# GCN-NEXT: $vcc = S_AND_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc
|
||||
# GCN-NEXT: S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
|
||||
name: and_execz_mov_vccz_modifies_sreg
|
||||
body: |
|
||||
bb.0:
|
||||
S_NOP 0
|
||||
|
||||
bb.1:
|
||||
S_NOP 0
|
||||
|
||||
bb.2:
|
||||
$sgpr0_sgpr1 = S_MOV_B64 -1
|
||||
$sgpr0 = S_MOV_B32 0
|
||||
$vcc = S_AND_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc
|
||||
S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: name: and_execz_imm_vccz_liveout_scc
|
||||
# GCN: $vcc = S_AND_B64 $exec, -1, implicit-def $scc
|
||||
# GCN-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
|
||||
# GCN-NEXT S_ENDPGM implicit $scc
|
||||
name: and_execz_imm_vccz_liveout_scc
|
||||
body: |
|
||||
bb.0:
|
||||
S_NOP 0
|
||||
|
||||
bb.1:
|
||||
S_NOP 0
|
||||
|
||||
bb.2:
|
||||
$vcc = S_AND_B64 $exec, -1, implicit-def $scc
|
||||
S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
|
||||
S_ENDPGM implicit $scc
|
||||
...
|
Loading…
Reference in New Issue
Block a user