mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
[AMDGPU] Add SI_EARLY_TERMINATE_SCC0 for early terminating shader
Add pseudo instruction to allow early termination of pixel shader anywhere based on the value of SCC. The intention is to use this when a mask of live lanes is updated, e.g. live lanes in WQM pass. This facilitates early termination of shaders even when EXEC is incomplete, e.g. in non-uniform control flow. Reviewed By: foad Differential Revision: https://reviews.llvm.org/D88777
This commit is contained in:
parent
abe71cac9b
commit
037097a4f3
@ -49,6 +49,7 @@ private:
|
||||
DebugLoc DL);
|
||||
|
||||
bool kill(MachineInstr &MI);
|
||||
void earlyTerm(MachineInstr &MI);
|
||||
|
||||
bool skipMaskBranch(MachineInstr &MI, MachineBasicBlock &MBB);
|
||||
|
||||
@ -145,10 +146,11 @@ bool SIInsertSkips::dominatesAllReachable(MachineBasicBlock &MBB) {
|
||||
return true;
|
||||
}
|
||||
|
||||
static void generatePsEndPgm(MachineBasicBlock &MBB,
|
||||
static void generateEndPgm(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I, DebugLoc DL,
|
||||
const SIInstrInfo *TII) {
|
||||
// Generate "null export; s_endpgm".
|
||||
const SIInstrInfo *TII, bool IsPS) {
|
||||
// "null export"
|
||||
if (IsPS) {
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::EXP_DONE))
|
||||
.addImm(AMDGPU::Exp::ET_NULL)
|
||||
.addReg(AMDGPU::VGPR0, RegState::Undef)
|
||||
@ -158,6 +160,8 @@ static void generatePsEndPgm(MachineBasicBlock &MBB,
|
||||
.addImm(1) // vm
|
||||
.addImm(0) // compr
|
||||
.addImm(0); // en
|
||||
}
|
||||
// s_endpgm
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ENDPGM)).addImm(0);
|
||||
}
|
||||
|
||||
@ -169,7 +173,9 @@ void SIInsertSkips::ensureEarlyExitBlock(MachineBasicBlock &MBB,
|
||||
if (!EarlyExitBlock) {
|
||||
EarlyExitBlock = MF->CreateMachineBasicBlock();
|
||||
MF->insert(MF->end(), EarlyExitBlock);
|
||||
generatePsEndPgm(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII);
|
||||
generateEndPgm(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII,
|
||||
MF->getFunction().getCallingConv() ==
|
||||
CallingConv::AMDGPU_PS);
|
||||
EarlyExitClearsExec = false;
|
||||
}
|
||||
|
||||
@ -178,7 +184,6 @@ void SIInsertSkips::ensureEarlyExitBlock(MachineBasicBlock &MBB,
|
||||
unsigned Mov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
|
||||
Register Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
|
||||
auto ExitI = EarlyExitBlock->getFirstNonPHI();
|
||||
assert(ExitI->getOpcode() == AMDGPU::EXP_DONE);
|
||||
BuildMI(*EarlyExitBlock, ExitI, DL, TII->get(Mov), Exec).addImm(0);
|
||||
EarlyExitClearsExec = true;
|
||||
}
|
||||
@ -224,7 +229,7 @@ void SIInsertSkips::skipIfDead(MachineBasicBlock &MBB,
|
||||
I == MBB.end() && !llvm::is_contained(MBB.successors(), &*NextBBI);
|
||||
|
||||
if (NoSuccessor) {
|
||||
generatePsEndPgm(MBB, I, DL, TII);
|
||||
generateEndPgm(MBB, I, DL, TII, true);
|
||||
} else {
|
||||
ensureEarlyExitBlock(MBB, false);
|
||||
|
||||
@ -368,6 +373,23 @@ bool SIInsertSkips::kill(MachineInstr &MI) {
|
||||
}
|
||||
}
|
||||
|
||||
void SIInsertSkips::earlyTerm(MachineInstr &MI) {
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
const DebugLoc DL = MI.getDebugLoc();
|
||||
|
||||
ensureEarlyExitBlock(MBB, true);
|
||||
|
||||
auto BranchMI = BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_CBRANCH_SCC0))
|
||||
.addMBB(EarlyExitBlock);
|
||||
auto Next = std::next(MI.getIterator());
|
||||
|
||||
if (Next != MBB.end() && !Next->isTerminator())
|
||||
splitBlock(MBB, *BranchMI, MDT);
|
||||
|
||||
MBB.addSuccessor(EarlyExitBlock);
|
||||
MDT->getBase().insertEdge(&MBB, EarlyExitBlock);
|
||||
}
|
||||
|
||||
// Returns true if a branch over the block was inserted.
|
||||
bool SIInsertSkips::skipMaskBranch(MachineInstr &MI,
|
||||
MachineBasicBlock &SrcMBB) {
|
||||
@ -393,6 +415,7 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
|
||||
SkipThreshold = SkipThresholdFlag;
|
||||
|
||||
SmallVector<MachineInstr *, 4> KillInstrs;
|
||||
SmallVector<MachineInstr *, 4> EarlyTermInstrs;
|
||||
bool MadeChange = false;
|
||||
|
||||
for (MachineBasicBlock &MBB : MF) {
|
||||
@ -451,18 +474,29 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
|
||||
}
|
||||
break;
|
||||
|
||||
case AMDGPU::SI_EARLY_TERMINATE_SCC0:
|
||||
EarlyTermInstrs.push_back(&MI);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (MachineInstr *Instr : EarlyTermInstrs) {
|
||||
// Early termination in GS does nothing
|
||||
if (MF.getFunction().getCallingConv() != CallingConv::AMDGPU_GS)
|
||||
earlyTerm(*Instr);
|
||||
Instr->eraseFromParent();
|
||||
}
|
||||
for (MachineInstr *Kill : KillInstrs) {
|
||||
skipIfDead(*Kill->getParent(), std::next(Kill->getIterator()),
|
||||
Kill->getDebugLoc());
|
||||
Kill->eraseFromParent();
|
||||
}
|
||||
KillInstrs.clear();
|
||||
EarlyTermInstrs.clear();
|
||||
EarlyExitBlock = nullptr;
|
||||
|
||||
return MadeChange;
|
||||
|
@ -321,6 +321,14 @@ def SI_IF_BREAK : CFPseudoInstSI <
|
||||
let isReMaterializable = 1;
|
||||
}
|
||||
|
||||
// Branch to the early termination block of the shader if SCC is 0.
|
||||
// This uses SCC from a previous SALU operation, i.e. the update of
|
||||
// a mask of live lanes after a kill/demote operation.
|
||||
// Only valid in pixel shaders.
|
||||
def SI_EARLY_TERMINATE_SCC0 : SPseudoInstSI <(outs), (ins)> {
|
||||
let Uses = [EXEC,SCC];
|
||||
}
|
||||
|
||||
let Uses = [EXEC] in {
|
||||
|
||||
multiclass PseudoInstKill <dag ins> {
|
||||
|
268
test/CodeGen/AMDGPU/early-term.mir
Normal file
268
test/CodeGen/AMDGPU/early-term.mir
Normal file
@ -0,0 +1,268 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=si-insert-skips -verify-machineinstrs %s -o - | FileCheck %s
|
||||
|
||||
--- |
|
||||
define amdgpu_ps void @early_term_scc0_end_block() {
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @early_term_scc0_next_terminator() {
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @early_term_scc0_in_block() {
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @early_term_scc0_with_kill() {
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_gs void @early_term_scc0_gs() {
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_cs void @early_term_scc0_cs() {
|
||||
ret void
|
||||
}
|
||||
...
|
||||
|
||||
---
|
||||
name: early_term_scc0_end_block
|
||||
tracksRegLiveness: true
|
||||
liveins:
|
||||
- { reg: '$sgpr0' }
|
||||
- { reg: '$sgpr1' }
|
||||
body: |
|
||||
; CHECK-LABEL: name: early_term_scc0_end_block
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x80000000), %bb.2(0x00000000)
|
||||
; CHECK: liveins: $sgpr0, $sgpr1
|
||||
; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
; CHECK: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
|
||||
; CHECK: S_CBRANCH_SCC0 %bb.2, implicit $scc
|
||||
; CHECK: bb.1:
|
||||
; CHECK: liveins: $vgpr0
|
||||
; CHECK: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
; CHECK: S_ENDPGM 0
|
||||
; CHECK: bb.2:
|
||||
; CHECK: $exec_lo = S_MOV_B32 0
|
||||
; CHECK: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
|
||||
; CHECK: S_ENDPGM 0
|
||||
bb.0:
|
||||
liveins: $sgpr0, $sgpr1
|
||||
successors: %bb.1
|
||||
|
||||
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
|
||||
SI_EARLY_TERMINATE_SCC0 implicit $scc, implicit $exec
|
||||
|
||||
bb.1:
|
||||
liveins: $vgpr0
|
||||
EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
---
|
||||
name: early_term_scc0_next_terminator
|
||||
tracksRegLiveness: true
|
||||
liveins:
|
||||
- { reg: '$sgpr0' }
|
||||
- { reg: '$sgpr1' }
|
||||
body: |
|
||||
; CHECK-LABEL: name: early_term_scc0_next_terminator
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.2(0x80000000), %bb.3(0x00000000)
|
||||
; CHECK: liveins: $sgpr0, $sgpr1
|
||||
; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
; CHECK: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
|
||||
; CHECK: S_CBRANCH_SCC0 %bb.3, implicit $scc
|
||||
; CHECK: S_BRANCH %bb.2
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: $vgpr0 = V_MOV_B32_e32 1, implicit $exec
|
||||
; CHECK: bb.2:
|
||||
; CHECK: liveins: $vgpr0
|
||||
; CHECK: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
; CHECK: S_ENDPGM 0
|
||||
; CHECK: bb.3:
|
||||
; CHECK: $exec_lo = S_MOV_B32 0
|
||||
; CHECK: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
|
||||
; CHECK: S_ENDPGM 0
|
||||
bb.0:
|
||||
liveins: $sgpr0, $sgpr1
|
||||
successors: %bb.2
|
||||
|
||||
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
|
||||
SI_EARLY_TERMINATE_SCC0 implicit $scc, implicit $exec
|
||||
S_BRANCH %bb.2
|
||||
|
||||
bb.1:
|
||||
successors: %bb.2
|
||||
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
|
||||
S_BRANCH %bb.2
|
||||
|
||||
bb.2:
|
||||
liveins: $vgpr0
|
||||
EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
---
|
||||
name: early_term_scc0_in_block
|
||||
tracksRegLiveness: true
|
||||
liveins:
|
||||
- { reg: '$sgpr0' }
|
||||
- { reg: '$sgpr1' }
|
||||
body: |
|
||||
; CHECK-LABEL: name: early_term_scc0_in_block
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: liveins: $sgpr0, $sgpr1
|
||||
; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
; CHECK: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
|
||||
; CHECK: S_CBRANCH_SCC0 %bb.2, implicit $scc
|
||||
; CHECK: bb.3:
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: liveins: $vgpr0, $scc
|
||||
; CHECK: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
|
||||
; CHECK: bb.1:
|
||||
; CHECK: liveins: $vgpr0, $vgpr1
|
||||
; CHECK: EXP 1, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec
|
||||
; CHECK: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
; CHECK: S_ENDPGM 0
|
||||
; CHECK: bb.2:
|
||||
; CHECK: $exec_lo = S_MOV_B32 0
|
||||
; CHECK: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
|
||||
; CHECK: S_ENDPGM 0
|
||||
bb.0:
|
||||
liveins: $sgpr0, $sgpr1
|
||||
successors: %bb.1
|
||||
|
||||
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
|
||||
SI_EARLY_TERMINATE_SCC0 implicit $scc, implicit $exec
|
||||
$vgpr1 = V_MOV_B32_e32 1, implicit $exec
|
||||
|
||||
bb.1:
|
||||
liveins: $vgpr0, $vgpr1
|
||||
EXP 1, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec
|
||||
EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
---
|
||||
name: early_term_scc0_with_kill
|
||||
tracksRegLiveness: true
|
||||
liveins:
|
||||
- { reg: '$sgpr0' }
|
||||
- { reg: '$sgpr1' }
|
||||
- { reg: '$vgpr2' }
|
||||
body: |
|
||||
; CHECK-LABEL: name: early_term_scc0_with_kill
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x80000000), %bb.3(0x00000000)
|
||||
; CHECK: liveins: $sgpr0, $sgpr1, $vgpr2
|
||||
; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
; CHECK: V_CMPX_LE_F32_nosdst_e32 0, killed $vgpr2, implicit-def $exec, implicit $mode, implicit $exec
|
||||
; CHECK: S_CBRANCH_EXECZ %bb.3, implicit $exec
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.4(0x40000000), %bb.3(0x40000000)
|
||||
; CHECK: liveins: $sgpr0, $sgpr1, $vgpr0
|
||||
; CHECK: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
|
||||
; CHECK: S_CBRANCH_SCC0 %bb.3, implicit $scc
|
||||
; CHECK: bb.4:
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: liveins: $vgpr0, $scc
|
||||
; CHECK: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
|
||||
; CHECK: bb.2:
|
||||
; CHECK: liveins: $vgpr0, $vgpr1
|
||||
; CHECK: EXP 1, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec
|
||||
; CHECK: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
; CHECK: S_ENDPGM 0
|
||||
; CHECK: bb.3:
|
||||
; CHECK: $exec_lo = S_MOV_B32 0
|
||||
; CHECK: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
|
||||
; CHECK: S_ENDPGM 0
|
||||
bb.0:
|
||||
liveins: $sgpr0, $sgpr1, $vgpr2
|
||||
successors: %bb.1
|
||||
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
SI_KILL_F32_COND_IMM_TERMINATOR killed $vgpr2, 0, 3, implicit-def $exec, implicit-def $vcc, implicit-def $scc, implicit $exec
|
||||
|
||||
bb.1:
|
||||
liveins: $sgpr0, $sgpr1, $vgpr0
|
||||
successors: %bb.2
|
||||
dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
|
||||
SI_EARLY_TERMINATE_SCC0 implicit $scc, implicit $exec
|
||||
$vgpr1 = V_MOV_B32_e32 1, implicit $exec
|
||||
|
||||
bb.2:
|
||||
liveins: $vgpr0, $vgpr1
|
||||
EXP 1, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec
|
||||
EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
---
|
||||
name: early_term_scc0_gs
|
||||
tracksRegLiveness: true
|
||||
liveins:
|
||||
- { reg: '$sgpr0' }
|
||||
- { reg: '$sgpr1' }
|
||||
body: |
|
||||
; CHECK-LABEL: name: early_term_scc0_gs
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: liveins: $sgpr0, $sgpr1
|
||||
; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
; CHECK: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
|
||||
; CHECK: bb.1:
|
||||
; CHECK: liveins: $vgpr0
|
||||
; CHECK: S_ENDPGM 0
|
||||
bb.0:
|
||||
liveins: $sgpr0, $sgpr1
|
||||
successors: %bb.1
|
||||
|
||||
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
|
||||
SI_EARLY_TERMINATE_SCC0 implicit $scc, implicit $exec
|
||||
|
||||
bb.1:
|
||||
liveins: $vgpr0
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
---
|
||||
name: early_term_scc0_cs
|
||||
tracksRegLiveness: true
|
||||
liveins:
|
||||
- { reg: '$sgpr0' }
|
||||
- { reg: '$sgpr1' }
|
||||
body: |
|
||||
; CHECK-LABEL: name: early_term_scc0_cs
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x80000000), %bb.2(0x00000000)
|
||||
; CHECK: liveins: $sgpr0, $sgpr1
|
||||
; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
; CHECK: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
|
||||
; CHECK: S_CBRANCH_SCC0 %bb.2, implicit $scc
|
||||
; CHECK: bb.1:
|
||||
; CHECK: liveins: $vgpr0
|
||||
; CHECK: S_ENDPGM 0
|
||||
; CHECK: bb.2:
|
||||
; CHECK: $exec_lo = S_MOV_B32 0
|
||||
; CHECK: S_ENDPGM 0
|
||||
bb.0:
|
||||
liveins: $sgpr0, $sgpr1
|
||||
successors: %bb.1
|
||||
|
||||
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
|
||||
SI_EARLY_TERMINATE_SCC0 implicit $scc, implicit $exec
|
||||
|
||||
bb.1:
|
||||
liveins: $vgpr0
|
||||
S_ENDPGM 0
|
||||
...
|
Loading…
x
Reference in New Issue
Block a user