1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 05:01:59 +01:00

[AMDGPU] Add SI_EARLY_TERMINATE_SCC0 for early terminating shader

Add pseudo instruction to allow early termination of pixel shader
anywhere based on the value of SCC.  The intention is to use this
when a mask of live lanes is updated, e.g. live lanes in WQM pass.
This facilitates early termination of shaders even when EXEC is
incomplete, e.g. in non-uniform control flow.

Reviewed By: foad

Differential Revision: https://reviews.llvm.org/D88777
This commit is contained in:
Carl Ritson 2021-01-13 13:08:42 +09:00
parent abe71cac9b
commit 037097a4f3
3 changed files with 326 additions and 16 deletions

View File

@ -49,6 +49,7 @@ private:
DebugLoc DL);
bool kill(MachineInstr &MI);
void earlyTerm(MachineInstr &MI);
bool skipMaskBranch(MachineInstr &MI, MachineBasicBlock &MBB);
@ -145,10 +146,11 @@ bool SIInsertSkips::dominatesAllReachable(MachineBasicBlock &MBB) {
return true;
}
static void generatePsEndPgm(MachineBasicBlock &MBB,
static void generateEndPgm(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
const SIInstrInfo *TII) {
// Generate "null export; s_endpgm".
const SIInstrInfo *TII, bool IsPS) {
// "null export"
if (IsPS) {
BuildMI(MBB, I, DL, TII->get(AMDGPU::EXP_DONE))
.addImm(AMDGPU::Exp::ET_NULL)
.addReg(AMDGPU::VGPR0, RegState::Undef)
@ -158,6 +160,8 @@ static void generatePsEndPgm(MachineBasicBlock &MBB,
.addImm(1) // vm
.addImm(0) // compr
.addImm(0); // en
}
// s_endpgm
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ENDPGM)).addImm(0);
}
@ -169,7 +173,9 @@ void SIInsertSkips::ensureEarlyExitBlock(MachineBasicBlock &MBB,
if (!EarlyExitBlock) {
EarlyExitBlock = MF->CreateMachineBasicBlock();
MF->insert(MF->end(), EarlyExitBlock);
generatePsEndPgm(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII);
generateEndPgm(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII,
MF->getFunction().getCallingConv() ==
CallingConv::AMDGPU_PS);
EarlyExitClearsExec = false;
}
@ -178,7 +184,6 @@ void SIInsertSkips::ensureEarlyExitBlock(MachineBasicBlock &MBB,
unsigned Mov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
Register Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
auto ExitI = EarlyExitBlock->getFirstNonPHI();
assert(ExitI->getOpcode() == AMDGPU::EXP_DONE);
BuildMI(*EarlyExitBlock, ExitI, DL, TII->get(Mov), Exec).addImm(0);
EarlyExitClearsExec = true;
}
@ -224,7 +229,7 @@ void SIInsertSkips::skipIfDead(MachineBasicBlock &MBB,
I == MBB.end() && !llvm::is_contained(MBB.successors(), &*NextBBI);
if (NoSuccessor) {
generatePsEndPgm(MBB, I, DL, TII);
generateEndPgm(MBB, I, DL, TII, true);
} else {
ensureEarlyExitBlock(MBB, false);
@ -368,6 +373,23 @@ bool SIInsertSkips::kill(MachineInstr &MI) {
}
}
void SIInsertSkips::earlyTerm(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
const DebugLoc DL = MI.getDebugLoc();
ensureEarlyExitBlock(MBB, true);
auto BranchMI = BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_CBRANCH_SCC0))
.addMBB(EarlyExitBlock);
auto Next = std::next(MI.getIterator());
if (Next != MBB.end() && !Next->isTerminator())
splitBlock(MBB, *BranchMI, MDT);
MBB.addSuccessor(EarlyExitBlock);
MDT->getBase().insertEdge(&MBB, EarlyExitBlock);
}
// Returns true if a branch over the block was inserted.
bool SIInsertSkips::skipMaskBranch(MachineInstr &MI,
MachineBasicBlock &SrcMBB) {
@ -393,6 +415,7 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
SkipThreshold = SkipThresholdFlag;
SmallVector<MachineInstr *, 4> KillInstrs;
SmallVector<MachineInstr *, 4> EarlyTermInstrs;
bool MadeChange = false;
for (MachineBasicBlock &MBB : MF) {
@ -451,18 +474,29 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
}
break;
case AMDGPU::SI_EARLY_TERMINATE_SCC0:
EarlyTermInstrs.push_back(&MI);
break;
default:
break;
}
}
}
for (MachineInstr *Instr : EarlyTermInstrs) {
// Early termination in GS does nothing
if (MF.getFunction().getCallingConv() != CallingConv::AMDGPU_GS)
earlyTerm(*Instr);
Instr->eraseFromParent();
}
for (MachineInstr *Kill : KillInstrs) {
skipIfDead(*Kill->getParent(), std::next(Kill->getIterator()),
Kill->getDebugLoc());
Kill->eraseFromParent();
}
KillInstrs.clear();
EarlyTermInstrs.clear();
EarlyExitBlock = nullptr;
return MadeChange;

View File

@ -321,6 +321,14 @@ def SI_IF_BREAK : CFPseudoInstSI <
let isReMaterializable = 1;
}
// Branch to the early termination block of the shader if SCC is 0.
// This uses SCC from a previous SALU operation, i.e. the update of
// a mask of live lanes after a kill/demote operation.
// Only valid in pixel shaders.
def SI_EARLY_TERMINATE_SCC0 : SPseudoInstSI <(outs), (ins)> {
let Uses = [EXEC,SCC];
}
let Uses = [EXEC] in {
multiclass PseudoInstKill <dag ins> {

View File

@ -0,0 +1,268 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=si-insert-skips -verify-machineinstrs %s -o - | FileCheck %s
--- |
define amdgpu_ps void @early_term_scc0_end_block() {
ret void
}
define amdgpu_ps void @early_term_scc0_next_terminator() {
ret void
}
define amdgpu_ps void @early_term_scc0_in_block() {
ret void
}
define amdgpu_ps void @early_term_scc0_with_kill() {
ret void
}
define amdgpu_gs void @early_term_scc0_gs() {
ret void
}
define amdgpu_cs void @early_term_scc0_cs() {
ret void
}
...
---
name: early_term_scc0_end_block
tracksRegLiveness: true
liveins:
- { reg: '$sgpr0' }
- { reg: '$sgpr1' }
body: |
; CHECK-LABEL: name: early_term_scc0_end_block
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x80000000), %bb.2(0x00000000)
; CHECK: liveins: $sgpr0, $sgpr1
; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; CHECK: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
; CHECK: S_CBRANCH_SCC0 %bb.2, implicit $scc
; CHECK: bb.1:
; CHECK: liveins: $vgpr0
; CHECK: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
; CHECK: S_ENDPGM 0
; CHECK: bb.2:
; CHECK: $exec_lo = S_MOV_B32 0
; CHECK: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
; CHECK: S_ENDPGM 0
bb.0:
liveins: $sgpr0, $sgpr1
successors: %bb.1
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
SI_EARLY_TERMINATE_SCC0 implicit $scc, implicit $exec
bb.1:
liveins: $vgpr0
EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
S_ENDPGM 0
...
---
name: early_term_scc0_next_terminator
tracksRegLiveness: true
liveins:
- { reg: '$sgpr0' }
- { reg: '$sgpr1' }
body: |
; CHECK-LABEL: name: early_term_scc0_next_terminator
; CHECK: bb.0:
; CHECK: successors: %bb.2(0x80000000), %bb.3(0x00000000)
; CHECK: liveins: $sgpr0, $sgpr1
; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; CHECK: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
; CHECK: S_CBRANCH_SCC0 %bb.3, implicit $scc
; CHECK: S_BRANCH %bb.2
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: $vgpr0 = V_MOV_B32_e32 1, implicit $exec
; CHECK: bb.2:
; CHECK: liveins: $vgpr0
; CHECK: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
; CHECK: S_ENDPGM 0
; CHECK: bb.3:
; CHECK: $exec_lo = S_MOV_B32 0
; CHECK: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
; CHECK: S_ENDPGM 0
bb.0:
liveins: $sgpr0, $sgpr1
successors: %bb.2
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
SI_EARLY_TERMINATE_SCC0 implicit $scc, implicit $exec
S_BRANCH %bb.2
bb.1:
successors: %bb.2
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
S_BRANCH %bb.2
bb.2:
liveins: $vgpr0
EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
S_ENDPGM 0
...
---
name: early_term_scc0_in_block
tracksRegLiveness: true
liveins:
- { reg: '$sgpr0' }
- { reg: '$sgpr1' }
body: |
; CHECK-LABEL: name: early_term_scc0_in_block
; CHECK: bb.0:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: liveins: $sgpr0, $sgpr1
; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; CHECK: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
; CHECK: S_CBRANCH_SCC0 %bb.2, implicit $scc
; CHECK: bb.3:
; CHECK: successors: %bb.1(0x80000000)
; CHECK: liveins: $vgpr0, $scc
; CHECK: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
; CHECK: bb.1:
; CHECK: liveins: $vgpr0, $vgpr1
; CHECK: EXP 1, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec
; CHECK: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
; CHECK: S_ENDPGM 0
; CHECK: bb.2:
; CHECK: $exec_lo = S_MOV_B32 0
; CHECK: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
; CHECK: S_ENDPGM 0
bb.0:
liveins: $sgpr0, $sgpr1
successors: %bb.1
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
SI_EARLY_TERMINATE_SCC0 implicit $scc, implicit $exec
$vgpr1 = V_MOV_B32_e32 1, implicit $exec
bb.1:
liveins: $vgpr0, $vgpr1
EXP 1, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec
EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
S_ENDPGM 0
...
---
name: early_term_scc0_with_kill
tracksRegLiveness: true
liveins:
- { reg: '$sgpr0' }
- { reg: '$sgpr1' }
- { reg: '$vgpr2' }
body: |
; CHECK-LABEL: name: early_term_scc0_with_kill
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x80000000), %bb.3(0x00000000)
; CHECK: liveins: $sgpr0, $sgpr1, $vgpr2
; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; CHECK: V_CMPX_LE_F32_nosdst_e32 0, killed $vgpr2, implicit-def $exec, implicit $mode, implicit $exec
; CHECK: S_CBRANCH_EXECZ %bb.3, implicit $exec
; CHECK: bb.1:
; CHECK: successors: %bb.4(0x40000000), %bb.3(0x40000000)
; CHECK: liveins: $sgpr0, $sgpr1, $vgpr0
; CHECK: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
; CHECK: S_CBRANCH_SCC0 %bb.3, implicit $scc
; CHECK: bb.4:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $vgpr0, $scc
; CHECK: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
; CHECK: bb.2:
; CHECK: liveins: $vgpr0, $vgpr1
; CHECK: EXP 1, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec
; CHECK: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
; CHECK: S_ENDPGM 0
; CHECK: bb.3:
; CHECK: $exec_lo = S_MOV_B32 0
; CHECK: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
; CHECK: S_ENDPGM 0
bb.0:
liveins: $sgpr0, $sgpr1, $vgpr2
successors: %bb.1
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
SI_KILL_F32_COND_IMM_TERMINATOR killed $vgpr2, 0, 3, implicit-def $exec, implicit-def $vcc, implicit-def $scc, implicit $exec
bb.1:
liveins: $sgpr0, $sgpr1, $vgpr0
successors: %bb.2
dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
SI_EARLY_TERMINATE_SCC0 implicit $scc, implicit $exec
$vgpr1 = V_MOV_B32_e32 1, implicit $exec
bb.2:
liveins: $vgpr0, $vgpr1
EXP 1, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec
EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
S_ENDPGM 0
...
---
name: early_term_scc0_gs
tracksRegLiveness: true
liveins:
- { reg: '$sgpr0' }
- { reg: '$sgpr1' }
body: |
; CHECK-LABEL: name: early_term_scc0_gs
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x80000000)
; CHECK: liveins: $sgpr0, $sgpr1
; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; CHECK: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
; CHECK: bb.1:
; CHECK: liveins: $vgpr0
; CHECK: S_ENDPGM 0
bb.0:
liveins: $sgpr0, $sgpr1
successors: %bb.1
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
SI_EARLY_TERMINATE_SCC0 implicit $scc, implicit $exec
bb.1:
liveins: $vgpr0
S_ENDPGM 0
...
---
name: early_term_scc0_cs
tracksRegLiveness: true
liveins:
- { reg: '$sgpr0' }
- { reg: '$sgpr1' }
body: |
; CHECK-LABEL: name: early_term_scc0_cs
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x80000000), %bb.2(0x00000000)
; CHECK: liveins: $sgpr0, $sgpr1
; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; CHECK: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
; CHECK: S_CBRANCH_SCC0 %bb.2, implicit $scc
; CHECK: bb.1:
; CHECK: liveins: $vgpr0
; CHECK: S_ENDPGM 0
; CHECK: bb.2:
; CHECK: $exec_lo = S_MOV_B32 0
; CHECK: S_ENDPGM 0
bb.0:
liveins: $sgpr0, $sgpr1
successors: %bb.1
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
SI_EARLY_TERMINATE_SCC0 implicit $scc, implicit $exec
bb.1:
liveins: $vgpr0
S_ENDPGM 0
...