1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 04:02:41 +01:00

[AMDGPU] Allow frontends to disable null export for pixel shaders

Disable null export (for kills) when a frontend defines a pixel
shader as not exporting using amdgpu-color-export and
amdgpu-depth-export function attrbutes.
This allows the generation of export free pixel shaders.

Reviewed By: foad

Differential Revision: https://reviews.llvm.org/D105683
This commit is contained in:
Carl Ritson 2021-07-22 09:59:35 +09:00
parent 191a71d3e8
commit 41b211a722
4 changed files with 154 additions and 83 deletions

View File

@ -67,9 +67,19 @@ char &llvm::SILateBranchLoweringPassID = SILateBranchLowering::ID;
static void generateEndPgm(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
const SIInstrInfo *TII, bool IsPS) {
// "null export"
if (IsPS) {
const SIInstrInfo *TII, MachineFunction &MF) {
const Function &F = MF.getFunction();
bool IsPS = F.getCallingConv() == CallingConv::AMDGPU_PS;
// Check if hardware has been configured to expect color or depth exports.
bool HasExports =
AMDGPU::getHasColorExport(F) || AMDGPU::getHasDepthExport(F);
// Prior to GFX10, hardware always expects at least one export for PS.
bool MustExport = !AMDGPU::isGFX10Plus(TII->getSubtarget());
if (IsPS && (HasExports || MustExport)) {
// Generate "null export" if hardware is expecting PS to export.
BuildMI(MBB, I, DL, TII->get(AMDGPU::EXP_DONE))
.addImm(AMDGPU::Exp::ET_NULL)
.addReg(AMDGPU::VGPR0, RegState::Undef)
@ -80,6 +90,7 @@ static void generateEndPgm(MachineBasicBlock &MBB,
.addImm(0) // compr
.addImm(0); // en
}
// s_endpgm
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ENDPGM)).addImm(0);
}
@ -168,8 +179,7 @@ bool SILateBranchLowering::runOnMachineFunction(MachineFunction &MF) {
BuildMI(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII->get(MovOpc),
ExecReg)
.addImm(0);
generateEndPgm(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII,
MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS);
generateEndPgm(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII, MF);
for (MachineInstr *Instr : EarlyTermInstrs) {
// Early termination in GS does nothing

View File

@ -1344,6 +1344,17 @@ unsigned getInitialPSInputAddr(const Function &F) {
return getIntegerAttribute(F, "InitialPSInputAddr", 0);
}
bool getHasColorExport(const Function &F) {
// As a safe default always respond as if PS has color exports.
return getIntegerAttribute(
F, "amdgpu-color-export",
F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0;
}
bool getHasDepthExport(const Function &F) {
return getIntegerAttribute(F, "amdgpu-depth-export", 0) != 0;
}
bool isShader(CallingConv::ID cc) {
switch(cc) {
case CallingConv::AMDGPU_VS:

View File

@ -689,6 +689,10 @@ uint64_t encodeMsg(uint64_t MsgId,
unsigned getInitialPSInputAddr(const Function &F);
bool getHasColorExport(const Function &F);
bool getHasDepthExport(const Function &F);
LLVM_READNONE
bool isShader(CallingConv::ID CC);

View File

@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=si-late-branch-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=si-late-branch-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX9 %s
# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -run-pass=si-late-branch-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX10 %s
--- |
define amdgpu_ps void @early_term_scc0_end_block() {
@ -21,6 +21,12 @@
define amdgpu_cs void @early_term_scc0_cs() {
ret void
}
define amdgpu_ps void @early_term_no_export() #0 {
ret void
}
attributes #0 = { "amdgpu-color-export"="0" "amdgpu-depth-export"="0" }
...
---
@ -30,21 +36,21 @@ liveins:
- { reg: '$sgpr0' }
- { reg: '$sgpr1' }
body: |
; GFX10-LABEL: name: early_term_scc0_end_block
; GFX10: bb.0:
; GFX10: successors: %bb.1(0x80000000), %bb.2(0x00000000)
; GFX10: liveins: $sgpr0, $sgpr1
; GFX10: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; GFX10: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
; GFX10: S_CBRANCH_SCC0 %bb.2, implicit $scc
; GFX10: bb.1:
; GFX10: liveins: $vgpr0
; GFX10: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
; GFX10: S_ENDPGM 0
; GFX10: bb.2:
; GFX10: $exec_lo = S_MOV_B32 0
; GFX10: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
; GFX10: S_ENDPGM 0
; GCN-LABEL: name: early_term_scc0_end_block
; GCN: bb.0:
; GCN: successors: %bb.1(0x80000000), %bb.2(0x00000000)
; GCN: liveins: $sgpr0, $sgpr1
; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; GCN: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
; GCN: bb.1:
; GCN: liveins: $vgpr0
; GCN: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
; GCN: S_ENDPGM 0
; GCN: bb.2:
; GCN: $exec = S_MOV_B64 0
; GCN: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
; GCN: S_ENDPGM 0
bb.0:
liveins: $sgpr0, $sgpr1
successors: %bb.1
@ -66,25 +72,25 @@ liveins:
- { reg: '$sgpr0' }
- { reg: '$sgpr1' }
body: |
; GFX10-LABEL: name: early_term_scc0_next_terminator
; GFX10: bb.0:
; GFX10: successors: %bb.2(0x80000000), %bb.3(0x00000000)
; GFX10: liveins: $sgpr0, $sgpr1
; GFX10: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; GFX10: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
; GFX10: S_CBRANCH_SCC0 %bb.3, implicit $scc
; GFX10: S_BRANCH %bb.2
; GFX10: bb.1:
; GFX10: successors: %bb.2(0x80000000)
; GFX10: $vgpr0 = V_MOV_B32_e32 1, implicit $exec
; GFX10: bb.2:
; GFX10: liveins: $vgpr0
; GFX10: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
; GFX10: S_ENDPGM 0
; GFX10: bb.3:
; GFX10: $exec_lo = S_MOV_B32 0
; GFX10: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
; GFX10: S_ENDPGM 0
; GCN-LABEL: name: early_term_scc0_next_terminator
; GCN: bb.0:
; GCN: successors: %bb.2(0x80000000), %bb.3(0x00000000)
; GCN: liveins: $sgpr0, $sgpr1
; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; GCN: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
; GCN: S_CBRANCH_SCC0 %bb.3, implicit $scc
; GCN: S_BRANCH %bb.2
; GCN: bb.1:
; GCN: successors: %bb.2(0x80000000)
; GCN: $vgpr0 = V_MOV_B32_e32 1, implicit $exec
; GCN: bb.2:
; GCN: liveins: $vgpr0
; GCN: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
; GCN: S_ENDPGM 0
; GCN: bb.3:
; GCN: $exec = S_MOV_B64 0
; GCN: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
; GCN: S_ENDPGM 0
bb.0:
liveins: $sgpr0, $sgpr1
successors: %bb.2
@ -112,26 +118,26 @@ liveins:
- { reg: '$sgpr0' }
- { reg: '$sgpr1' }
body: |
; GFX10-LABEL: name: early_term_scc0_in_block
; GFX10: bb.0:
; GFX10: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; GFX10: liveins: $sgpr0, $sgpr1
; GFX10: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; GFX10: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
; GFX10: S_CBRANCH_SCC0 %bb.2, implicit $scc
; GFX10: bb.3:
; GFX10: successors: %bb.1(0x80000000)
; GFX10: liveins: $vgpr0, $scc
; GFX10: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
; GFX10: bb.1:
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10: EXP 1, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec
; GFX10: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
; GFX10: S_ENDPGM 0
; GFX10: bb.2:
; GFX10: $exec_lo = S_MOV_B32 0
; GFX10: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
; GFX10: S_ENDPGM 0
; GCN-LABEL: name: early_term_scc0_in_block
; GCN: bb.0:
; GCN: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; GCN: liveins: $sgpr0, $sgpr1
; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; GCN: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
; GCN: bb.3:
; GCN: successors: %bb.1(0x80000000)
; GCN: liveins: $vgpr0, $scc
; GCN: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
; GCN: bb.1:
; GCN: liveins: $vgpr0, $vgpr1
; GCN: EXP 1, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec
; GCN: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
; GCN: S_ENDPGM 0
; GCN: bb.2:
; GCN: $exec = S_MOV_B64 0
; GCN: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
; GCN: S_ENDPGM 0
bb.0:
liveins: $sgpr0, $sgpr1
successors: %bb.1
@ -155,15 +161,18 @@ liveins:
- { reg: '$sgpr0' }
- { reg: '$sgpr1' }
body: |
; GFX10-LABEL: name: early_term_scc0_gs
; GFX10: bb.0:
; GFX10: successors: %bb.1(0x80000000)
; GFX10: liveins: $sgpr0, $sgpr1
; GFX10: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; GFX10: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
; GFX10: bb.1:
; GFX10: liveins: $vgpr0
; GFX10: S_ENDPGM 0
; GCN-LABEL: name: early_term_scc0_gs
; GCN: bb.0:
; GCN: successors: %bb.1(0x80000000)
; GCN: liveins: $sgpr0, $sgpr1
; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; GCN: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
; GCN: bb.1:
; GCN: liveins: $vgpr0
; GCN: S_ENDPGM 0
; GCN: bb.2:
; GCN: $exec = S_MOV_B64 0
; GCN: S_ENDPGM 0
bb.0:
liveins: $sgpr0, $sgpr1
successors: %bb.1
@ -184,19 +193,19 @@ liveins:
- { reg: '$sgpr0' }
- { reg: '$sgpr1' }
body: |
; GFX10-LABEL: name: early_term_scc0_cs
; GFX10: bb.0:
; GFX10: successors: %bb.1(0x80000000), %bb.2(0x00000000)
; GFX10: liveins: $sgpr0, $sgpr1
; GFX10: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; GFX10: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
; GFX10: S_CBRANCH_SCC0 %bb.2, implicit $scc
; GFX10: bb.1:
; GFX10: liveins: $vgpr0
; GFX10: S_ENDPGM 0
; GFX10: bb.2:
; GFX10: $exec_lo = S_MOV_B32 0
; GFX10: S_ENDPGM 0
; GCN-LABEL: name: early_term_scc0_cs
; GCN: bb.0:
; GCN: successors: %bb.1(0x80000000), %bb.2(0x00000000)
; GCN: liveins: $sgpr0, $sgpr1
; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; GCN: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
; GCN: bb.1:
; GCN: liveins: $vgpr0
; GCN: S_ENDPGM 0
; GCN: bb.2:
; GCN: $exec = S_MOV_B64 0
; GCN: S_ENDPGM 0
bb.0:
liveins: $sgpr0, $sgpr1
successors: %bb.1
@ -209,3 +218,40 @@ body: |
liveins: $vgpr0
S_ENDPGM 0
...
---
name: early_term_no_export
tracksRegLiveness: true
liveins:
- { reg: '$sgpr0' }
- { reg: '$sgpr1' }
body: |
; GCN-LABEL: name: early_term_no_export
; GCN: bb.0:
; GCN: successors: %bb.1(0x80000000), %bb.2(0x00000000)
; GCN: liveins: $sgpr0, $sgpr1
; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; GCN: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
; GCN: bb.1:
; GCN: liveins: $vgpr0
; GCN: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
; GCN: S_ENDPGM 0
; GCN: bb.2:
; GCN: $exec = S_MOV_B64 0
; GFX9: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
; GFX10-NOT: EXP_DONE
; GCN: S_ENDPGM 0
bb.0:
liveins: $sgpr0, $sgpr1
successors: %bb.1
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
SI_EARLY_TERMINATE_SCC0 implicit $scc, implicit $exec
bb.1:
liveins: $vgpr0
EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
S_ENDPGM 0
...