mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 04:02:41 +01:00
[AMDGPU] Allow frontends to disable null export for pixel shaders
Disable null export (for kills) when a frontend defines a pixel shader as not exporting using amdgpu-color-export and amdgpu-depth-export function attrbutes. This allows the generation of export free pixel shaders. Reviewed By: foad Differential Revision: https://reviews.llvm.org/D105683
This commit is contained in:
parent
191a71d3e8
commit
41b211a722
@ -67,9 +67,19 @@ char &llvm::SILateBranchLoweringPassID = SILateBranchLowering::ID;
|
||||
|
||||
static void generateEndPgm(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I, DebugLoc DL,
|
||||
const SIInstrInfo *TII, bool IsPS) {
|
||||
// "null export"
|
||||
if (IsPS) {
|
||||
const SIInstrInfo *TII, MachineFunction &MF) {
|
||||
const Function &F = MF.getFunction();
|
||||
bool IsPS = F.getCallingConv() == CallingConv::AMDGPU_PS;
|
||||
|
||||
// Check if hardware has been configured to expect color or depth exports.
|
||||
bool HasExports =
|
||||
AMDGPU::getHasColorExport(F) || AMDGPU::getHasDepthExport(F);
|
||||
|
||||
// Prior to GFX10, hardware always expects at least one export for PS.
|
||||
bool MustExport = !AMDGPU::isGFX10Plus(TII->getSubtarget());
|
||||
|
||||
if (IsPS && (HasExports || MustExport)) {
|
||||
// Generate "null export" if hardware is expecting PS to export.
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::EXP_DONE))
|
||||
.addImm(AMDGPU::Exp::ET_NULL)
|
||||
.addReg(AMDGPU::VGPR0, RegState::Undef)
|
||||
@ -80,6 +90,7 @@ static void generateEndPgm(MachineBasicBlock &MBB,
|
||||
.addImm(0) // compr
|
||||
.addImm(0); // en
|
||||
}
|
||||
|
||||
// s_endpgm
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ENDPGM)).addImm(0);
|
||||
}
|
||||
@ -168,8 +179,7 @@ bool SILateBranchLowering::runOnMachineFunction(MachineFunction &MF) {
|
||||
BuildMI(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII->get(MovOpc),
|
||||
ExecReg)
|
||||
.addImm(0);
|
||||
generateEndPgm(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII,
|
||||
MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS);
|
||||
generateEndPgm(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII, MF);
|
||||
|
||||
for (MachineInstr *Instr : EarlyTermInstrs) {
|
||||
// Early termination in GS does nothing
|
||||
|
@ -1344,6 +1344,17 @@ unsigned getInitialPSInputAddr(const Function &F) {
|
||||
return getIntegerAttribute(F, "InitialPSInputAddr", 0);
|
||||
}
|
||||
|
||||
bool getHasColorExport(const Function &F) {
|
||||
// As a safe default always respond as if PS has color exports.
|
||||
return getIntegerAttribute(
|
||||
F, "amdgpu-color-export",
|
||||
F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0;
|
||||
}
|
||||
|
||||
bool getHasDepthExport(const Function &F) {
|
||||
return getIntegerAttribute(F, "amdgpu-depth-export", 0) != 0;
|
||||
}
|
||||
|
||||
bool isShader(CallingConv::ID cc) {
|
||||
switch(cc) {
|
||||
case CallingConv::AMDGPU_VS:
|
||||
|
@ -689,6 +689,10 @@ uint64_t encodeMsg(uint64_t MsgId,
|
||||
|
||||
unsigned getInitialPSInputAddr(const Function &F);
|
||||
|
||||
bool getHasColorExport(const Function &F);
|
||||
|
||||
bool getHasDepthExport(const Function &F);
|
||||
|
||||
LLVM_READNONE
|
||||
bool isShader(CallingConv::ID CC);
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=si-late-branch-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=si-late-branch-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -run-pass=si-late-branch-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX10 %s
|
||||
|
||||
--- |
|
||||
define amdgpu_ps void @early_term_scc0_end_block() {
|
||||
@ -21,6 +21,12 @@
|
||||
define amdgpu_cs void @early_term_scc0_cs() {
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @early_term_no_export() #0 {
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-color-export"="0" "amdgpu-depth-export"="0" }
|
||||
...
|
||||
|
||||
---
|
||||
@ -30,21 +36,21 @@ liveins:
|
||||
- { reg: '$sgpr0' }
|
||||
- { reg: '$sgpr1' }
|
||||
body: |
|
||||
; GFX10-LABEL: name: early_term_scc0_end_block
|
||||
; GFX10: bb.0:
|
||||
; GFX10: successors: %bb.1(0x80000000), %bb.2(0x00000000)
|
||||
; GFX10: liveins: $sgpr0, $sgpr1
|
||||
; GFX10: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX10: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
|
||||
; GFX10: S_CBRANCH_SCC0 %bb.2, implicit $scc
|
||||
; GFX10: bb.1:
|
||||
; GFX10: liveins: $vgpr0
|
||||
; GFX10: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
; GFX10: S_ENDPGM 0
|
||||
; GFX10: bb.2:
|
||||
; GFX10: $exec_lo = S_MOV_B32 0
|
||||
; GFX10: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
|
||||
; GFX10: S_ENDPGM 0
|
||||
; GCN-LABEL: name: early_term_scc0_end_block
|
||||
; GCN: bb.0:
|
||||
; GCN: successors: %bb.1(0x80000000), %bb.2(0x00000000)
|
||||
; GCN: liveins: $sgpr0, $sgpr1
|
||||
; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GCN: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
|
||||
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
|
||||
; GCN: bb.1:
|
||||
; GCN: liveins: $vgpr0
|
||||
; GCN: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
; GCN: S_ENDPGM 0
|
||||
; GCN: bb.2:
|
||||
; GCN: $exec = S_MOV_B64 0
|
||||
; GCN: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
|
||||
; GCN: S_ENDPGM 0
|
||||
bb.0:
|
||||
liveins: $sgpr0, $sgpr1
|
||||
successors: %bb.1
|
||||
@ -66,25 +72,25 @@ liveins:
|
||||
- { reg: '$sgpr0' }
|
||||
- { reg: '$sgpr1' }
|
||||
body: |
|
||||
; GFX10-LABEL: name: early_term_scc0_next_terminator
|
||||
; GFX10: bb.0:
|
||||
; GFX10: successors: %bb.2(0x80000000), %bb.3(0x00000000)
|
||||
; GFX10: liveins: $sgpr0, $sgpr1
|
||||
; GFX10: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX10: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
|
||||
; GFX10: S_CBRANCH_SCC0 %bb.3, implicit $scc
|
||||
; GFX10: S_BRANCH %bb.2
|
||||
; GFX10: bb.1:
|
||||
; GFX10: successors: %bb.2(0x80000000)
|
||||
; GFX10: $vgpr0 = V_MOV_B32_e32 1, implicit $exec
|
||||
; GFX10: bb.2:
|
||||
; GFX10: liveins: $vgpr0
|
||||
; GFX10: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
; GFX10: S_ENDPGM 0
|
||||
; GFX10: bb.3:
|
||||
; GFX10: $exec_lo = S_MOV_B32 0
|
||||
; GFX10: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
|
||||
; GFX10: S_ENDPGM 0
|
||||
; GCN-LABEL: name: early_term_scc0_next_terminator
|
||||
; GCN: bb.0:
|
||||
; GCN: successors: %bb.2(0x80000000), %bb.3(0x00000000)
|
||||
; GCN: liveins: $sgpr0, $sgpr1
|
||||
; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GCN: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
|
||||
; GCN: S_CBRANCH_SCC0 %bb.3, implicit $scc
|
||||
; GCN: S_BRANCH %bb.2
|
||||
; GCN: bb.1:
|
||||
; GCN: successors: %bb.2(0x80000000)
|
||||
; GCN: $vgpr0 = V_MOV_B32_e32 1, implicit $exec
|
||||
; GCN: bb.2:
|
||||
; GCN: liveins: $vgpr0
|
||||
; GCN: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
; GCN: S_ENDPGM 0
|
||||
; GCN: bb.3:
|
||||
; GCN: $exec = S_MOV_B64 0
|
||||
; GCN: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
|
||||
; GCN: S_ENDPGM 0
|
||||
bb.0:
|
||||
liveins: $sgpr0, $sgpr1
|
||||
successors: %bb.2
|
||||
@ -112,26 +118,26 @@ liveins:
|
||||
- { reg: '$sgpr0' }
|
||||
- { reg: '$sgpr1' }
|
||||
body: |
|
||||
; GFX10-LABEL: name: early_term_scc0_in_block
|
||||
; GFX10: bb.0:
|
||||
; GFX10: successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
||||
; GFX10: liveins: $sgpr0, $sgpr1
|
||||
; GFX10: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX10: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
|
||||
; GFX10: S_CBRANCH_SCC0 %bb.2, implicit $scc
|
||||
; GFX10: bb.3:
|
||||
; GFX10: successors: %bb.1(0x80000000)
|
||||
; GFX10: liveins: $vgpr0, $scc
|
||||
; GFX10: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
|
||||
; GFX10: bb.1:
|
||||
; GFX10: liveins: $vgpr0, $vgpr1
|
||||
; GFX10: EXP 1, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec
|
||||
; GFX10: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
; GFX10: S_ENDPGM 0
|
||||
; GFX10: bb.2:
|
||||
; GFX10: $exec_lo = S_MOV_B32 0
|
||||
; GFX10: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
|
||||
; GFX10: S_ENDPGM 0
|
||||
; GCN-LABEL: name: early_term_scc0_in_block
|
||||
; GCN: bb.0:
|
||||
; GCN: successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
||||
; GCN: liveins: $sgpr0, $sgpr1
|
||||
; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GCN: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
|
||||
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
|
||||
; GCN: bb.3:
|
||||
; GCN: successors: %bb.1(0x80000000)
|
||||
; GCN: liveins: $vgpr0, $scc
|
||||
; GCN: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
|
||||
; GCN: bb.1:
|
||||
; GCN: liveins: $vgpr0, $vgpr1
|
||||
; GCN: EXP 1, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec
|
||||
; GCN: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
; GCN: S_ENDPGM 0
|
||||
; GCN: bb.2:
|
||||
; GCN: $exec = S_MOV_B64 0
|
||||
; GCN: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
|
||||
; GCN: S_ENDPGM 0
|
||||
bb.0:
|
||||
liveins: $sgpr0, $sgpr1
|
||||
successors: %bb.1
|
||||
@ -155,15 +161,18 @@ liveins:
|
||||
- { reg: '$sgpr0' }
|
||||
- { reg: '$sgpr1' }
|
||||
body: |
|
||||
; GFX10-LABEL: name: early_term_scc0_gs
|
||||
; GFX10: bb.0:
|
||||
; GFX10: successors: %bb.1(0x80000000)
|
||||
; GFX10: liveins: $sgpr0, $sgpr1
|
||||
; GFX10: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX10: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
|
||||
; GFX10: bb.1:
|
||||
; GFX10: liveins: $vgpr0
|
||||
; GFX10: S_ENDPGM 0
|
||||
; GCN-LABEL: name: early_term_scc0_gs
|
||||
; GCN: bb.0:
|
||||
; GCN: successors: %bb.1(0x80000000)
|
||||
; GCN: liveins: $sgpr0, $sgpr1
|
||||
; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GCN: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
|
||||
; GCN: bb.1:
|
||||
; GCN: liveins: $vgpr0
|
||||
; GCN: S_ENDPGM 0
|
||||
; GCN: bb.2:
|
||||
; GCN: $exec = S_MOV_B64 0
|
||||
; GCN: S_ENDPGM 0
|
||||
bb.0:
|
||||
liveins: $sgpr0, $sgpr1
|
||||
successors: %bb.1
|
||||
@ -184,19 +193,19 @@ liveins:
|
||||
- { reg: '$sgpr0' }
|
||||
- { reg: '$sgpr1' }
|
||||
body: |
|
||||
; GFX10-LABEL: name: early_term_scc0_cs
|
||||
; GFX10: bb.0:
|
||||
; GFX10: successors: %bb.1(0x80000000), %bb.2(0x00000000)
|
||||
; GFX10: liveins: $sgpr0, $sgpr1
|
||||
; GFX10: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX10: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
|
||||
; GFX10: S_CBRANCH_SCC0 %bb.2, implicit $scc
|
||||
; GFX10: bb.1:
|
||||
; GFX10: liveins: $vgpr0
|
||||
; GFX10: S_ENDPGM 0
|
||||
; GFX10: bb.2:
|
||||
; GFX10: $exec_lo = S_MOV_B32 0
|
||||
; GFX10: S_ENDPGM 0
|
||||
; GCN-LABEL: name: early_term_scc0_cs
|
||||
; GCN: bb.0:
|
||||
; GCN: successors: %bb.1(0x80000000), %bb.2(0x00000000)
|
||||
; GCN: liveins: $sgpr0, $sgpr1
|
||||
; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GCN: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
|
||||
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
|
||||
; GCN: bb.1:
|
||||
; GCN: liveins: $vgpr0
|
||||
; GCN: S_ENDPGM 0
|
||||
; GCN: bb.2:
|
||||
; GCN: $exec = S_MOV_B64 0
|
||||
; GCN: S_ENDPGM 0
|
||||
bb.0:
|
||||
liveins: $sgpr0, $sgpr1
|
||||
successors: %bb.1
|
||||
@ -209,3 +218,40 @@ body: |
|
||||
liveins: $vgpr0
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
---
|
||||
name: early_term_no_export
|
||||
tracksRegLiveness: true
|
||||
liveins:
|
||||
- { reg: '$sgpr0' }
|
||||
- { reg: '$sgpr1' }
|
||||
body: |
|
||||
; GCN-LABEL: name: early_term_no_export
|
||||
; GCN: bb.0:
|
||||
; GCN: successors: %bb.1(0x80000000), %bb.2(0x00000000)
|
||||
; GCN: liveins: $sgpr0, $sgpr1
|
||||
; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GCN: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
|
||||
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
|
||||
; GCN: bb.1:
|
||||
; GCN: liveins: $vgpr0
|
||||
; GCN: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
; GCN: S_ENDPGM 0
|
||||
; GCN: bb.2:
|
||||
; GCN: $exec = S_MOV_B64 0
|
||||
; GFX9: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
|
||||
; GFX10-NOT: EXP_DONE
|
||||
; GCN: S_ENDPGM 0
|
||||
bb.0:
|
||||
liveins: $sgpr0, $sgpr1
|
||||
successors: %bb.1
|
||||
|
||||
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
|
||||
SI_EARLY_TERMINATE_SCC0 implicit $scc, implicit $exec
|
||||
|
||||
bb.1:
|
||||
liveins: $vgpr0
|
||||
EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
Loading…
Reference in New Issue
Block a user