mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
AMDGPU: Correct const_index_stride for wave 32 for PAL ABI
Retrying after revert and fix (removed implicit def flag from operand). Now passes with expensive_checks enabled. Since there is a single scratch resource descriptor for all shaders, if there is a wave32 and a wave64 shader (for instance for VsFs pairs) then the const_index_stride will be incorrect for wave32 shaders. Differential Revision: https://reviews.llvm.org/D101830 Change-Id: Ie3b8b2921237968caca91527dd0c97b1b0cc0360
This commit is contained in:
parent
e47cdd17d3
commit
754f532bdd
@ -525,6 +525,7 @@ void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
|
||||
// The pointer to the GIT is formed from the offset passed in and either
|
||||
// the amdgpu-git-ptr-high function attribute or the top part of the PC
|
||||
Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
|
||||
Register Rsrc03 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
|
||||
|
||||
buildGitPtr(MBB, I, DL, TII, Rsrc01);
|
||||
|
||||
@ -546,6 +547,20 @@ void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
|
||||
.addImm(0) // cpol
|
||||
.addReg(ScratchRsrcReg, RegState::ImplicitDefine)
|
||||
.addMemOperand(MMO);
|
||||
|
||||
// The driver will always set the SRD for wave 64 (bits 118:117 of
|
||||
// descriptor / bits 22:21 of third sub-reg will be 0b11)
|
||||
// If the shader is actually wave32 we have to modify the const_index_stride
|
||||
// field of the descriptor 3rd sub-reg (bits 22:21) to 0b10 (stride=32). The
|
||||
// reason the driver does this is that there can be cases where it presents
|
||||
// 2 shaders with different wave size (e.g. VsFs).
|
||||
// TODO: convert to using SCRATCH instructions or multiple SRD buffers
|
||||
if (ST.isWave32()) {
|
||||
const MCInstrDesc &SBitsetB32 = TII->get(AMDGPU::S_BITSET0_B32);
|
||||
BuildMI(MBB, I, DL, SBitsetB32, Rsrc03)
|
||||
.addImm(21)
|
||||
.addReg(Rsrc03);
|
||||
}
|
||||
} else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {
|
||||
assert(!ST.isAmdHsaOrMesa(Fn));
|
||||
const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
|
||||
|
@ -3,7 +3,8 @@
|
||||
; RUN: opt -S -mtriple=amdgcn-amd-amdpal -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
; Check that it doesn't crash
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX9 %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10 %s
|
||||
|
||||
target datalayout = "A5"
|
||||
|
||||
@ -13,8 +14,8 @@ define amdgpu_cs void @test_simple_indirect_call() {
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_getpc_b64 s[36:37]
|
||||
; GFX9-NEXT: s_mov_b32 s36, s0
|
||||
; GFX9-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX9-NEXT: s_load_dwordx4 s[36:39], s[36:37], 0x10
|
||||
; GFX9-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX9-NEXT: s_mov_b32 s32, 0
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: s_add_u32 s36, s36, s0
|
||||
@ -23,6 +24,23 @@ define amdgpu_cs void @test_simple_indirect_call() {
|
||||
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GFX9-NEXT: s_endpgm
|
||||
;
|
||||
; GFX10-LABEL: test_simple_indirect_call:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_getpc_b64 s[36:37]
|
||||
; GFX10-NEXT: s_mov_b32 s36, s0
|
||||
; GFX10-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX10-NEXT: s_load_dwordx4 s[36:39], s[36:37], 0x10
|
||||
; GFX10-NEXT: s_mov_b32 s32, 0
|
||||
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-NEXT: s_bitset0_b32 s39, 21
|
||||
; GFX10-NEXT: s_add_u32 s36, s36, s0
|
||||
; GFX10-NEXT: s_addc_u32 s37, s37, 0
|
||||
; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37]
|
||||
; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GFX10-NEXT: s_endpgm
|
||||
|
||||
|
||||
%pc = call i64 @llvm.amdgcn.s.getpc()
|
||||
%fun = inttoptr i64 %pc to void()*
|
||||
|
Loading…
Reference in New Issue
Block a user