1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-21 20:12:56 +02:00
llvm-mirror/test/CodeGen/AMDGPU/large-alloca-graphics.ll
Tom Stellard b9654f5566 AMDGPU/SI: Use the correct scratch wave offset register for shaders.
Summary:
The code previously always used s1 as it was using the user + system SGPR
information for compute kernels. This is incorrect for Mesa shaders though,

The register should be the next SGPR after all user and system SGPR's.
We use that Mesa adds arguments for all input and system SGPR's and
take the next available SGPR for the scratch wave offset register.

Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>

Reviewers: mareko, arsenm, nhaehnle, tstellarAMD

Subscribers: qcolombet, arsenm, llvm-commits

Differential Revision: http://reviews.llvm.org/D18941

Patch By: Bas Nieuwenhuizen

llvm-svn: 266336
2016-04-14 16:27:03 +00:00

47 lines
1.8 KiB
LLVM

; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=ALL %s
; RUN: llc -march=amdgcn -mcpu=carrizo < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=ALL %s
; ALL-LABEL: {{^}}large_alloca_pixel_shader:
; GCN: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
; GCN: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
; GCN: s_mov_b32 s10, -1
; CI: s_mov_b32 s11, 0x98f000
; VI: s_mov_b32 s11, 0x980000
; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s0 offen
; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s0 offen
; ALL: ; ScratchSize: 32772
define amdgpu_ps void @large_alloca_pixel_shader(i32 %x, i32 %y) #0 {
%large = alloca [8192 x i32], align 4
%gep = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 8191
store volatile i32 %x, i32* %gep
%gep1 = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 %y
%val = load volatile i32, i32* %gep1
store volatile i32 %val, i32 addrspace(1)* undef
ret void
}
; ALL-LABEL: {{^}}large_alloca_pixel_shader_inreg:
; GCN: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
; GCN: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
; GCN: s_mov_b32 s10, -1
; CI: s_mov_b32 s11, 0x98f000
; VI: s_mov_b32 s11, 0x980000
; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s2 offen
; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s2 offen
; ALL: ; ScratchSize: 32772
define amdgpu_ps void @large_alloca_pixel_shader_inreg(i32 inreg %x, i32 inreg %y) #0 {
%large = alloca [8192 x i32], align 4
%gep = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 8191
store volatile i32 %x, i32* %gep
%gep1 = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 %y
%val = load volatile i32, i32* %gep1
store volatile i32 %val, i32 addrspace(1)* undef
ret void
}
attributes #0 = { nounwind }