mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
10dc424440
This should be directly implied from the register class, and there's no need to special case live ins here. This was getting the wrong answer for the queue ptr argument in callable functions, since it's not an explicit IR argument and is always uniform. Fixes not using scalar loads for the aperture in addrspacecast lowering, and any other places that use implicit SGPR arguments.
331 lines
11 KiB
LLVM
331 lines
11 KiB
LLVM
; RUN: llc -amdgpu-fixed-function-abi -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s
|
|
; RUN: llc -amdgpu-fixed-function-abi -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
|
|
|
|
; GCN-LABEL: {{^}}use_dispatch_ptr:
|
|
; GCN: s_load_dword s{{[0-9]+}}, s[4:5]
|
|
define hidden void @use_dispatch_ptr() #1 {
|
|
%dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
|
|
%header_ptr = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
|
|
%value = load volatile i32, i32 addrspace(4)* %header_ptr
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}use_queue_ptr:
|
|
; GCN: s_load_dword s{{[0-9]+}}, s[6:7]
|
|
define hidden void @use_queue_ptr() #1 {
|
|
%queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
|
|
%header_ptr = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
|
|
%value = load volatile i32, i32 addrspace(4)* %header_ptr
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}use_kernarg_segment_ptr:
|
|
; GCN: s_mov_b64 [[PTR:s\[[0-9]+:[0-9]+\]]], 0
|
|
; GCN: s_load_dword s{{[0-9]+}}, [[PTR]], 0x0
|
|
define hidden void @use_kernarg_segment_ptr() #1 {
|
|
%kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
|
|
%header_ptr = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
|
|
%value = load volatile i32, i32 addrspace(4)* %header_ptr
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}use_implicitarg_ptr:
|
|
; GCN: s_load_dword s{{[0-9]+}}, s[8:9]
|
|
define hidden void @use_implicitarg_ptr() #1 {
|
|
%implicit.arg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0
|
|
%header_ptr = bitcast i8 addrspace(4)* %implicit.arg.ptr to i32 addrspace(4)*
|
|
%value = load volatile i32, i32 addrspace(4)* %header_ptr
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}use_dispatch_id:
|
|
; GCN: ; use s[10:11]
|
|
define hidden void @use_dispatch_id() #1 {
|
|
%id = call i64 @llvm.amdgcn.dispatch.id()
|
|
call void asm sideeffect "; use $0", "s"(i64 %id)
|
|
ret void
|
|
}
|
|
; GCN-LABEL: {{^}}use_workgroup_id_x:
|
|
; GCN: s_waitcnt
|
|
; GCN: ; use s12
|
|
define hidden void @use_workgroup_id_x() #1 {
|
|
%val = call i32 @llvm.amdgcn.workgroup.id.x()
|
|
call void asm sideeffect "; use $0", "s"(i32 %val)
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}use_stack_workgroup_id_x:
|
|
; GCN: s_waitcnt
|
|
; GCN-NOT: s32
|
|
; GCN: buffer_store_dword v0, off, s[0:3], s32{{$}}
|
|
; GCN: ; use s12
|
|
; GCN: s_setpc_b64
|
|
define hidden void @use_stack_workgroup_id_x() #1 {
|
|
%alloca = alloca i32, addrspace(5)
|
|
store volatile i32 0, i32 addrspace(5)* %alloca
|
|
%val = call i32 @llvm.amdgcn.workgroup.id.x()
|
|
call void asm sideeffect "; use $0", "s"(i32 %val)
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}use_workgroup_id_y:
|
|
; GCN: s_waitcnt
|
|
; GCN: ; use s13
|
|
define hidden void @use_workgroup_id_y() #1 {
|
|
%val = call i32 @llvm.amdgcn.workgroup.id.y()
|
|
call void asm sideeffect "; use $0", "s"(i32 %val)
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}use_workgroup_id_z:
|
|
; GCN: s_waitcnt
|
|
; GCN: ; use s14
|
|
define hidden void @use_workgroup_id_z() #1 {
|
|
%val = call i32 @llvm.amdgcn.workgroup.id.z()
|
|
call void asm sideeffect "; use $0", "s"(i32 %val)
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}use_workgroup_id_xy:
|
|
; GCN: ; use s12
|
|
; GCN: ; use s13
|
|
define hidden void @use_workgroup_id_xy() #1 {
|
|
%val0 = call i32 @llvm.amdgcn.workgroup.id.x()
|
|
%val1 = call i32 @llvm.amdgcn.workgroup.id.y()
|
|
call void asm sideeffect "; use $0", "s"(i32 %val0)
|
|
call void asm sideeffect "; use $0", "s"(i32 %val1)
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}use_workgroup_id_xyz:
|
|
; GCN: ; use s12
|
|
; GCN: ; use s13
|
|
; GCN: ; use s14
|
|
define hidden void @use_workgroup_id_xyz() #1 {
|
|
%val0 = call i32 @llvm.amdgcn.workgroup.id.x()
|
|
%val1 = call i32 @llvm.amdgcn.workgroup.id.y()
|
|
%val2 = call i32 @llvm.amdgcn.workgroup.id.z()
|
|
call void asm sideeffect "; use $0", "s"(i32 %val0)
|
|
call void asm sideeffect "; use $0", "s"(i32 %val1)
|
|
call void asm sideeffect "; use $0", "s"(i32 %val2)
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}use_workgroup_id_xz:
|
|
; GCN: ; use s12
|
|
; GCN: ; use s14
|
|
define hidden void @use_workgroup_id_xz() #1 {
|
|
%val0 = call i32 @llvm.amdgcn.workgroup.id.x()
|
|
%val1 = call i32 @llvm.amdgcn.workgroup.id.z()
|
|
call void asm sideeffect "; use $0", "s"(i32 %val0)
|
|
call void asm sideeffect "; use $0", "s"(i32 %val1)
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}use_workgroup_id_yz:
|
|
; GCN: ; use s13
|
|
; GCN: ; use s14
|
|
define hidden void @use_workgroup_id_yz() #1 {
|
|
%val0 = call i32 @llvm.amdgcn.workgroup.id.y()
|
|
%val1 = call i32 @llvm.amdgcn.workgroup.id.z()
|
|
call void asm sideeffect "; use $0", "s"(i32 %val0)
|
|
call void asm sideeffect "; use $0", "s"(i32 %val1)
|
|
ret void
|
|
}
|
|
|
|
; Argument is in right place already
|
|
; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_x:
|
|
; GCN-NOT: s12
|
|
; GCN-NOT: s13
|
|
; GCN-NOT: s14
|
|
; GCN: v_readlane_b32 s4, v40, 0
|
|
define hidden void @func_indirect_use_workgroup_id_x() #1 {
|
|
call void @use_workgroup_id_x()
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_y:
|
|
; GCN-NOT: s4
|
|
; GCN: v_readlane_b32 s4, v40, 0
|
|
define hidden void @func_indirect_use_workgroup_id_y() #1 {
|
|
call void @use_workgroup_id_y()
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_z:
|
|
; GCN-NOT: s4
|
|
; GCN: v_readlane_b32 s4, v40, 0
|
|
define hidden void @func_indirect_use_workgroup_id_z() #1 {
|
|
call void @use_workgroup_id_z()
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}other_arg_use_workgroup_id_x:
|
|
; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
|
|
; GCN: ; use s12
|
|
define hidden void @other_arg_use_workgroup_id_x(i32 %arg0) #1 {
|
|
%val = call i32 @llvm.amdgcn.workgroup.id.x()
|
|
store volatile i32 %arg0, i32 addrspace(1)* undef
|
|
call void asm sideeffect "; use $0", "s"(i32 %val)
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}other_arg_use_workgroup_id_y:
|
|
; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
|
|
; GCN: ; use s13
|
|
define hidden void @other_arg_use_workgroup_id_y(i32 %arg0) #1 {
|
|
%val = call i32 @llvm.amdgcn.workgroup.id.y()
|
|
store volatile i32 %arg0, i32 addrspace(1)* undef
|
|
call void asm sideeffect "; use $0", "s"(i32 %val)
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}other_arg_use_workgroup_id_z:
|
|
; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
|
|
; GCN: ; use s14
|
|
define hidden void @other_arg_use_workgroup_id_z(i32 %arg0) #1 {
|
|
%val = call i32 @llvm.amdgcn.workgroup.id.z()
|
|
store volatile i32 %arg0, i32 addrspace(1)* undef
|
|
call void asm sideeffect "; use $0", "s"(i32 %val)
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}use_every_sgpr_input:
|
|
; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32{{$}}
|
|
; GCN: s_load_dword s{{[0-9]+}}, s[4:5]
|
|
; GCN: s_load_dword s{{[0-9]+}}, s[6:7]
|
|
; GCN: s_load_dword s{{[0-9]+}}, s[8:9]
|
|
; GCN: ; use s[10:11]
|
|
; GCN: ; use s12
|
|
; GCN: ; use s13
|
|
; GCN: ; use s14
|
|
define hidden void @use_every_sgpr_input() #1 {
|
|
%alloca = alloca i32, align 4, addrspace(5)
|
|
store volatile i32 0, i32 addrspace(5)* %alloca
|
|
|
|
%dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
|
|
%dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
|
|
%val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc
|
|
|
|
%queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
|
|
%queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
|
|
%val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
|
|
|
|
%implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0
|
|
%implicitarg.ptr.bc = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
|
|
%val2 = load volatile i32, i32 addrspace(4)* %implicitarg.ptr.bc
|
|
|
|
%val3 = call i64 @llvm.amdgcn.dispatch.id()
|
|
call void asm sideeffect "; use $0", "s"(i64 %val3)
|
|
|
|
%val4 = call i32 @llvm.amdgcn.workgroup.id.x()
|
|
call void asm sideeffect "; use $0", "s"(i32 %val4)
|
|
|
|
%val5 = call i32 @llvm.amdgcn.workgroup.id.y()
|
|
call void asm sideeffect "; use $0", "s"(i32 %val5)
|
|
|
|
%val6 = call i32 @llvm.amdgcn.workgroup.id.z()
|
|
call void asm sideeffect "; use $0", "s"(i32 %val6)
|
|
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}kern_indirect_use_every_sgpr_input:
|
|
; GCN: s_mov_b32 s12, s14
|
|
; GCN: s_mov_b32 s13, s15
|
|
; GCN: s_mov_b32 s14, s16
|
|
; GCN: s_mov_b32 s32, 0
|
|
; GCN: s_swappc_b64
|
|
|
|
; GCN: .amdhsa_user_sgpr_private_segment_buffer 1
|
|
; GCN: .amdhsa_user_sgpr_dispatch_ptr 1
|
|
; GCN: .amdhsa_user_sgpr_queue_ptr 1
|
|
; GCN: .amdhsa_user_sgpr_kernarg_segment_ptr 1
|
|
; GCN: .amdhsa_user_sgpr_dispatch_id 1
|
|
; GCN: .amdhsa_user_sgpr_flat_scratch_init 1
|
|
; GCN: .amdhsa_user_sgpr_private_segment_size 0
|
|
; GCN: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
|
|
; GCN: .amdhsa_system_sgpr_workgroup_id_x 1
|
|
; GCN: .amdhsa_system_sgpr_workgroup_id_y 1
|
|
; GCN: .amdhsa_system_sgpr_workgroup_id_z 1
|
|
; GCN: .amdhsa_system_sgpr_workgroup_info 0
|
|
; GCN: .amdhsa_system_vgpr_workitem_id 2
|
|
define amdgpu_kernel void @kern_indirect_use_every_sgpr_input() #1 {
|
|
call void @use_every_sgpr_input()
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}func_indirect_use_every_sgpr_input:
|
|
; GCN-NOT: s6
|
|
; GCN-NOT: s7
|
|
; GCN-NOT: s8
|
|
; GCN-NOT: s9
|
|
; GCN-NOT: s10
|
|
; GCN-NOT: s11
|
|
; GCN-NOT: s12
|
|
; GCN-NOT: s13
|
|
; GCN-NOT: s[6:7]
|
|
; GCN-NOT: s[8:9]
|
|
; GCN-NOT: s[10:11]
|
|
; GCN-NOT: s[12:13]
|
|
; GCN-NOT: s14
|
|
; GCN: s_or_saveexec_b64 s[16:17], -1
|
|
define hidden void @func_indirect_use_every_sgpr_input() #1 {
|
|
call void @use_every_sgpr_input()
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz:
|
|
; GCN-NOT: s12
|
|
; GCN-NOT: s13
|
|
; GCN-NOT: s14
|
|
; GCN: ; use s[10:11]
|
|
; GCN: ; use s12
|
|
; GCN: ; use s13
|
|
; GCN: ; use s14
|
|
|
|
; GCN: s_swappc_b64
|
|
define hidden void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 {
|
|
%alloca = alloca i32, align 4, addrspace(5)
|
|
store volatile i32 0, i32 addrspace(5)* %alloca
|
|
|
|
%dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
|
|
%dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
|
|
%val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc
|
|
|
|
%queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
|
|
%queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
|
|
%val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
|
|
|
|
%kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0
|
|
%kernarg_segment_ptr.bc = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
|
|
%val2 = load volatile i32, i32 addrspace(4)* %kernarg_segment_ptr.bc
|
|
|
|
%val3 = call i64 @llvm.amdgcn.dispatch.id()
|
|
call void asm sideeffect "; use $0", "s"(i64 %val3)
|
|
|
|
%val4 = call i32 @llvm.amdgcn.workgroup.id.x()
|
|
call void asm sideeffect "; use $0", "s"(i32 %val4)
|
|
|
|
%val5 = call i32 @llvm.amdgcn.workgroup.id.y()
|
|
call void asm sideeffect "; use $0", "s"(i32 %val5)
|
|
|
|
%val6 = call i32 @llvm.amdgcn.workgroup.id.z()
|
|
call void asm sideeffect "; use $0", "s"(i32 %val6)
|
|
|
|
call void @use_workgroup_id_xyz()
|
|
ret void
|
|
}
|
|
|
|
declare i32 @llvm.amdgcn.workgroup.id.x() #0
|
|
declare i32 @llvm.amdgcn.workgroup.id.y() #0
|
|
declare i32 @llvm.amdgcn.workgroup.id.z() #0
|
|
declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
|
|
declare noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
|
|
declare noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0
|
|
declare i64 @llvm.amdgcn.dispatch.id() #0
|
|
declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
|
|
|
|
attributes #0 = { nounwind readnone speculatable }
|
|
attributes #1 = { nounwind noinline }
|