1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 10:42:39 +01:00

[AMDGPU] Update OpenCL to use 48 bytes of implicit arguments for AMDGPU

Add two additional implicit arguments for OpenCL for the AMDGPU target using the AMDHSA runtime to support device enqueue.

Differential Revision: https://reviews.llvm.org/D44697

llvm-svn: 328351
This commit is contained in:
Tony Tye 2018-03-23 18:58:47 +00:00
parent a07033cf3c
commit e96a23f40c
3 changed files with 15 additions and 11 deletions

View File

@ -3801,10 +3801,14 @@ When the language is OpenCL the following differences occur:
Position Byte Byte Description
Size Alignment
======== ==== ========= ===========================================
0 8 8 OpenCL Global Offset X
1 8 8 OpenCL Global Offset Y
2 8 8 OpenCL Global Offset Z
3 8 8 OpenCL printf buffer
1 8 8 OpenCL Global Offset X
2 8 8 OpenCL Global Offset Y
3 8 8 OpenCL Global Offset Z
4 8 8 OpenCL address of printf buffer
5 8 8 OpenCL address of virtual queue used by
enqueue_kernel.
6 8 8 OpenCL address of AqlWrap struct used by
enqueue_kernel.
======== ==== ========= ===========================================
.. _amdgpu-hcc:

View File

@ -18,7 +18,7 @@ define amdgpu_kernel void @kernel_implicitarg_ptr_empty() #0 {
; GCN-LABEL: {{^}}opencl_kernel_implicitarg_ptr_empty:
; GCN: enable_sgpr_kernarg_segment_ptr = 1
; HSA: kernarg_segment_byte_size = 32
; HSA: kernarg_segment_byte_size = 48
; MESA: kernarg_segment_byte_size = 16
; HSA: s_load_dword s0, s[4:5], 0x0
@ -46,7 +46,7 @@ define amdgpu_kernel void @kernel_implicitarg_ptr([112 x i8]) #0 {
; GCN-LABEL: {{^}}opencl_kernel_implicitarg_ptr:
; GCN: enable_sgpr_kernarg_segment_ptr = 1
; HSA: kernarg_segment_byte_size = 144
; HSA: kernarg_segment_byte_size = 160
; MESA: kernarg_segment_byte_size = 464
; HSA: s_load_dword s0, s[4:5], 0x1c
@ -106,7 +106,7 @@ define amdgpu_kernel void @kernel_call_implicitarg_ptr_func_empty() #0 {
; GCN-LABEL: {{^}}opencl_kernel_call_implicitarg_ptr_func_empty:
; GCN: enable_sgpr_kernarg_segment_ptr = 1
; HSA: kernarg_segment_byte_size = 32
; HSA: kernarg_segment_byte_size = 48
; MESA: kernarg_segment_byte_size = 16
; GCN: s_mov_b64 s[6:7], s[4:5]
; GCN: s_swappc_b64
@ -132,7 +132,7 @@ define amdgpu_kernel void @kernel_call_implicitarg_ptr_func([112 x i8]) #0 {
; GCN-LABEL: {{^}}opencl_kernel_call_implicitarg_ptr_func:
; GCN: enable_sgpr_kernarg_segment_ptr = 1
; HSA: kernarg_segment_byte_size = 144
; HSA: kernarg_segment_byte_size = 160
; MESA: kernarg_segment_byte_size = 464
; HSA: s_add_u32 s6, s4, 0x70
@ -232,5 +232,5 @@ declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #2
declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #2
attributes #0 = { nounwind noinline }
attributes #1 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="32" }
attributes #1 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="48" }
attributes #2 = { nounwind readnone speculatable }

View File

@ -46,7 +46,7 @@ define amdgpu_kernel void @test_implicit_alignment(i32 addrspace(1)* %out, <2 x
}
; ALL-LABEL: {{^}}opencl_test_implicit_alignment
; HSA: kernarg_segment_byte_size = 48
; HSA: kernarg_segment_byte_size = 64
; OS-MESA3D: kernarg_segment_byte_size = 28
; OS-UNKNOWN: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xc
; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x4
@ -79,4 +79,4 @@ declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
attributes #2 = { nounwind "amdgpu-implicitarg-num-bytes"="32" }
attributes #2 = { nounwind "amdgpu-implicitarg-num-bytes"="48" }