1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00
llvm-mirror/test/CodeGen/AMDGPU/kernarg-size.ll
Konstantin Zhuravlyov a76ecb87cf AMDGPU: Add target id and code object v4 support
- Add target id support (https://clang.llvm.org/docs/ClangOffloadBundler.html#target-id)
  - Add code object v4 support (https://llvm.org/docs/AMDGPUUsage.html#elf-code-object)
    - Add kernarg_size to kernel descriptor
    - Change trap handler ABI to no longer move queue pointer into s[0:1]
  - Cleanup ELF definitions
    - Add V2, V3, V4 suffixes to make a clear distinction for code object version
    - Consolidate note names

Differential Revision: https://reviews.llvm.org/D95638
2021-03-24 11:54:05 -04:00

22 lines
889 B
LLVM

; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefix=HSA %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefix=HSA %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefix=HSA %s
declare void @llvm.trap() #0
declare void @llvm.debugtrap() #1
; HSA: .amdhsa_kernel trap
; HSA-NEXT: .amdhsa_group_segment_fixed_size 0
; HSA-NEXT: .amdhsa_private_segment_fixed_size 0
; HSA-NEXT: .amdhsa_kernarg_size 8
; HSA-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
; HSA: .end_amdhsa_kernel
define amdgpu_kernel void @trap(i32 addrspace(1)* nocapture readonly %arg0) {
store volatile i32 1, i32 addrspace(1)* %arg0
call void @llvm.trap()
unreachable
store volatile i32 2, i32 addrspace(1)* %arg0
ret void
}