mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
cc39b4a525
Accesses to global module LDS variable start from null, but kernel also thinks its variables start address is null. Fixed by not using a null as an address. Differential Revision: https://reviews.llvm.org/D102882
57 lines
2.8 KiB
LLVM
57 lines
2.8 KiB
LLVM
; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s | FileCheck %s
|
|
; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s | FileCheck %s
|
|
|
|
; Padding to meet alignment, so references to @var1 replaced with gep ptr, 0, 2
|
|
; No i64 as addrspace(3) types with initializers are ignored. Likewise no addrspace(4).
|
|
; CHECK: %llvm.amdgcn.module.lds.t = type { float, [4 x i8], i32 }
|
|
|
|
; Variables removed by pass
|
|
; CHECK-NOT: @var0
|
|
; CHECK-NOT: @var1
|
|
|
|
@var0 = addrspace(3) global float undef, align 8
|
|
@var1 = addrspace(3) global i32 undef, align 8
|
|
|
|
@ptr = addrspace(1) global i32 addrspace(3)* @var1, align 4
|
|
|
|
; A variable that is unchanged by pass
|
|
; CHECK: @with_init = addrspace(3) global i64 0
|
|
@with_init = addrspace(3) global i64 0
|
|
|
|
; Instance of new type, aligned to max of element alignment
|
|
; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t undef, align 8
|
|
|
|
; Use in func rewritten to access struct at address zero
|
|
; CHECK-LABEL: @func()
|
|
; CHECK: %dec = atomicrmw fsub float addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 0), float 1.0
|
|
; CHECK: %val0 = load i32, i32 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 2), align 4
|
|
; CHECK: %val1 = add i32 %val0, 4
|
|
; CHECK: store i32 %val1, i32 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 2), align 4
|
|
; CHECK: %unused0 = atomicrmw add i64 addrspace(3)* @with_init, i64 1 monotonic
|
|
define void @func() {
|
|
%dec = atomicrmw fsub float addrspace(3)* @var0, float 1.0 monotonic
|
|
%val0 = load i32, i32 addrspace(3)* @var1, align 4
|
|
%val1 = add i32 %val0, 4
|
|
store i32 %val1, i32 addrspace(3)* @var1, align 4
|
|
%unused0 = atomicrmw add i64 addrspace(3)* @with_init, i64 1 monotonic
|
|
ret void
|
|
}
|
|
|
|
; This kernel calls a function that uses LDS so needs the block
|
|
; CHECK-LABEL: @kern_call()
|
|
; CHECK: call void @llvm.donothing() [ "ExplicitUse"(%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds) ]
|
|
; CHECK: call void @func()
|
|
; CHECK: %dec = atomicrmw fsub float addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 0), float 2.000000e+00 monotonic, align 4
|
|
define amdgpu_kernel void @kern_call() {
|
|
call void @func()
|
|
%dec = atomicrmw fsub float addrspace(3)* @var0, float 2.0 monotonic
|
|
ret void
|
|
}
|
|
|
|
; This kernel does not need to alloc the LDS block as it makes no calls
|
|
; CHECK-LABEL: @kern_empty()
|
|
; CHECK: call void @llvm.donothing() [ "ExplicitUse"(%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds) ]
|
|
define spir_kernel void @kern_empty() {
|
|
ret void
|
|
}
|