mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-24 05:23:45 +02:00
774adca4ab
This was assuming it could use all memory before, which is a bad decision because it restricts occupancy. By default, only try to use enough space that could reduce occupancy to 7, an arbitrarily chosen limit. Based on the exist LDS usage, try to round up to the limit in the current tier instead of further hurting occupancy. This isn't ideal, because it doesn't accurately know how much space is going to be used for alignment padding. llvm-svn: 269708
39 lines
1.8 KiB
LLVM
39 lines
1.8 KiB
LLVM
; RUN: llc -O0 -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mattr=+promote-alloca < %s | FileCheck -check-prefix=NOOPTS -check-prefix=ALL %s
|
|
; RUN: llc -O1 -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mattr=+promote-alloca < %s | FileCheck -check-prefix=OPTS -check-prefix=ALL %s
|
|
|
|
; ALL-LABEL: {{^}}promote_alloca_i32_array_array:
|
|
; NOOPTS: workgroup_group_segment_byte_size = 0{{$}}
|
|
; NOOPTS-NOT ds_write
|
|
; OPTS: ds_write
|
|
define void @promote_alloca_i32_array_array(i32 addrspace(1)* %out, i32 %index) #0 {
|
|
entry:
|
|
%alloca = alloca [2 x [2 x i32]]
|
|
%gep0 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 0
|
|
%gep1 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 1
|
|
store i32 0, i32* %gep0
|
|
store i32 1, i32* %gep1
|
|
%gep2 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 %index
|
|
%load = load i32, i32* %gep2
|
|
store i32 %load, i32 addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; ALL-LABEL: {{^}}optnone_promote_alloca_i32_array_array:
|
|
; ALL: workgroup_group_segment_byte_size = 0{{$}}
|
|
; ALL-NOT ds_write
|
|
define void @optnone_promote_alloca_i32_array_array(i32 addrspace(1)* %out, i32 %index) #1 {
|
|
entry:
|
|
%alloca = alloca [2 x [2 x i32]]
|
|
%gep0 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 0
|
|
%gep1 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 1
|
|
store i32 0, i32* %gep0
|
|
store i32 1, i32* %gep1
|
|
%gep2 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 %index
|
|
%load = load i32, i32* %gep2
|
|
store i32 %load, i32 addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { nounwind "amdgpu-max-work-group-size"="64" }
|
|
attributes #1 = { nounwind optnone noinline "amdgpu-max-work-group-size"="64" }
|