mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
0c2a21406a
Regions are sometimes skipped which should be rescheduled without memory op clustering. RegionIdx is not incremented when iterating over regions that are flagged to be skipped, causing the index to be incorrect. Thanks to Vang Thao for discovering this bug! Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D85498
46 lines
1.8 KiB
LLVM
46 lines
1.8 KiB
LLVM
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
|
|
|
; Interleave loads and stores to fit into 9 VGPR limit.
|
|
; This requires to avoid load/store clustering.
|
|
|
|
; Reschedule the second scheduling region without clustering while
|
|
; the first region is skipped.
|
|
|
|
; GCN: global_load_dwordx4
|
|
; GCN: global_store_dwordx4
|
|
; GCN: global_load_dwordx4
|
|
; GCN: global_store_dwordx4
|
|
; GCN: global_load_dwordx4
|
|
; GCN: global_store_dwordx4
|
|
; GCN: NumVgprs: {{[0-9]$}}
|
|
; GCN: ScratchSize: 0{{$}}
|
|
|
|
define amdgpu_kernel void @load_store_max_9vgprs(<4 x i32> addrspace(1)* nocapture noalias readonly %arg, <4 x i32> addrspace(1)* nocapture noalias %arg1, i1 %cnd) #1 {
|
|
bb:
|
|
%id = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%base = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg, i32 %id
|
|
br i1 %cnd, label %bb1, label %bb2
|
|
|
|
bb1:
|
|
%tmp = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %base, i32 1
|
|
%tmp2 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp, align 4
|
|
%tmp3 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %base, i32 3
|
|
%tmp4 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp3, align 4
|
|
%tmp5 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %base, i32 5
|
|
%tmp6 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp5, align 4
|
|
store <4 x i32> %tmp2, <4 x i32> addrspace(1)* %arg1, align 4
|
|
%tmp7 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg1, i64 3
|
|
store <4 x i32> %tmp4, <4 x i32> addrspace(1)* %tmp7, align 4
|
|
%tmp8 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg1, i64 5
|
|
store <4 x i32> %tmp6, <4 x i32> addrspace(1)* %tmp8, align 4
|
|
br label %bb2
|
|
|
|
bb2:
|
|
ret void
|
|
}
|
|
|
|
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
|
|
|
attributes #0 = { nounwind readnone }
|
|
attributes #1 = { "amdgpu-num-vgpr"="9" }
|