1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 02:52:53 +02:00

[AMDGPU] Fix not rescheduling without clustering

Regions are sometimes skipped which should be rescheduled without memory op
clustering. RegionIdx is not incremented when iterating over regions that
are flagged to be skipped, causing the index to be incorrect.

Thanks to Vang Thao for discovering this bug!

Reviewed By: rampitec

Differential Revision: https://reviews.llvm.org/D85498
This commit is contained in:
Vang Thao 2020-08-06 20:46:27 -07:00 committed by Austin Kerbow
parent 4240996330
commit 0c2a21406a
2 changed files with 13 additions and 2 deletions

View File

@ -567,8 +567,10 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
SavedMutations.swap(Mutations);
for (auto Region : Regions) {
if (Stage == UnclusteredReschedule && !RescheduleRegions[RegionIdx])
if (Stage == UnclusteredReschedule && !RescheduleRegions[RegionIdx]) {
++RegionIdx;
continue;
}
RegionBegin = Region.first;
RegionEnd = Region.second;

View File

@ -3,6 +3,9 @@
; Interleave loads and stores to fit into 9 VGPR limit.
; This requires to avoid load/store clustering.
; Reschedule the second scheduling region without clustering while
; the first region is skipped.
; GCN: global_load_dwordx4
; GCN: global_store_dwordx4
; GCN: global_load_dwordx4
@ -12,10 +15,13 @@
; GCN: NumVgprs: {{[0-9]$}}
; GCN: ScratchSize: 0{{$}}
define amdgpu_kernel void @load_store_max_9vgprs(<4 x i32> addrspace(1)* nocapture noalias readonly %arg, <4 x i32> addrspace(1)* nocapture noalias %arg1) #1 {
define amdgpu_kernel void @load_store_max_9vgprs(<4 x i32> addrspace(1)* nocapture noalias readonly %arg, <4 x i32> addrspace(1)* nocapture noalias %arg1, i1 %cnd) #1 {
bb:
%id = call i32 @llvm.amdgcn.workitem.id.x()
%base = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg, i32 %id
br i1 %cnd, label %bb1, label %bb2
bb1:
%tmp = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %base, i32 1
%tmp2 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp, align 4
%tmp3 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %base, i32 3
@ -27,6 +33,9 @@ bb:
store <4 x i32> %tmp4, <4 x i32> addrspace(1)* %tmp7, align 4
%tmp8 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg1, i64 5
store <4 x i32> %tmp6, <4 x i32> addrspace(1)* %tmp8, align 4
br label %bb2
bb2:
ret void
}