1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00

[Pipeliner] Fix incorrect loop carried dependence calculation

The isLoopCarriedDep function does not correctly compute loop
carried dependences when the array index offset is negative
or the stride is smallar than the access size.

Patch by Denis Antrushin.

Differential Revision: https://reviews.llvm.org/D60135

llvm-svn: 358233
This commit is contained in:
Brendon Cahoon 2019-04-11 21:57:51 +00:00
parent de31e20597
commit 088fd84a72
4 changed files with 181 additions and 5 deletions

View File

@ -3167,12 +3167,14 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
// This is the main test, which checks the offset values and the loop
// increment value to determine if the accesses may be loop carried.
if (OffsetS >= OffsetD)
return OffsetS + AccessSizeS > DeltaS;
else
return OffsetD + AccessSizeD > DeltaD;
if (AccessSizeS == MemoryLocation::UnknownSize ||
AccessSizeD == MemoryLocation::UnknownSize)
return true;
return true;
if (DeltaS != DeltaD || DeltaS < AccessSizeS || DeltaD < AccessSizeD)
return true;
return (OffsetS + (int64_t)AccessSizeS < OffsetD + (int64_t)AccessSizeD);
}
void SwingSchedulerDAG::postprocessDAG() {

View File

@ -0,0 +1,103 @@
# RUN: llc -mtriple=hexagon -run-pass pipeliner -debug-only=pipeliner %s -o /dev/null 2>&1 | FileCheck %s
# REQUIRES: asserts
# Test that the loop carried dependence check correctly identifies a recurrence.
# CHECK: Rec NodeSet
# CHECK: Rec NodeSet
# CHECK: Rec NodeSet
# CHECK: Rec NodeSet
# CHECK-NEXT: SU(4)
# CHECK-NEXT: SU(6)
--- |
%struct.A = type { i16, i16 }
define i32 @test(%struct.A* noalias nocapture %s, i16* noalias nocapture readonly %r, i32 %n) {
entry:
%cmp19 = icmp eq i32 %n, 2
br i1 %cmp19, label %for.end, label %for.body.preheader
for.body.preheader:
%0 = add i32 %n, -2
%cgep = getelementptr %struct.A, %struct.A* %s, i32 2, i32 1
%scevgep1 = bitcast i16* %cgep to %struct.A*
%cgep9 = getelementptr i16, i16* %r, i32 2
br label %for.body
for.body:
%lsr.iv7 = phi i16* [ %cgep9, %for.body.preheader ], [ %cgep12, %for.body ]
%lsr.iv2 = phi %struct.A* [ %scevgep1, %for.body.preheader ], [ %cgep11, %for.body ]
%lsr.iv = phi i32 [ %0, %for.body.preheader ], [ %lsr.iv.next, %for.body ]
%sum.020 = phi i32 [ %add7, %for.body ], [ 0, %for.body.preheader ]
%lsr.iv24 = bitcast %struct.A* %lsr.iv2 to i16*
%1 = load i16, i16* %lsr.iv7, align 2
%conv = sext i16 %1 to i32
%cgep10 = getelementptr i16, i16* %lsr.iv24, i32 -4
%2 = load i16, i16* %cgep10, align 2
%conv2 = sext i16 %2 to i32
%add = add i16 %1, 10
store i16 %add, i16* %lsr.iv24, align 2
%add6 = add i32 %sum.020, %conv
%add7 = add i32 %add6, %conv2
%lsr.iv.next = add i32 %lsr.iv, -1
%cmp = icmp eq i32 %lsr.iv.next, 0
%cgep11 = getelementptr %struct.A, %struct.A* %lsr.iv2, i32 1
%cgep12 = getelementptr i16, i16* %lsr.iv7, i32 1
br i1 %cmp, label %for.end, label %for.body
for.end:
%sum.0.lcssa = phi i32 [ 0, %entry ], [ %add7, %for.body ]
ret i32 %sum.0.lcssa
}
...
---
name: test
tracksRegLiveness: true
body: |
bb.0:
successors: %bb.3, %bb.1
liveins: $r0, $r1, $r2
%14:intregs = COPY $r2
%13:intregs = COPY $r1
%12:intregs = COPY $r0
%16:predregs = C2_cmpeqi %14, 2
%15:intregs = A2_tfrsi 0
J2_jumpt killed %16, %bb.3, implicit-def dead $pc
J2_jump %bb.1, implicit-def dead $pc
bb.1:
successors: %bb.2
%0:intregs = A2_addi %14, -2
%1:intregs = A2_addi %12, 10
%2:intregs = A2_addi %13, 4
%17:intregs = A2_tfrsi 0
%23:intregs = COPY %0
J2_loop0r %bb.2, %23, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
bb.2 (address-taken):
successors: %bb.3, %bb.2
%3:intregs = PHI %2, %bb.1, %10, %bb.2
%4:intregs = PHI %1, %bb.1, %9, %bb.2
%6:intregs = PHI %17, %bb.1, %7, %bb.2
%18:intregs, %10:intregs = L2_loadrh_pi %3, 2 :: (load 2 from %ir.lsr.iv7)
%19:intregs = L2_loadrh_io %4, -8 :: (load 2 from %ir.cgep10)
%20:intregs = A2_addi %18, 10
S2_storerh_io %4, 0, killed %20 :: (store 2 into %ir.lsr.iv24)
%7:intregs = M2_acci %19, %6, %18
%9:intregs = A2_addi %4, 4
ENDLOOP0 %bb.2, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
J2_jump %bb.3, implicit-def dead $pc
bb.3:
%11:intregs = PHI %15, %bb.0, %7, %bb.2
$r0 = COPY %11
PS_jmpret $r31, implicit-def dead $pc, implicit $r0
...

View File

@ -0,0 +1,70 @@
# RUN: llc -mtriple=hexagon -run-pass pipeliner -debug-only=pipeliner %s -o /dev/null 2>&1 | FileCheck %s
# REQUIRES: asserts
# Test that the loop carried dependence check correctly identifies a recurrence
# when the loop variable decreases and the array index offset is negative.
# CHECK: Rec NodeSet
# CHECK: Rec NodeSet
# CHECK: SU(3)
# CHECK: SU(4)
# CHECK: SU(5)
--- |
define void @test() {
b0:
br label %b3
b3:
%lsr.iv = phi [9 x i32]* [ %0, %b3 ], [ undef, %b0 ]
%v0 = phi i32 [ %v8, %b3 ], [ 7, %b0 ]
%v1 = phi i32 [ %v6, %b3 ], [ undef, %b0 ]
%v2 = phi i32 [ %v1, %b3 ], [ undef, %b0 ]
%lsr.iv1 = bitcast [9 x i32]* %lsr.iv to i32*
%cgep = getelementptr i32, i32* %lsr.iv1, i32 -2
%v6 = load i32, i32* %cgep, align 4
%v7 = tail call i32 @llvm.hexagon.A2.subsat(i32 %v2, i32 %v6)
store i32 %v7, i32* %lsr.iv1, align 4
%v8 = add i32 %v0, -1
%cgep3 = getelementptr [9 x i32], [9 x i32]* %lsr.iv, i32 0, i32 -1
%0 = bitcast i32* %cgep3 to [9 x i32]*
%v9 = icmp sgt i32 %v8, 1
br i1 %v9, label %b3, label %b4
b4:
unreachable
}
declare i32 @llvm.hexagon.A2.subsat(i32, i32) #0
declare void @llvm.stackprotector(i8*, i8**) #1
...
---
name: test
tracksRegLiveness: true
body: |
bb.0:
successors: %bb.1
%10:intregs = IMPLICIT_DEF
%11:intregs = IMPLICIT_DEF
J2_loop0i %bb.1, 6, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
bb.1 (address-taken):
successors: %bb.1, %bb.2
%0:intregs = PHI %11, %bb.0, %6, %bb.1
%2:intregs = PHI %10, %bb.0, %4, %bb.1
%3:intregs = PHI %10, %bb.0, %2, %bb.1
%4:intregs = L2_loadri_io %0, -8 :: (load 4 from %ir.cgep)
%12:intregs = A2_subsat %3, %4, implicit-def dead $usr_ovf
S2_storeri_io %0, 0, %12 :: (store 4 into %ir.lsr.iv1)
%6:intregs = A2_addi %0, -4
ENDLOOP0 %bb.1, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
J2_jump %bb.2, implicit-def dead $pc
bb.2:
...

View File

@ -1,4 +1,5 @@
; RUN: llc -march=hexagon -mno-pairing -mno-compound -hexagon-initial-cfg-cleanup=0 < %s | FileCheck %s
; XFAIL: *
; Test that we generate the correct phi names in the epilog when the pipeliner
; schedules a phi and it's loop definition in different stages, e.g., a phi is