1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00
llvm-mirror/test/Transforms/LoopReroll/indvar_with_ext.ll
Eli Friedman 659a645d9a [LoopReroll] Rewrite induction variable rewriting.
This gets rid of a bunch of weird special cases; instead, just use SCEV
rewriting for everything.  In addition to being simpler, this fixes a
bug where we would use the wrong stride in certain edge cases.

The one bit I'm not quite sure about is the trip count handling,
specifically the FIXME about overflow.  In general, I think we need to
widen the exit condition, but that's probably not profitable if the new
type isn't legal, so we probably need a check somewhere.  That said, I
don't think I'm making the existing problem any worse.

As a followup to this, a bunch of IV-related code in root-finding could
be cleaned up; with SCEV-based rewriting, there isn't any reason to
assume a loop will have exactly one or two PHI nodes.

Differential Revision: https://reviews.llvm.org/D45191

llvm-svn: 335400
2018-06-22 22:58:55 +00:00

185 lines
7.7 KiB
LLVM

; RUN: opt -S -loop-reroll %s | FileCheck %s
target triple = "aarch64--linux-gnu"
define void @test(i32 %n, float* %arrayidx200, float* %arrayidx164, float* %arrayidx172) {
entry:
%rem.i = srem i32 %n, 4
%t22 = load float, float* %arrayidx172, align 4
%cmp.9 = icmp eq i32 %n, 0
%t7 = sext i32 %n to i64
br i1 %cmp.9, label %while.end, label %while.body.preheader
while.body.preheader:
br label %while.body
while.body:
;CHECK-LABEL: while.body:
;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %while.body ], [ 0, %while.body.preheader ]
;CHECK-NEXT: %arrayidx62.i = getelementptr inbounds float, float* %arrayidx200, i64 %indvar
;CHECK-NEXT: %t1 = load float, float* %arrayidx62.i, align 4
;CHECK-NEXT: %arrayidx64.i = getelementptr inbounds float, float* %arrayidx164, i64 %indvar
;CHECK-NEXT: %t2 = load float, float* %arrayidx64.i, align 4
;CHECK-NEXT: %mul65.i = fmul fast float %t2, %t22
;CHECK-NEXT: %add66.i = fadd fast float %mul65.i, %t1
;CHECK-NEXT: store float %add66.i, float* %arrayidx62.i, align 4
;CHECK-NEXT: %indvar.next = add i64 %indvar, 1
;CHECK-NEXT: %exitcond = icmp eq i64 %indvar, %{{[0-9]+}}
;CHECK-NEXT: br i1 %exitcond, label %while.end.loopexit, label %while.body
%indvars.iv.i423 = phi i64 [ %indvars.iv.next.i424, %while.body ], [ 0, %while.body.preheader ]
%i.22.i = phi i32 [ %add103.i, %while.body ], [ %rem.i, %while.body.preheader ]
%arrayidx62.i = getelementptr inbounds float, float* %arrayidx200, i64 %indvars.iv.i423
%t1 = load float, float* %arrayidx62.i, align 4
%arrayidx64.i = getelementptr inbounds float, float* %arrayidx164, i64 %indvars.iv.i423
%t2 = load float, float* %arrayidx64.i, align 4
%mul65.i = fmul fast float %t2, %t22
%add66.i = fadd fast float %mul65.i, %t1
store float %add66.i, float* %arrayidx62.i, align 4
%t3 = add nsw i64 %indvars.iv.i423, 1
%arrayidx71.i = getelementptr inbounds float, float* %arrayidx200, i64 %t3
%t4 = load float, float* %arrayidx71.i, align 4
%arrayidx74.i = getelementptr inbounds float, float* %arrayidx164, i64 %t3
%t5 = load float, float* %arrayidx74.i, align 4
%mul75.i = fmul fast float %t5, %t22
%add76.i = fadd fast float %mul75.i, %t4
store float %add76.i, float* %arrayidx71.i, align 4
%add103.i = add nsw i32 %i.22.i, 2
%t6 = sext i32 %add103.i to i64
%cmp58.i = icmp slt i64 %t6, %t7
%indvars.iv.next.i424 = add i64 %indvars.iv.i423, 2
br i1 %cmp58.i, label %while.body, label %while.end.loopexit
while.end.loopexit:
br label %while.end
while.end:
ret void
}
; Function Attrs: noinline norecurse nounwind
define i32 @test2(i64 %n, i32* nocapture %x, i32* nocapture readonly %y) {
entry:
%cmp18 = icmp sgt i64 %n, 0
br i1 %cmp18, label %for.body.preheader, label %for.end
for.body.preheader: ; preds = %entry
br label %for.body
for.body: ; preds = %for.body.preheader, %for.body
;CHECK-LABEL: for.body:
;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %for.body.preheader ]
;CHECK-NEXT: %arrayidx = getelementptr inbounds i32, i32* %y, i64 %indvar
;CHECK-NEXT: [[T1:%[0-9]+]] = load i32, i32* %arrayidx, align 4
;CHECK-NEXT: %arrayidx3 = getelementptr inbounds i32, i32* %x, i64 %indvar
;CHECK-NEXT: store i32 [[T1]], i32* %arrayidx3, align 4
;CHECK-NEXT: %indvar.next = add i64 %indvar, 1
;CHECK-NEXT: %exitcond = icmp eq i64 %indvar, %{{[0-9]+}}
;CHECK-NEXT: br i1 %exitcond, label %for.end.loopexit, label %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
%arrayidx = getelementptr inbounds i32, i32* %y, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4
%arrayidx3 = getelementptr inbounds i32, i32* %x, i64 %indvars.iv
store i32 %0, i32* %arrayidx3, align 4
%1 = or i64 %indvars.iv, 1
%arrayidx5 = getelementptr inbounds i32, i32* %y, i64 %1
%2 = load i32, i32* %arrayidx5, align 4
%arrayidx8 = getelementptr inbounds i32, i32* %x, i64 %1
store i32 %2, i32* %arrayidx8, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
%cmp = icmp slt i64 %indvars.iv.next, %n
br i1 %cmp, label %for.body, label %for.end.loopexit
for.end.loopexit: ; preds = %for.body
br label %for.end
for.end: ; preds = %for.end.loopexit, %entry
ret i32 0
}
; Function Attrs: noinline norecurse nounwind
define i32 @test3(i32 %n, i32* nocapture %x, i32* nocapture readonly %y) {
entry:
%cmp21 = icmp sgt i32 %n, 0
br i1 %cmp21, label %for.body.preheader, label %for.end
for.body.preheader: ; preds = %entry
br label %for.body
for.body: ; preds = %for.body.preheader, %for.body
;CHECK-LABEL: for.body:
;CHECK: %add12 = add i8 %i.022, 2
;CHECK-NEXT: %conv = sext i8 %add12 to i32
;CHECK-NEXT: %cmp = icmp slt i32 %conv, %n
;CHECK-NEXT: br i1 %cmp, label %for.body, label %for.end.loopexit
%conv23 = phi i32 [ %conv, %for.body ], [ 0, %for.body.preheader ]
%i.022 = phi i8 [ %add12, %for.body ], [ 0, %for.body.preheader ]
%idxprom = sext i8 %i.022 to i64
%arrayidx = getelementptr inbounds i32, i32* %y, i64 %idxprom
%0 = load i32, i32* %arrayidx, align 4
%arrayidx3 = getelementptr inbounds i32, i32* %x, i64 %idxprom
store i32 %0, i32* %arrayidx3, align 4
%add = or i32 %conv23, 1
%idxprom5 = sext i32 %add to i64
%arrayidx6 = getelementptr inbounds i32, i32* %y, i64 %idxprom5
%1 = load i32, i32* %arrayidx6, align 4
%arrayidx10 = getelementptr inbounds i32, i32* %x, i64 %idxprom5
store i32 %1, i32* %arrayidx10, align 4
%add12 = add i8 %i.022, 2
%conv = sext i8 %add12 to i32
%cmp = icmp slt i32 %conv, %n
br i1 %cmp, label %for.body, label %for.end.loopexit
for.end.loopexit: ; preds = %for.body
br label %for.end
for.end: ; preds = %for.end.loopexit, %entry
ret i32 0
}
; Function Attrs: noinline norecurse nounwind
define i32 @test4(i64 %n, i32* nocapture %x, i32* nocapture readonly %y) {
entry:
%cmp18 = icmp eq i64 %n, 0
br i1 %cmp18, label %for.end, label %for.body.preheader
for.body.preheader: ; preds = %entry
br label %for.body
for.body: ; preds = %for.body.preheader, %for.body
;CHECK-LABEL: for.body:
;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %for.body.preheader ]
;CHECK-NEXT: %arrayidx = getelementptr inbounds i32, i32* %y, i64 %indvar
;CHECK-NEXT: [[T1:%[0-9]+]] = load i32, i32* %arrayidx, align 4
;CHECK-NEXT: %arrayidx3 = getelementptr inbounds i32, i32* %x, i64 %indvar
;CHECK-NEXT: store i32 [[T1]], i32* %arrayidx3, align 4
;CHECK-NEXT: %indvar.next = add i64 %indvar, 1
;CHECK-NEXT: %exitcond = icmp eq i64 %indvar, %{{[0-9]+}}
;CHECK-NEXT: br i1 %exitcond, label %for.end.loopexit, label %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
%arrayidx = getelementptr inbounds i32, i32* %y, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4
%arrayidx3 = getelementptr inbounds i32, i32* %x, i64 %indvars.iv
store i32 %0, i32* %arrayidx3, align 4
%1 = or i64 %indvars.iv, 1
%arrayidx5 = getelementptr inbounds i32, i32* %y, i64 %1
%2 = load i32, i32* %arrayidx5, align 4
%arrayidx8 = getelementptr inbounds i32, i32* %x, i64 %1
store i32 %2, i32* %arrayidx8, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
%cmp = icmp ult i64 %indvars.iv.next, %n
br i1 %cmp, label %for.body, label %for.end.loopexit
for.end.loopexit: ; preds = %for.body
br label %for.end
for.end: ; preds = %for.end.loopexit, %entry
ret i32 0
}