mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
318f4a3446
The SCEV code for constructing GEP expressions currently assumes that the addition of the base and all the offsets is nsw if the GEP is inbounds. While the addition of the offsets is indeed nsw, the addition to the base address is not, as the base address is interpreted as an unsigned value. Fix the GEP expression code to not assume nsw for the base+offset calculation. However, do assume nuw if we know that the offset is non-negative. With this, we use the same behavior as the construction of GEP addrecs does. (Modulo the fact that we disregard SCEV unification, as the pre-existing FIXME points out). Differential Revision: https://reviews.llvm.org/D90648
85 lines
3.7 KiB
LLVM
85 lines
3.7 KiB
LLVM
; RUN: opt < %s -S -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
|
|
; RUN: opt < %s -S -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
|
|
|
|
; ScalarEvolution should be able to fold away the sign-extensions
|
|
; on this loop with a primary induction variable incremented with
|
|
; a nsw add of 2 (this test is derived from the nsw-offset.ll test, but uses an
|
|
; assume instead of a preheader conditional branch to guard the loop).
|
|
|
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
|
|
|
define void @foo(i32 %no, double* nocapture %d, double* nocapture %q) nounwind {
|
|
entry:
|
|
%n = and i32 %no, 4294967294
|
|
%0 = icmp sgt i32 %n, 0 ; <i1> [#uses=1]
|
|
tail call void @llvm.assume(i1 %0)
|
|
br label %bb.nph
|
|
|
|
bb.nph: ; preds = %entry
|
|
br label %bb
|
|
|
|
bb: ; preds = %bb.nph, %bb1
|
|
%i.01 = phi i32 [ %16, %bb1 ], [ 0, %bb.nph ] ; <i32> [#uses=5]
|
|
|
|
; CHECK: %1 = sext i32 %i.01 to i64
|
|
; CHECK: --> {0,+,2}<nuw><nsw><%bb>
|
|
%1 = sext i32 %i.01 to i64 ; <i64> [#uses=1]
|
|
|
|
; CHECK: %2 = getelementptr inbounds double, double* %d, i64 %1
|
|
; CHECK: --> {%d,+,16}<nuw><%bb>
|
|
%2 = getelementptr inbounds double, double* %d, i64 %1 ; <double*> [#uses=1]
|
|
|
|
%3 = load double, double* %2, align 8 ; <double> [#uses=1]
|
|
%4 = sext i32 %i.01 to i64 ; <i64> [#uses=1]
|
|
%5 = getelementptr inbounds double, double* %q, i64 %4 ; <double*> [#uses=1]
|
|
%6 = load double, double* %5, align 8 ; <double> [#uses=1]
|
|
%7 = or i32 %i.01, 1 ; <i32> [#uses=1]
|
|
|
|
; CHECK: %8 = sext i32 %7 to i64
|
|
; CHECK: --> {1,+,2}<nuw><nsw><%bb>
|
|
%8 = sext i32 %7 to i64 ; <i64> [#uses=1]
|
|
|
|
; CHECK: %9 = getelementptr inbounds double, double* %q, i64 %8
|
|
; CHECK: {(8 + %q)<nuw>,+,16}<nuw><%bb>
|
|
%9 = getelementptr inbounds double, double* %q, i64 %8 ; <double*> [#uses=1]
|
|
|
|
; Artificially repeat the above three instructions, this time using
|
|
; add nsw instead of or.
|
|
%t7 = add nsw i32 %i.01, 1 ; <i32> [#uses=1]
|
|
|
|
; CHECK: %t8 = sext i32 %t7 to i64
|
|
; CHECK: --> {1,+,2}<nuw><nsw><%bb>
|
|
%t8 = sext i32 %t7 to i64 ; <i64> [#uses=1]
|
|
|
|
; CHECK: %t9 = getelementptr inbounds double, double* %q, i64 %t8
|
|
; CHECK: {(8 + %q)<nuw>,+,16}<nuw><%bb>
|
|
%t9 = getelementptr inbounds double, double* %q, i64 %t8 ; <double*> [#uses=1]
|
|
|
|
%10 = load double, double* %9, align 8 ; <double> [#uses=1]
|
|
%11 = fadd double %6, %10 ; <double> [#uses=1]
|
|
%12 = fadd double %11, 3.200000e+00 ; <double> [#uses=1]
|
|
%13 = fmul double %3, %12 ; <double> [#uses=1]
|
|
%14 = sext i32 %i.01 to i64 ; <i64> [#uses=1]
|
|
%15 = getelementptr inbounds double, double* %d, i64 %14 ; <double*> [#uses=1]
|
|
store double %13, double* %15, align 8
|
|
%16 = add nsw i32 %i.01, 2 ; <i32> [#uses=2]
|
|
br label %bb1
|
|
|
|
bb1: ; preds = %bb
|
|
%17 = icmp slt i32 %16, %n ; <i1> [#uses=1]
|
|
br i1 %17, label %bb, label %bb1.return_crit_edge
|
|
|
|
bb1.return_crit_edge: ; preds = %bb1
|
|
br label %return
|
|
|
|
return: ; preds = %bb1.return_crit_edge, %entry
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.assume(i1) nounwind
|
|
|
|
; Note: Without the preheader assume, there is an 'smax' in the
|
|
; backedge-taken count expression:
|
|
; CHECK: Loop %bb: backedge-taken count is ((-1 + (2 * (%no /u 2))<nuw>) /u 2)
|
|
; CHECK: Loop %bb: max backedge-taken count is 1073741822
|