mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-20 03:23:01 +02:00
Generalize the cast-of-addrec folding to handle folding of SCEVs like
(sext i8 {-128,+,1} to i64) to i64 {-128,+,1}, where the iteration crosses from negative to positive, but is still safe if the trip count is within range. llvm-svn: 70421
This commit is contained in:
parent
8b1b8d5891
commit
06aff30f01
@ -718,7 +718,7 @@ SCEVHandle ScalarEvolution::getZeroExtendExpr(const SCEVHandle &Op,
|
|||||||
SCEVHandle BECount = getBackedgeTakenCount(AR->getLoop());
|
SCEVHandle BECount = getBackedgeTakenCount(AR->getLoop());
|
||||||
if (!isa<SCEVCouldNotCompute>(BECount)) {
|
if (!isa<SCEVCouldNotCompute>(BECount)) {
|
||||||
// Manually compute the final value for AR, checking for
|
// Manually compute the final value for AR, checking for
|
||||||
// overflow at each step.
|
// overflow.
|
||||||
SCEVHandle Start = AR->getStart();
|
SCEVHandle Start = AR->getStart();
|
||||||
SCEVHandle Step = AR->getStepRecurrence(*this);
|
SCEVHandle Step = AR->getStepRecurrence(*this);
|
||||||
|
|
||||||
@ -730,41 +730,34 @@ SCEVHandle ScalarEvolution::getZeroExtendExpr(const SCEVHandle &Op,
|
|||||||
getTruncateOrZeroExtend(CastedBECount, BECount->getType())) {
|
getTruncateOrZeroExtend(CastedBECount, BECount->getType())) {
|
||||||
const Type *WideTy =
|
const Type *WideTy =
|
||||||
IntegerType::get(getTypeSizeInBits(Start->getType()) * 2);
|
IntegerType::get(getTypeSizeInBits(Start->getType()) * 2);
|
||||||
|
// Check whether Start+Step*BECount has no unsigned overflow.
|
||||||
SCEVHandle ZMul =
|
SCEVHandle ZMul =
|
||||||
getMulExpr(CastedBECount,
|
getMulExpr(CastedBECount,
|
||||||
getTruncateOrZeroExtend(Step, Start->getType()));
|
getTruncateOrZeroExtend(Step, Start->getType()));
|
||||||
// Check whether Start+Step*BECount has no unsigned overflow.
|
SCEVHandle Add = getAddExpr(Start, ZMul);
|
||||||
if (getZeroExtendExpr(ZMul, WideTy) ==
|
if (getZeroExtendExpr(Add, WideTy) ==
|
||||||
getMulExpr(getZeroExtendExpr(CastedBECount, WideTy),
|
getAddExpr(getZeroExtendExpr(Start, WideTy),
|
||||||
getZeroExtendExpr(Step, WideTy))) {
|
getMulExpr(getZeroExtendExpr(CastedBECount, WideTy),
|
||||||
SCEVHandle Add = getAddExpr(Start, ZMul);
|
getZeroExtendExpr(Step, WideTy))))
|
||||||
if (getZeroExtendExpr(Add, WideTy) ==
|
// Return the expression with the addrec on the outside.
|
||||||
getAddExpr(getZeroExtendExpr(Start, WideTy),
|
return getAddRecExpr(getZeroExtendExpr(Start, Ty),
|
||||||
getZeroExtendExpr(ZMul, WideTy)))
|
getZeroExtendExpr(Step, Ty),
|
||||||
// Return the expression with the addrec on the outside.
|
AR->getLoop());
|
||||||
return getAddRecExpr(getZeroExtendExpr(Start, Ty),
|
|
||||||
getZeroExtendExpr(Step, Ty),
|
|
||||||
AR->getLoop());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Similar to above, only this time treat the step value as signed.
|
// Similar to above, only this time treat the step value as signed.
|
||||||
// This covers loops that count down.
|
// This covers loops that count down.
|
||||||
SCEVHandle SMul =
|
SCEVHandle SMul =
|
||||||
getMulExpr(CastedBECount,
|
getMulExpr(CastedBECount,
|
||||||
getTruncateOrSignExtend(Step, Start->getType()));
|
getTruncateOrSignExtend(Step, Start->getType()));
|
||||||
// Check whether Start+Step*BECount has no unsigned overflow.
|
Add = getAddExpr(Start, SMul);
|
||||||
if (getSignExtendExpr(SMul, WideTy) ==
|
if (getZeroExtendExpr(Add, WideTy) ==
|
||||||
getMulExpr(getZeroExtendExpr(CastedBECount, WideTy),
|
getAddExpr(getZeroExtendExpr(Start, WideTy),
|
||||||
getSignExtendExpr(Step, WideTy))) {
|
getMulExpr(getZeroExtendExpr(CastedBECount, WideTy),
|
||||||
SCEVHandle Add = getAddExpr(Start, SMul);
|
getSignExtendExpr(Step, WideTy))))
|
||||||
if (getZeroExtendExpr(Add, WideTy) ==
|
// Return the expression with the addrec on the outside.
|
||||||
getAddExpr(getZeroExtendExpr(Start, WideTy),
|
return getAddRecExpr(getZeroExtendExpr(Start, Ty),
|
||||||
getSignExtendExpr(SMul, WideTy)))
|
getSignExtendExpr(Step, Ty),
|
||||||
// Return the expression with the addrec on the outside.
|
AR->getLoop());
|
||||||
return getAddRecExpr(getZeroExtendExpr(Start, Ty),
|
|
||||||
getSignExtendExpr(Step, Ty),
|
|
||||||
AR->getLoop());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -807,37 +800,31 @@ SCEVHandle ScalarEvolution::getSignExtendExpr(const SCEVHandle &Op,
|
|||||||
SCEVHandle BECount = getBackedgeTakenCount(AR->getLoop());
|
SCEVHandle BECount = getBackedgeTakenCount(AR->getLoop());
|
||||||
if (!isa<SCEVCouldNotCompute>(BECount)) {
|
if (!isa<SCEVCouldNotCompute>(BECount)) {
|
||||||
// Manually compute the final value for AR, checking for
|
// Manually compute the final value for AR, checking for
|
||||||
// overflow at each step.
|
// overflow.
|
||||||
SCEVHandle Start = AR->getStart();
|
SCEVHandle Start = AR->getStart();
|
||||||
SCEVHandle Step = AR->getStepRecurrence(*this);
|
SCEVHandle Step = AR->getStepRecurrence(*this);
|
||||||
|
|
||||||
// Check whether the backedge-taken count can be losslessly casted to
|
// Check whether the backedge-taken count can be losslessly casted to
|
||||||
// the addrec's type. The count needs to be the same whether sign
|
// the addrec's type. The count is always unsigned.
|
||||||
// extended or zero extended.
|
|
||||||
SCEVHandle CastedBECount =
|
SCEVHandle CastedBECount =
|
||||||
getTruncateOrZeroExtend(BECount, Start->getType());
|
getTruncateOrZeroExtend(BECount, Start->getType());
|
||||||
if (BECount ==
|
if (BECount ==
|
||||||
getTruncateOrZeroExtend(CastedBECount, BECount->getType()) &&
|
getTruncateOrZeroExtend(CastedBECount, BECount->getType())) {
|
||||||
BECount ==
|
|
||||||
getTruncateOrSignExtend(CastedBECount, BECount->getType())) {
|
|
||||||
const Type *WideTy =
|
const Type *WideTy =
|
||||||
IntegerType::get(getTypeSizeInBits(Start->getType()) * 2);
|
IntegerType::get(getTypeSizeInBits(Start->getType()) * 2);
|
||||||
|
// Check whether Start+Step*BECount has no signed overflow.
|
||||||
SCEVHandle SMul =
|
SCEVHandle SMul =
|
||||||
getMulExpr(CastedBECount,
|
getMulExpr(CastedBECount,
|
||||||
getTruncateOrSignExtend(Step, Start->getType()));
|
getTruncateOrSignExtend(Step, Start->getType()));
|
||||||
// Check whether Start+Step*BECount has no signed overflow.
|
SCEVHandle Add = getAddExpr(Start, SMul);
|
||||||
if (getSignExtendExpr(SMul, WideTy) ==
|
if (getSignExtendExpr(Add, WideTy) ==
|
||||||
getMulExpr(getSignExtendExpr(CastedBECount, WideTy),
|
getAddExpr(getSignExtendExpr(Start, WideTy),
|
||||||
getSignExtendExpr(Step, WideTy))) {
|
getMulExpr(getZeroExtendExpr(CastedBECount, WideTy),
|
||||||
SCEVHandle Add = getAddExpr(Start, SMul);
|
getSignExtendExpr(Step, WideTy))))
|
||||||
if (getSignExtendExpr(Add, WideTy) ==
|
// Return the expression with the addrec on the outside.
|
||||||
getAddExpr(getSignExtendExpr(Start, WideTy),
|
return getAddRecExpr(getSignExtendExpr(Start, Ty),
|
||||||
getSignExtendExpr(SMul, WideTy)))
|
getSignExtendExpr(Step, Ty),
|
||||||
// Return the expression with the addrec on the outside.
|
AR->getLoop());
|
||||||
return getAddRecExpr(getSignExtendExpr(Start, Ty),
|
|
||||||
getSignExtendExpr(Step, Ty),
|
|
||||||
AR->getLoop());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
31
test/Analysis/ScalarEvolution/sext-iv-0.ll
Normal file
31
test/Analysis/ScalarEvolution/sext-iv-0.ll
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
; RUN: llvm-as < %s | opt -disable-output -scalar-evolution -analyze \
|
||||||
|
; RUN: | grep { --> \{-128,+,1\}<bb1> Exits: 127} | count 5
|
||||||
|
|
||||||
|
; Convert (sext {-128,+,1}) to {sext(-128),+,sext(1)}, since the
|
||||||
|
; trip count is within range where this is safe.
|
||||||
|
|
||||||
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
||||||
|
target triple = "x86_64-unknown-linux-gnu"
|
||||||
|
|
||||||
|
define void @foo(double* nocapture %x) nounwind {
|
||||||
|
bb1.thread:
|
||||||
|
br label %bb1
|
||||||
|
|
||||||
|
bb1: ; preds = %bb1, %bb1.thread
|
||||||
|
%i.0.reg2mem.0 = phi i64 [ -128, %bb1.thread ], [ %8, %bb1 ] ; <i64> [#uses=3]
|
||||||
|
%0 = trunc i64 %i.0.reg2mem.0 to i8 ; <i8> [#uses=1]
|
||||||
|
%1 = trunc i64 %i.0.reg2mem.0 to i9 ; <i8> [#uses=1]
|
||||||
|
%2 = sext i9 %1 to i64 ; <i64> [#uses=1]
|
||||||
|
%3 = getelementptr double* %x, i64 %2 ; <double*> [#uses=1]
|
||||||
|
%4 = load double* %3, align 8 ; <double> [#uses=1]
|
||||||
|
%5 = mul double %4, 3.900000e+00 ; <double> [#uses=1]
|
||||||
|
%6 = sext i8 %0 to i64 ; <i64> [#uses=1]
|
||||||
|
%7 = getelementptr double* %x, i64 %6 ; <double*> [#uses=1]
|
||||||
|
store double %5, double* %7, align 8
|
||||||
|
%8 = add i64 %i.0.reg2mem.0, 1 ; <i64> [#uses=2]
|
||||||
|
%9 = icmp sgt i64 %8, 127 ; <i1> [#uses=1]
|
||||||
|
br i1 %9, label %return, label %bb1
|
||||||
|
|
||||||
|
return: ; preds = %bb1
|
||||||
|
ret void
|
||||||
|
}
|
100
test/Analysis/ScalarEvolution/sext-iv-1.ll
Normal file
100
test/Analysis/ScalarEvolution/sext-iv-1.ll
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
; RUN: llvm-as < %s | opt -disable-output -scalar-evolution -analyze \
|
||||||
|
; RUN: | grep { --> (sext i. \{.\*,+,.\*\}<bb1> to i64)} | count 5
|
||||||
|
|
||||||
|
; Don't convert (sext {...,+,...}) to {sext(...),+,sext(...)} in cases
|
||||||
|
; where the trip count is not within range.
|
||||||
|
|
||||||
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
||||||
|
target triple = "x86_64-unknown-linux-gnu"
|
||||||
|
|
||||||
|
define void @foo0(double* nocapture %x) nounwind {
|
||||||
|
bb1.thread:
|
||||||
|
br label %bb1
|
||||||
|
|
||||||
|
bb1: ; preds = %bb1, %bb1.thread
|
||||||
|
%i.0.reg2mem.0 = phi i64 [ -128, %bb1.thread ], [ %8, %bb1 ] ; <i64> [#uses=3]
|
||||||
|
%0 = trunc i64 %i.0.reg2mem.0 to i7 ; <i8> [#uses=1]
|
||||||
|
%1 = trunc i64 %i.0.reg2mem.0 to i9 ; <i8> [#uses=1]
|
||||||
|
%2 = sext i9 %1 to i64 ; <i64> [#uses=1]
|
||||||
|
%3 = getelementptr double* %x, i64 %2 ; <double*> [#uses=1]
|
||||||
|
%4 = load double* %3, align 8 ; <double> [#uses=1]
|
||||||
|
%5 = mul double %4, 3.900000e+00 ; <double> [#uses=1]
|
||||||
|
%6 = sext i7 %0 to i64 ; <i64> [#uses=1]
|
||||||
|
%7 = getelementptr double* %x, i64 %6 ; <double*> [#uses=1]
|
||||||
|
store double %5, double* %7, align 8
|
||||||
|
%8 = add i64 %i.0.reg2mem.0, 1 ; <i64> [#uses=2]
|
||||||
|
%9 = icmp sgt i64 %8, 127 ; <i1> [#uses=1]
|
||||||
|
br i1 %9, label %return, label %bb1
|
||||||
|
|
||||||
|
return: ; preds = %bb1
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @foo1(double* nocapture %x) nounwind {
|
||||||
|
bb1.thread:
|
||||||
|
br label %bb1
|
||||||
|
|
||||||
|
bb1: ; preds = %bb1, %bb1.thread
|
||||||
|
%i.0.reg2mem.0 = phi i64 [ -128, %bb1.thread ], [ %8, %bb1 ] ; <i64> [#uses=3]
|
||||||
|
%0 = trunc i64 %i.0.reg2mem.0 to i8 ; <i8> [#uses=1]
|
||||||
|
%1 = trunc i64 %i.0.reg2mem.0 to i9 ; <i8> [#uses=1]
|
||||||
|
%2 = sext i9 %1 to i64 ; <i64> [#uses=1]
|
||||||
|
%3 = getelementptr double* %x, i64 %2 ; <double*> [#uses=1]
|
||||||
|
%4 = load double* %3, align 8 ; <double> [#uses=1]
|
||||||
|
%5 = mul double %4, 3.900000e+00 ; <double> [#uses=1]
|
||||||
|
%6 = sext i8 %0 to i64 ; <i64> [#uses=1]
|
||||||
|
%7 = getelementptr double* %x, i64 %6 ; <double*> [#uses=1]
|
||||||
|
store double %5, double* %7, align 8
|
||||||
|
%8 = add i64 %i.0.reg2mem.0, 1 ; <i64> [#uses=2]
|
||||||
|
%9 = icmp sgt i64 %8, 128 ; <i1> [#uses=1]
|
||||||
|
br i1 %9, label %return, label %bb1
|
||||||
|
|
||||||
|
return: ; preds = %bb1
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @foo2(double* nocapture %x) nounwind {
|
||||||
|
bb1.thread:
|
||||||
|
br label %bb1
|
||||||
|
|
||||||
|
bb1: ; preds = %bb1, %bb1.thread
|
||||||
|
%i.0.reg2mem.0 = phi i64 [ -129, %bb1.thread ], [ %8, %bb1 ] ; <i64> [#uses=3]
|
||||||
|
%0 = trunc i64 %i.0.reg2mem.0 to i8 ; <i8> [#uses=1]
|
||||||
|
%1 = trunc i64 %i.0.reg2mem.0 to i9 ; <i8> [#uses=1]
|
||||||
|
%2 = sext i9 %1 to i64 ; <i64> [#uses=1]
|
||||||
|
%3 = getelementptr double* %x, i64 %2 ; <double*> [#uses=1]
|
||||||
|
%4 = load double* %3, align 8 ; <double> [#uses=1]
|
||||||
|
%5 = mul double %4, 3.900000e+00 ; <double> [#uses=1]
|
||||||
|
%6 = sext i8 %0 to i64 ; <i64> [#uses=1]
|
||||||
|
%7 = getelementptr double* %x, i64 %6 ; <double*> [#uses=1]
|
||||||
|
store double %5, double* %7, align 8
|
||||||
|
%8 = add i64 %i.0.reg2mem.0, 1 ; <i64> [#uses=2]
|
||||||
|
%9 = icmp sgt i64 %8, 127 ; <i1> [#uses=1]
|
||||||
|
br i1 %9, label %return, label %bb1
|
||||||
|
|
||||||
|
return: ; preds = %bb1
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @foo3(double* nocapture %x) nounwind {
|
||||||
|
bb1.thread:
|
||||||
|
br label %bb1
|
||||||
|
|
||||||
|
bb1: ; preds = %bb1, %bb1.thread
|
||||||
|
%i.0.reg2mem.0 = phi i64 [ -128, %bb1.thread ], [ %8, %bb1 ] ; <i64> [#uses=3]
|
||||||
|
%0 = trunc i64 %i.0.reg2mem.0 to i8 ; <i8> [#uses=1]
|
||||||
|
%1 = trunc i64 %i.0.reg2mem.0 to i9 ; <i8> [#uses=1]
|
||||||
|
%2 = sext i9 %1 to i64 ; <i64> [#uses=1]
|
||||||
|
%3 = getelementptr double* %x, i64 %2 ; <double*> [#uses=1]
|
||||||
|
%4 = load double* %3, align 8 ; <double> [#uses=1]
|
||||||
|
%5 = mul double %4, 3.900000e+00 ; <double> [#uses=1]
|
||||||
|
%6 = sext i8 %0 to i64 ; <i64> [#uses=1]
|
||||||
|
%7 = getelementptr double* %x, i64 %6 ; <double*> [#uses=1]
|
||||||
|
store double %5, double* %7, align 8
|
||||||
|
%8 = add i64 %i.0.reg2mem.0, -1 ; <i64> [#uses=2]
|
||||||
|
%9 = icmp sgt i64 %8, 127 ; <i1> [#uses=1]
|
||||||
|
br i1 %9, label %return, label %bb1
|
||||||
|
|
||||||
|
return: ; preds = %bb1
|
||||||
|
ret void
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user