1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 10:42:39 +01:00

When checking whether the special handling for an addrec increment which

doesn't dominate the header is needed, don't check whether the increment
expression has computable loop evolution. While the operands of an
addrec are required to be loop-invariant, they're not required to 
dominate any part of the loop. This fixes PR6914.

llvm-svn: 102389
This commit is contained in:
Dan Gohman 2010-04-26 21:46:36 +00:00
parent 9c1b7fdc46
commit 40561dd0ba
3 changed files with 26 additions and 4 deletions

View File

@ -17,7 +17,7 @@ which is very inefficient when expanded into code.
//===---------------------------------------------------------------------===//
In test/CodeGen/X86/lsr-delayed-fold.ll,
In formatValue in test/CodeGen/X86/lsr-delayed-fold.ll,
ScalarEvolution is forming this expression:

View File

@ -995,8 +995,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
// Strip off any non-loop-dominating component from the addrec step.
const SCEV *Step = Normalized->getStepRecurrence(SE);
const SCEV *PostLoopScale = 0;
if (!Step->hasComputableLoopEvolution(L) &&
!Step->dominates(L->getHeader(), SE.DT)) {
if (!Step->dominates(L->getHeader(), SE.DT)) {
PostLoopScale = Step;
Step = SE.getIntegerSCEV(1, Normalized->getType());
Normalized =

View File

@ -1,8 +1,8 @@
; RUN: llc -march=x86-64 < %s > /dev/null
; rdar://7886751
; ScalarEvolution misses an opportunity to fold ((trunc x) + (trunc -x) + y),
; but LSR should tolerate this.
; rdar://7886751
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-darwin11.0"
@ -26,3 +26,26 @@ bb15: ; preds = %bb21, %bb12
bb24: ; preds = %bb21, %bb11
unreachable
}
; ScalarEvolution should be able to correctly expand the crazy addrec here.
; PR6914
define void @int323() nounwind {
entry:
br label %for.cond
for.cond: ; preds = %lbl_264, %for.inc, %entry
%g_263.tmp.1 = phi i8 [ undef, %entry ], [ %g_263.tmp.1, %for.cond ]
%p_95.addr.0 = phi i8 [ 0, %entry ], [ %add, %for.cond ]
%add = add i8 %p_95.addr.0, 1 ; <i8> [#uses=1]
br i1 undef, label %for.cond, label %lbl_264
lbl_264: ; preds = %if.end, %lbl_264.preheader
%g_263.tmp.0 = phi i8 [ %g_263.tmp.1, %for.cond ] ; <i8> [#uses=1]
%tmp7 = load i16* undef ; <i16> [#uses=1]
%conv8 = trunc i16 %tmp7 to i8 ; <i8> [#uses=1]
%mul.i = mul i8 %p_95.addr.0, %p_95.addr.0 ; <i8> [#uses=1]
%mul.i18 = mul i8 %mul.i, %conv8 ; <i8> [#uses=1]
%tobool12 = icmp eq i8 %mul.i18, 0 ; <i1> [#uses=1]
unreachable
}