From 40561dd0baeac9da7c119a87792cc4adfabf6952 Mon Sep 17 00:00:00 2001
From: Dan Gohman <gohman@apple.com>
Date: Mon, 26 Apr 2010 21:46:36 +0000
Subject: [PATCH] When checking whether the special handling for an addrec
 increment which doesn't dominate the header is needed, don't check whether
 the increment expression has computable loop evolution. While the operands of
 an addrec are required to be loop-invariant, they're not required to dominate
 any part of the loop. This fixes PR6914.

llvm-svn: 102389
---
 lib/Analysis/README.txt                  |  2 +-
 lib/Analysis/ScalarEvolutionExpander.cpp |  3 +--
 test/CodeGen/X86/lsr-delayed-fold.ll     | 25 +++++++++++++++++++++++-
 3 files changed, 26 insertions(+), 4 deletions(-)
diff --git a/lib/Analysis/README.txt b/lib/Analysis/README.txt
index 88ea9f11ad7..0e96e4c950c 100644
--- a/lib/Analysis/README.txt
+++ b/lib/Analysis/README.txt
@@ -17,7 +17,7 @@ which is very inefficient when expanded into code.
 
 //===---------------------------------------------------------------------===//
 
-In test/CodeGen/X86/lsr-delayed-fold.ll,
+In formatValue in test/CodeGen/X86/lsr-delayed-fold.ll,
 
 ScalarEvolution is forming this expression:
 
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index b90f0515a42..e9a634bdee2 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -995,8 +995,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
   // Strip off any non-loop-dominating component from the addrec step.
   const SCEV *Step = Normalized->getStepRecurrence(SE);
   const SCEV *PostLoopScale = 0;
-  if (!Step->hasComputableLoopEvolution(L) &&
-      !Step->dominates(L->getHeader(), SE.DT)) {
+  if (!Step->dominates(L->getHeader(), SE.DT)) {
     PostLoopScale = Step;
     Step = SE.getIntegerSCEV(1, Normalized->getType());
     Normalized =
diff --git a/test/CodeGen/X86/lsr-delayed-fold.ll b/test/CodeGen/X86/lsr-delayed-fold.ll
index f160c2d92bb..17d6a4c0915 100644
--- a/test/CodeGen/X86/lsr-delayed-fold.ll
+++ b/test/CodeGen/X86/lsr-delayed-fold.ll
@@ -1,8 +1,8 @@
 ; RUN: llc -march=x86-64 < %s > /dev/null
-; rdar://7886751
 
 ; ScalarEvolution misses an opportunity to fold ((trunc x) + (trunc -x) + y),
 ; but LSR should tolerate this.
+; rdar://7886751
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin11.0"
@@ -26,3 +26,26 @@ bb15:                                             ; preds = %bb21, %bb12
 bb24:                                             ; preds = %bb21, %bb11
   unreachable
 }
+
+; ScalarEvolution should be able to correctly expand the crazy addrec here.
+; PR6914
+
+define void @int323() nounwind {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %lbl_264, %for.inc, %entry
+  %g_263.tmp.1 = phi i8 [ undef, %entry ], [ %g_263.tmp.1, %for.cond ]
+  %p_95.addr.0 = phi i8 [ 0, %entry ], [ %add, %for.cond ]
+  %add = add i8 %p_95.addr.0, 1                   ; <i8> [#uses=1]
+  br i1 undef, label %for.cond, label %lbl_264
+
+lbl_264:                                          ; preds = %if.end, %lbl_264.preheader
+  %g_263.tmp.0 = phi i8 [ %g_263.tmp.1, %for.cond ] ; <i8> [#uses=1]
+  %tmp7 = load i16* undef                         ; <i16> [#uses=1]
+  %conv8 = trunc i16 %tmp7 to i8                  ; <i8> [#uses=1]
+  %mul.i = mul i8 %p_95.addr.0, %p_95.addr.0      ; <i8> [#uses=1]
+  %mul.i18 = mul i8 %mul.i, %conv8                ; <i8> [#uses=1]
+  %tobool12 = icmp eq i8 %mul.i18, 0              ; <i1> [#uses=1]
+  unreachable
+}