Generalize IVUsers to track arbitrary expressions rather than expressions

explicitly split into stride-and-offset pairs. Also, add the ability to track multiple post-increment loops on the same expression. This refines the concept of "normalizing" SCEV expressions used for to post-increment uses, and introduces a dedicated utility routine for normalizing and denormalizing expressions. This fixes the expansion of expressions which are post-increment users of more than one loop at a time. More broadly, this takes LSR another step closer to being able to reason about more than one loop at a time. llvm-svn: 100699
2024-11-23 19:23:23 +01:00 · 2010-04-07 22:27:08 +00:00 · 2010-04-07 22:27:08 +00:00 · b5210c934f
commit b5210c934f
parent b024d0a612
10 changed files with 743 additions and 279 deletions
--- a/include/llvm/Analysis/IVUsers.h
+++ b/include/llvm/Analysis/IVUsers.h
@ -16,6 +16,7 @@
 #define LLVM_ANALYSIS_IVUSERS_H

 #include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolutionNormalization.h"
 #include "llvm/Support/ValueHandle.h"

 namespace llvm {
@ -26,17 +27,18 @@ class Value;
 class IVUsers;
 class ScalarEvolution;
 class SCEV;
+class IVUsers;

 /// IVStrideUse - Keep track of one use of a strided induction variable.
 /// The Expr member keeps track of the expression, User is the actual user
 /// instruction of the operand, and 'OperandValToReplace' is the operand of
 /// the User that is the use.
 class IVStrideUse : public CallbackVH, public ilist_node<IVStrideUse> {
+  friend class IVUsers;
 public:
-  IVStrideUse(IVUsers *P, const SCEV *S, const SCEV *Off,
+  IVStrideUse(IVUsers *P, const SCEV *E,
              Instruction* U, Value *O)
-    : CallbackVH(U), Parent(P), Stride(S), Offset(Off),
-      OperandValToReplace(O), IsUseOfPostIncrementedValue(false) {
+    : CallbackVH(U), Parent(P), Expr(E), OperandValToReplace(O) {
  }

  /// getUser - Return the user instruction for this use.
@ -53,23 +55,15 @@ public:
  /// this IVStrideUse.
  IVUsers *getParent() const { return Parent; }

-  /// getStride - Return the expression for the stride for the use.
-  const SCEV *getStride() const { return Stride; }
+  /// getExpr - Return the expression for the use.
+  const SCEV *getExpr() const { return Expr; }

-  /// setStride - Assign a new stride to this use.
-  void setStride(const SCEV *Val) {
-    Stride = Val;
+  /// setExpr - Assign a new expression to this use.
+  void setExpr(const SCEV *Val) {
+    Expr = Val;
  }

-  /// getOffset - Return the offset to add to a theoretical induction
-  /// variable that starts at zero and counts up by the stride to compute
-  /// the value for the use. This always has the same type as the stride.
-  const SCEV *getOffset() const { return Offset; }
-
-  /// setOffset - Assign a new offset to this use.
-  void setOffset(const SCEV *Val) {
-    Offset = Val;
-  }
+  const SCEV *getStride(const Loop *L) const;

  /// getOperandValToReplace - Return the Value of the operand in the user
  /// instruction that this IVStrideUse is representing.
@ -83,37 +77,30 @@ public:
    OperandValToReplace = Op;
  }

-  /// isUseOfPostIncrementedValue - True if this should use the
-  /// post-incremented version of this IV, not the preincremented version.
-  /// This can only be set in special cases, such as the terminating setcc
-  /// instruction for a loop or uses dominated by the loop.
-  bool isUseOfPostIncrementedValue() const {
-    return IsUseOfPostIncrementedValue;
+  /// getPostIncLoops - Return the set of loops for which the expression has
+  /// been adjusted to use post-inc mode.
+  const PostIncLoopSet &getPostIncLoops() const {
+    return PostIncLoops;
  }

-  /// setIsUseOfPostIncrmentedValue - set the flag that indicates whether
-  /// this is a post-increment use.
-  void setIsUseOfPostIncrementedValue(bool Val) {
-    IsUseOfPostIncrementedValue = Val;
-  }
+  /// transformToPostInc - Transform the expression to post-inc form for the
+  /// given loop.
+  void transformToPostInc(const Loop *L);

 private:
  /// Parent - a pointer to the IVUsers that owns this IVStrideUse.
  IVUsers *Parent;

-  /// Stride - The stride for this use.
-  const SCEV *Stride;
-
-  /// Offset - The offset to add to the base induction expression.
-  const SCEV *Offset;
+  /// Expr - The expression for this use.
+  const SCEV *Expr;

  /// OperandValToReplace - The Value of the operand in the user instruction
  /// that this IVStrideUse is representing.
  WeakVH OperandValToReplace;

-  /// IsUseOfPostIncrementedValue - True if this should use the
-  /// post-incremented version of this IV, not the preincremented version.
-  bool IsUseOfPostIncrementedValue;
+  /// PostIncLoops - The set of loops for which Expr has been adjusted to
+  /// use post-inc mode. This corresponds with SCEVExpander's post-inc concept.
+  PostIncLoopSet PostIncLoops;

  /// Deleted - Implementation of CallbackVH virtual function to
  /// receive notification when the User is deleted.
@ -174,18 +161,13 @@ public:
  /// return true.  Otherwise, return false.
  bool AddUsersIfInteresting(Instruction *I);

-  IVStrideUse &AddUser(const SCEV *Stride, const SCEV *Offset,
+  IVStrideUse &AddUser(const SCEV *Expr,
                       Instruction *User, Value *Operand);

  /// getReplacementExpr - Return a SCEV expression which computes the
  /// value of the OperandValToReplace of the given IVStrideUse.
  const SCEV *getReplacementExpr(const IVStrideUse &U) const;

-  /// getCanonicalExpr - Return a SCEV expression which computes the
-  /// value of the SCEV of the given IVStrideUse, ignoring the 
-  /// isUseOfPostIncrementedValue flag.
-  const SCEV *getCanonicalExpr(const IVStrideUse &U) const;
-
  typedef ilist<IVStrideUse>::iterator iterator;
  typedef ilist<IVStrideUse>::const_iterator const_iterator;
  iterator begin() { return IVUses.begin(); }
--- a/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpander.h
@ -15,6 +15,7 @@
 #define LLVM_ANALYSIS_SCALAREVOLUTION_EXPANDER_H

 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ScalarEvolutionNormalization.h"
 #include "llvm/Support/IRBuilder.h"
 #include "llvm/Support/TargetFolder.h"
 #include <set>
@ -32,12 +33,12 @@ namespace llvm {
      InsertedExpressions;
    std::set<Value*> InsertedValues;

-    /// PostIncLoop - When non-null, expanded addrecs referring to the given
-    /// loop expanded in post-inc mode. For example, expanding {1,+,1}<L> in
-    /// post-inc mode returns the add instruction that adds one to the phi
-    /// for {0,+,1}<L>, as opposed to a new phi starting at 1. This is only
-    /// supported in non-canonical mode.
-    const Loop *PostIncLoop;
+    /// PostIncLoops - Addrecs referring to any of the given loops are expanded
+    /// in post-inc mode. For example, expanding {1,+,1}<L> in post-inc mode
+    /// returns the add instruction that adds one to the phi for {0,+,1}<L>,
+    /// as opposed to a new phi starting at 1. This is only supported in
+    /// non-canonical mode.
+    PostIncLoopSet PostIncLoops;

    /// IVIncInsertPos - When this is non-null, addrecs expanded in the
    /// loop it indicates should be inserted with increments at
@ -62,7 +63,7 @@ namespace llvm {
  public:
    /// SCEVExpander - Construct a SCEVExpander in "canonical" mode.
    explicit SCEVExpander(ScalarEvolution &se)
-      : SE(se), PostIncLoop(0), IVIncInsertLoop(0), CanonicalMode(true),
+      : SE(se), IVIncInsertLoop(0), CanonicalMode(true),
        Builder(se.getContext(), TargetFolder(se.TD)) {}

    /// clear - Erase the contents of the InsertedExpressions map so that users
@ -89,14 +90,18 @@ namespace llvm {
      IVIncInsertPos = Pos;
    }

-    /// setPostInc - If L is non-null, enable post-inc expansion for addrecs
-    /// referring to the given loop. If L is null, disable post-inc expansion
-    /// completely. Post-inc expansion is only supported in non-canonical
+    /// setPostInc - Enable post-inc expansion for addrecs referring to the
+    /// given loops. Post-inc expansion is only supported in non-canonical
    /// mode.
-    void setPostInc(const Loop *L) {
+    void setPostInc(const PostIncLoopSet &L) {
      assert(!CanonicalMode &&
             "Post-inc expansion is not supported in CanonicalMode");
-      PostIncLoop = L;
+      PostIncLoops = L;
+    }
+
+    /// clearPostInc - Disable all post-inc expansion.
+    void clearPostInc() {
+      PostIncLoops.clear();
    }

    /// disableCanonicalMode - Disable the behavior of expanding expressions in
--- a/include/llvm/Analysis/ScalarEvolutionNormalization.h
+++ b/include/llvm/Analysis/ScalarEvolutionNormalization.h
@ -0,0 +1,78 @@
+//===- llvm/Analysis/ScalarEvolutionNormalization.h - See below -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines utilities for working with "normalized" ScalarEvolution
+// expressions.
+//
+// The following example illustrates post-increment uses and how normalized
+// expressions help.
+//
+//   for (i=0; i!=n; ++i) {
+//     ...
+//   }
+//   use(i);
+//
+// While the expression for most uses of i inside the loop is {0,+,1}<%L>, the
+// expression for the use of i outside the loop is {1,+,1}<%L>, since i is
+// incremented at the end of the loop body. This is inconveient, since it
+// suggests that we need two different induction variables, one that starts
+// at 0 and one that starts at 1. We'd prefer to be able to think of these as
+// the same induction variable, with uses inside the loop using the
+// "pre-incremented" value, and uses after the loop using the
+// "post-incremented" value.
+//
+// Expressions for post-incremented uses are represented as an expression
+// paired with a set of loops for which the expression is in "post-increment"
+// mode (there may be multiple loops).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_SCALAREVOLUTION_NORMALIZATION_H
+#define LLVM_ANALYSIS_SCALAREVOLUTION_NORMALIZATION_H
+
+#include "llvm/ADT/SmallPtrSet.h"
+
+namespace llvm {
+
+class Instruction;
+class DominatorTree;
+class Loop;
+class ScalarEvolution;
+class SCEV;
+class Value;
+
+/// TransformKind - Different types of transformations that
+/// TransformForPostIncUse can do.
+enum TransformKind {
+  /// Normalize - Normalize according to the given loops.
+  Normalize,
+  /// NormalizeAutodetect - Detect post-inc opportunities on new expressions,
+  /// update the given loop set, and normalize.
+  NormalizeAutodetect,
+  /// Denormalize - Perform the inverse transform on the expression with the
+  /// given loop set.
+  Denormalize
+};
+
+/// PostIncLoopSet - A set of loops.
+typedef SmallPtrSet<const Loop *, 2> PostIncLoopSet;
+
+/// TransformForPostIncUse - Transform the given expression according to the
+/// given transformation kind.
+const SCEV *TransformForPostIncUse(TransformKind Kind,
+                                   const SCEV *S,
+                                   Instruction *User,
+                                   Value *OperandValToReplace,
+                                   PostIncLoopSet &Loops,
+                                   ScalarEvolution &SE,
+                                   DominatorTree &DT);
+
+}
+
+#endif
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@ -62,120 +62,34 @@ static void CollectSubexprs(const SCEV *S,
  Ops.push_back(S);
 }

-/// getSCEVStartAndStride - Compute the start and stride of this expression,
-/// returning false if the expression is not a start/stride pair, or true if it
-/// is.  The stride must be a loop invariant expression, but the start may be
-/// a mix of loop invariant and loop variant expressions.  The start cannot,
-/// however, contain an AddRec from a different loop, unless that loop is an
-/// outer loop of the current loop.
-static bool getSCEVStartAndStride(const SCEV *&SH, Loop *L, Loop *UseLoop,
-                                  const SCEV *&Start, const SCEV *&Stride,
-                                  ScalarEvolution *SE, DominatorTree *DT) {
-  const SCEV *TheAddRec = Start;   // Initialize to zero.
-
-  // If the outer level is an AddExpr, the operands are all start values except
-  // for a nested AddRecExpr.
-  if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(SH)) {
-    for (unsigned i = 0, e = AE->getNumOperands(); i != e; ++i)
-      if (const SCEVAddRecExpr *AddRec =
-             dyn_cast<SCEVAddRecExpr>(AE->getOperand(i)))
-        TheAddRec = SE->getAddExpr(AddRec, TheAddRec);
-      else
-        Start = SE->getAddExpr(Start, AE->getOperand(i));
-  } else if (isa<SCEVAddRecExpr>(SH)) {
-    TheAddRec = SH;
-  } else {
-    return false;  // not analyzable.
-  }
-
-  // Break down TheAddRec into its component parts.
-  SmallVector<const SCEV *, 4> Subexprs;
-  CollectSubexprs(TheAddRec, Subexprs, *SE);
-
-  // Look for an addrec on the current loop among the parts.
-  const SCEV *AddRecStride = 0;
-  for (SmallVectorImpl<const SCEV *>::iterator I = Subexprs.begin(),
-       E = Subexprs.end(); I != E; ++I) {
-    const SCEV *S = *I;
-    if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
-      if (AR->getLoop() == L) {
-        *I = AR->getStart();
-        AddRecStride = AR->getStepRecurrence(*SE);
-        break;
-      }
-  }
-  if (!AddRecStride)
-    return false;
-
-  // Add up everything else into a start value (which may not be
-  // loop-invariant).
-  const SCEV *AddRecStart = SE->getAddExpr(Subexprs);
-
-  // Use getSCEVAtScope to attempt to simplify other loops out of
-  // the picture.
-  AddRecStart = SE->getSCEVAtScope(AddRecStart, UseLoop);
-
-  Start = SE->getAddExpr(Start, AddRecStart);
-
-  // If stride is an instruction, make sure it properly dominates the header.
-  // Otherwise we could end up with a use before def situation.
-  if (!isa<SCEVConstant>(AddRecStride)) {
-    BasicBlock *Header = L->getHeader();
-    if (!AddRecStride->properlyDominates(Header, DT))
-      return false;
-
-    DEBUG(dbgs() << "[";
-          WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false);
-          dbgs() << "] Variable stride: " << *AddRecStride << "\n");
-  }
-
-  Stride = AddRecStride;
-  return true;
-}
-
-/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression
-/// and now we need to decide whether the user should use the preinc or post-inc
-/// value.  If this user should use the post-inc version of the IV, return true.
-///
-/// Choosing wrong here can break dominance properties (if we choose to use the
-/// post-inc value when we cannot) or it can end up adding extra live-ranges to
-/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we
-/// should use the post-inc value).
-static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV,
-                                       const Loop *L, DominatorTree *DT) {
-  // If the user is in the loop, use the preinc value.
-  if (L->contains(User)) return false;
-
-  BasicBlock *LatchBlock = L->getLoopLatch();
-  if (!LatchBlock)
-    return false;
-
-  // Ok, the user is outside of the loop.  If it is dominated by the latch
-  // block, use the post-inc value.
-  if (DT->dominates(LatchBlock, User->getParent()))
+/// isInteresting - Test whether the given expression is "interesting" when
+/// used by the given expression, within the context of analyzing the
+/// given loop.
+static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L) {
+  // Anything loop-invariant is interesting.
+  if (!isa<SCEVUnknown>(S) && S->isLoopInvariant(L))
    return true;

-  // There is one case we have to be careful of: PHI nodes.  These little guys
-  // can live in blocks that are not dominated by the latch block, but (since
-  // their uses occur in the predecessor block, not the block the PHI lives in)
-  // should still use the post-inc value.  Check for this case now.
-  PHINode *PN = dyn_cast<PHINode>(User);
-  if (!PN) return false;  // not a phi, not dominated by latch block.
+  // An addrec is interesting if it's affine or if it has an interesting start.
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+    // Keep things simple. Don't touch loop-variant strides.
+    if (AR->getLoop() == L && (AR->isAffine() || !L->contains(I)))
+        return true;
+    // Otherwise recurse to see if the start value is interesting.
+    return isInteresting(AR->getStart(), I, L);
+  }

-  // Look at all of the uses of IV by the PHI node.  If any use corresponds to
-  // a block that is not dominated by the latch block, give up and use the
-  // preincremented value.
-  unsigned NumUses = 0;
-  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
-    if (PN->getIncomingValue(i) == IV) {
-      ++NumUses;
-      if (!DT->dominates(LatchBlock, PN->getIncomingBlock(i)))
-        return false;
-    }
+  // An add is interesting if any of its operands is.
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    for (SCEVAddExpr::op_iterator OI = Add->op_begin(), OE = Add->op_end();
+         OI != OE; ++OI)
+      if (isInteresting(*OI, I, L))
+        return true;
+    return false;
+  }

-  // Okay, all uses of IV by PN are in predecessor blocks that really are
-  // dominated by the latch block.  Use the post-incremented value.
-  return true;
+  // Nothing else is interesting here.
+  return false;
 }

 /// AddUsersIfInteresting - Inspect the specified instruction.  If it is a
@ -196,16 +110,9 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
  const SCEV *ISE = SE->getSCEV(I);
  if (isa<SCEVCouldNotCompute>(ISE)) return false;

-  // Get the start and stride for this expression.
-  Loop *UseLoop = LI->getLoopFor(I->getParent());
-  const SCEV *Start = SE->getIntegerSCEV(0, ISE->getType());
-  const SCEV *Stride = Start;
-
-  if (!getSCEVStartAndStride(ISE, L, UseLoop, Start, Stride, SE, DT))
-    return false;  // Non-reducible symbolic expression, bail out.
-
-  // Keep things simple. Don't touch loop-variant strides.
-  if (!Stride->isLoopInvariant(L) && L->contains(I))
+  // If we've come to an uninteresting expression, stop the traversal and
+  // call this a user.
+  if (!isInteresting(ISE, I, L))
    return false;

  SmallPtrSet<Instruction *, 4> UniqueUsers;
@ -241,27 +148,24 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
    }

    if (AddUserToIVUsers) {
-      // Okay, we found a user that we cannot reduce.  Analyze the instruction
-      // and decide what to do with it.  If we are a use inside of the loop, use
-      // the value before incrementation, otherwise use it after incrementation.
-      if (IVUseShouldUsePostIncValue(User, I, L, DT)) {
-        // The value used will be incremented by the stride more than we are
-        // expecting, so subtract this off.
-        const SCEV *NewStart = SE->getMinusSCEV(Start, Stride);
-        IVUses.push_back(new IVStrideUse(this, Stride, NewStart, User, I));
-        IVUses.back().setIsUseOfPostIncrementedValue(true);
-        DEBUG(dbgs() << "   USING POSTINC SCEV, START=" << *NewStart<< "\n");
-      } else {
-        IVUses.push_back(new IVStrideUse(this, Stride, Start, User, I));
-      }
+      // Okay, we found a user that we cannot reduce.
+      IVUses.push_back(new IVStrideUse(this, ISE, User, I));
+      IVStrideUse &NewUse = IVUses.back();
+      // Transform the expression into a normalized form.
+      NewUse.Expr =
+        TransformForPostIncUse(NormalizeAutodetect, NewUse.Expr,
+                               User, I,
+                               NewUse.PostIncLoops,
+                               *SE, *DT);
+      DEBUG(dbgs() << "   NORMALIZED TO: " << *NewUse.Expr << '\n');
    }
  }
  return true;
 }

-IVStrideUse &IVUsers::AddUser(const SCEV *Stride, const SCEV *Offset,
+IVStrideUse &IVUsers::AddUser(const SCEV *Expr,
                              Instruction *User, Value *Operand) {
-  IVUses.push_back(new IVStrideUse(this, Stride, Offset, User, Operand));
+  IVUses.push_back(new IVStrideUse(this, Expr, User, Operand));
  return IVUses.back();
 }

@ -295,30 +199,10 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
 /// getReplacementExpr - Return a SCEV expression which computes the
 /// value of the OperandValToReplace of the given IVStrideUse.
 const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &U) const {
-  // Start with zero.
-  const SCEV *RetVal = SE->getIntegerSCEV(0, U.getStride()->getType());
-  // Create the basic add recurrence.
-  RetVal = SE->getAddRecExpr(RetVal, U.getStride(), L);
-  // Add the offset in a separate step, because it may be loop-variant.
-  RetVal = SE->getAddExpr(RetVal, U.getOffset());
-  // For uses of post-incremented values, add an extra stride to compute
-  // the actual replacement value.
-  if (U.isUseOfPostIncrementedValue())
-    RetVal = SE->getAddExpr(RetVal, U.getStride());
-  return RetVal;
-}
-
-/// getCanonicalExpr - Return a SCEV expression which computes the
-/// value of the SCEV of the given IVStrideUse, ignoring the 
-/// isUseOfPostIncrementedValue flag.
-const SCEV *IVUsers::getCanonicalExpr(const IVStrideUse &U) const {
-  // Start with zero.
-  const SCEV *RetVal = SE->getIntegerSCEV(0, U.getStride()->getType());
-  // Create the basic add recurrence.
-  RetVal = SE->getAddRecExpr(RetVal, U.getStride(), L);
-  // Add the offset in a separate step, because it may be loop-variant.
-  RetVal = SE->getAddExpr(RetVal, U.getOffset());
-  return RetVal;
+  PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(U.PostIncLoops);
+  return TransformForPostIncUse(Denormalize, U.getExpr(),
+                                U.getUser(), U.getOperandValToReplace(),
+                                Loops, *SE, *DT);
 }

 void IVUsers::print(raw_ostream &OS, const Module *M) const {
@ -339,8 +223,13 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const {
    WriteAsOperand(OS, UI->getOperandValToReplace(), false);
    OS << " = "
       << *getReplacementExpr(*UI);
-    if (UI->isUseOfPostIncrementedValue())
-      OS << " (post-inc)";
+    for (PostIncLoopSet::const_iterator
+         I = UI->PostIncLoops.begin(),
+         E = UI->PostIncLoops.end(); I != E; ++I) {
+      OS << " (post-inc with loop ";
+      WriteAsOperand(OS, (*I)->getHeader(), false);
+      OS << ")";
+    }
    OS << " in  ";
    UI->getUser()->print(OS, &Annotator);
    OS << '\n';
@ -356,6 +245,39 @@ void IVUsers::releaseMemory() {
  IVUses.clear();
 }

+static const SCEVAddRecExpr *findAddRecForLoop(const SCEV *S, const Loop *L) {
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+    if (AR->getLoop() == L)
+      return AR;
+    return findAddRecForLoop(AR->getStart(), L);
+  }
+
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+         I != E; ++I)
+      if (const SCEVAddRecExpr *AR = findAddRecForLoop(*I, L))
+        return AR;
+    return 0;
+  }
+
+  return 0;
+}
+
+const SCEV *IVStrideUse::getStride(const Loop *L) const {
+  if (const SCEVAddRecExpr *AR = findAddRecForLoop(getExpr(), L))
+    return AR->getStepRecurrence(*Parent->SE);
+  return 0;
+}
+
+void IVStrideUse::transformToPostInc(const Loop *L) {
+  PostIncLoopSet Loops;
+  Loops.insert(L);
+  Expr = TransformForPostIncUse(Normalize, Expr,
+                                getUser(), getOperandValToReplace(),
+                                Loops, *Parent->SE, *Parent->DT);
+  PostIncLoops.insert(L);
+}
+
 void IVStrideUse::deleted() {
  // Remove this user from the list.
  Parent->IVUses.erase(this);
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@ -966,9 +966,12 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
  // Determine a normalized form of this expression, which is the expression
  // before any post-inc adjustment is made.
  const SCEVAddRecExpr *Normalized = S;
-  if (L == PostIncLoop) {
-    const SCEV *Step = S->getStepRecurrence(SE);
-    Normalized = cast<SCEVAddRecExpr>(SE.getMinusSCEV(S, Step));
+  if (PostIncLoops.count(L)) {
+    PostIncLoopSet Loops;
+    Loops.insert(L);
+    Normalized =
+      cast<SCEVAddRecExpr>(TransformForPostIncUse(Normalize, S, 0, 0,
+                                                  Loops, SE, *SE.DT));
  }

  // Strip off any non-loop-dominating component from the addrec start.
@ -1002,7 +1005,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {

  // Accommodate post-inc mode, if necessary.
  Value *Result;
-  if (L != PostIncLoop)
+  if (!PostIncLoops.count(L))
    Result = PN;
  else {
    // In PostInc mode, use the post-incremented value.
@ -1274,7 +1277,7 @@ Value *SCEVExpander::expand(const SCEV *S) {
      // If the SCEV is computable at this level, insert it into the header
      // after the PHIs (and after any other instructions that we've inserted
      // there) so that it is guaranteed to dominate any user inside the loop.
-      if (L && S->hasComputableLoopEvolution(L) && L != PostIncLoop)
+      if (L && S->hasComputableLoopEvolution(L) && !PostIncLoops.count(L))
        InsertPt = L->getHeader()->getFirstNonPHI();
      while (isInsertedInstruction(InsertPt) || isa<DbgInfoIntrinsic>(InsertPt))
        InsertPt = llvm::next(BasicBlock::iterator(InsertPt));
@ -1296,7 +1299,7 @@ Value *SCEVExpander::expand(const SCEV *S) {
  Value *V = visit(S);

  // Remember the expanded value for this SCEV at this location.
-  if (!PostIncLoop)
+  if (PostIncLoops.empty())
    InsertedExpressions[std::make_pair(S, InsertPt)] = V;

  restoreInsertPoint(SaveInsertBB, SaveInsertPt);
@ -1304,7 +1307,7 @@ Value *SCEVExpander::expand(const SCEV *S) {
 }

 void SCEVExpander::rememberInstruction(Value *I) {
-  if (!PostIncLoop)
+  if (PostIncLoops.empty())
    InsertedValues.insert(I);

  // If we just claimed an existing instruction and that instruction had
--- a/lib/Analysis/ScalarEvolutionNormalization.cpp
+++ b/lib/Analysis/ScalarEvolutionNormalization.cpp
@ -0,0 +1,150 @@
+//===- ScalarEvolutionNormalization.cpp - See below -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements utilities for working with "normalized" expressions.
+// See the comments at the top of ScalarEvolutionNormalization.h for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ScalarEvolutionNormalization.h"
+using namespace llvm;
+
+/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression
+/// and now we need to decide whether the user should use the preinc or post-inc
+/// value.  If this user should use the post-inc version of the IV, return true.
+///
+/// Choosing wrong here can break dominance properties (if we choose to use the
+/// post-inc value when we cannot) or it can end up adding extra live-ranges to
+/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we
+/// should use the post-inc value).
+static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV,
+                                       const Loop *L, DominatorTree *DT) {
+  // If the user is in the loop, use the preinc value.
+  if (L->contains(User)) return false;
+
+  BasicBlock *LatchBlock = L->getLoopLatch();
+  if (!LatchBlock)
+    return false;
+
+  // Ok, the user is outside of the loop.  If it is dominated by the latch
+  // block, use the post-inc value.
+  if (DT->dominates(LatchBlock, User->getParent()))
+    return true;
+
+  // There is one case we have to be careful of: PHI nodes.  These little guys
+  // can live in blocks that are not dominated by the latch block, but (since
+  // their uses occur in the predecessor block, not the block the PHI lives in)
+  // should still use the post-inc value.  Check for this case now.
+  PHINode *PN = dyn_cast<PHINode>(User);
+  if (!PN) return false;  // not a phi, not dominated by latch block.
+
+  // Look at all of the uses of IV by the PHI node.  If any use corresponds to
+  // a block that is not dominated by the latch block, give up and use the
+  // preincremented value.
+  unsigned NumUses = 0;
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+    if (PN->getIncomingValue(i) == IV) {
+      ++NumUses;
+      if (!DT->dominates(LatchBlock, PN->getIncomingBlock(i)))
+        return false;
+    }
+
+  // Okay, all uses of IV by PN are in predecessor blocks that really are
+  // dominated by the latch block.  Use the post-incremented value.
+  return true;
+}
+
+const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
+                                         const SCEV *S,
+                                         Instruction *User,
+                                         Value *OperandValToReplace,
+                                         PostIncLoopSet &Loops,
+                                         ScalarEvolution &SE,
+                                         DominatorTree &DT) {
+  if (isa<SCEVConstant>(S) || isa<SCEVUnknown>(S))
+    return S;
+  if (const SCEVCastExpr *X = dyn_cast<SCEVCastExpr>(S)) {
+    const SCEV *O = X->getOperand();
+    const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace,
+                                           Loops, SE, DT);
+    if (O != N)
+      switch (S->getSCEVType()) {
+      case scZeroExtend: return SE.getZeroExtendExpr(N, S->getType());
+      case scSignExtend: return SE.getSignExtendExpr(N, S->getType());
+      case scTruncate: return SE.getTruncateExpr(N, S->getType());
+      default: llvm_unreachable("Unexpected SCEVCastExpr kind!");
+      }
+    return S;
+  }
+  if (const SCEVNAryExpr *X = dyn_cast<SCEVNAryExpr>(S)) {
+    SmallVector<const SCEV *, 8> Operands;
+    bool Changed = false;
+    for (SCEVNAryExpr::op_iterator I = X->op_begin(), E = X->op_end();
+         I != E; ++I) {
+      const SCEV *O = *I;
+      const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace,
+                                             Loops, SE, DT);
+      Changed |= N != O;
+      Operands.push_back(N);
+    }
+    if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+      // An addrec. This is the interesting part.
+      const Loop *L = AR->getLoop();
+      const SCEV *Result = SE.getAddRecExpr(Operands, L);
+      switch (Kind) {
+      default: llvm_unreachable("Unexpected transform name!");
+      case NormalizeAutodetect:
+        if (Instruction *OI = dyn_cast<Instruction>(OperandValToReplace))
+          if (IVUseShouldUsePostIncValue(User, OI, L, &DT)) {
+            Result = SE.getMinusSCEV(Result, AR->getStepRecurrence(SE));
+            Loops.insert(L);
+          }
+        break;
+      case Normalize:
+        if (Loops.count(L))
+          Result = SE.getMinusSCEV(Result, AR->getStepRecurrence(SE));
+        break;
+      case Denormalize:
+        if (Loops.count(L)) {
+          const SCEV *TransformedStep =
+            TransformForPostIncUse(Kind, AR->getStepRecurrence(SE),
+                                   User, OperandValToReplace, Loops, SE, DT);
+          Result = SE.getAddExpr(Result, TransformedStep);
+        }
+        break;
+      }
+      return Result;
+    }
+    if (Changed)
+      switch (S->getSCEVType()) {
+      case scAddExpr: return SE.getAddExpr(Operands);
+      case scMulExpr: return SE.getMulExpr(Operands);
+      case scSMaxExpr: return SE.getSMaxExpr(Operands);
+      case scUMaxExpr: return SE.getUMaxExpr(Operands);
+      default: llvm_unreachable("Unexpected SCEVNAryExpr kind!");
+      }
+    return S;
+  }
+  if (const SCEVUDivExpr *X = dyn_cast<SCEVUDivExpr>(S)) {
+    const SCEV *LO = X->getLHS();
+    const SCEV *RO = X->getRHS();
+    const SCEV *LN = TransformForPostIncUse(Kind, LO, User, OperandValToReplace,
+                                            Loops, SE, DT);
+    const SCEV *RN = TransformForPostIncUse(Kind, RO, User, OperandValToReplace,
+                                            Loops, SE, DT);
+    if (LO != LN || RO != RN)
+      return SE.getUDivExpr(LN, RN);
+    return S;
+  }
+  llvm_unreachable("Unexpected SCEV kind!");
+  return 0;
+}
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@ -454,6 +454,46 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
  return Changed;
 }

+// FIXME: It is an extremely bad idea to indvar substitute anything more
+// complex than affine induction variables.  Doing so will put expensive
+// polynomial evaluations inside of the loop, and the str reduction pass
+// currently can only reduce affine polynomials.  For now just disable
+// indvar subst on anything more complex than an affine addrec, unless
+// it can be expanded to a trivial value.
+static bool isSafe(const SCEV *S, const Loop *L) {
+  // Loop-invariant values are safe.
+  if (S->isLoopInvariant(L)) return true;
+
+  // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how
+  // to transform them into efficient code.
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+    return AR->isAffine();
+
+  // An add is safe it all its operands are safe.
+  if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) {
+    for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(),
+         E = Commutative->op_end(); I != E; ++I)
+      if (!isSafe(*I, L)) return false;
+    return true;
+  }
+  
+  // A cast is safe if its operand is.
+  if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
+    return isSafe(C->getOperand(), L);
+
+  // A udiv is safe if its operands are.
+  if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S))
+    return isSafe(UD->getLHS(), L) &&
+           isSafe(UD->getRHS(), L);
+
+  // SCEVUnknown is always safe.
+  if (isa<SCEVUnknown>(S))
+    return true;
+
+  // Nothing else is safe.
+  return false;
+}
+
 void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
  SmallVector<WeakVH, 16> DeadInsts;

@ -465,7 +505,6 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
  // the need for the code evaluation methods to insert induction variables
  // of different sizes.
  for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) {
-    const SCEV *Stride = UI->getStride();
    Value *Op = UI->getOperandValToReplace();
    const Type *UseTy = Op->getType();
    Instruction *User = UI->getUser();
@ -486,7 +525,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
    // currently can only reduce affine polynomials.  For now just disable
    // indvar subst on anything more complex than an affine addrec, unless
    // it can be expanded to a trivial value.
-    if (!AR->isLoopInvariant(L) && !Stride->isLoopInvariant(L))
+    if (!isSafe(AR, L))
      continue;

    // Determine the insertion point for this user. By default, insert
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@ -781,10 +781,10 @@ struct LSRFixup {
  /// will be replaced.
  Value *OperandValToReplace;

-  /// PostIncLoop - If this user is to use the post-incremented value of an
+  /// PostIncLoops - If this user is to use the post-incremented value of an
  /// induction variable, this variable is non-null and holds the loop
  /// associated with the induction variable.
-  const Loop *PostIncLoop;
+  PostIncLoopSet PostIncLoops;

  /// LUIdx - The index of the LSRUse describing the expression which
  /// this fixup needs, minus an offset (below).
@ -795,6 +795,8 @@ struct LSRFixup {
  /// offsets, for example in an unrolled loop.
  int64_t Offset;

+  bool isUseFullyOutsideLoop(const Loop *L) const;
+
  LSRFixup();

  void print(raw_ostream &OS) const;
@ -804,9 +806,24 @@ struct LSRFixup {
 }

 LSRFixup::LSRFixup()
-  : UserInst(0), OperandValToReplace(0), PostIncLoop(0),
+  : UserInst(0), OperandValToReplace(0),
    LUIdx(~size_t(0)), Offset(0) {}

+/// isUseFullyOutsideLoop - Test whether this fixup always uses its
+/// value outside of the given loop.
+bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {
+  // PHI nodes use their value in their incoming blocks.
+  if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (PN->getIncomingValue(i) == OperandValToReplace &&
+          L->contains(PN->getIncomingBlock(i)))
+        return false;
+    return true;
+  }
+
+  return !L->contains(UserInst);
+}
+
 void LSRFixup::print(raw_ostream &OS) const {
  OS << "UserInst=";
  // Store is common and interesting enough to be worth special-casing.
@ -821,9 +838,10 @@ void LSRFixup::print(raw_ostream &OS) const {
  OS << ", OperandValToReplace=";
  WriteAsOperand(OS, OperandValToReplace, /*PrintType=*/false);

-  if (PostIncLoop) {
+  for (PostIncLoopSet::const_iterator I = PostIncLoops.begin(),
+       E = PostIncLoops.end(); I != E; ++I) {
    OS << ", PostIncLoop=";
-    WriteAsOperand(OS, PostIncLoop->getHeader(), /*PrintType=*/false);
+    WriteAsOperand(OS, (*I)->getHeader(), /*PrintType=*/false);
  }

  if (LUIdx != ~size_t(0))
@ -1545,8 +1563,9 @@ LSRInstance::OptimizeLoopTermCond() {
            !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
          // Conservatively assume there may be reuse if the quotient of their
          // strides could be a legal scale.
-          const SCEV *A = CondUse->getStride();
-          const SCEV *B = UI->getStride();
+          const SCEV *A = CondUse->getStride(L);
+          const SCEV *B = UI->getStride(L);
+          if (!A || !B) continue;
          if (SE.getTypeSizeInBits(A->getType()) !=
              SE.getTypeSizeInBits(B->getType())) {
            if (SE.getTypeSizeInBits(A->getType()) >
@ -1598,7 +1617,7 @@ LSRInstance::OptimizeLoopTermCond() {
        ExitingBlock->getInstList().insert(TermBr, Cond);

        // Clone the IVUse, as the old use still exists!
-        CondUse = &IU.AddUser(CondUse->getStride(), CondUse->getOffset(),
+        CondUse = &IU.AddUser(CondUse->getExpr(),
                              Cond, CondUse->getOperandValToReplace());
        TermBr->replaceUsesOfWith(OldCond, Cond);
      }
@ -1607,9 +1626,7 @@ LSRInstance::OptimizeLoopTermCond() {
    // If we get to here, we know that we can transform the setcc instruction to
    // use the post-incremented version of the IV, allowing us to coalesce the
    // live ranges for the IV correctly.
-    CondUse->setOffset(SE.getMinusSCEV(CondUse->getOffset(),
-                                       CondUse->getStride()));
-    CondUse->setIsUseOfPostIncrementedValue(true);
+    CondUse->transformToPostInc(L);
    Changed = true;

    PostIncs.insert(Cond);
@ -1717,19 +1734,24 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
  SmallSetVector<const SCEV *, 4> Strides;

  // Collect interesting types and strides.
+  SmallVector<const SCEV *, 4> Worklist;
  for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
-    const SCEV *Stride = UI->getStride();
+    const SCEV *Expr = UI->getExpr();

    // Collect interesting types.
-    Types.insert(SE.getEffectiveSCEVType(Stride->getType()));
+    Types.insert(SE.getEffectiveSCEVType(Expr->getType()));

-    // Add the stride for this loop.
-    Strides.insert(Stride);
-
-    // Add strides for other mentioned loops.
-    for (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(UI->getOffset());
-         AR; AR = dyn_cast<SCEVAddRecExpr>(AR->getStart()))
-      Strides.insert(AR->getStepRecurrence(SE));
+    // Add strides for mentioned loops.
+    Worklist.push_back(Expr);
+    do {
+      const SCEV *S = Worklist.pop_back_val();
+      if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+        Strides.insert(AR->getStepRecurrence(SE));
+        Worklist.push_back(AR->getStart());
+      } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+        Worklist.insert(Worklist.end(), Add->op_begin(), Add->op_end());
+      }
+    } while (!Worklist.empty());
  }

  // Compute interesting factors from the set of interesting strides.
@ -1776,8 +1798,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
    LSRFixup &LF = getNewFixup();
    LF.UserInst = UI->getUser();
    LF.OperandValToReplace = UI->getOperandValToReplace();
-    if (UI->isUseOfPostIncrementedValue())
-      LF.PostIncLoop = L;
+    LF.PostIncLoops = UI->getPostIncLoops();

    LSRUse::KindType Kind = LSRUse::Basic;
    const Type *AccessTy = 0;
@ -1786,7 +1807,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
      AccessTy = getAccessType(LF.UserInst);
    }

-    const SCEV *S = IU.getCanonicalExpr(*UI);
+    const SCEV *S = UI->getExpr();

    // Equality (== and !=) ICmps are special. We can rewrite (i == N) as
    // (N - i == 0), and this allows (N - i) to be the expression that we work
@ -1824,7 +1845,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
    LF.LUIdx = P.first;
    LF.Offset = P.second;
    LSRUse &LU = Uses[LF.LUIdx];
-    LU.AllFixupsOutsideLoop &= !L->contains(LF.UserInst);
+    LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);

    // If this is the first use of this LSRUse, give it a formula.
    if (LU.Formulae.empty()) {
@ -1936,7 +1957,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
        LF.LUIdx = P.first;
        LF.Offset = P.second;
        LSRUse &LU = Uses[LF.LUIdx];
-        LU.AllFixupsOutsideLoop &= L->contains(LF.UserInst);
+        LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
        InsertSupplementalFormula(U, LU, LF.LUIdx);
        CountRegisters(LU.Formulae.back(), Uses.size() - 1);
        break;
@ -2783,8 +2804,8 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
                           SmallVectorImpl<WeakVH> &DeadInsts) const {
  const LSRUse &LU = Uses[LF.LUIdx];

-  // Then, collect some instructions which we will remain dominated by when
-  // expanding the replacement. These must be dominated by any operands that
+  // Then, collect some instructions which must be dominated by the
+  // expanding replacement. These must be dominated by any operands that
  // will be required in the expansion.
  SmallVector<Instruction *, 4> Inputs;
  if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace))
@ -2793,8 +2814,8 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
    if (Instruction *I =
          dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
      Inputs.push_back(I);
-  if (LF.PostIncLoop) {
-    if (!L->contains(LF.UserInst))
+  if (LF.PostIncLoops.count(L)) {
+    if (LF.isUseFullyOutsideLoop(L))
      Inputs.push_back(L->getLoopLatch()->getTerminator());
    else
      Inputs.push_back(IVIncInsertPos);
@ -2831,7 +2852,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,

  // Inform the Rewriter if we have a post-increment use, so that it can
  // perform an advantageous expansion.
-  Rewriter.setPostInc(LF.PostIncLoop);
+  Rewriter.setPostInc(LF.PostIncLoops);

  // This is the type that the user actually needs.
  const Type *OpTy = LF.OperandValToReplace->getType();
@ -2855,24 +2876,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
    const SCEV *Reg = *I;
    assert(!Reg->isZero() && "Zero allocated in a base register!");

-    // If we're expanding for a post-inc user for the add-rec's loop, make the
-    // post-inc adjustment.
-    const SCEV *Start = Reg;
-    while (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Start)) {
-      if (AR->getLoop() == LF.PostIncLoop) {
-        Reg = SE.getAddExpr(Reg, AR->getStepRecurrence(SE));
-        // If the user is inside the loop, insert the code after the increment
-        // so that it is dominated by its operand. If the original insert point
-        // was already dominated by the increment, keep it, because there may
-        // be loop-variant operands that need to be respected also.
-        if (L->contains(LF.UserInst) && !DT.dominates(IVIncInsertPos, IP)) {
-          IP = IVIncInsertPos;
-          while (isa<DbgInfoIntrinsic>(IP)) ++IP;
-        }
-        break;
-      }
-      Start = AR->getStart();
-    }
+    // If we're expanding for a post-inc user, make the post-inc adjustment.
+    PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
+    Reg = TransformForPostIncUse(Denormalize, Reg,
+                                 LF.UserInst, LF.OperandValToReplace,
+                                 Loops, SE, DT);

    Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP)));
  }
@ -2889,11 +2897,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
  if (F.AM.Scale != 0) {
    const SCEV *ScaledS = F.ScaledReg;

-    // If we're expanding for a post-inc user for the add-rec's loop, make the
-    // post-inc adjustment.
-    if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ScaledS))
-      if (AR->getLoop() == LF.PostIncLoop)
-        ScaledS = SE.getAddExpr(ScaledS, AR->getStepRecurrence(SE));
+    // If we're expanding for a post-inc user, make the post-inc adjustment.
+    PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
+    ScaledS = TransformForPostIncUse(Denormalize, ScaledS,
+                                     LF.UserInst, LF.OperandValToReplace,
+                                     Loops, SE, DT);

    if (LU.Kind == LSRUse::ICmpZero) {
      // An interesting way of "folding" with an icmp is to use a negated
@ -2954,7 +2962,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
  Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP);

  // We're done expanding now, so reset the rewriter.
-  Rewriter.setPostInc(0);
+  Rewriter.clearPostInc();

  // An ICmpZero Formula represents an ICmp which we're handling as a
  // comparison against zero. Now that we've expanded an expression for that
--- a/test/CodeGen/X86/multiple-loop-post-inc.ll
+++ b/test/CodeGen/X86/multiple-loop-post-inc.ll
@ -0,0 +1,277 @@
+; RUN: llc -asm-verbose=false -disable-branch-fold -disable-code-place -disable-tail-duplicate -march=x86-64 < %s | FileCheck %s
+; rdar://7236213
+
+; CodeGen shouldn't require any lea instructions inside the marked loop.
+; It should properly set up post-increment uses and do coalescing for
+; the induction variables.
+
+; CHECK: # Start
+; CHECK-NOT: lea
+; CHECK: # Stop
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+define void @foo(float* %I, i64 %IS, float* nocapture %Start, float* nocapture %Step, float* %O, i64 %OS, i64 %N) nounwind {
+entry:
+  %times4 = alloca float, align 4                 ; <float*> [#uses=3]
+  %timesN = alloca float, align 4                 ; <float*> [#uses=2]
+  %0 = load float* %Step, align 4                 ; <float> [#uses=8]
+  %1 = ptrtoint float* %I to i64                  ; <i64> [#uses=1]
+  %2 = ptrtoint float* %O to i64                  ; <i64> [#uses=1]
+  %tmp = xor i64 %2, %1                           ; <i64> [#uses=1]
+  %tmp16 = and i64 %tmp, 15                       ; <i64> [#uses=1]
+  %3 = icmp eq i64 %tmp16, 0                      ; <i1> [#uses=1]
+  %4 = trunc i64 %IS to i32                       ; <i32> [#uses=1]
+  %5 = xor i32 %4, 1                              ; <i32> [#uses=1]
+  %6 = trunc i64 %OS to i32                       ; <i32> [#uses=1]
+  %7 = xor i32 %6, 1                              ; <i32> [#uses=1]
+  %8 = or i32 %7, %5                              ; <i32> [#uses=1]
+  %9 = icmp eq i32 %8, 0                          ; <i1> [#uses=1]
+  br i1 %9, label %bb, label %return
+
+bb:                                               ; preds = %entry
+  %10 = load float* %Start, align 4               ; <float> [#uses=1]
+  br label %bb2
+
+bb1:                                              ; preds = %bb3
+  %11 = load float* %I_addr.0, align 4            ; <float> [#uses=1]
+  %12 = fmul float %11, %x.0                      ; <float> [#uses=1]
+  store float %12, float* %O_addr.0, align 4
+  %13 = fadd float %x.0, %0                       ; <float> [#uses=1]
+  %indvar.next53 = add i64 %14, 1                 ; <i64> [#uses=1]
+  br label %bb2
+
+bb2:                                              ; preds = %bb1, %bb
+  %14 = phi i64 [ %indvar.next53, %bb1 ], [ 0, %bb ] ; <i64> [#uses=21]
+  %x.0 = phi float [ %13, %bb1 ], [ %10, %bb ]    ; <float> [#uses=6]
+  %N_addr.0 = sub i64 %N, %14                     ; <i64> [#uses=4]
+  %O_addr.0 = getelementptr float* %O, i64 %14    ; <float*> [#uses=4]
+  %I_addr.0 = getelementptr float* %I, i64 %14    ; <float*> [#uses=3]
+  %15 = icmp slt i64 %N_addr.0, 1                 ; <i1> [#uses=1]
+  br i1 %15, label %bb4, label %bb3
+
+bb3:                                              ; preds = %bb2
+  %16 = ptrtoint float* %O_addr.0 to i64          ; <i64> [#uses=1]
+  %17 = and i64 %16, 15                           ; <i64> [#uses=1]
+  %18 = icmp eq i64 %17, 0                        ; <i1> [#uses=1]
+  br i1 %18, label %bb4, label %bb1
+
+bb4:                                              ; preds = %bb3, %bb2
+  %19 = fmul float %0, 4.000000e+00               ; <float> [#uses=1]
+  store float %19, float* %times4, align 4
+  %20 = fmul float %0, 1.600000e+01               ; <float> [#uses=1]
+  store float %20, float* %timesN, align 4
+  %21 = fmul float %0, 0.000000e+00               ; <float> [#uses=1]
+  %22 = fadd float %21, %x.0                      ; <float> [#uses=1]
+  %23 = fadd float %x.0, %0                       ; <float> [#uses=1]
+  %24 = fmul float %0, 2.000000e+00               ; <float> [#uses=1]
+  %25 = fadd float %24, %x.0                      ; <float> [#uses=1]
+  %26 = fmul float %0, 3.000000e+00               ; <float> [#uses=1]
+  %27 = fadd float %26, %x.0                      ; <float> [#uses=1]
+  %28 = insertelement <4 x float> undef, float %22, i32 0 ; <<4 x float>> [#uses=1]
+  %29 = insertelement <4 x float> %28, float %23, i32 1 ; <<4 x float>> [#uses=1]
+  %30 = insertelement <4 x float> %29, float %25, i32 2 ; <<4 x float>> [#uses=1]
+  %31 = insertelement <4 x float> %30, float %27, i32 3 ; <<4 x float>> [#uses=5]
+  %asmtmp.i = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %times4) nounwind ; <<4 x float>> [#uses=3]
+  %32 = fadd <4 x float> %31, %asmtmp.i           ; <<4 x float>> [#uses=3]
+  %33 = fadd <4 x float> %32, %asmtmp.i           ; <<4 x float>> [#uses=3]
+  %34 = fadd <4 x float> %33, %asmtmp.i           ; <<4 x float>> [#uses=2]
+  %asmtmp.i18 = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %timesN) nounwind ; <<4 x float>> [#uses=8]
+  %35 = icmp sgt i64 %N_addr.0, 15                ; <i1> [#uses=2]
+  br i1 %3, label %bb6.preheader, label %bb8
+
+bb6.preheader:                                    ; preds = %bb4
+  br i1 %35, label %bb.nph43, label %bb7
+
+bb.nph43:                                         ; preds = %bb6.preheader
+  %tmp108 = add i64 %14, 16                       ; <i64> [#uses=1]
+  %tmp111 = add i64 %14, 4                        ; <i64> [#uses=1]
+  %tmp115 = add i64 %14, 8                        ; <i64> [#uses=1]
+  %tmp119 = add i64 %14, 12                       ; <i64> [#uses=1]
+  %tmp134 = add i64 %N, -16                       ; <i64> [#uses=1]
+  %tmp135 = sub i64 %tmp134, %14                  ; <i64> [#uses=1]
+  call void asm sideeffect "# Start.", "~{dirflag},~{fpsr},~{flags}"() nounwind
+  br label %bb5
+
+bb5:                                              ; preds = %bb.nph43, %bb5
+  %indvar102 = phi i64 [ 0, %bb.nph43 ], [ %indvar.next103, %bb5 ] ; <i64> [#uses=3]
+  %vX3.041 = phi <4 x float> [ %34, %bb.nph43 ], [ %45, %bb5 ] ; <<4 x float>> [#uses=2]
+  %vX0.039 = phi <4 x float> [ %31, %bb.nph43 ], [ %41, %bb5 ] ; <<4 x float>> [#uses=2]
+  %vX2.037 = phi <4 x float> [ %33, %bb.nph43 ], [ %46, %bb5 ] ; <<4 x float>> [#uses=2]
+  %vX1.036 = phi <4 x float> [ %32, %bb.nph43 ], [ %47, %bb5 ] ; <<4 x float>> [#uses=2]
+  %tmp104 = shl i64 %indvar102, 4                 ; <i64> [#uses=5]
+  %tmp105 = add i64 %14, %tmp104                  ; <i64> [#uses=2]
+  %scevgep106 = getelementptr float* %I, i64 %tmp105 ; <float*> [#uses=1]
+  %scevgep106107 = bitcast float* %scevgep106 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp109 = add i64 %tmp108, %tmp104              ; <i64> [#uses=2]
+  %tmp112 = add i64 %tmp111, %tmp104              ; <i64> [#uses=2]
+  %scevgep113 = getelementptr float* %I, i64 %tmp112 ; <float*> [#uses=1]
+  %scevgep113114 = bitcast float* %scevgep113 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp116 = add i64 %tmp115, %tmp104              ; <i64> [#uses=2]
+  %scevgep117 = getelementptr float* %I, i64 %tmp116 ; <float*> [#uses=1]
+  %scevgep117118 = bitcast float* %scevgep117 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp120 = add i64 %tmp119, %tmp104              ; <i64> [#uses=2]
+  %scevgep121 = getelementptr float* %I, i64 %tmp120 ; <float*> [#uses=1]
+  %scevgep121122 = bitcast float* %scevgep121 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %scevgep123 = getelementptr float* %O, i64 %tmp105 ; <float*> [#uses=1]
+  %scevgep123124 = bitcast float* %scevgep123 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %scevgep126 = getelementptr float* %O, i64 %tmp112 ; <float*> [#uses=1]
+  %scevgep126127 = bitcast float* %scevgep126 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %scevgep128 = getelementptr float* %O, i64 %tmp116 ; <float*> [#uses=1]
+  %scevgep128129 = bitcast float* %scevgep128 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %scevgep130 = getelementptr float* %O, i64 %tmp120 ; <float*> [#uses=1]
+  %scevgep130131 = bitcast float* %scevgep130 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp132 = mul i64 %indvar102, -16               ; <i64> [#uses=1]
+  %tmp136 = add i64 %tmp135, %tmp132              ; <i64> [#uses=2]
+  %36 = load <4 x float>* %scevgep106107, align 16 ; <<4 x float>> [#uses=1]
+  %37 = load <4 x float>* %scevgep113114, align 16 ; <<4 x float>> [#uses=1]
+  %38 = load <4 x float>* %scevgep117118, align 16 ; <<4 x float>> [#uses=1]
+  %39 = load <4 x float>* %scevgep121122, align 16 ; <<4 x float>> [#uses=1]
+  %40 = fmul <4 x float> %36, %vX0.039            ; <<4 x float>> [#uses=1]
+  %41 = fadd <4 x float> %vX0.039, %asmtmp.i18    ; <<4 x float>> [#uses=2]
+  %42 = fmul <4 x float> %37, %vX1.036            ; <<4 x float>> [#uses=1]
+  %43 = fmul <4 x float> %38, %vX2.037            ; <<4 x float>> [#uses=1]
+  %44 = fmul <4 x float> %39, %vX3.041            ; <<4 x float>> [#uses=1]
+  store <4 x float> %40, <4 x float>* %scevgep123124, align 16
+  store <4 x float> %42, <4 x float>* %scevgep126127, align 16
+  store <4 x float> %43, <4 x float>* %scevgep128129, align 16
+  store <4 x float> %44, <4 x float>* %scevgep130131, align 16
+  %45 = fadd <4 x float> %vX3.041, %asmtmp.i18    ; <<4 x float>> [#uses=1]
+  %46 = fadd <4 x float> %vX2.037, %asmtmp.i18    ; <<4 x float>> [#uses=1]
+  %47 = fadd <4 x float> %vX1.036, %asmtmp.i18    ; <<4 x float>> [#uses=1]
+  %48 = icmp sgt i64 %tmp136, 15                  ; <i1> [#uses=1]
+  %indvar.next103 = add i64 %indvar102, 1         ; <i64> [#uses=1]
+  br i1 %48, label %bb5, label %bb6.bb7_crit_edge
+
+bb6.bb7_crit_edge:                                ; preds = %bb5
+  call void asm sideeffect "# Stop.", "~{dirflag},~{fpsr},~{flags}"() nounwind
+  %scevgep110 = getelementptr float* %I, i64 %tmp109 ; <float*> [#uses=1]
+  %scevgep125 = getelementptr float* %O, i64 %tmp109 ; <float*> [#uses=1]
+  br label %bb7
+
+bb7:                                              ; preds = %bb6.bb7_crit_edge, %bb6.preheader
+  %I_addr.1.lcssa = phi float* [ %scevgep110, %bb6.bb7_crit_edge ], [ %I_addr.0, %bb6.preheader ] ; <float*> [#uses=1]
+  %O_addr.1.lcssa = phi float* [ %scevgep125, %bb6.bb7_crit_edge ], [ %O_addr.0, %bb6.preheader ] ; <float*> [#uses=1]
+  %vX0.0.lcssa = phi <4 x float> [ %41, %bb6.bb7_crit_edge ], [ %31, %bb6.preheader ] ; <<4 x float>> [#uses=1]
+  %N_addr.1.lcssa = phi i64 [ %tmp136, %bb6.bb7_crit_edge ], [ %N_addr.0, %bb6.preheader ] ; <i64> [#uses=1]
+  %asmtmp.i17 = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %times4) nounwind ; <<4 x float>> [#uses=0]
+  br label %bb11
+
+bb8:                                              ; preds = %bb4
+  br i1 %35, label %bb.nph, label %bb11
+
+bb.nph:                                           ; preds = %bb8
+  %I_addr.0.sum = add i64 %14, -1                 ; <i64> [#uses=1]
+  %49 = getelementptr inbounds float* %I, i64 %I_addr.0.sum ; <float*> [#uses=1]
+  %50 = bitcast float* %49 to <4 x float>*        ; <<4 x float>*> [#uses=1]
+  %51 = load <4 x float>* %50, align 16           ; <<4 x float>> [#uses=1]
+  %tmp54 = add i64 %14, 16                        ; <i64> [#uses=1]
+  %tmp56 = add i64 %14, 3                         ; <i64> [#uses=1]
+  %tmp60 = add i64 %14, 7                         ; <i64> [#uses=1]
+  %tmp64 = add i64 %14, 11                        ; <i64> [#uses=1]
+  %tmp68 = add i64 %14, 15                        ; <i64> [#uses=1]
+  %tmp76 = add i64 %14, 4                         ; <i64> [#uses=1]
+  %tmp80 = add i64 %14, 8                         ; <i64> [#uses=1]
+  %tmp84 = add i64 %14, 12                        ; <i64> [#uses=1]
+  %tmp90 = add i64 %N, -16                        ; <i64> [#uses=1]
+  %tmp91 = sub i64 %tmp90, %14                    ; <i64> [#uses=1]
+  br label %bb9
+
+bb9:                                              ; preds = %bb.nph, %bb9
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb9 ] ; <i64> [#uses=3]
+  %vX3.125 = phi <4 x float> [ %34, %bb.nph ], [ %69, %bb9 ] ; <<4 x float>> [#uses=2]
+  %vX0.223 = phi <4 x float> [ %31, %bb.nph ], [ %65, %bb9 ] ; <<4 x float>> [#uses=2]
+  %vX2.121 = phi <4 x float> [ %33, %bb.nph ], [ %70, %bb9 ] ; <<4 x float>> [#uses=2]
+  %vX1.120 = phi <4 x float> [ %32, %bb.nph ], [ %71, %bb9 ] ; <<4 x float>> [#uses=2]
+  %vI0.019 = phi <4 x float> [ %51, %bb.nph ], [ %55, %bb9 ] ; <<4 x float>> [#uses=1]
+  %tmp51 = shl i64 %indvar, 4                     ; <i64> [#uses=9]
+  %tmp55 = add i64 %tmp54, %tmp51                 ; <i64> [#uses=2]
+  %tmp57 = add i64 %tmp56, %tmp51                 ; <i64> [#uses=1]
+  %scevgep58 = getelementptr float* %I, i64 %tmp57 ; <float*> [#uses=1]
+  %scevgep5859 = bitcast float* %scevgep58 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp61 = add i64 %tmp60, %tmp51                 ; <i64> [#uses=1]
+  %scevgep62 = getelementptr float* %I, i64 %tmp61 ; <float*> [#uses=1]
+  %scevgep6263 = bitcast float* %scevgep62 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp65 = add i64 %tmp64, %tmp51                 ; <i64> [#uses=1]
+  %scevgep66 = getelementptr float* %I, i64 %tmp65 ; <float*> [#uses=1]
+  %scevgep6667 = bitcast float* %scevgep66 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp69 = add i64 %tmp68, %tmp51                 ; <i64> [#uses=1]
+  %scevgep70 = getelementptr float* %I, i64 %tmp69 ; <float*> [#uses=1]
+  %scevgep7071 = bitcast float* %scevgep70 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp72 = add i64 %14, %tmp51                    ; <i64> [#uses=1]
+  %scevgep73 = getelementptr float* %O, i64 %tmp72 ; <float*> [#uses=1]
+  %scevgep7374 = bitcast float* %scevgep73 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp77 = add i64 %tmp76, %tmp51                 ; <i64> [#uses=1]
+  %scevgep78 = getelementptr float* %O, i64 %tmp77 ; <float*> [#uses=1]
+  %scevgep7879 = bitcast float* %scevgep78 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp81 = add i64 %tmp80, %tmp51                 ; <i64> [#uses=1]
+  %scevgep82 = getelementptr float* %O, i64 %tmp81 ; <float*> [#uses=1]
+  %scevgep8283 = bitcast float* %scevgep82 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp85 = add i64 %tmp84, %tmp51                 ; <i64> [#uses=1]
+  %scevgep86 = getelementptr float* %O, i64 %tmp85 ; <float*> [#uses=1]
+  %scevgep8687 = bitcast float* %scevgep86 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp88 = mul i64 %indvar, -16                   ; <i64> [#uses=1]
+  %tmp92 = add i64 %tmp91, %tmp88                 ; <i64> [#uses=2]
+  %52 = load <4 x float>* %scevgep5859, align 16  ; <<4 x float>> [#uses=2]
+  %53 = load <4 x float>* %scevgep6263, align 16  ; <<4 x float>> [#uses=2]
+  %54 = load <4 x float>* %scevgep6667, align 16  ; <<4 x float>> [#uses=2]
+  %55 = load <4 x float>* %scevgep7071, align 16  ; <<4 x float>> [#uses=2]
+  %56 = shufflevector <4 x float> %vI0.019, <4 x float> %52, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  %57 = shufflevector <4 x float> %56, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
+  %58 = shufflevector <4 x float> %52, <4 x float> %53, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  %59 = shufflevector <4 x float> %58, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
+  %60 = shufflevector <4 x float> %53, <4 x float> %54, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  %61 = shufflevector <4 x float> %60, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
+  %62 = shufflevector <4 x float> %54, <4 x float> %55, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  %63 = shufflevector <4 x float> %62, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
+  %64 = fmul <4 x float> %57, %vX0.223            ; <<4 x float>> [#uses=1]
+  %65 = fadd <4 x float> %vX0.223, %asmtmp.i18    ; <<4 x float>> [#uses=2]
+  %66 = fmul <4 x float> %59, %vX1.120            ; <<4 x float>> [#uses=1]
+  %67 = fmul <4 x float> %61, %vX2.121            ; <<4 x float>> [#uses=1]
+  %68 = fmul <4 x float> %63, %vX3.125            ; <<4 x float>> [#uses=1]
+  store <4 x float> %64, <4 x float>* %scevgep7374, align 16
+  store <4 x float> %66, <4 x float>* %scevgep7879, align 16
+  store <4 x float> %67, <4 x float>* %scevgep8283, align 16
+  store <4 x float> %68, <4 x float>* %scevgep8687, align 16
+  %69 = fadd <4 x float> %vX3.125, %asmtmp.i18    ; <<4 x float>> [#uses=1]
+  %70 = fadd <4 x float> %vX2.121, %asmtmp.i18    ; <<4 x float>> [#uses=1]
+  %71 = fadd <4 x float> %vX1.120, %asmtmp.i18    ; <<4 x float>> [#uses=1]
+  %72 = icmp sgt i64 %tmp92, 15                   ; <i1> [#uses=1]
+  %indvar.next = add i64 %indvar, 1               ; <i64> [#uses=1]
+  br i1 %72, label %bb9, label %bb10.bb11.loopexit_crit_edge
+
+bb10.bb11.loopexit_crit_edge:                     ; preds = %bb9
+  %scevgep = getelementptr float* %I, i64 %tmp55  ; <float*> [#uses=1]
+  %scevgep75 = getelementptr float* %O, i64 %tmp55 ; <float*> [#uses=1]
+  br label %bb11
+
+bb11:                                             ; preds = %bb8, %bb10.bb11.loopexit_crit_edge, %bb7
+  %N_addr.2 = phi i64 [ %N_addr.1.lcssa, %bb7 ], [ %tmp92, %bb10.bb11.loopexit_crit_edge ], [ %N_addr.0, %bb8 ] ; <i64> [#uses=2]
+  %vX0.1 = phi <4 x float> [ %vX0.0.lcssa, %bb7 ], [ %65, %bb10.bb11.loopexit_crit_edge ], [ %31, %bb8 ] ; <<4 x float>> [#uses=1]
+  %O_addr.2 = phi float* [ %O_addr.1.lcssa, %bb7 ], [ %scevgep75, %bb10.bb11.loopexit_crit_edge ], [ %O_addr.0, %bb8 ] ; <float*> [#uses=1]
+  %I_addr.2 = phi float* [ %I_addr.1.lcssa, %bb7 ], [ %scevgep, %bb10.bb11.loopexit_crit_edge ], [ %I_addr.0, %bb8 ] ; <float*> [#uses=1]
+  %73 = extractelement <4 x float> %vX0.1, i32 0  ; <float> [#uses=2]
+  %74 = icmp sgt i64 %N_addr.2, 0                 ; <i1> [#uses=1]
+  br i1 %74, label %bb12, label %bb14
+
+bb12:                                             ; preds = %bb11, %bb12
+  %indvar94 = phi i64 [ %indvar.next95, %bb12 ], [ 0, %bb11 ] ; <i64> [#uses=3]
+  %x.130 = phi float [ %77, %bb12 ], [ %73, %bb11 ] ; <float> [#uses=2]
+  %I_addr.433 = getelementptr float* %I_addr.2, i64 %indvar94 ; <float*> [#uses=1]
+  %O_addr.432 = getelementptr float* %O_addr.2, i64 %indvar94 ; <float*> [#uses=1]
+  %75 = load float* %I_addr.433, align 4          ; <float> [#uses=1]
+  %76 = fmul float %75, %x.130                    ; <float> [#uses=1]
+  store float %76, float* %O_addr.432, align 4
+  %77 = fadd float %x.130, %0                     ; <float> [#uses=2]
+  %indvar.next95 = add i64 %indvar94, 1           ; <i64> [#uses=2]
+  %exitcond = icmp eq i64 %indvar.next95, %N_addr.2 ; <i1> [#uses=1]
+  br i1 %exitcond, label %bb14, label %bb12
+
+bb14:                                             ; preds = %bb12, %bb11
+  %x.1.lcssa = phi float [ %73, %bb11 ], [ %77, %bb12 ] ; <float> [#uses=1]
+  store float %x.1.lcssa, float* %Start, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}
--- a/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
+++ b/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
@ -1,4 +1,4 @@
-; RUN: opt < %s -analyze -iv-users | grep {\{1,+,3,+,2\}<%loop> (post-inc)}
+; RUN: opt < %s -analyze -iv-users | grep {\{1,+,3,+,2\}<%loop> (post-inc with loop %loop)}

 ; The value of %r is dependent on a polynomial iteration expression.