Revert r314886 "[X86] Improvement in CodeGen instruction selection for LEAs (re-applying post required revision changes.)"

It broke the Chromium / SQLite build; see PR34830. > Summary: > 1/ Operand folding during complex pattern matching for LEAs has been > extended, such that it promotes Scale to accommodate similar operand > appearing in the DAG. > e.g. > T1 = A + B > T2 = T1 + 10 > T3 = T2 + A > For above DAG rooted at T3, X86AddressMode will no look like > Base = B , Index = A , Scale = 2 , Disp = 10 > > 2/ During OptimizeLEAPass down the pipeline factorization is now performed over LEAs > so that if there is an opportunity then complex LEAs (having 3 operands) > could be factored out. > e.g. > leal 1(%rax,%rcx,1), %rdx > leal 1(%rax,%rcx,2), %rcx > will be factored as following > leal 1(%rax,%rcx,1), %rdx > leal (%rdx,%rcx) , %edx > > 3/ Aggressive operand folding for AM based selection for LEAs is sensitive to loops, > thus avoiding creation of any complex LEAs within a loop. > > Reviewers: lsaba, RKSimon, craig.topper, qcolombet, jmolloy > > Reviewed By: lsaba > > Subscribers: jmolloy, spatel, igorb, llvm-commits > > Differential Revision: https://reviews.llvm.org/D35014 llvm-svn: 314919
2024-10-19 19:12:56 +02:00 · 2017-10-04 17:54:06 +00:00 · 2017-10-04 17:54:06 +00:00 · 05b9db2476
commit 05b9db2476
parent dd2311d56c
20 changed files with 187 additions and 639 deletions
--- a/include/llvm/CodeGen/MachineInstr.h
+++ b/include/llvm/CodeGen/MachineInstr.h
@ -1289,13 +1289,12 @@ public:
  /// Add all implicit def and use operands to this instruction.
  void addImplicitDefUseOperands(MachineFunction &MF);

+private:
  /// If this instruction is embedded into a MachineFunction, return the
  /// MachineRegisterInfo object for the current function, otherwise
  /// return null.
  MachineRegisterInfo *getRegInfo();

-private:
-
  /// Unlink all of the register operands in this instruction from their
  /// respective use lists.  This requires that the operands already be on their
  /// use lists.
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@ -300,9 +300,6 @@ public:
  /// type legalization.
  bool NewNodesMustHaveLegalTypes = false;

-  /// Set to true for DAG of BasicBlock contained inside a loop.
-  bool IsDAGPartOfLoop = false;
-
 private:
  /// DAGUpdateListener is a friend so it can manipulate the listener stack.
  friend struct DAGUpdateListener;
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@ -26,7 +26,6 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/OptimizationDiagnosticInfo.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/CodeGen/FastISel.h"
@ -326,8 +325,6 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
  if (OptLevel != CodeGenOpt::None)
    AU.addRequired<AAResultsWrapperPass>();
  AU.addRequired<GCModuleInfo>();
-  if (OptLevel != CodeGenOpt::None)
-    AU.addRequired<LoopInfoWrapperPass>();
  AU.addRequired<StackProtector>();
  AU.addPreserved<StackProtector>();
  AU.addPreserved<GCModuleInfo>();
@ -1419,7 +1416,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {

  // Iterate over all basic blocks in the function.
  for (const BasicBlock *LLVMBB : RPOT) {
-    CurDAG->IsDAGPartOfLoop = false;
    if (OptLevel != CodeGenOpt::None) {
      bool AllPredsVisited = true;
      for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB);
@ -1597,13 +1593,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
                                   FunctionBasedInstrumentation);
    }

-    if (OptLevel != CodeGenOpt::None) {
-      auto &LIWP = getAnalysis<LoopInfoWrapperPass>();
-      LoopInfo &LI = LIWP.getLoopInfo();
-      if (LI.getLoopFor(LLVMBB))
-        CurDAG->IsDAGPartOfLoop = true;
-    }
-
    if (Begin != BI)
      ++NumDAGBlocks;
    else
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@ -88,11 +88,6 @@ namespace {
             IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr;
    }

-    bool hasComplexAddressingMode() const {
-      return Disp && IndexReg.getNode() != nullptr &&
-             Base_Reg.getNode() != nullptr;
-    }
-
    /// Return true if this addressing mode is already RIP-relative.
    bool isRIPRelative() const {
      if (BaseType != RegBase) return false;
@ -102,10 +97,6 @@ namespace {
      return false;
    }

-    bool isLegalScale() {
-      return (Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8);
-    }
-
    void setBaseReg(SDValue Reg) {
      BaseType = RegBase;
      Base_Reg = Reg;
@ -171,13 +162,10 @@ namespace {
    /// If true, selector should try to optimize for minimum code size.
    bool OptForMinSize;

-    /// If true, selector should try to aggresively fold operands into AM.
-    bool OptForAggressingFolding;
-
  public:
    explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
        : SelectionDAGISel(tm, OptLevel), OptForSize(false),
-          OptForMinSize(false), OptForAggressingFolding(false) {}
+          OptForMinSize(false) {}

    StringRef getPassName() const override {
      return "X86 DAG->DAG Instruction Selection";
@ -196,12 +184,6 @@ namespace {

    void PreprocessISelDAG() override;

-    void setAggressiveOperandFolding(bool val = false) {
-      OptForAggressingFolding = val;
-    }
-
-    bool getAggressiveOperandFolding() { return OptForAggressingFolding; }
-
 // Include the pieces autogenerated from the target description.
 #include "X86GenDAGISel.inc"

@ -215,7 +197,6 @@ namespace {
    bool matchAdd(SDValue N, X86ISelAddressMode &AM, unsigned Depth);
    bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
                                 unsigned Depth);
-    bool matchAddressLEA(SDValue N, X86ISelAddressMode &AM);
    bool matchAddressBase(SDValue N, X86ISelAddressMode &AM);
    bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
                    SDValue &Scale, SDValue &Index, SDValue &Disp,
@ -464,20 +445,6 @@ namespace {

    bool isMaskZeroExtended(SDNode *N) const;
  };
-
-  class X86AggressiveOperandFolding {
-  public:
-    explicit X86AggressiveOperandFolding(X86DAGToDAGISel &ISel, bool val)
-        : Selector(&ISel) {
-      Selector->setAggressiveOperandFolding(val);
-    }
-    ~X86AggressiveOperandFolding() {
-      Selector->setAggressiveOperandFolding(false);
-    }
-
-  private:
-    X86DAGToDAGISel *Selector;
-  };
 }


@ -1224,7 +1191,7 @@ static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
  AM.IndexReg = NewSRL;
  return false;
 }
-   
+
 bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
                                              unsigned Depth) {
  SDLoc dl(N);
@ -1232,11 +1199,8 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
      dbgs() << "MatchAddress: ";
      AM.dump();
    });
-
-  // Limit recursion. For aggressive operand folding recurse 
-  // till depth 8 which is the maximum legal scale value.
-  unsigned MaxDepth = getAggressiveOperandFolding() ? 8 : 5;
-  if (Depth > MaxDepth)
+  // Limit recursion.
+  if (Depth > 5)
    return matchAddressBase(N, AM);

  // If this is already a %rip relative address, we can only merge immediates
@ -1527,20 +1491,6 @@ bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) {
      return false;
    }

-    if (OptLevel != CodeGenOpt::None && getAggressiveOperandFolding() &&
-        AM.BaseType == X86ISelAddressMode::RegBase) {
-      if (AM.Base_Reg == N) {
-        SDValue Base_Reg = AM.Base_Reg;
-        AM.Base_Reg = AM.IndexReg;
-        AM.IndexReg = Base_Reg;
-        AM.Scale++;
-        return false;
-      } else if (AM.IndexReg == N) {
-        AM.Scale++;
-        return false;
-      }
-    }
-
    // Otherwise, we cannot select it.
    return true;
  }
@ -1771,7 +1721,7 @@ bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base,
                                         SDValue &Disp, SDValue &Segment) {
  // Save the debug loc before calling selectLEAAddr, in case it invalidates N.
  SDLoc DL(N);
-   
+
  if (!selectLEAAddr(N, Base, Scale, Index, Disp, Segment))
    return false;

@ -1806,29 +1756,6 @@ bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base,
  return true;
 }

-bool X86DAGToDAGISel::matchAddressLEA(SDValue N, X86ISelAddressMode &AM) {
-  // Avoid enabling aggressive operand folding when node N is a part of loop.
-  X86AggressiveOperandFolding Enable(*this, !CurDAG->IsDAGPartOfLoop);
-
-  bool matchRes = matchAddress(N, AM);
-
-  // Check for legality of scale when recursion unwinds back to the top.
-  if (!matchRes) {
-    if (!AM.isLegalScale())
-      return true;
-
-    // Avoid creating costly complex LEAs having scale less than 2
-    // within loop.
-    if(CurDAG->IsDAGPartOfLoop && Subtarget->slow3OpsLEA() &&
-        AM.Scale <= 2 && AM.hasComplexAddressingMode() &&
-         (!AM.hasSymbolicDisplacement() && N.getOpcode() < ISD::BUILTIN_OP_END))
-     return true;
-  }
-
-  return matchRes;
-}
-
-
 /// Calls SelectAddr and determines if the maximal addressing
 /// mode it matches can be cost effectively emitted as an LEA instruction.
 bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
@ -1846,7 +1773,7 @@ bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
  SDValue Copy = AM.Segment;
  SDValue T = CurDAG->getRegister(0, MVT::i32);
  AM.Segment = T;
-  if (matchAddressLEA(N, AM))
+  if (matchAddress(N, AM))
    return false;
  assert (T == AM.Segment);
  AM.Segment = Copy;
--- a/lib/Target/X86/X86OptimizeLEAs.cpp
+++ b/lib/Target/X86/X86OptimizeLEAs.cpp
@ -22,7 +22,6 @@
 #include "X86Subtarget.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LiveVariables.h"
-#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineOperand.h"
@ -45,7 +44,6 @@ static cl::opt<bool>
                     cl::init(false));

 STATISTIC(NumSubstLEAs, "Number of LEA instruction substitutions");
-STATISTIC(NumFactoredLEAs, "Number of LEAs factorized");
 STATISTIC(NumRedundantLEAs, "Number of redundant LEA instructions removed");

 /// \brief Returns true if two machine operands are identical and they are not
@ -53,10 +51,6 @@ STATISTIC(NumRedundantLEAs, "Number of redundant LEA instructions removed");
 static inline bool isIdenticalOp(const MachineOperand &MO1,
                                 const MachineOperand &MO2);

-/// \brief Returns true if two machine instructions have identical operands.
-static bool isIdenticalMI(MachineRegisterInfo *MRI, const MachineOperand &MO1,
-                          const MachineOperand &MO2);
-
 /// \brief Returns true if two address displacement operands are of the same
 /// type and use the same symbol/index/address regardless of the offset.
 static bool isSimilarDispOp(const MachineOperand &MO1,
@ -65,44 +59,21 @@ static bool isSimilarDispOp(const MachineOperand &MO1,
 /// \brief Returns true if the instruction is LEA.
 static inline bool isLEA(const MachineInstr &MI);

-/// \brief Returns true if Definition of Operand is a copylike instruction.
-static bool isDefCopyLike(MachineRegisterInfo *MRI, const MachineOperand &Opr);
-
 namespace {
 /// A key based on instruction's memory operands.
 class MemOpKey {
 public:
  MemOpKey(const MachineOperand *Base, const MachineOperand *Scale,
           const MachineOperand *Index, const MachineOperand *Segment,
-           const MachineOperand *Disp, bool DispCheck = false)
-      : Disp(Disp), DeepCheck(DispCheck) {
+           const MachineOperand *Disp)
+      : Disp(Disp) {
    Operands[0] = Base;
    Operands[1] = Scale;
    Operands[2] = Index;
    Operands[3] = Segment;
  }

-  /// Checks operands of MemOpKey are identical, if Base or Index
-  /// operand definitions are of kind SUBREG_TO_REG then compare
-  /// operands of defining MI.
-  bool performDeepCheck(const MemOpKey &Other) const {
-    MachineInstr *MI = const_cast<MachineInstr *>(Operands[0]->getParent());
-    MachineRegisterInfo *MRI = MI->getRegInfo();
-
-    for (int i = 0; i < 4; i++) {
-      bool copyLike = isDefCopyLike(MRI, *Operands[i]);
-      if (copyLike && !isIdenticalMI(MRI, *Operands[i], *Other.Operands[i]))
-        return false;
-      else if (!copyLike && !isIdenticalOp(*Operands[i], *Other.Operands[i]))
-        return false;
-    }
-    return isIdenticalOp(*Disp, *Other.Disp);
-  }
-
  bool operator==(const MemOpKey &Other) const {
-    if (DeepCheck)
-      return performDeepCheck(Other);
-
    // Addresses' bases, scales, indices and segments must be identical.
    for (int i = 0; i < 4; ++i)
      if (!isIdenticalOp(*Operands[i], *Other.Operands[i]))
@ -120,12 +91,6 @@ public:

  // Address' displacement operand.
  const MachineOperand *Disp;
-
-  // If true checks Address' base, index, segment and
-  // displacement are identical, in additions if base/index
-  // are defined by copylike instruction then futher
-  // compare the operands of the defining instruction.
-  bool DeepCheck;
 };
 } // end anonymous namespace

@ -149,34 +114,12 @@ template <> struct DenseMapInfo<MemOpKey> {
  static unsigned getHashValue(const MemOpKey &Val) {
    // Checking any field of MemOpKey is enough to determine if the key is
    // empty or tombstone.
-    hash_code Hash(0);
    assert(Val.Disp != PtrInfo::getEmptyKey() && "Cannot hash the empty key");
    assert(Val.Disp != PtrInfo::getTombstoneKey() &&
           "Cannot hash the tombstone key");

-    auto getMIHash = [](MachineInstr *MI) -> hash_code {
-      hash_code h(0);
-      for (unsigned i = 1, e = MI->getNumOperands(); i < e; i++)
-        h = hash_combine(h, MI->getOperand(i));
-      return h;
-    };
-
-    const MachineOperand &Base = *Val.Operands[0];
-    const MachineOperand &Index = *Val.Operands[2];
-    MachineInstr *MI = const_cast<MachineInstr *>(Base.getParent());
-    MachineRegisterInfo *MRI = MI->getRegInfo();
-
-    if (isDefCopyLike(MRI, Base))
-      Hash = getMIHash(MRI->getVRegDef(Base.getReg()));
-    else
-      Hash = hash_combine(Hash, Base);
-
-    if (isDefCopyLike(MRI, Index))
-      Hash = getMIHash(MRI->getVRegDef(Index.getReg()));
-    else
-      Hash = hash_combine(Hash, Index);
-
-    Hash = hash_combine(Hash, *Val.Operands[1], *Val.Operands[3]);
+    hash_code Hash = hash_combine(*Val.Operands[0], *Val.Operands[1],
+                                  *Val.Operands[2], *Val.Operands[3]);

    // If the address displacement is an immediate, it should not affect the
    // hash so that memory operands which differ only be immediate displacement
@ -235,16 +178,6 @@ static inline MemOpKey getMemOpKey(const MachineInstr &MI, unsigned N) {
                  &MI.getOperand(N + X86::AddrDisp));
 }

-static inline MemOpKey getMemOpCSEKey(const MachineInstr &MI, unsigned N) {
-  static MachineOperand DummyScale = MachineOperand::CreateImm(1);
-  assert((isLEA(MI) || MI.mayLoadOrStore()) &&
-         "The instruction must be a LEA, a load or a store");
-  return MemOpKey(&MI.getOperand(N + X86::AddrBaseReg), &DummyScale,
-                  &MI.getOperand(N + X86::AddrIndexReg),
-                  &MI.getOperand(N + X86::AddrSegmentReg),
-                  &MI.getOperand(N + X86::AddrDisp), true);
-}
-
 static inline bool isIdenticalOp(const MachineOperand &MO1,
                                 const MachineOperand &MO2) {
  return MO1.isIdenticalTo(MO2) &&
@ -252,27 +185,6 @@ static inline bool isIdenticalOp(const MachineOperand &MO1,
          !TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
 }

-static bool isIdenticalMI(MachineRegisterInfo *MRI, const MachineOperand &MO1,
-                          const MachineOperand &MO2) {
-  MachineInstr *MI1 = nullptr;
-  MachineInstr *MI2 = nullptr; 
-  if (!MO1.isReg() || !MO2.isReg())
-    return false;
-
-  MI1 = MRI->getVRegDef(MO1.getReg());
-  MI2 = MRI->getVRegDef(MO2.getReg());
-  if (!MI1 || !MI2)
-    return false;
-  if (MI1->getOpcode() != MI2->getOpcode())
-    return false;
-  if (MI1->getNumOperands() != MI2->getNumOperands())
-    return false;
-  for (unsigned i = 1, e = MI1->getNumOperands(); i < e; ++i)
-    if (!isIdenticalOp(MI1->getOperand(i), MI2->getOperand(i)))
-      return false;
-  return true;
-}
-
 #ifndef NDEBUG
 static bool isValidDispOp(const MachineOperand &MO) {
  return MO.isImm() || MO.isCPI() || MO.isJTI() || MO.isSymbol() ||
@ -304,140 +216,7 @@ static inline bool isLEA(const MachineInstr &MI) {
         Opcode == X86::LEA64r || Opcode == X86::LEA64_32r;
 }

-static bool isDefCopyLike(MachineRegisterInfo *MRI, const MachineOperand &Opr) {
-  if (!Opr.isReg() || TargetRegisterInfo::isPhysicalRegister(Opr.getReg()))
-    return false;
-  MachineInstr *MI = MRI->getVRegDef(Opr.getReg());
-  return MI && MI->isCopyLike();
-}
-
 namespace {
-
-/// This class captures the functions and attributes
-/// needed to factorize LEA within and across basic
-/// blocks.LEA instruction with same BASE,OFFSET and
-/// INDEX are the candidates for factorization.
-class FactorizeLEAOpt {
-public:
-  using LEAListT = std::list<MachineInstr *>;
-  using LEAMapT = DenseMap<MemOpKey, LEAListT>;
-  using ValueT = DenseMap<MemOpKey, unsigned>;
-  using ScopeEntryT = std::pair<MachineBasicBlock *, ValueT>;
-  using ScopeStackT = std::vector<ScopeEntryT>;
-
-  FactorizeLEAOpt() = default;
-  FactorizeLEAOpt(const FactorizeLEAOpt &) = delete;
-  FactorizeLEAOpt &operator=(const FactorizeLEAOpt &) = delete;
-
-  void performCleanup() {
-    for (auto LEA : removedLEAs)
-      LEA->eraseFromParent();
-    LEAs.clear();
-    Stack.clear();
-    removedLEAs.clear();
-  }
-
-  LEAMapT &getLEAMap() { return LEAs; }
-  ScopeEntryT *getTopScope() { return &Stack.back(); }
-
-  void addForLazyRemoval(MachineInstr *Instr) { removedLEAs.insert(Instr); }
-
-  bool checkIfScheduledForRemoval(MachineInstr *Instr) {
-    return removedLEAs.find(Instr) != removedLEAs.end();
-  }
-
-  /// Push the ScopeEntry for the BasicBlock over Stack.
-  /// Also traverses over list of instruction and update
-  /// LEAs Map and ScopeEntry for each LEA instruction
-  /// found using insertLEA().
-  void pushScope(MachineBasicBlock *MBB);
-
-  /// Stores the size of MachineInstr list corrosponding
-  /// to key K from LEAs MAP into the ScopeEntry of
-  /// the basic block, then insert the LEA at the beginning
-  /// of the list.
-  void insertLEA(MachineInstr *MI);
-
-  /// Pops out ScopeEntry of top most BasicBlock from the stack
-  /// and remove the LEA instructions contained in the scope
-  /// from the LEAs Map.
-  void popScope();
-
-  /// If LEA contains Physical Registers then its not a candidate
-  /// for factorizations since physical registers may violate SSA
-  /// semantics of MI.
-  bool containsPhyReg(MachineInstr *MI, unsigned RecLevel);
-
-private:
-  ScopeStackT Stack;
-  LEAMapT LEAs;
-  std::set<MachineInstr *> removedLEAs;
-};
-
-void FactorizeLEAOpt::pushScope(MachineBasicBlock *MBB) {
-  ValueT EmptyMap;
-  ScopeEntryT SE = std::make_pair(MBB, EmptyMap);
-  Stack.push_back(SE);
-  for (auto &MI : *MBB) {
-    if (isLEA(MI))
-      insertLEA(&MI);
-  }
-}
-
-void FactorizeLEAOpt::popScope() {
-  ScopeEntryT &SE = Stack.back();
-  for (auto MapEntry : SE.second) {
-    LEAMapT::iterator Itr = LEAs.find(MapEntry.first);
-    assert((Itr != LEAs.end()) &&
-           "LEAs map must have a node corresponding to ScopeEntry's Key.");
-
-    while (((*Itr).second.size() > MapEntry.second))
-      (*Itr).second.pop_front();
-    // If list goes empty remove entry from LEAs Map.
-    if ((*Itr).second.empty())
-      LEAs.erase(Itr);
-  }
-  Stack.pop_back();
-}
-
-bool FactorizeLEAOpt::containsPhyReg(MachineInstr *MI, unsigned RecLevel) {
-  if (!MI || !RecLevel)
-    return false;
-
-  MachineRegisterInfo *MRI = MI->getRegInfo();
-  for (auto Operand : MI->operands()) {
-    if (!Operand.isReg())
-      continue;
-    if (TargetRegisterInfo::isPhysicalRegister(Operand.getReg()))
-      return true;
-    MachineInstr *OperDefMI = MRI->getVRegDef(Operand.getReg());
-    if (OperDefMI && (MI != OperDefMI) && OperDefMI->isCopyLike() &&
-        containsPhyReg(OperDefMI, RecLevel - 1))
-      return true;
-  }
-  return false;
-}
-
-void FactorizeLEAOpt::insertLEA(MachineInstr *MI) {
-  unsigned lsize;
-  if (containsPhyReg(MI, 2))
-    return;
-
-  MemOpKey Key = getMemOpCSEKey(*MI, 1);
-  ScopeEntryT *TopScope = getTopScope();
-
-  LEAMapT::iterator Itr = LEAs.find(Key);
-  if (Itr == LEAs.end()) {
-    lsize = 0;
-    LEAs[Key].push_front(MI);
-  } else {
-    lsize = (*Itr).second.size();
-    (*Itr).second.push_front(MI);
-  }
-  if (TopScope->second.find(Key) == TopScope->second.end())
-    TopScope->second[Key] = lsize;
-}
-
 class OptimizeLEAPass : public MachineFunctionPass {
 public:
  OptimizeLEAPass() : MachineFunctionPass(ID) {}
@ -449,12 +228,6 @@ public:
  /// been calculated by LEA. Also, remove redundant LEAs.
  bool runOnMachineFunction(MachineFunction &MF) override;

-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.setPreservesCFG();
-    MachineFunctionPass::getAnalysisUsage(AU);
-    AU.addRequired<MachineDominatorTree>();
-  }
-
 private:
  typedef DenseMap<MemOpKey, SmallVector<MachineInstr *, 16>> MemOpMap;

@ -500,24 +273,8 @@ private:
  /// \brief Removes LEAs which calculate similar addresses.
  bool removeRedundantLEAs(MemOpMap &LEAs);

-  /// \brief Visit over basic blocks, collect LEAs in a scoped
-  ///  hash map (FactorizeLEAOpt::LEAs) and try to factor them out.
-  bool FactorizeLEAsAllBasicBlocks(MachineFunction &MF);
-
-  bool FactorizeLEAsBasicBlock(MachineDomTreeNode *DN);
-
-  /// \brief Factor out LEAs which share Base,Index,Offset and Segment.
-  bool processBasicBlock(const MachineBasicBlock &MBB);
-
-  /// \brief Try to replace LEA with a lower strength instruction
-  /// to improves latency and throughput.
-  bool strengthReduceLEAs(MemOpMap &LEAs, const MachineBasicBlock &MBB);
-
  DenseMap<const MachineInstr *, unsigned> InstrPos;

-  FactorizeLEAOpt FactorOpt;
-
-  MachineDominatorTree *DT;
  MachineRegisterInfo *MRI;
  const X86InstrInfo *TII;
  const X86RegisterInfo *TRI;
@ -890,168 +647,6 @@ bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) {
  return Changed;
 }

-static inline int getADDrrFromLEA(int LEAOpcode) {
-  switch (LEAOpcode) {
-  default:
-    llvm_unreachable("Unexpected LEA instruction");
-  case X86::LEA16r:
-    return X86::ADD16rr;
-  case X86::LEA32r:
-    return X86::ADD32rr;
-  case X86::LEA64_32r:
-  case X86::LEA64r:
-    return X86::ADD64rr;
-  }
-}
-
-bool OptimizeLEAPass::strengthReduceLEAs(MemOpMap &LEAs,
-                                         const MachineBasicBlock &BB) {
-  bool Changed = false;
-
-  // Loop over all entries in the table.
-  for (auto &E : LEAs) {
-    auto &List = E.second;
-
-    // Loop over all LEA pairs.
-    for (auto I1 = List.begin(); I1 != List.end(); I1++) {
-      MachineInstrBuilder NewMI;
-      MachineInstr &First = **I1;
-      MachineOperand &Res = First.getOperand(0);
-      MachineOperand &Base = First.getOperand(1);
-      MachineOperand &Scale = First.getOperand(2);
-      MachineOperand &Index = First.getOperand(3);
-      MachineOperand &Offset = First.getOperand(4);
-
-      const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(First.getOpcode()));
-      const DebugLoc DL = First.getDebugLoc();
-
-      if (!Base.isReg() || !Index.isReg())
-        continue;
-      if (TargetRegisterInfo::isPhysicalRegister(Res.getReg()) ||
-          TargetRegisterInfo::isPhysicalRegister(Base.getReg()) ||
-          TargetRegisterInfo::isPhysicalRegister(Index.getReg()))
-        continue;
-
-      MachineBasicBlock &MBB = *(const_cast<MachineBasicBlock *>(&BB));
-      if (Scale.isImm() && Scale.getImm() == 1) {
-        // R = B + I
-        if (Offset.isImm() && !Offset.getImm()) {
-          NewMI = BuildMI(MBB, &First, DL, ADDrr)
-                      .addDef(Res.getReg())
-                      .addUse(Base.getReg())
-                      .addUse(Index.getReg());
-          Changed = NewMI.getInstr() != nullptr;
-          First.eraseFromParent();
-        }
-      }
-    }
-  }
-  return Changed;
-}
-
-bool OptimizeLEAPass::processBasicBlock(const MachineBasicBlock &MBB) {
-  bool cseDone = false;
-
-  // Legal scale value (1,2,4 & 8) vector.
-  int LegalScale[9] = {0, 1, 1, 0, 1, 0, 0, 0, 1};
-
-  auto CompareFn = [](const MachineInstr *Arg1,
-                      const MachineInstr *Arg2) -> bool {
-    if (Arg1->getOperand(2).getImm() < Arg2->getOperand(2).getImm())
-      return false;
-    return true;
-  };
-
-  // Loop over all entries in the table.
-  for (auto &E : FactorOpt.getLEAMap()) {
-    auto &List = E.second;
-    if (List.size() > 1)
-      List.sort(CompareFn);
-    
-    // Loop over all LEA pairs.
-    for (auto Iter1 = List.begin(); Iter1 != List.end(); Iter1++) {
-      for (auto Iter2 = std::next(Iter1); Iter2 != List.end(); Iter2++) {
-        MachineInstr &LI1 = **Iter1;
-        MachineInstr &LI2 = **Iter2;
-
-        if (!DT->dominates(&LI2, &LI1))
-          continue;
-
-        int Scale1 = LI1.getOperand(2).getImm();
-        int Scale2 = LI2.getOperand(2).getImm();
-        assert(LI2.getOperand(0).isReg() && "Result is a VirtualReg");
-        DebugLoc DL = LI1.getDebugLoc();
-
-        if (FactorOpt.checkIfScheduledForRemoval(&LI1))
-          continue;
-
-        int Factor = Scale1 - Scale2;
-        if (Factor > 0 && LegalScale[Factor]) {
-          DEBUG(dbgs() << "CSE LEAs: Candidate to replace: "; LI1.dump(););
-          MachineInstrBuilder NewMI =
-              BuildMI(*(const_cast<MachineBasicBlock *>(&MBB)), &LI1, DL,
-                      TII->get(LI1.getOpcode()))
-                  .addDef(LI1.getOperand(0).getReg()) // Dst   = Dst of LI1.
-                  .addUse(LI2.getOperand(0).getReg()) // Base  = Dst of LI2.
-                  .addImm(Factor) // Scale = Diff b/w scales.
-                  .addUse(LI1.getOperand(3).getReg()) // Index = Index of LI1.
-                  .addImm(0)                          // Disp  = 0
-                  .addUse(
-                      LI1.getOperand(5).getReg()); // Segment = Segmant of LI1.
-
-          cseDone = NewMI.getInstr() != nullptr;
-
-          LI1.getOperand(0).setIsDef(false);
-
-          /// Lazy removal shall ensure that replaced LEA remains
-          /// till we finish processing all the basic block. This shall
-          /// provide opportunity for further factorization based on
-          /// the replaced LEA which will be legal since it has same
-          /// destination as newly formed LEA.
-          FactorOpt.addForLazyRemoval(&LI1);
-
-          NumFactoredLEAs++;
-          DEBUG(dbgs() << "CSE LEAs: Replaced by: "; NewMI->dump(););
-        }
-      }
-    }
-  }
-  return cseDone;
-}
-
-bool OptimizeLEAPass::FactorizeLEAsBasicBlock(MachineDomTreeNode *DN) {
-  bool Changed = false;
-  using StackT = SmallVector<MachineDomTreeNode* , 16>;
-  using ProcessedMapT = DenseMap<MachineDomTreeNode* , bool>;
-
-  StackT WorkList;
-  ProcessedMapT ProcessesMap;
-
-  WorkList.push_back(DN);
-  while(!WorkList.empty()) {
-    MachineDomTreeNode * MDN = WorkList.back();
-    if (ProcessesMap.find(MDN) == ProcessesMap.end()) {
-       ProcessesMap[MDN] = true;
-       FactorOpt.pushScope(MDN->getBlock());
-       Changed |= processBasicBlock(*MDN->getBlock());
-       for (auto Child : MDN->getChildren())
-          WorkList.push_back(Child);
-    }
-    MachineDomTreeNode *TDM = WorkList.back();
-    if (MDN->getLevel() == TDM->getLevel()) {
-       FactorOpt.popScope();
-       WorkList.pop_back();
-    }
-  }
-  return Changed;
-}
-
-bool OptimizeLEAPass::FactorizeLEAsAllBasicBlocks(MachineFunction &MF) {
-  bool Changed = FactorizeLEAsBasicBlock(DT->getRootNode());
-  FactorOpt.performCleanup();
-  return Changed;
-}
-
 bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
  bool Changed = false;

@ -1061,10 +656,6 @@ bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
  MRI = &MF.getRegInfo();
  TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
  TRI = MF.getSubtarget<X86Subtarget>().getRegisterInfo();
-  DT = &getAnalysis<MachineDominatorTree>();
-
-  // Attempt factorizing LEAs.
-  Changed |= FactorizeLEAsAllBasicBlocks(MF);

  // Process all basic blocks.
  for (auto &MBB : MF) {
@ -1081,9 +672,6 @@ bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
    // Remove redundant LEA instructions.
    Changed |= removeRedundantLEAs(LEAs);

-    // Strength reduce LEA instructions.
-    Changed |= strengthReduceLEAs(LEAs, MBB);
-
    // Remove redundant address calculations. Do it only for -Os/-Oz since only
    // a code size gain is expected from this part of the pass.
    if (MF.getFunction()->optForSize())
--- a/test/CodeGen/X86/GlobalISel/callingconv.ll
+++ b/test/CodeGen/X86/GlobalISel/callingconv.ll
@ -405,7 +405,7 @@ define void @test_variadic_call_2(i8** %addr_ptr, double* %val_ptr) {
 ; X32-NEXT:    movl 4(%ecx), %ecx
 ; X32-NEXT:    movl %eax, (%esp)
 ; X32-NEXT:    movl $4, %eax
-; X32-NEXT:    addl %esp, %eax
+; X32-NEXT:    leal (%esp,%eax), %eax
 ; X32-NEXT:    movl %edx, 4(%esp)
 ; X32-NEXT:    movl %ecx, 4(%eax)
 ; X32-NEXT:    calll variadic_callee
--- a/test/CodeGen/X86/GlobalISel/gep.ll
+++ b/test/CodeGen/X86/GlobalISel/gep.ll
@ -5,10 +5,10 @@
 define i32* @test_gep_i8(i32 *%arr, i8 %ind) {
 ; X64_GISEL-LABEL: test_gep_i8:
 ; X64_GISEL:       # BB#0:
-; X64_GISEL-NEXT:    movq $4, %rcx
-; X64_GISEL-NEXT:    movsbq %sil, %rax
-; X64_GISEL-NEXT:    imulq %rcx, %rax
-; X64_GISEL-NEXT:    addq %rdi, %rax
+; X64_GISEL-NEXT:    movq $4, %rax
+; X64_GISEL-NEXT:    movsbq %sil, %rcx
+; X64_GISEL-NEXT:    imulq %rax, %rcx
+; X64_GISEL-NEXT:    leaq (%rdi,%rcx), %rax
 ; X64_GISEL-NEXT:    retq
 ;
 ; X64-LABEL: test_gep_i8:
@ -25,7 +25,7 @@ define i32* @test_gep_i8_const(i32 *%arr) {
 ; X64_GISEL-LABEL: test_gep_i8_const:
 ; X64_GISEL:       # BB#0:
 ; X64_GISEL-NEXT:    movq $80, %rax
-; X64_GISEL-NEXT:    addq %rdi, %rax
+; X64_GISEL-NEXT:    leaq (%rdi,%rax), %rax
 ; X64_GISEL-NEXT:    retq
 ;
 ; X64-LABEL: test_gep_i8_const:
@ -39,10 +39,10 @@ define i32* @test_gep_i8_const(i32 *%arr) {
 define i32* @test_gep_i16(i32 *%arr, i16 %ind) {
 ; X64_GISEL-LABEL: test_gep_i16:
 ; X64_GISEL:       # BB#0:
-; X64_GISEL-NEXT:    movq $4, %rcx
-; X64_GISEL-NEXT:    movswq %si, %rax
-; X64_GISEL-NEXT:    imulq %rcx, %rax
-; X64_GISEL-NEXT:    addq %rdi, %rax
+; X64_GISEL-NEXT:    movq $4, %rax
+; X64_GISEL-NEXT:    movswq %si, %rcx
+; X64_GISEL-NEXT:    imulq %rax, %rcx
+; X64_GISEL-NEXT:    leaq (%rdi,%rcx), %rax
 ; X64_GISEL-NEXT:    retq
 ;
 ; X64-LABEL: test_gep_i16:
@ -59,7 +59,7 @@ define i32* @test_gep_i16_const(i32 *%arr) {
 ; X64_GISEL-LABEL: test_gep_i16_const:
 ; X64_GISEL:       # BB#0:
 ; X64_GISEL-NEXT:    movq $80, %rax
-; X64_GISEL-NEXT:    addq %rdi, %rax
+; X64_GISEL-NEXT:    leaq (%rdi,%rax), %rax
 ; X64_GISEL-NEXT:    retq
 ;
 ; X64-LABEL: test_gep_i16_const:
@ -73,10 +73,10 @@ define i32* @test_gep_i16_const(i32 *%arr) {
 define i32* @test_gep_i32(i32 *%arr, i32 %ind) {
 ; X64_GISEL-LABEL: test_gep_i32:
 ; X64_GISEL:       # BB#0:
-; X64_GISEL-NEXT:    movq $4, %rcx
-; X64_GISEL-NEXT:    movslq %esi, %rax
-; X64_GISEL-NEXT:    imulq %rcx, %rax
-; X64_GISEL-NEXT:    addq %rdi, %rax
+; X64_GISEL-NEXT:    movq $4, %rax
+; X64_GISEL-NEXT:    movslq %esi, %rcx
+; X64_GISEL-NEXT:    imulq %rax, %rcx
+; X64_GISEL-NEXT:    leaq (%rdi,%rcx), %rax
 ; X64_GISEL-NEXT:    retq
 ;
 ; X64-LABEL: test_gep_i32:
@ -92,7 +92,7 @@ define i32* @test_gep_i32_const(i32 *%arr) {
 ; X64_GISEL-LABEL: test_gep_i32_const:
 ; X64_GISEL:       # BB#0:
 ; X64_GISEL-NEXT:    movq $20, %rax
-; X64_GISEL-NEXT:    addq %rdi, %rax
+; X64_GISEL-NEXT:    leaq (%rdi,%rax), %rax
 ; X64_GISEL-NEXT:    retq
 ;
 ; X64-LABEL: test_gep_i32_const:
@ -108,7 +108,7 @@ define i32* @test_gep_i64(i32 *%arr, i64 %ind) {
 ; X64_GISEL:       # BB#0:
 ; X64_GISEL-NEXT:    movq $4, %rax
 ; X64_GISEL-NEXT:    imulq %rsi, %rax
-; X64_GISEL-NEXT:    addq %rdi, %rax
+; X64_GISEL-NEXT:    leaq (%rdi,%rax), %rax
 ; X64_GISEL-NEXT:    retq
 ;
 ; X64-LABEL: test_gep_i64:
@ -123,7 +123,7 @@ define i32* @test_gep_i64_const(i32 *%arr) {
 ; X64_GISEL-LABEL: test_gep_i64_const:
 ; X64_GISEL:       # BB#0:
 ; X64_GISEL-NEXT:    movq $20, %rax
-; X64_GISEL-NEXT:    addq %rdi, %rax
+; X64_GISEL-NEXT:    leaq (%rdi,%rax), %rax
 ; X64_GISEL-NEXT:    retq
 ;
 ; X64-LABEL: test_gep_i64_const:
--- a/test/CodeGen/X86/GlobalISel/memop-scalar.ll
+++ b/test/CodeGen/X86/GlobalISel/memop-scalar.ll
@ -181,7 +181,7 @@ define i32 @test_gep_folding_largeGepIndex(i32* %arr, i32 %val) {
 ; ALL-LABEL: test_gep_folding_largeGepIndex:
 ; ALL:       # BB#0:
 ; ALL-NEXT:    movabsq $228719476720, %rax # imm = 0x3540BE3FF0
-; ALL-NEXT:    addq %rdi, %rax
+; ALL-NEXT:    leaq (%rdi,%rax), %rax
 ; ALL-NEXT:    movl %esi, (%rax)
 ; ALL-NEXT:    movl (%rax), %eax
 ; ALL-NEXT:    retq
--- a/test/CodeGen/X86/lea-opt-cse1.ll
+++ b/test/CodeGen/X86/lea-opt-cse1.ll
@ -9,21 +9,29 @@ define void @test_func(%struct.SA* nocapture %ctx, i32 %n) local_unnamed_addr {
 ; X64:       # BB#0: # %entry
 ; X64-NEXT:    movl (%rdi), %eax
 ; X64-NEXT:    movl 16(%rdi), %ecx
+; X64-NEXT:    leal (%rax,%rcx), %edx
 ; X64-NEXT:    leal 1(%rax,%rcx), %eax
 ; X64-NEXT:    movl %eax, 12(%rdi)
-; X64-NEXT:    addq %rcx, %eax
+; X64-NEXT:    leal 1(%rcx,%rdx), %eax
 ; X64-NEXT:    movl %eax, 16(%rdi)
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: test_func:
 ; X86:       # BB#0: # %entry
+; X86-NEXT:    pushl %esi
+; X86-NEXT:  .Lcfi0:
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:  .Lcfi1:
+; X86-NEXT:    .cfi_offset %esi, -8
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl (%eax), %ecx
 ; X86-NEXT:    movl 16(%eax), %edx
-; X86-NEXT:    leal 1(%ecx,%edx), %ecx
-; X86-NEXT:    movl %ecx, 12(%eax)
+; X86-NEXT:    leal 1(%ecx,%edx), %esi
 ; X86-NEXT:    addl %edx, %ecx
+; X86-NEXT:    movl %esi, 12(%eax)
+; X86-NEXT:    leal 1(%edx,%ecx), %ecx
 ; X86-NEXT:    movl %ecx, 16(%eax)
+; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
 entry:
   %h0 = getelementptr inbounds %struct.SA, %struct.SA* %ctx, i64 0, i32 0
--- a/test/CodeGen/X86/lea-opt-cse2.ll
+++ b/test/CodeGen/X86/lea-opt-cse2.ll
@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown  -mattr=+slow-3ops-lea | FileCheck %s -check-prefix=X64
-; RUN: llc < %s -mtriple=i686-unknown  -mattr=+slow-3ops-lea  | FileCheck %s -check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=i686-unknown   | FileCheck %s -check-prefix=X86

 %struct.SA = type { i32 , i32 , i32 , i32 , i32};

@ -10,41 +10,47 @@ define void @foo(%struct.SA* nocapture %ctx, i32 %n) local_unnamed_addr #0 {
 ; X64-NEXT:    .p2align 4, 0x90
 ; X64-NEXT:  .LBB0_1: # %loop
 ; X64-NEXT:    # =>This Inner Loop Header: Depth=1
-; X64-NEXT:    movl 16(%rdi), %eax
-; X64-NEXT:    movl (%rdi), %ecx
-; X64-NEXT:    addl %eax, %ecx
-; X64-NEXT:    incl %ecx
-; X64-NEXT:    movl %ecx, 12(%rdi)
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    movl 16(%rdi), %ecx
+; X64-NEXT:    leal 1(%rax,%rcx), %edx
+; X64-NEXT:    movl %edx, 12(%rdi)
 ; X64-NEXT:    decl %esi
 ; X64-NEXT:    jne .LBB0_1
 ; X64-NEXT:  # BB#2: # %exit
-; X64-NEXT:    addl %eax, %ecx
-; X64-NEXT:    movl %ecx, 16(%rdi)
+; X64-NEXT:    addl %ecx, %eax
+; X64-NEXT:    leal 1(%rcx,%rax), %eax
+; X64-NEXT:    movl %eax, 16(%rdi)
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: foo:
 ; X86:       # BB#0: # %entry
-; X86-NEXT:    pushl %esi
+; X86-NEXT:    pushl %edi
 ; X86-NEXT:  .Lcfi0:
 ; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    pushl %esi
 ; X86-NEXT:  .Lcfi1:
-; X86-NEXT:    .cfi_offset %esi, -8
+; X86-NEXT:    .cfi_def_cfa_offset 12
+; X86-NEXT:  .Lcfi2:
+; X86-NEXT:    .cfi_offset %esi, -12
+; X86-NEXT:  .Lcfi3:
+; X86-NEXT:    .cfi_offset %edi, -8
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    .p2align 4, 0x90
 ; X86-NEXT:  .LBB0_1: # %loop
 ; X86-NEXT:    # =>This Inner Loop Header: Depth=1
-; X86-NEXT:    movl 16(%eax), %edx
-; X86-NEXT:    movl (%eax), %esi
-; X86-NEXT:    addl %edx, %esi
-; X86-NEXT:    incl %esi
-; X86-NEXT:    movl %esi, 12(%eax)
+; X86-NEXT:    movl (%eax), %edx
+; X86-NEXT:    movl 16(%eax), %esi
+; X86-NEXT:    leal 1(%edx,%esi), %edi
+; X86-NEXT:    movl %edi, 12(%eax)
 ; X86-NEXT:    decl %ecx
 ; X86-NEXT:    jne .LBB0_1
 ; X86-NEXT:  # BB#2: # %exit
-; X86-NEXT:    addl %edx, %esi
-; X86-NEXT:    movl %esi, 16(%eax)
+; X86-NEXT:    addl %esi, %edx
+; X86-NEXT:    leal 1(%esi,%edx), %ecx
+; X86-NEXT:    movl %ecx, 16(%eax)
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
 ; X86-NEXT:    retl
 entry:
   br label %loop
--- a/test/CodeGen/X86/lea-opt-cse3.ll
+++ b/test/CodeGen/X86/lea-opt-cse3.ll
@ -8,7 +8,7 @@ define i32 @foo(i32 %a, i32 %b) local_unnamed_addr #0 {
 ; X64-NEXT:    # kill: %ESI<def> %ESI<kill> %RSI<def>
 ; X64-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
 ; X64-NEXT:    leal 4(%rdi,%rsi,2), %ecx
-; X64-NEXT:    leal (%ecx,%rsi,2), %eax
+; X64-NEXT:    leal 4(%rdi,%rsi,4), %eax
 ; X64-NEXT:    imull %ecx, %eax
 ; X64-NEXT:    retq
 ;
@ -16,9 +16,9 @@ define i32 @foo(i32 %a, i32 %b) local_unnamed_addr #0 {
 ; X86:       # BB#0: # %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    leal 4(%ecx,%eax,2), %ecx
-; X86-NEXT:    leal (%ecx,%eax,2), %eax
-; X86-NEXT:    imull %ecx, %eax
+; X86-NEXT:    leal 4(%ecx,%eax,2), %edx
+; X86-NEXT:    leal 4(%ecx,%eax,4), %eax
+; X86-NEXT:    imull %edx, %eax
 ; X86-NEXT:    retl
 entry:
  %mul = shl i32 %b, 1
@ -36,7 +36,7 @@ define i32 @foo1(i32 %a, i32 %b) local_unnamed_addr #0 {
 ; X64-NEXT:    # kill: %ESI<def> %ESI<kill> %RSI<def>
 ; X64-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
 ; X64-NEXT:    leal 4(%rdi,%rsi,4), %ecx
-; X64-NEXT:    leal (%ecx,%rsi,4), %eax
+; X64-NEXT:    leal 4(%rdi,%rsi,8), %eax
 ; X64-NEXT:    imull %ecx, %eax
 ; X64-NEXT:    retq
 ;
@ -44,9 +44,9 @@ define i32 @foo1(i32 %a, i32 %b) local_unnamed_addr #0 {
 ; X86:       # BB#0: # %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    leal 4(%ecx,%eax,4), %ecx
-; X86-NEXT:    leal (%ecx,%eax,4), %eax
-; X86-NEXT:    imull %ecx, %eax
+; X86-NEXT:    leal 4(%ecx,%eax,4), %edx
+; X86-NEXT:    leal 4(%ecx,%eax,8), %eax
+; X86-NEXT:    imull %edx, %eax
 ; X86-NEXT:    retl
 entry:
  %mul = shl i32 %b, 2
@ -68,23 +68,31 @@ define i32 @foo1_mult_basic_blocks(i32 %a, i32 %b) local_unnamed_addr #0 {
 ; X64-NEXT:    cmpl $10, %ecx
 ; X64-NEXT:    je .LBB2_2
 ; X64-NEXT:  # BB#1: # %mid
-; X64-NEXT:    leal (%ecx,%rsi,4), %eax
-; X64-NEXT:    imull %ecx, %eax
+; X64-NEXT:    leal 4(%rdi,%rsi,8), %eax
+; X64-NEXT:    imull %eax, %ecx
+; X64-NEXT:    movl %ecx, %eax
 ; X64-NEXT:  .LBB2_2: # %exit
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: foo1_mult_basic_blocks:
 ; X86:       # BB#0: # %entry
+; X86-NEXT:    pushl %esi
+; X86-NEXT:  .Lcfi0:
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:  .Lcfi1:
+; X86-NEXT:    .cfi_offset %esi, -8
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    leal 4(%eax,%edx,4), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    leal 4(%esi,%edx,4), %ecx
 ; X86-NEXT:    xorl %eax, %eax
 ; X86-NEXT:    cmpl $10, %ecx
 ; X86-NEXT:    je .LBB2_2
 ; X86-NEXT:  # BB#1: # %mid
-; X86-NEXT:    leal (%ecx,%edx,4), %eax
-; X86-NEXT:    imull %ecx, %eax
+; X86-NEXT:    leal 4(%esi,%edx,8), %eax
+; X86-NEXT:    imull %eax, %ecx
+; X86-NEXT:    movl %ecx, %eax
 ; X86-NEXT:  .LBB2_2: # %exit
+; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
 entry:
  %mul = shl i32 %b, 2
@ -123,9 +131,9 @@ define i32 @foo1_mult_basic_blocks_illegal_scale(i32 %a, i32 %b) local_unnamed_a
 ; X86-LABEL: foo1_mult_basic_blocks_illegal_scale:
 ; X86:       # BB#0: # %entry
 ; X86-NEXT:    pushl %esi
-; X86-NEXT:  .Lcfi0:
+; X86-NEXT:  .Lcfi2:
 ; X86-NEXT:    .cfi_def_cfa_offset 8
-; X86-NEXT:  .Lcfi1:
+; X86-NEXT:  .Lcfi3:
 ; X86-NEXT:    .cfi_offset %esi, -8
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
--- a/test/CodeGen/X86/lea-opt-cse4.ll
+++ b/test/CodeGen/X86/lea-opt-cse4.ll
@ -1,31 +1,43 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+slow-3ops-lea | FileCheck %s -check-prefix=X64
-; RUN: llc < %s -mtriple=i686-unknown -mattr=+slow-3ops-lea | FileCheck %s -check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=i686-unknown   | FileCheck %s -check-prefix=X86

 %struct.SA = type { i32 , i32 , i32 , i32 , i32};

 define void @foo(%struct.SA* nocapture %ctx, i32 %n) local_unnamed_addr #0 {
 ; X64-LABEL: foo:
 ; X64:       # BB#0: # %entry
-; X64-NEXT:    movl (%rdi), %eax
-; X64-NEXT:    movl 16(%rdi), %ecx
-; X64-NEXT:    leal (%rax,%rcx,4), %eax
-; X64-NEXT:    addl $1, %eax
-; X64-NEXT:    movl %eax, 12(%rdi)
-; X64-NEXT:    addl %ecx, %eax
+; X64-NEXT:    movl 16(%rdi), %eax
+; X64-NEXT:    movl (%rdi), %ecx
+; X64-NEXT:    addl %eax, %ecx
+; X64-NEXT:    addl %eax, %ecx
+; X64-NEXT:    addl %eax, %ecx
+; X64-NEXT:    leal (%rcx,%rax), %edx
+; X64-NEXT:    leal 1(%rax,%rcx), %ecx
+; X64-NEXT:    movl %ecx, 12(%rdi)
+; X64-NEXT:    leal 1(%rax,%rdx), %eax
 ; X64-NEXT:    movl %eax, 16(%rdi)
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: foo:
 ; X86:       # BB#0: # %entry
+; X86-NEXT:    pushl %esi
+; X86-NEXT:  .Lcfi0:
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:  .Lcfi1:
+; X86-NEXT:    .cfi_offset %esi, -8
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl (%eax), %ecx
-; X86-NEXT:    movl 16(%eax), %edx
-; X86-NEXT:    leal (%ecx,%edx,4), %ecx
-; X86-NEXT:    addl $1, %ecx
-; X86-NEXT:    movl %ecx, 12(%eax)
-; X86-NEXT:    addl %edx, %ecx
+; X86-NEXT:    movl 16(%eax), %ecx
+; X86-NEXT:    movl (%eax), %edx
+; X86-NEXT:    addl %ecx, %edx
+; X86-NEXT:    addl %ecx, %edx
+; X86-NEXT:    addl %ecx, %edx
+; X86-NEXT:    leal 1(%ecx,%edx), %esi
+; X86-NEXT:    addl %ecx, %edx
+; X86-NEXT:    movl %esi, 12(%eax)
+; X86-NEXT:    leal 1(%ecx,%edx), %ecx
 ; X86-NEXT:    movl %ecx, 16(%eax)
+; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
 entry:
   %h0 = getelementptr inbounds %struct.SA, %struct.SA* %ctx, i64 0, i32 0
@ -52,15 +64,15 @@ define void @foo_loop(%struct.SA* nocapture %ctx, i32 %n) local_unnamed_addr #0
 ; X64-NEXT:    .p2align 4, 0x90
 ; X64-NEXT:  .LBB1_1: # %loop
 ; X64-NEXT:    # =>This Inner Loop Header: Depth=1
-; X64-NEXT:    movl 16(%rdi), %eax
 ; X64-NEXT:    movl (%rdi), %ecx
-; X64-NEXT:    addl %eax, %ecx
-; X64-NEXT:    incl %ecx
-; X64-NEXT:    movl %ecx, 12(%rdi)
+; X64-NEXT:    movl 16(%rdi), %eax
+; X64-NEXT:    leal 1(%rcx,%rax), %edx
+; X64-NEXT:    movl %edx, 12(%rdi)
 ; X64-NEXT:    decl %esi
 ; X64-NEXT:    jne .LBB1_1
 ; X64-NEXT:  # BB#2: # %exit
 ; X64-NEXT:    addl %eax, %ecx
+; X64-NEXT:    leal 1(%rax,%rcx), %ecx
 ; X64-NEXT:    addl %eax, %ecx
 ; X64-NEXT:    addl %eax, %ecx
 ; X64-NEXT:    addl %eax, %ecx
@ -72,25 +84,30 @@ define void @foo_loop(%struct.SA* nocapture %ctx, i32 %n) local_unnamed_addr #0
 ;
 ; X86-LABEL: foo_loop:
 ; X86:       # BB#0: # %entry
-; X86-NEXT:    pushl %esi
-; X86-NEXT:  .Lcfi0:
+; X86-NEXT:    pushl %edi
+; X86-NEXT:  .Lcfi2:
 ; X86-NEXT:    .cfi_def_cfa_offset 8
-; X86-NEXT:  .Lcfi1:
-; X86-NEXT:    .cfi_offset %esi, -8
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:  .Lcfi3:
+; X86-NEXT:    .cfi_def_cfa_offset 12
+; X86-NEXT:  .Lcfi4:
+; X86-NEXT:    .cfi_offset %esi, -12
+; X86-NEXT:  .Lcfi5:
+; X86-NEXT:    .cfi_offset %edi, -8
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    .p2align 4, 0x90
 ; X86-NEXT:  .LBB1_1: # %loop
 ; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl (%eax), %esi
 ; X86-NEXT:    movl 16(%eax), %ecx
-; X86-NEXT:    movl (%eax), %edx
-; X86-NEXT:    addl %ecx, %edx
-; X86-NEXT:    incl %edx
-; X86-NEXT:    movl %edx, 12(%eax)
-; X86-NEXT:    decl %esi
+; X86-NEXT:    leal 1(%esi,%ecx), %edi
+; X86-NEXT:    movl %edi, 12(%eax)
+; X86-NEXT:    decl %edx
 ; X86-NEXT:    jne .LBB1_1
 ; X86-NEXT:  # BB#2: # %exit
-; X86-NEXT:    addl %ecx, %edx
+; X86-NEXT:    addl %ecx, %esi
+; X86-NEXT:    leal 1(%ecx,%esi), %edx
 ; X86-NEXT:    addl %ecx, %edx
 ; X86-NEXT:    addl %ecx, %edx
 ; X86-NEXT:    addl %ecx, %edx
@ -99,6 +116,7 @@ define void @foo_loop(%struct.SA* nocapture %ctx, i32 %n) local_unnamed_addr #0
 ; X86-NEXT:    addl %ecx, %edx
 ; X86-NEXT:    movl %edx, 16(%eax)
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
 ; X86-NEXT:    retl
 entry:
   br label %loop
--- a/test/CodeGen/X86/mul-constant-i16.ll
+++ b/test/CodeGen/X86/mul-constant-i16.ll
@ -558,10 +558,11 @@ define i16 @test_mul_by_28(i16 %x) {
 define i16 @test_mul_by_29(i16 %x) {
 ; X86-LABEL: test_mul_by_29:
 ; X86:       # BB#0:
-; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    leal (%eax,%eax,8), %ecx
-; X86-NEXT:    leal (%ecx,%ecx,2), %ecx
-; X86-NEXT:    leal (%ecx,%eax,2), %eax
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    leal (%ecx,%ecx,8), %eax
+; X86-NEXT:    leal (%eax,%eax,2), %eax
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    addl %ecx, %eax
 ; X86-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
 ; X86-NEXT:    retl
 ;
@ -570,7 +571,8 @@ define i16 @test_mul_by_29(i16 %x) {
 ; X64-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
 ; X64-NEXT:    leal (%rdi,%rdi,8), %eax
 ; X64-NEXT:    leal (%rax,%rax,2), %eax
-; X64-NEXT:    leal (%rax,%rdi,2), %eax
+; X64-NEXT:    addl %edi, %eax
+; X64-NEXT:    addl %edi, %eax
 ; X64-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
 ; X64-NEXT:    retq
  %mul = mul nsw i16 %x, 29
--- a/test/CodeGen/X86/mul-constant-i32.ll
+++ b/test/CodeGen/X86/mul-constant-i32.ll
@ -1457,10 +1457,11 @@ define i32 @test_mul_by_28(i32 %x) {
 define i32 @test_mul_by_29(i32 %x) {
 ; X86-LABEL: test_mul_by_29:
 ; X86:       # BB#0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    leal (%eax,%eax,8), %ecx
-; X86-NEXT:    leal (%ecx,%ecx,2), %ecx
-; X86-NEXT:    leal (%ecx,%eax,2), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    leal (%ecx,%ecx,8), %eax
+; X86-NEXT:    leal (%eax,%eax,2), %eax
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    addl %ecx, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-HSW-LABEL: test_mul_by_29:
@ -1468,7 +1469,8 @@ define i32 @test_mul_by_29(i32 %x) {
 ; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
 ; X64-HSW-NEXT:    leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
 ; X64-HSW-NEXT:    leal (%rax,%rax,2), %eax # sched: [1:0.50]
-; X64-HSW-NEXT:    leal (%rax,%rdi,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    addl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT:    addl %edi, %eax # sched: [1:0.25]
 ; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_29:
@ -1476,7 +1478,8 @@ define i32 @test_mul_by_29(i32 %x) {
 ; X64-JAG-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
 ; X64-JAG-NEXT:    leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
 ; X64-JAG-NEXT:    leal (%rax,%rax,2), %eax # sched: [1:0.50]
-; X64-JAG-NEXT:    leal (%rax,%rdi,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    addl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    addl %edi, %eax # sched: [1:0.50]
 ; X64-JAG-NEXT:    retq # sched: [4:1.00]
 ;
 ; X86-NOOPT-LABEL: test_mul_by_29:
--- a/test/CodeGen/X86/mul-constant-i64.ll
+++ b/test/CodeGen/X86/mul-constant-i64.ll
@ -1523,7 +1523,8 @@ define i64 @test_mul_by_29(i64 %x) {
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    leal (%eax,%eax,8), %ecx
 ; X86-NEXT:    leal (%ecx,%ecx,2), %ecx
-; X86-NEXT:    leal (%ecx,%eax,2), %ecx
+; X86-NEXT:    addl %eax, %ecx
+; X86-NEXT:    addl %eax, %ecx
 ; X86-NEXT:    movl $29, %eax
 ; X86-NEXT:    mull {{[0-9]+}}(%esp)
 ; X86-NEXT:    addl %ecx, %edx
@ -1533,14 +1534,16 @@ define i64 @test_mul_by_29(i64 %x) {
 ; X64-HSW:       # BB#0:
 ; X64-HSW-NEXT:    leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
 ; X64-HSW-NEXT:    leaq (%rax,%rax,2), %rax # sched: [1:0.50]
-; X64-HSW-NEXT:    leaq (%rax,%rdi,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    addq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT:    addq %rdi, %rax # sched: [1:0.25]
 ; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_29:
 ; X64-JAG:       # BB#0:
 ; X64-JAG-NEXT:    leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
 ; X64-JAG-NEXT:    leaq (%rax,%rax,2), %rax # sched: [1:0.50]
-; X64-JAG-NEXT:    leaq (%rax,%rdi,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    addq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    addq %rdi, %rax # sched: [1:0.50]
 ; X64-JAG-NEXT:    retq # sched: [4:1.00]
 ;
 ; X86-NOOPT-LABEL: test_mul_by_29:
--- a/test/CodeGen/X86/mul-constant-result.ll
+++ b/test/CodeGen/X86/mul-constant-result.ll
@ -166,7 +166,8 @@ define i32 @mult(i32, i32) local_unnamed_addr #0 {
 ; X86-NEXT:  .LBB0_35:
 ; X86-NEXT:    leal (%eax,%eax,8), %ecx
 ; X86-NEXT:    leal (%ecx,%ecx,2), %ecx
-; X86-NEXT:    leal (%ecx,%eax,2), %eax
+; X86-NEXT:    addl %eax, %ecx
+; X86-NEXT:    addl %ecx, %eax
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
 ; X86-NEXT:  .LBB0_36:
@ -324,15 +325,14 @@ define i32 @mult(i32, i32) local_unnamed_addr #0 {
 ; X64-HSW-NEXT:  .LBB0_31:
 ; X64-HSW-NEXT:    leal (%rax,%rax,8), %ecx
 ; X64-HSW-NEXT:    leal (%rcx,%rcx,2), %ecx
-; X64-HSW-NEXT:  .LBB0_17:
-; X64-HSW-NEXT:    addl %eax, %ecx
-; X64-HSW-NEXT:    movl %ecx, %eax
-; X64-HSW-NEXT:    # kill: %EAX<def> %EAX<kill> %RAX<kill>
-; X64-HSW-NEXT:    retq
+; X64-HSW-NEXT:    jmp .LBB0_17
 ; X64-HSW-NEXT:  .LBB0_32:
 ; X64-HSW-NEXT:    leal (%rax,%rax,8), %ecx
 ; X64-HSW-NEXT:    leal (%rcx,%rcx,2), %ecx
-; X64-HSW-NEXT:    leal (%rcx,%rax,2), %eax
+; X64-HSW-NEXT:    addl %eax, %ecx
+; X64-HSW-NEXT:  .LBB0_17:
+; X64-HSW-NEXT:    addl %eax, %ecx
+; X64-HSW-NEXT:    movl %ecx, %eax
 ; X64-HSW-NEXT:    # kill: %EAX<def> %EAX<kill> %RAX<kill>
 ; X64-HSW-NEXT:    retq
 ; X64-HSW-NEXT:  .LBB0_33:
--- a/test/CodeGen/X86/pr34629.ll
+++ b/test/CodeGen/X86/pr34629.ll
@ -13,8 +13,8 @@ define void @c() local_unnamed_addr #0 {
 ; CHECK:       # BB#0: # %entry
 ; CHECK-NEXT:    movq {{.*}}(%rip), %rax
 ; CHECK-NEXT:    leaq (%rax,%rax,4), %rcx
-; CHECK-NEXT:    leaq (%rcx,%rax,4), %rax
 ; CHECK-NEXT:    negq %rcx
+; CHECK-NEXT:    leaq (%rax,%rax,8), %rax
 ; CHECK-NEXT:    leaq (%rax,%rax,4), %rax
 ; CHECK-NEXT:    testq %rax, %rcx
 ; CHECK-NEXT:    je .LBB0_2
--- a/test/CodeGen/X86/pr34634.ll
+++ b/test/CodeGen/X86/pr34634.ll
@ -14,11 +14,11 @@ define void @fn1() local_unnamed_addr #0 {
 ; CHECK-NEXT:    movslq {{.*}}(%rip), %rax
 ; CHECK-NEXT:    leaq (%rax,%rax,4), %rcx
 ; CHECK-NEXT:    leaq (,%rax,4), %rdx
-; CHECK-NEXT:    movl a(%rdx,%rcx,8), %edx
-; CHECK-NEXT:    leaq (%rcx,%rax,4), %rcx
-; CHECK-NEXT:    leaq (%rcx,%rcx,2), %rcx
-; CHECK-NEXT:    addq %rax, %rcx
-; CHECK-NEXT:    movl %edx, b(%rcx,%rax,4)
+; CHECK-NEXT:    movl a(%rdx,%rcx,8), %ecx
+; CHECK-NEXT:    leaq (%rax,%rax,8), %rdx
+; CHECK-NEXT:    leaq (%rdx,%rdx,2), %rdx
+; CHECK-NEXT:    addq %rax, %rdx
+; CHECK-NEXT:    movl %ecx, b(%rdx,%rax,4)
 ; CHECK-NEXT:    retq
 entry:
  %0 = load i32, i32* @c, align 4, !tbaa !2
@ -37,11 +37,11 @@ define i32 @main() local_unnamed_addr #0 {
 ; CHECK-NEXT:    movslq {{.*}}(%rip), %rax
 ; CHECK-NEXT:    leaq (%rax,%rax,4), %rcx
 ; CHECK-NEXT:    leaq (,%rax,4), %rdx
-; CHECK-NEXT:    movl a(%rdx,%rcx,8), %edx
-; CHECK-NEXT:    leaq (%rcx,%rax,4), %rcx
-; CHECK-NEXT:    leaq (%rcx,%rcx,2), %rcx
-; CHECK-NEXT:    addq %rax, %rcx
-; CHECK-NEXT:    movl %edx, b(%rcx,%rax,4)
+; CHECK-NEXT:    movl a(%rdx,%rcx,8), %ecx
+; CHECK-NEXT:    leaq (%rax,%rax,8), %rdx
+; CHECK-NEXT:    leaq (%rdx,%rdx,2), %rdx
+; CHECK-NEXT:    addq %rax, %rdx
+; CHECK-NEXT:    movl %ecx, b(%rdx,%rax,4)
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    retq
 entry:
--- a/test/CodeGen/X86/umul-with-overflow.ll
+++ b/test/CodeGen/X86/umul-with-overflow.ll
@ -40,10 +40,10 @@ define i32 @test2(i32 %a, i32 %b) nounwind readnone {
 ; X64-NEXT:    leal (%rdi,%rdi), %eax
 ; X64-NEXT:    retq
 entry:
-        %tmp0 = add i32 %b, %a
-        %tmp1 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %tmp0, i32 2)
-        %tmp2 = extractvalue { i32, i1 } %tmp1, 0
-        ret i32 %tmp2
+	%tmp0 = add i32 %b, %a
+	%tmp1 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %tmp0, i32 2)
+	%tmp2 = extractvalue { i32, i1 } %tmp1, 0
+	ret i32 %tmp2
 }

 define i32 @test3(i32 %a, i32 %b) nounwind readnone {
@ -64,8 +64,8 @@ define i32 @test3(i32 %a, i32 %b) nounwind readnone {
 ; X64-NEXT:    mull %ecx
 ; X64-NEXT:    retq
 entry:
-        %tmp0 = add i32 %b, %a
-        %tmp1 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %tmp0, i32 4)
-        %tmp2 = extractvalue { i32, i1 } %tmp1, 0
-        ret i32 %tmp2
+	%tmp0 = add i32 %b, %a
+	%tmp1 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %tmp0, i32 4)
+	%tmp2 = extractvalue { i32, i1 } %tmp1, 0
+	ret i32 %tmp2
 }
--- a/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
@ -13,14 +13,14 @@
 ; X64-NEXT: .p2align
 ; X64: %loop
 ; no complex address modes
-; X64-NOT: [1-9]+(%{{[^)]+}},%{{[^)]+}},
+; X64-NOT: (%{{[^)]+}},%{{[^)]+}},
 ;
 ; X32: @simple
 ; no expensive address computation in the preheader
 ; X32-NOT: imul
 ; X32: %loop
 ; no complex address modes
-; X32-NOT: [1-9]+(%{{[^)]+}},%{{[^)]+}},
+; X32-NOT: (%{{[^)]+}},%{{[^)]+}},
 define i32 @simple(i32* %a, i32* %b, i32 %x) nounwind {
 entry:
  br label %loop
@ -103,7 +103,7 @@ exit:
 ; X32-NOT: mov{{.*}}(%esp){{$}}
 ; X32: %for.body{{$}}
 ; no complex address modes
-; X32-NOT: [1-9]+(%{{[^)]+}},%{{[^)]+}},
+; X32-NOT: (%{{[^)]+}},%{{[^)]+}},
 ; no reloads
 ; X32-NOT: (%esp)
 define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture %res, i32 %x, i32 %y, i32 %z) nounwind {