diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h
index 6c899ca7ee0..c741ce622cd 100644
--- a/include/llvm/CodeGen/MachineInstr.h
+++ b/include/llvm/CodeGen/MachineInstr.h
@@ -1320,12 +1320,13 @@ public:
   /// Add all implicit def and use operands to this instruction.
   void addImplicitDefUseOperands(MachineFunction &MF);
 
-private:
   /// If this instruction is embedded into a MachineFunction, return the
   /// MachineRegisterInfo object for the current function, otherwise
   /// return null.
   MachineRegisterInfo *getRegInfo();
 
+private:
+
   /// Unlink all of the register operands in this instruction from their
   /// respective use lists.  This requires that the operands already be on their
   /// use lists.
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index 82f507e64b9..090d0e08a78 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -300,6 +300,9 @@ public:
   /// type legalization.
   bool NewNodesMustHaveLegalTypes = false;
 
+  /// Set to true for DAG of BasicBlock contained inside a loop.
+  bool IsDAGPartOfLoop = false;
+
 private:
   /// DAGUpdateListener is a friend so it can manipulate the listener stack.
   friend struct DAGUpdateListener;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index cb37137d547..28e89b1be5c 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -27,6 +27,7 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/CodeGen/FastISel.h"
@@ -325,6 +326,8 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
   if (OptLevel != CodeGenOpt::None)
     AU.addRequired<AAResultsWrapperPass>();
   AU.addRequired<GCModuleInfo>();
+  if (OptLevel != CodeGenOpt::None)
+    AU.addRequired<LoopInfoWrapperPass>();
   AU.addRequired<StackProtector>();
   AU.addPreserved<StackProtector>();
   AU.addPreserved<GCModuleInfo>();
@@ -1415,6 +1418,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
 
   // Iterate over all basic blocks in the function.
   for (const BasicBlock *LLVMBB : RPOT) {
+    CurDAG->IsDAGPartOfLoop = false;
     if (OptLevel != CodeGenOpt::None) {
       bool AllPredsVisited = true;
       for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB);
@@ -1592,6 +1596,13 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
                                    FunctionBasedInstrumentation);
     }
 
+    if (OptLevel != CodeGenOpt::None) {
+      auto &LIWP = getAnalysis<LoopInfoWrapperPass>();
+      LoopInfo &LI = LIWP.getLoopInfo();
+      if (LI.getLoopFor(LLVMBB))
+        CurDAG->IsDAGPartOfLoop = true;
+    }
+
     if (Begin != BI)
       ++NumDAGBlocks;
     else
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 93a3b9281d9..b74eaf0daf5 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -88,6 +88,11 @@ namespace {
              IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr;
     }
 
+    bool hasComplexAddressingMode() const {
+      return Disp && IndexReg.getNode() != nullptr &&
+             Base_Reg.getNode() != nullptr;
+    }
+
     /// Return true if this addressing mode is already RIP-relative.
     bool isRIPRelative() const {
       if (BaseType != RegBase) return false;
@@ -97,6 +102,10 @@ namespace {
       return false;
     }
 
+    bool isLegalScale() const {
+      return (Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8);
+    }
+
     void setBaseReg(SDValue Reg) {
       BaseType = RegBase;
       Base_Reg = Reg;
@@ -162,10 +171,13 @@ namespace {
     /// If true, selector should try to optimize for minimum code size.
     bool OptForMinSize;
 
+    /// If true, selector should try to aggresively fold operands into AM.
+    bool OptForAggressingFolding;
+
   public:
     explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
         : SelectionDAGISel(tm, OptLevel), OptForSize(false),
-          OptForMinSize(false) {}
+          OptForMinSize(false), OptForAggressingFolding(false) {}
 
     StringRef getPassName() const override {
       return "X86 DAG->DAG Instruction Selection";
@@ -184,6 +196,12 @@ namespace {
 
     void PreprocessISelDAG() override;
 
+    void setAggressiveOperandFolding(bool val) {
+      OptForAggressingFolding = val;
+    }
+
+    bool getAggressiveOperandFolding() { return OptForAggressingFolding; }
+
 // Include the pieces autogenerated from the target description.
 #include "X86GenDAGISel.inc"
 
@@ -198,6 +216,7 @@ namespace {
     bool matchAdd(SDValue N, X86ISelAddressMode &AM, unsigned Depth);
     bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
                                  unsigned Depth);
+    bool matchAddressLEA(SDValue N, X86ISelAddressMode &AM);
     bool matchAddressBase(SDValue N, X86ISelAddressMode &AM);
     bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
                     SDValue &Scale, SDValue &Index, SDValue &Disp,
@@ -447,6 +466,20 @@ namespace {
 
     bool isMaskZeroExtended(SDNode *N) const;
   };
+
+  class X86AggressiveOperandFolding {
+  public:
+    explicit X86AggressiveOperandFolding(X86DAGToDAGISel &ISel, bool val)
+        : Selector(&ISel) {
+      Selector->setAggressiveOperandFolding(val);
+    }
+    ~X86AggressiveOperandFolding() {
+      Selector->setAggressiveOperandFolding(false);
+    }
+
+  private:
+    X86DAGToDAGISel *Selector;
+  };
 }
 
 
@@ -1194,7 +1227,7 @@ static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
   AM.IndexReg = NewSRL;
   return false;
 }
-
+   
 bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
                                               unsigned Depth) {
   SDLoc dl(N);
@@ -1202,8 +1235,14 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
       dbgs() << "MatchAddress: ";
       AM.dump();
     });
-  // Limit recursion.
-  if (Depth > 5)
+ 
+  // Limit recursion. For aggressive operand folding recurse 
+  // till depth 8 which is the maximum legal scale value.
+  auto getMaxOperandFoldingDepth = [&] () -> unsigned int {
+      return getAggressiveOperandFolding() ? 8 : 5;
+  };
+
+  if (Depth > getMaxOperandFoldingDepth())
     return matchAddressBase(N, AM);
 
   // If this is already a %rip relative address, we can only merge immediates
@@ -1494,6 +1533,20 @@ bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) {
       return false;
     }
 
+    if (OptLevel != CodeGenOpt::None && getAggressiveOperandFolding() &&
+        AM.BaseType == X86ISelAddressMode::RegBase) {
+      if (AM.Base_Reg == N) {
+        SDValue Base_Reg = AM.Base_Reg;
+        AM.Base_Reg = AM.IndexReg;
+        AM.IndexReg = Base_Reg;
+        AM.Scale++;
+        return false;
+      } else if (AM.IndexReg == N) {
+        AM.Scale++;
+        return false;
+      }
+    }
+
     // Otherwise, we cannot select it.
     return true;
   }
@@ -1729,7 +1782,7 @@ bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base,
                                          SDValue &Disp, SDValue &Segment) {
   // Save the debug loc before calling selectLEAAddr, in case it invalidates N.
   SDLoc DL(N);
-
+   
   if (!selectLEAAddr(N, Base, Scale, Index, Disp, Segment))
     return false;
 
@@ -1764,6 +1817,29 @@ bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base,
   return true;
 }
 
+bool X86DAGToDAGISel::matchAddressLEA(SDValue N, X86ISelAddressMode &AM) {
+  // Avoid enabling aggressive operand folding when node N is a part of loop.
+  X86AggressiveOperandFolding Enable(*this, !CurDAG->IsDAGPartOfLoop);
+
+  bool matchRes = matchAddress(N, AM);
+
+  // Check for legality of scale when recursion unwinds back to the top.
+  if (!matchRes) {
+    if (!AM.isLegalScale())
+      return true;
+
+    // Avoid creating costly complex LEAs having scale less than 2
+    // within loop.
+    if(CurDAG->IsDAGPartOfLoop && Subtarget->slow3OpsLEA() &&
+        AM.Scale <= 2 && AM.hasComplexAddressingMode() &&
+         (!AM.hasSymbolicDisplacement() && N.getOpcode() < ISD::BUILTIN_OP_END))
+     return true;
+  }
+
+  return matchRes;
+}
+
+
 /// Calls SelectAddr and determines if the maximal addressing
 /// mode it matches can be cost effectively emitted as an LEA instruction.
 bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
@@ -1781,7 +1857,7 @@ bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
   SDValue Copy = AM.Segment;
   SDValue T = CurDAG->getRegister(0, MVT::i32);
   AM.Segment = T;
-  if (matchAddress(N, AM))
+  if (matchAddressLEA(N, AM))
     return false;
   assert (T == AM.Segment);
   AM.Segment = Copy;
diff --git a/lib/Target/X86/X86OptimizeLEAs.cpp b/lib/Target/X86/X86OptimizeLEAs.cpp
index cc136866c47..7783c116400 100644
--- a/lib/Target/X86/X86OptimizeLEAs.cpp
+++ b/lib/Target/X86/X86OptimizeLEAs.cpp
@@ -27,6 +27,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -58,6 +59,7 @@ static cl::opt<bool>
                      cl::init(false));
 
 STATISTIC(NumSubstLEAs, "Number of LEA instruction substitutions");
+STATISTIC(NumFactoredLEAs, "Number of LEAs factorized");
 STATISTIC(NumRedundantLEAs, "Number of redundant LEA instructions removed");
 
 /// \brief Returns true if two machine operands are identical and they are not
@@ -65,6 +67,10 @@ STATISTIC(NumRedundantLEAs, "Number of redundant LEA instructions removed");
 static inline bool isIdenticalOp(const MachineOperand &MO1,
                                  const MachineOperand &MO2);
 
+/// \brief Returns true if two machine instructions have identical operands.
+static bool isIdenticalMI(MachineRegisterInfo *MRI, const MachineOperand &MO1,
+                          const MachineOperand &MO2);
+
 /// \brief Returns true if two address displacement operands are of the same
 /// type and use the same symbol/index/address regardless of the offset.
 static bool isSimilarDispOp(const MachineOperand &MO1,
@@ -73,6 +79,9 @@ static bool isSimilarDispOp(const MachineOperand &MO1,
 /// \brief Returns true if the instruction is LEA.
 static inline bool isLEA(const MachineInstr &MI);
 
+/// \brief Returns true if Definition of Operand is a copylike instruction.
+static bool isDefCopyLike(MachineRegisterInfo *MRI, const MachineOperand &Opr);
+
 namespace {
 
 /// A key based on instruction's memory operands.
@@ -80,15 +89,35 @@ class MemOpKey {
 public:
   MemOpKey(const MachineOperand *Base, const MachineOperand *Scale,
            const MachineOperand *Index, const MachineOperand *Segment,
-           const MachineOperand *Disp)
-      : Disp(Disp) {
+           const MachineOperand *Disp, bool DispCheck = false)
+      : Disp(Disp), DeepCheck(DispCheck) {
     Operands[0] = Base;
     Operands[1] = Scale;
     Operands[2] = Index;
     Operands[3] = Segment;
   }
 
+  /// Checks operands of MemOpKey are identical, if Base or Index
+  /// operand definitions are of kind SUBREG_TO_REG then compare
+  /// operands of defining MI.
+  bool performDeepCheck(const MemOpKey &Other) const {
+    MachineInstr *MI = const_cast<MachineInstr *>(Operands[0]->getParent());
+    MachineRegisterInfo *MRI = MI->getRegInfo();
+
+    for (int i = 0; i < 4; i++) {
+      bool CopyLike = isDefCopyLike(MRI, *Operands[i]);
+      if (CopyLike && !isIdenticalMI(MRI, *Operands[i], *Other.Operands[i]))
+        return false;
+      else if (!CopyLike && !isIdenticalOp(*Operands[i], *Other.Operands[i]))
+        return false;
+    }
+    return isIdenticalOp(*Disp, *Other.Disp);
+  }
+
   bool operator==(const MemOpKey &Other) const {
+    if (DeepCheck)
+      return performDeepCheck(Other);
+
     // Addresses' bases, scales, indices and segments must be identical.
     for (int i = 0; i < 4; ++i)
       if (!isIdenticalOp(*Operands[i], *Other.Operands[i]))
@@ -106,6 +135,12 @@ public:
 
   // Address' displacement operand.
   const MachineOperand *Disp;
+
+  // If true checks Address' base, index, segment and
+  // displacement are identical, in additions if base/index
+  // are defined by copylike instruction then futher
+  // compare the operands of the defining instruction.
+  bool DeepCheck;
 };
 
 } // end anonymous namespace
@@ -131,12 +166,34 @@ template <> struct DenseMapInfo<MemOpKey> {
   static unsigned getHashValue(const MemOpKey &Val) {
     // Checking any field of MemOpKey is enough to determine if the key is
     // empty or tombstone.
+    hash_code Hash(0);
     assert(Val.Disp != PtrInfo::getEmptyKey() && "Cannot hash the empty key");
     assert(Val.Disp != PtrInfo::getTombstoneKey() &&
            "Cannot hash the tombstone key");
 
-    hash_code Hash = hash_combine(*Val.Operands[0], *Val.Operands[1],
-                                  *Val.Operands[2], *Val.Operands[3]);
+    auto getMIHash = [](MachineInstr *MI) -> hash_code {
+      hash_code h(0);
+      for (unsigned i = 1, e = MI->getNumOperands(); i < e; i++)
+        h = hash_combine(h, MI->getOperand(i));
+      return h;
+    };
+
+    const MachineOperand &Base = *Val.Operands[0];
+    const MachineOperand &Index = *Val.Operands[2];
+    MachineInstr *MI = const_cast<MachineInstr *>(Base.getParent());
+    MachineRegisterInfo *MRI = MI->getRegInfo();
+
+    if (isDefCopyLike(MRI, Base))
+      Hash = getMIHash(MRI->getVRegDef(Base.getReg()));
+    else
+      Hash = hash_combine(Hash, Base);
+
+    if (isDefCopyLike(MRI, Index))
+      Hash = getMIHash(MRI->getVRegDef(Index.getReg()));
+    else
+      Hash = hash_combine(Hash, Index);
+
+    Hash = hash_combine(Hash, *Val.Operands[1], *Val.Operands[3]);
 
     // If the address displacement is an immediate, it should not affect the
     // hash so that memory operands which differ only be immediate displacement
@@ -196,6 +253,16 @@ static inline MemOpKey getMemOpKey(const MachineInstr &MI, unsigned N) {
                   &MI.getOperand(N + X86::AddrDisp));
 }
 
+static inline MemOpKey getMemOpCSEKey(const MachineInstr &MI, unsigned N) {
+  static MachineOperand DummyScale = MachineOperand::CreateImm(1);
+  assert((isLEA(MI) || MI.mayLoadOrStore()) &&
+         "The instruction must be a LEA, a load or a store");
+  return MemOpKey(&MI.getOperand(N + X86::AddrBaseReg), &DummyScale,
+                  &MI.getOperand(N + X86::AddrIndexReg),
+                  &MI.getOperand(N + X86::AddrSegmentReg),
+                  &MI.getOperand(N + X86::AddrDisp), true);
+}
+
 static inline bool isIdenticalOp(const MachineOperand &MO1,
                                  const MachineOperand &MO2) {
   return MO1.isIdenticalTo(MO2) &&
@@ -203,6 +270,27 @@ static inline bool isIdenticalOp(const MachineOperand &MO1,
           !TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
 }
 
+static bool isIdenticalMI(MachineRegisterInfo *MRI, const MachineOperand &MO1,
+                          const MachineOperand &MO2) {
+  MachineInstr *MI1 = nullptr;
+  MachineInstr *MI2 = nullptr;
+  if (!MO1.isReg() || !MO2.isReg())
+    return false;
+
+  MI1 = MRI->getVRegDef(MO1.getReg());
+  MI2 = MRI->getVRegDef(MO2.getReg());
+  if (!MI1 || !MI2)
+    return false;
+  if (MI1->getOpcode() != MI2->getOpcode())
+    return false;
+  if (MI1->getNumOperands() != MI2->getNumOperands())
+    return false;
+  for (unsigned i = 1, e = MI1->getNumOperands(); i < e; ++i)
+    if (!isIdenticalOp(MI1->getOperand(i), MI2->getOperand(i)))
+      return false;
+  return true;
+}
+
 #ifndef NDEBUG
 static bool isValidDispOp(const MachineOperand &MO) {
   return MO.isImm() || MO.isCPI() || MO.isJTI() || MO.isSymbol() ||
@@ -234,8 +322,150 @@ static inline bool isLEA(const MachineInstr &MI) {
          Opcode == X86::LEA64r || Opcode == X86::LEA64_32r;
 }
 
+static bool isDefCopyLike(MachineRegisterInfo *MRI, const MachineOperand &Opr) {
+  bool isInstrErased = !(Opr.isReg() && Opr.getParent()->getParent());
+  if (!Opr.isReg() || isInstrErased ||
+      TargetRegisterInfo::isPhysicalRegister(Opr.getReg()))
+    return false;
+  MachineInstr *MI = MRI->getVRegDef(Opr.getReg());
+  return MI && MI->isCopyLike();
+}
+
 namespace {
 
+/// This class captures the functions and attributes
+/// needed to factorize LEA within and across basic
+/// blocks.LEA instruction with same BASE,OFFSET and
+/// INDEX are the candidates for factorization.
+class FactorizeLEAOpt {
+public:
+  using LEAListT = std::list<MachineInstr *>;
+  using LEAMapT = DenseMap<MemOpKey, LEAListT>;
+  using ValueT = DenseMap<MemOpKey, unsigned>;
+  using ScopeEntryT = std::pair<MachineBasicBlock *, ValueT>;
+  using ScopeStackT = std::vector<ScopeEntryT>;
+
+  FactorizeLEAOpt() = default;
+  FactorizeLEAOpt(const FactorizeLEAOpt &) = delete;
+  FactorizeLEAOpt &operator=(const FactorizeLEAOpt &) = delete;
+
+  void performCleanup() {
+    for (auto LEA : removedLEAs)
+      LEA->eraseFromParent();
+    LEAs.clear();
+    Stack.clear();
+    removedLEAs.clear();
+  }
+
+  LEAMapT &getLEAMap() { return LEAs; }
+  ScopeEntryT *getTopScope() { return &Stack.back(); }
+
+  void addForLazyRemoval(MachineInstr *Instr) { removedLEAs.insert(Instr); }
+
+  bool checkIfScheduledForRemoval(MachineInstr *Instr) {
+    return removedLEAs.find(Instr) != removedLEAs.end();
+  }
+
+  /// Push the ScopeEntry for the BasicBlock over Stack.
+  /// Also traverses over list of instruction and update
+  /// LEAs Map and ScopeEntry for each LEA instruction
+  /// found using insertLEA().
+  void collectDataForBasicBlock(MachineBasicBlock *MBB);
+
+  /// Stores the size of MachineInstr list corrosponding
+  /// to key K from LEAs MAP into the ScopeEntry of
+  /// the basic block, then insert the LEA at the beginning
+  /// of the list.
+  void insertLEA(MachineInstr *MI);
+
+  /// Pops out ScopeEntry of top most BasicBlock from the stack
+  /// and remove the LEA instructions contained in the scope
+  /// from the LEAs Map.
+  void removeDataForBasicBlock();
+
+  /// If LEA contains Physical Registers then its not a candidate
+  /// for factorizations since physical registers may violate SSA
+  /// semantics of MI.
+  bool containsPhyReg(MachineInstr *MI, unsigned RecLevel);
+
+private:
+  ScopeStackT Stack;
+  LEAMapT LEAs;
+  std::set<MachineInstr *> removedLEAs;
+};
+
+void FactorizeLEAOpt::collectDataForBasicBlock(MachineBasicBlock *MBB) {
+  ValueT EmptyMap;
+  ScopeEntryT SE = std::make_pair(MBB, EmptyMap);
+  Stack.push_back(SE);
+  for (auto &MI : *MBB) {
+    if (isLEA(MI))
+      insertLEA(&MI);
+  }
+}
+
+void FactorizeLEAOpt::removeDataForBasicBlock() {
+  ScopeEntryT &SE = Stack.back();
+  for (auto MapEntry : SE.second) {
+    LEAMapT::iterator Itr = LEAs.find(MapEntry.first);
+    assert((Itr != LEAs.end()) &&
+           "LEAs map must have a node corresponding to ScopeEntry's Key.");
+
+    while (((*Itr).second.size() > MapEntry.second))
+      (*Itr).second.pop_front();
+    // If list goes empty remove entry from LEAs Map.
+    if ((*Itr).second.empty())
+      LEAs.erase(Itr);
+  }
+  Stack.pop_back();
+}
+
+bool FactorizeLEAOpt::containsPhyReg(MachineInstr *MI, unsigned RecLevel) {
+  if (!MI || !RecLevel)
+    return false;
+
+  MachineRegisterInfo *MRI = MI->getRegInfo();
+  for (auto Operand : MI->operands()) {
+    if (!Operand.isReg())
+      continue;
+    if (TargetRegisterInfo::isPhysicalRegister(Operand.getReg()))
+      return true;
+    MachineInstr *OperDefMI = MRI->getVRegDef(Operand.getReg());
+    if (OperDefMI && (MI != OperDefMI) && OperDefMI->isCopyLike() &&
+        containsPhyReg(OperDefMI, RecLevel - 1))
+      return true;
+  }
+  return false;
+}
+
+void FactorizeLEAOpt::insertLEA(MachineInstr *MI) {
+  unsigned lsize;
+  if (containsPhyReg(MI, 2))
+    return;
+
+  // Factorization is beneficial only for complex LEAs.
+  MachineOperand &Base = MI->getOperand(1);
+  MachineOperand &Index = MI->getOperand(3);
+  MachineOperand &Offset = MI->getOperand(4);
+  if ((Offset.isImm() && !Offset.getImm()) ||
+      (!Base.isReg() || !Base.getReg()) || (!Index.isReg() || !Index.getReg()))
+    return;
+
+  MemOpKey Key = getMemOpCSEKey(*MI, 1);
+  ScopeEntryT *TopScope = getTopScope();
+
+  LEAMapT::iterator Itr = LEAs.find(Key);
+  if (Itr == LEAs.end()) {
+    lsize = 0;
+    LEAs[Key].push_front(MI);
+  } else {
+    lsize = (*Itr).second.size();
+    (*Itr).second.push_front(MI);
+  }
+  if (TopScope->second.find(Key) == TopScope->second.end())
+    TopScope->second[Key] = lsize;
+}
+
 class OptimizeLEAPass : public MachineFunctionPass {
 public:
   OptimizeLEAPass() : MachineFunctionPass(ID) {}
@@ -247,6 +477,12 @@ public:
   /// been calculated by LEA. Also, remove redundant LEAs.
   bool runOnMachineFunction(MachineFunction &MF) override;
 
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+    AU.addRequired<MachineDominatorTree>();
+  }
+
 private:
   using MemOpMap = DenseMap<MemOpKey, SmallVector<MachineInstr *, 16>>;
 
@@ -292,8 +528,24 @@ private:
   /// \brief Removes LEAs which calculate similar addresses.
   bool removeRedundantLEAs(MemOpMap &LEAs);
 
+  /// \brief Visit over basic blocks, collect LEAs in a scoped
+  ///  hash map (FactorizeLEAOpt::LEAs) and try to factor them out.
+  bool FactorizeLEAsAllBasicBlocks(MachineFunction &MF);
+
+  bool FactorizeLEAsBasicBlock(MachineDomTreeNode *DN);
+
+  /// \brief Factor out LEAs which share Base,Index,Offset and Segment.
+  bool processBasicBlock(const MachineBasicBlock &MBB);
+
+  /// \brief Try to replace LEA with a lower strength instruction
+  /// to improves latency and throughput.
+  bool strengthReduceLEAs(MemOpMap &LEAs, const MachineBasicBlock &MBB);
+
   DenseMap<const MachineInstr *, unsigned> InstrPos;
 
+  FactorizeLEAOpt FactorOpt;
+
+  MachineDominatorTree *DT;
   MachineRegisterInfo *MRI;
   const X86InstrInfo *TII;
   const X86RegisterInfo *TRI;
@@ -489,7 +741,9 @@ void OptimizeLEAPass::findLEAs(const MachineBasicBlock &MBB, MemOpMap &LEAs) {
 bool OptimizeLEAPass::removeRedundantAddrCalc(MemOpMap &LEAs) {
   bool Changed = false;
 
-  assert(!LEAs.empty());
+  if (LEAs.empty())
+    return Changed;
+
   MachineBasicBlock *MBB = (*LEAs.begin()->second.begin())->getParent();
 
   // Process all instructions in basic block.
@@ -659,6 +913,10 @@ bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) {
         // Erase removed LEA from the list.
         I2 = List.erase(I2);
 
+        // If List becomes empty remove it from LEAs map.
+        if (List.empty())
+          LEAs.erase(E.first);
+
         Changed = true;
       }
       ++I1;
@@ -668,6 +926,217 @@ bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) {
   return Changed;
 }
 
+static inline int getADDrrFromLEA(int LEAOpcode) {
+  switch (LEAOpcode) {
+  default:
+    llvm_unreachable("Unexpected LEA instruction");
+  case X86::LEA16r:
+    return X86::ADD16rr;
+  case X86::LEA32r:
+    return X86::ADD32rr;
+  case X86::LEA64_32r:
+  case X86::LEA64r:
+    return X86::ADD64rr;
+  }
+}
+
+bool OptimizeLEAPass::strengthReduceLEAs(MemOpMap &LEAs,
+                                         const MachineBasicBlock &BB) {
+  bool Changed = false;
+
+  // Loop over all entries in the table.
+  for (auto &E : LEAs) {
+    auto &List = E.second;
+
+    // Loop over all LEA pairs.
+    auto I1 = List.begin();
+    while (I1 != List.end()) {
+      MachineInstrBuilder NewMI;
+      MachineInstr &First = **I1;
+      MachineOperand &Res = First.getOperand(0);
+      MachineOperand &Base = First.getOperand(1);
+      MachineOperand &Scale = First.getOperand(2);
+      MachineOperand &Index = First.getOperand(3);
+      MachineOperand &Offset = First.getOperand(4);
+
+      const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(First.getOpcode()));
+      const DebugLoc DL = First.getDebugLoc();
+
+      if (!Base.isReg() || !Index.isReg() || !Index.getReg()) {
+        I1++;
+        continue;
+      }
+
+      if (TargetRegisterInfo::isPhysicalRegister(Res.getReg()) ||
+          TargetRegisterInfo::isPhysicalRegister(Base.getReg()) ||
+          TargetRegisterInfo::isPhysicalRegister(Index.getReg())) {
+        I1++;
+        continue;
+      }
+
+      // Check for register class compatibility between Result and
+      // Index operands.
+      const TargetRegisterClass *ResRC = MRI->getRegClass(Res.getReg());
+      const TargetRegisterClass *IndexRC = MRI->getRegClass(Index.getReg());
+      if (TRI->getRegSizeInBits(*ResRC) != TRI->getRegSizeInBits(*IndexRC)) {
+        I1++;
+        continue;
+      }
+
+      MachineBasicBlock &MBB = *(const_cast<MachineBasicBlock *>(&BB));
+      // R = B + I
+      if (Scale.isImm() && Scale.getImm() == 1 && Offset.isImm() &&
+          !Offset.getImm()) {
+        NewMI = BuildMI(MBB, &First, DL, ADDrr)
+                    .addDef(Res.getReg())
+                    .addUse(Base.getReg())
+                    .addUse(Index.getReg());
+        Changed = NewMI.getInstr() != nullptr;
+        First.eraseFromParent();
+        I1 = List.erase(I1);
+
+        // If List becomes empty remove it from LEAs map.
+        if (List.empty())
+          LEAs.erase(E.first);
+      } else
+        I1++;
+    }
+  }
+  return Changed;
+}
+
+bool OptimizeLEAPass::processBasicBlock(const MachineBasicBlock &MBB) {
+  bool cseDone = false;
+
+  // Legal scale value (1,2,4 & 8) vector.
+  auto LegalScale = [](int scale) {
+    return scale == 1 || scale == 2 || scale == 4 || scale == 8;
+  };
+
+  auto CompareFn = [](const MachineInstr *Arg1,
+                      const MachineInstr *Arg2) -> bool {
+    return Arg1->getOperand(2).getImm() >= Arg2->getOperand(2).getImm();
+  };
+
+  // Loop over all entries in the table.
+  for (auto &E : FactorOpt.getLEAMap()) {
+    auto &List = E.second;
+    if (List.size() > 1)
+      List.sort(CompareFn);
+
+    // Loop over all LEA pairs.
+    for (auto Iter1 = List.begin(); Iter1 != List.end(); Iter1++) {
+      for (auto Iter2 = std::next(Iter1); Iter2 != List.end(); Iter2++) {
+        MachineInstr &LI1 = **Iter1;
+        MachineInstr &LI2 = **Iter2;
+
+        if (!DT->dominates(&LI2, &LI1))
+          continue;
+
+        int Scale1 = LI1.getOperand(2).getImm();
+        int Scale2 = LI2.getOperand(2).getImm();
+        assert(LI2.getOperand(0).isReg() && "Result is a VirtualReg");
+        DebugLoc DL = LI1.getDebugLoc();
+
+        // Continue if instruction has already been factorized.
+        if (FactorOpt.checkIfScheduledForRemoval(&LI1))
+          continue;
+
+        int Factor = Scale1 - Scale2;
+        if (Factor > 0 && LegalScale(Factor)) {
+          MachineOperand NewBase = LI2.getOperand(0);
+          MachineOperand NewIndex = LI1.getOperand(3);
+
+          const TargetRegisterClass *LI2ResRC =
+              MRI->getRegClass(LI2.getOperand(0).getReg());
+          const TargetRegisterClass *LI1BaseRC =
+              MRI->getRegClass(LI1.getOperand(1).getReg());
+
+          if (TRI->getRegSizeInBits(*LI1BaseRC) >
+              TRI->getRegSizeInBits(*LI2ResRC)) {
+            MachineInstr *LI1IndexDef =
+                MRI->getVRegDef(LI1.getOperand(3).getReg());
+            if (LI1IndexDef->getOpcode() != TargetOpcode::SUBREG_TO_REG)
+              continue;
+            MachineOperand &SubReg = LI1IndexDef->getOperand(2);
+            const TargetRegisterClass *SubRegRC =
+                MRI->getRegClass(SubReg.getReg());
+            if (TRI->getRegSizeInBits(*SubRegRC) !=
+                TRI->getRegSizeInBits(*LI2ResRC))
+              continue;
+            NewIndex = SubReg;
+          }
+
+          DEBUG(dbgs() << "CSE LEAs: Candidate to replace: "; LI1.dump(););
+          MachineInstrBuilder NewMI =
+              BuildMI(*(const_cast<MachineBasicBlock *>(&MBB)), &LI1, DL,
+                      TII->get(LI1.getOpcode()))
+                  .addDef(LI1.getOperand(0).getReg()) // Dst   = Dst of LI1.
+                  .addUse(NewBase.getReg())           // Base  = Dst to LI2.
+                  .addImm(Factor)            // Scale = Diff b/w scales.
+                  .addUse(NewIndex.getReg()) // Index = Index of LI1.
+                  .addImm(0)                 // Disp  = 0
+                  .addUse(
+                      LI1.getOperand(5).getReg()); // Segment = Segmant of LI1.
+
+          cseDone = NewMI.getInstr() != nullptr;
+
+          /// To preserve the SSA nature we need to remove Def flag
+          /// from old result.
+          LI1.getOperand(0).setIsDef(false);
+
+          /// Lazy removal shall ensure that replaced LEA remains
+          /// till we finish processing all the basic block. This shall
+          /// provide opportunity for further factorization based on
+          /// the replaced LEA which will be legal since it has same
+          /// destination as newly formed LEA.
+          FactorOpt.addForLazyRemoval(&LI1);
+
+          NumFactoredLEAs++;
+          DEBUG(dbgs() << "CSE LEAs: Replaced by: "; NewMI->dump(););
+        }
+      }
+    }
+  }
+  return cseDone;
+}
+
+bool OptimizeLEAPass::FactorizeLEAsBasicBlock(MachineDomTreeNode *DN) {
+  bool Changed = false;
+  using StackT = SmallVector<MachineDomTreeNode *, 16>;
+  using VisitedNodeMapT = SmallSet<MachineDomTreeNode *, 16>;
+
+  StackT WorkList;
+  VisitedNodeMapT DomNodeMap;
+
+  WorkList.push_back(DN);
+  while (!WorkList.empty()) {
+    MachineDomTreeNode *MDN = WorkList.back();
+    FactorOpt.collectDataForBasicBlock(MDN->getBlock());
+    Changed |= processBasicBlock(*MDN->getBlock());
+
+    if (DomNodeMap.find(MDN) == DomNodeMap.end()) {
+      DomNodeMap.insert(MDN);
+      for (auto Child : MDN->getChildren())
+        WorkList.push_back(Child);
+    }
+
+    MachineDomTreeNode *TDM = WorkList.back();
+    if (MDN->getLevel() == TDM->getLevel()) {
+      FactorOpt.removeDataForBasicBlock();
+      DomNodeMap.erase(MDN);
+      WorkList.pop_back();
+    }
+  }
+  return Changed;
+}
+
+bool OptimizeLEAPass::FactorizeLEAsAllBasicBlocks(MachineFunction &MF) {
+  bool Changed = FactorizeLEAsBasicBlock(DT->getRootNode());
+  FactorOpt.performCleanup();
+  return Changed;
+}
+
 bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
   bool Changed = false;
 
@@ -677,6 +1146,10 @@ bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
   MRI = &MF.getRegInfo();
   TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
   TRI = MF.getSubtarget<X86Subtarget>().getRegisterInfo();
+  DT = &getAnalysis<MachineDominatorTree>();
+
+  // Attempt factorizing LEAs.
+  Changed |= FactorizeLEAsAllBasicBlocks(MF);
 
   // Process all basic blocks.
   for (auto &MBB : MF) {
@@ -693,6 +1166,9 @@ bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
     // Remove redundant LEA instructions.
     Changed |= removeRedundantLEAs(LEAs);
 
+    // Strength reduce LEA instructions.
+    Changed |= strengthReduceLEAs(LEAs, MBB);
+
     // Remove redundant address calculations. Do it only for -Os/-Oz since only
     // a code size gain is expected from this part of the pass.
     if (MF.getFunction()->optForSize())
diff --git a/test/CodeGen/X86/GlobalISel/callingconv.ll b/test/CodeGen/X86/GlobalISel/callingconv.ll
index 4100a7217ac..a55ff862e2b 100644
--- a/test/CodeGen/X86/GlobalISel/callingconv.ll
+++ b/test/CodeGen/X86/GlobalISel/callingconv.ll
@@ -388,7 +388,7 @@ define void @test_variadic_call_2(i8** %addr_ptr, double* %val_ptr) {
 ; X32-NEXT:    movl 4(%ecx), %ecx
 ; X32-NEXT:    movl %eax, (%esp)
 ; X32-NEXT:    movl $4, %eax
-; X32-NEXT:    leal (%esp,%eax), %eax
+; X32-NEXT:    addl %esp, %eax
 ; X32-NEXT:    movl %edx, 4(%esp)
 ; X32-NEXT:    movl %ecx, 4(%eax)
 ; X32-NEXT:    calll variadic_callee
diff --git a/test/CodeGen/X86/GlobalISel/gep.ll b/test/CodeGen/X86/GlobalISel/gep.ll
index ee66accc77d..95ad8d4eb30 100644
--- a/test/CodeGen/X86/GlobalISel/gep.ll
+++ b/test/CodeGen/X86/GlobalISel/gep.ll
@@ -5,10 +5,10 @@
 define i32* @test_gep_i8(i32 *%arr, i8 %ind) {
 ; X64_GISEL-LABEL: test_gep_i8:
 ; X64_GISEL:       # BB#0:
-; X64_GISEL-NEXT:    movq $4, %rax
-; X64_GISEL-NEXT:    movsbq %sil, %rcx
-; X64_GISEL-NEXT:    imulq %rax, %rcx
-; X64_GISEL-NEXT:    leaq (%rdi,%rcx), %rax
+; X64_GISEL-NEXT:    movq $4, %rcx
+; X64_GISEL-NEXT:    movsbq %sil, %rax
+; X64_GISEL-NEXT:    imulq %rcx, %rax
+; X64_GISEL-NEXT:    addq %rdi, %rax
 ; X64_GISEL-NEXT:    retq
 ;
 ; X64-LABEL: test_gep_i8:
@@ -25,7 +25,7 @@ define i32* @test_gep_i8_const(i32 *%arr) {
 ; X64_GISEL-LABEL: test_gep_i8_const:
 ; X64_GISEL:       # BB#0:
 ; X64_GISEL-NEXT:    movq $80, %rax
-; X64_GISEL-NEXT:    leaq (%rdi,%rax), %rax
+; X64_GISEL-NEXT:    addq %rdi, %rax
 ; X64_GISEL-NEXT:    retq
 ;
 ; X64-LABEL: test_gep_i8_const:
@@ -39,10 +39,10 @@ define i32* @test_gep_i8_const(i32 *%arr) {
 define i32* @test_gep_i16(i32 *%arr, i16 %ind) {
 ; X64_GISEL-LABEL: test_gep_i16:
 ; X64_GISEL:       # BB#0:
-; X64_GISEL-NEXT:    movq $4, %rax
-; X64_GISEL-NEXT:    movswq %si, %rcx
-; X64_GISEL-NEXT:    imulq %rax, %rcx
-; X64_GISEL-NEXT:    leaq (%rdi,%rcx), %rax
+; X64_GISEL-NEXT:    movq $4, %rcx
+; X64_GISEL-NEXT:    movswq %si, %rax
+; X64_GISEL-NEXT:    imulq %rcx, %rax
+; X64_GISEL-NEXT:    addq %rdi, %rax
 ; X64_GISEL-NEXT:    retq
 ;
 ; X64-LABEL: test_gep_i16:
@@ -59,7 +59,7 @@ define i32* @test_gep_i16_const(i32 *%arr) {
 ; X64_GISEL-LABEL: test_gep_i16_const:
 ; X64_GISEL:       # BB#0:
 ; X64_GISEL-NEXT:    movq $80, %rax
-; X64_GISEL-NEXT:    leaq (%rdi,%rax), %rax
+; X64_GISEL-NEXT:    addq %rdi, %rax
 ; X64_GISEL-NEXT:    retq
 ;
 ; X64-LABEL: test_gep_i16_const:
@@ -73,10 +73,10 @@ define i32* @test_gep_i16_const(i32 *%arr) {
 define i32* @test_gep_i32(i32 *%arr, i32 %ind) {
 ; X64_GISEL-LABEL: test_gep_i32:
 ; X64_GISEL:       # BB#0:
-; X64_GISEL-NEXT:    movq $4, %rax
-; X64_GISEL-NEXT:    movslq %esi, %rcx
-; X64_GISEL-NEXT:    imulq %rax, %rcx
-; X64_GISEL-NEXT:    leaq (%rdi,%rcx), %rax
+; X64_GISEL-NEXT:    movq $4, %rcx
+; X64_GISEL-NEXT:    movslq %esi, %rax
+; X64_GISEL-NEXT:    imulq %rcx, %rax
+; X64_GISEL-NEXT:    addq %rdi, %rax
 ; X64_GISEL-NEXT:    retq
 ;
 ; X64-LABEL: test_gep_i32:
@@ -92,7 +92,7 @@ define i32* @test_gep_i32_const(i32 *%arr) {
 ; X64_GISEL-LABEL: test_gep_i32_const:
 ; X64_GISEL:       # BB#0:
 ; X64_GISEL-NEXT:    movq $20, %rax
-; X64_GISEL-NEXT:    leaq (%rdi,%rax), %rax
+; X64_GISEL-NEXT:    addq %rdi, %rax
 ; X64_GISEL-NEXT:    retq
 ;
 ; X64-LABEL: test_gep_i32_const:
@@ -108,7 +108,7 @@ define i32* @test_gep_i64(i32 *%arr, i64 %ind) {
 ; X64_GISEL:       # BB#0:
 ; X64_GISEL-NEXT:    movq $4, %rax
 ; X64_GISEL-NEXT:    imulq %rsi, %rax
-; X64_GISEL-NEXT:    leaq (%rdi,%rax), %rax
+; X64_GISEL-NEXT:    addq %rdi, %rax
 ; X64_GISEL-NEXT:    retq
 ;
 ; X64-LABEL: test_gep_i64:
@@ -123,7 +123,7 @@ define i32* @test_gep_i64_const(i32 *%arr) {
 ; X64_GISEL-LABEL: test_gep_i64_const:
 ; X64_GISEL:       # BB#0:
 ; X64_GISEL-NEXT:    movq $20, %rax
-; X64_GISEL-NEXT:    leaq (%rdi,%rax), %rax
+; X64_GISEL-NEXT:    addq %rdi, %rax
 ; X64_GISEL-NEXT:    retq
 ;
 ; X64-LABEL: test_gep_i64_const:
diff --git a/test/CodeGen/X86/GlobalISel/memop-scalar.ll b/test/CodeGen/X86/GlobalISel/memop-scalar.ll
index 2097a3b0bfc..c73a43177e3 100644
--- a/test/CodeGen/X86/GlobalISel/memop-scalar.ll
+++ b/test/CodeGen/X86/GlobalISel/memop-scalar.ll
@@ -181,7 +181,7 @@ define i32 @test_gep_folding_largeGepIndex(i32* %arr, i32 %val) {
 ; ALL-LABEL: test_gep_folding_largeGepIndex:
 ; ALL:       # BB#0:
 ; ALL-NEXT:    movabsq $228719476720, %rax # imm = 0x3540BE3FF0
-; ALL-NEXT:    leaq (%rdi,%rax), %rax
+; ALL-NEXT:    addq %rdi, %rax
 ; ALL-NEXT:    movl %esi, (%rax)
 ; ALL-NEXT:    movl (%rax), %eax
 ; ALL-NEXT:    retq
diff --git a/test/CodeGen/X86/lea-opt-cse1.ll b/test/CodeGen/X86/lea-opt-cse1.ll
index 05b47690e81..512740ce075 100644
--- a/test/CodeGen/X86/lea-opt-cse1.ll
+++ b/test/CodeGen/X86/lea-opt-cse1.ll
@@ -9,27 +9,21 @@ define void @test_func(%struct.SA* nocapture %ctx, i32 %n) local_unnamed_addr {
 ; X64:       # BB#0: # %entry
 ; X64-NEXT:    movl (%rdi), %eax
 ; X64-NEXT:    movl 16(%rdi), %ecx
-; X64-NEXT:    leal (%rax,%rcx), %edx
 ; X64-NEXT:    leal 1(%rax,%rcx), %eax
 ; X64-NEXT:    movl %eax, 12(%rdi)
-; X64-NEXT:    leal 1(%rcx,%rdx), %eax
+; X64-NEXT:    addq %ecx, %eax
 ; X64-NEXT:    movl %eax, 16(%rdi)
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: test_func:
 ; X86:       # BB#0: # %entry
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    .cfi_def_cfa_offset 8
-; X86-NEXT:    .cfi_offset %esi, -8
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl (%eax), %ecx
 ; X86-NEXT:    movl 16(%eax), %edx
-; X86-NEXT:    leal 1(%ecx,%edx), %esi
+; X86-NEXT:    leal 1(%ecx,%edx), %ecx
+; X86-NEXT:    movl %ecx, 12(%eax)
 ; X86-NEXT:    addl %edx, %ecx
-; X86-NEXT:    movl %esi, 12(%eax)
-; X86-NEXT:    leal 1(%edx,%ecx), %ecx
 ; X86-NEXT:    movl %ecx, 16(%eax)
-; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
  entry:
    %h0 = getelementptr inbounds %struct.SA, %struct.SA* %ctx, i64 0, i32 0
diff --git a/test/CodeGen/X86/lea-opt-cse2.ll b/test/CodeGen/X86/lea-opt-cse2.ll
index 865dd49a6e1..4226e3d25c8 100644
--- a/test/CodeGen/X86/lea-opt-cse2.ll
+++ b/test/CodeGen/X86/lea-opt-cse2.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s -check-prefix=X64
-; RUN: llc < %s -mtriple=i686-unknown   | FileCheck %s -check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-unknown  -mattr=+slow-3ops-lea | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=i686-unknown  -mattr=+slow-3ops-lea  | FileCheck %s -check-prefix=X86
 
 %struct.SA = type { i32 , i32 , i32 , i32 , i32};
 
@@ -10,43 +10,39 @@ define void @foo(%struct.SA* nocapture %ctx, i32 %n) local_unnamed_addr #0 {
 ; X64-NEXT:    .p2align 4, 0x90
 ; X64-NEXT:  .LBB0_1: # %loop
 ; X64-NEXT:    # =>This Inner Loop Header: Depth=1
-; X64-NEXT:    movl (%rdi), %eax
-; X64-NEXT:    movl 16(%rdi), %ecx
-; X64-NEXT:    leal 1(%rax,%rcx), %edx
-; X64-NEXT:    movl %edx, 12(%rdi)
+; X64-NEXT:    movl 16(%rdi), %eax
+; X64-NEXT:    movl (%rdi), %ecx
+; X64-NEXT:    addl %eax, %ecx
+; X64-NEXT:    incl %ecx
+; X64-NEXT:    movl %ecx, 12(%rdi)
 ; X64-NEXT:    decl %esi
 ; X64-NEXT:    jne .LBB0_1
 ; X64-NEXT:  # BB#2: # %exit
-; X64-NEXT:    addl %ecx, %eax
-; X64-NEXT:    leal 1(%rcx,%rax), %eax
-; X64-NEXT:    movl %eax, 16(%rdi)
+; X64-NEXT:    addl %eax, %ecx
+; X64-NEXT:    movl %ecx, 16(%rdi)
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: foo:
 ; X86:       # BB#0: # %entry
-; X86-NEXT:    pushl %edi
-; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    pushl %esi
-; X86-NEXT:    .cfi_def_cfa_offset 12
-; X86-NEXT:    .cfi_offset %esi, -12
-; X86-NEXT:    .cfi_offset %edi, -8
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %esi, -8
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    .p2align 4, 0x90
 ; X86-NEXT:  .LBB0_1: # %loop
 ; X86-NEXT:    # =>This Inner Loop Header: Depth=1
-; X86-NEXT:    movl (%eax), %edx
-; X86-NEXT:    movl 16(%eax), %esi
-; X86-NEXT:    leal 1(%edx,%esi), %edi
-; X86-NEXT:    movl %edi, 12(%eax)
+; X86-NEXT:    movl 16(%eax), %edx
+; X86-NEXT:    movl (%eax), %esi
+; X86-NEXT:    addl %edx, %esi
+; X86-NEXT:    incl %esi
+; X86-NEXT:    movl %esi, 12(%eax)
 ; X86-NEXT:    decl %ecx
 ; X86-NEXT:    jne .LBB0_1
 ; X86-NEXT:  # BB#2: # %exit
-; X86-NEXT:    addl %esi, %edx
-; X86-NEXT:    leal 1(%esi,%edx), %ecx
-; X86-NEXT:    movl %ecx, 16(%eax)
+; X86-NEXT:    addl %edx, %esi
+; X86-NEXT:    movl %esi, 16(%eax)
 ; X86-NEXT:    popl %esi
-; X86-NEXT:    popl %edi
 ; X86-NEXT:    retl
  entry:
    br label %loop
diff --git a/test/CodeGen/X86/lea-opt-cse3.ll b/test/CodeGen/X86/lea-opt-cse3.ll
index 48ab3130bf0..57c20cceed1 100644
--- a/test/CodeGen/X86/lea-opt-cse3.ll
+++ b/test/CodeGen/X86/lea-opt-cse3.ll
@@ -8,7 +8,7 @@ define i32 @foo(i32 %a, i32 %b) local_unnamed_addr #0 {
 ; X64-NEXT:    # kill: %esi<def> %esi<kill> %rsi<def>
 ; X64-NEXT:    # kill: %edi<def> %edi<kill> %rdi<def>
 ; X64-NEXT:    leal 4(%rdi,%rsi,2), %ecx
-; X64-NEXT:    leal 4(%rdi,%rsi,4), %eax
+; X64-NEXT:    leal (%ecx,%esi,2), %eax
 ; X64-NEXT:    imull %ecx, %eax
 ; X64-NEXT:    retq
 ;
@@ -16,9 +16,9 @@ define i32 @foo(i32 %a, i32 %b) local_unnamed_addr #0 {
 ; X86:       # BB#0: # %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    leal 4(%ecx,%eax,2), %edx
-; X86-NEXT:    leal 4(%ecx,%eax,4), %eax
-; X86-NEXT:    imull %edx, %eax
+; X86-NEXT:    leal 4(%ecx,%eax,2), %ecx
+; X86-NEXT:    leal (%ecx,%eax,2), %eax
+; X86-NEXT:    imull %ecx, %eax
 ; X86-NEXT:    retl
 entry:
   %mul = shl i32 %b, 1
@@ -36,7 +36,7 @@ define i32 @foo1(i32 %a, i32 %b) local_unnamed_addr #0 {
 ; X64-NEXT:    # kill: %esi<def> %esi<kill> %rsi<def>
 ; X64-NEXT:    # kill: %edi<def> %edi<kill> %rdi<def>
 ; X64-NEXT:    leal 4(%rdi,%rsi,4), %ecx
-; X64-NEXT:    leal 4(%rdi,%rsi,8), %eax
+; X64-NEXT:    leal (%ecx,%esi,4), %eax
 ; X64-NEXT:    imull %ecx, %eax
 ; X64-NEXT:    retq
 ;
@@ -44,9 +44,9 @@ define i32 @foo1(i32 %a, i32 %b) local_unnamed_addr #0 {
 ; X86:       # BB#0: # %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    leal 4(%ecx,%eax,4), %edx
-; X86-NEXT:    leal 4(%ecx,%eax,8), %eax
-; X86-NEXT:    imull %edx, %eax
+; X86-NEXT:    leal 4(%ecx,%eax,4), %ecx
+; X86-NEXT:    leal (%ecx,%eax,4), %eax
+; X86-NEXT:    imull %ecx, %eax
 ; X86-NEXT:    retl
 entry:
   %mul = shl i32 %b, 2
@@ -68,29 +68,23 @@ define i32 @foo1_mult_basic_blocks(i32 %a, i32 %b) local_unnamed_addr #0 {
 ; X64-NEXT:    cmpl $10, %ecx
 ; X64-NEXT:    je .LBB2_2
 ; X64-NEXT:  # BB#1: # %mid
-; X64-NEXT:    leal 4(%rdi,%rsi,8), %eax
-; X64-NEXT:    imull %eax, %ecx
-; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    leal (%ecx,%esi,4), %eax
+; X64-NEXT:    imull %ecx, %eax
 ; X64-NEXT:  .LBB2_2: # %exit
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: foo1_mult_basic_blocks:
 ; X86:       # BB#0: # %entry
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    .cfi_def_cfa_offset 8
-; X86-NEXT:    .cfi_offset %esi, -8
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    leal 4(%esi,%edx,4), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    leal 4(%eax,%edx,4), %ecx
 ; X86-NEXT:    xorl %eax, %eax
 ; X86-NEXT:    cmpl $10, %ecx
 ; X86-NEXT:    je .LBB2_2
 ; X86-NEXT:  # BB#1: # %mid
-; X86-NEXT:    leal 4(%esi,%edx,8), %eax
-; X86-NEXT:    imull %eax, %ecx
-; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    leal (%ecx,%edx,4), %eax
+; X86-NEXT:    imull %ecx, %eax
 ; X86-NEXT:  .LBB2_2: # %exit
-; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
 entry:
   %mul = shl i32 %b, 2
diff --git a/test/CodeGen/X86/lea-opt-cse4.ll b/test/CodeGen/X86/lea-opt-cse4.ll
index 31f31a73d44..6f1fe282f92 100644
--- a/test/CodeGen/X86/lea-opt-cse4.ll
+++ b/test/CodeGen/X86/lea-opt-cse4.ll
@@ -1,41 +1,31 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s -check-prefix=X64
-; RUN: llc < %s -mtriple=i686-unknown   | FileCheck %s -check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+slow-3ops-lea | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+slow-3ops-lea | FileCheck %s -check-prefix=X86
 
 %struct.SA = type { i32 , i32 , i32 , i32 , i32};
 
 define void @foo(%struct.SA* nocapture %ctx, i32 %n) local_unnamed_addr #0 {
 ; X64-LABEL: foo:
 ; X64:       # BB#0: # %entry
-; X64-NEXT:    movl 16(%rdi), %eax
-; X64-NEXT:    movl (%rdi), %ecx
-; X64-NEXT:    addl %eax, %ecx
-; X64-NEXT:    addl %eax, %ecx
-; X64-NEXT:    addl %eax, %ecx
-; X64-NEXT:    leal (%rcx,%rax), %edx
-; X64-NEXT:    leal 1(%rax,%rcx), %ecx
-; X64-NEXT:    movl %ecx, 12(%rdi)
-; X64-NEXT:    leal 1(%rax,%rdx), %eax
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    movl 16(%rdi), %ecx
+; X64-NEXT:    leal (%rax,%rcx,4), %eax
+; X64-NEXT:    addl $1, %eax
+; X64-NEXT:    movl %eax, 12(%rdi)
+; X64-NEXT:    addl %ecx, %eax
 ; X64-NEXT:    movl %eax, 16(%rdi)
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: foo:
 ; X86:       # BB#0: # %entry
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    .cfi_def_cfa_offset 8
-; X86-NEXT:    .cfi_offset %esi, -8
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl 16(%eax), %ecx
-; X86-NEXT:    movl (%eax), %edx
-; X86-NEXT:    addl %ecx, %edx
-; X86-NEXT:    addl %ecx, %edx
-; X86-NEXT:    addl %ecx, %edx
-; X86-NEXT:    leal 1(%ecx,%edx), %esi
-; X86-NEXT:    addl %ecx, %edx
-; X86-NEXT:    movl %esi, 12(%eax)
-; X86-NEXT:    leal 1(%ecx,%edx), %ecx
+; X86-NEXT:    movl (%eax), %ecx
+; X86-NEXT:    movl 16(%eax), %edx
+; X86-NEXT:    leal (%ecx,%edx,4), %ecx
+; X86-NEXT:    addl $1, %ecx
+; X86-NEXT:    movl %ecx, 12(%eax)
+; X86-NEXT:    addl %edx, %ecx
 ; X86-NEXT:    movl %ecx, 16(%eax)
-; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
  entry:
    %h0 = getelementptr inbounds %struct.SA, %struct.SA* %ctx, i64 0, i32 0
@@ -62,15 +52,15 @@ define void @foo_loop(%struct.SA* nocapture %ctx, i32 %n) local_unnamed_addr #0
 ; X64-NEXT:    .p2align 4, 0x90
 ; X64-NEXT:  .LBB1_1: # %loop
 ; X64-NEXT:    # =>This Inner Loop Header: Depth=1
-; X64-NEXT:    movl (%rdi), %ecx
 ; X64-NEXT:    movl 16(%rdi), %eax
-; X64-NEXT:    leal 1(%rcx,%rax), %edx
-; X64-NEXT:    movl %edx, 12(%rdi)
+; X64-NEXT:    movl (%rdi), %ecx
+; X64-NEXT:    addl %eax, %ecx
+; X64-NEXT:    incl %ecx
+; X64-NEXT:    movl %ecx, 12(%rdi)
 ; X64-NEXT:    decl %esi
 ; X64-NEXT:    jne .LBB1_1
 ; X64-NEXT:  # BB#2: # %exit
 ; X64-NEXT:    addl %eax, %ecx
-; X64-NEXT:    leal 1(%rax,%rcx), %ecx
 ; X64-NEXT:    addl %eax, %ecx
 ; X64-NEXT:    addl %eax, %ecx
 ; X64-NEXT:    addl %eax, %ecx
@@ -82,26 +72,23 @@ define void @foo_loop(%struct.SA* nocapture %ctx, i32 %n) local_unnamed_addr #0
 ;
 ; X86-LABEL: foo_loop:
 ; X86:       # BB#0: # %entry
-; X86-NEXT:    pushl %edi
-; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    pushl %esi
-; X86-NEXT:    .cfi_def_cfa_offset 12
-; X86-NEXT:    .cfi_offset %esi, -12
-; X86-NEXT:    .cfi_offset %edi, -8
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %esi, -8
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    .p2align 4, 0x90
 ; X86-NEXT:  .LBB1_1: # %loop
 ; X86-NEXT:    # =>This Inner Loop Header: Depth=1
-; X86-NEXT:    movl (%eax), %esi
 ; X86-NEXT:    movl 16(%eax), %ecx
-; X86-NEXT:    leal 1(%esi,%ecx), %edi
-; X86-NEXT:    movl %edi, 12(%eax)
-; X86-NEXT:    decl %edx
+; X86-NEXT:    movl (%eax), %edx
+; X86-NEXT:    addl %ecx, %edx
+; X86-NEXT:    incl %edx
+; X86-NEXT:    movl %edx, 12(%eax)
+; X86-NEXT:    decl %esi
 ; X86-NEXT:    jne .LBB1_1
 ; X86-NEXT:  # BB#2: # %exit
-; X86-NEXT:    addl %ecx, %esi
-; X86-NEXT:    leal 1(%ecx,%esi), %edx
+; X86-NEXT:    addl %ecx, %edx
 ; X86-NEXT:    addl %ecx, %edx
 ; X86-NEXT:    addl %ecx, %edx
 ; X86-NEXT:    addl %ecx, %edx
@@ -110,7 +97,6 @@ define void @foo_loop(%struct.SA* nocapture %ctx, i32 %n) local_unnamed_addr #0
 ; X86-NEXT:    addl %ecx, %edx
 ; X86-NEXT:    movl %edx, 16(%eax)
 ; X86-NEXT:    popl %esi
-; X86-NEXT:    popl %edi
 ; X86-NEXT:    retl
  entry:
    br label %loop
diff --git a/test/CodeGen/X86/mul-constant-i16.ll b/test/CodeGen/X86/mul-constant-i16.ll
index c3b822ac214..72731b3ba67 100644
--- a/test/CodeGen/X86/mul-constant-i16.ll
+++ b/test/CodeGen/X86/mul-constant-i16.ll
@@ -558,11 +558,10 @@ define i16 @test_mul_by_28(i16 %x) {
 define i16 @test_mul_by_29(i16 %x) {
 ; X86-LABEL: test_mul_by_29:
 ; X86:       # BB#0:
-; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    leal (%ecx,%ecx,8), %eax
-; X86-NEXT:    leal (%eax,%eax,2), %eax
-; X86-NEXT:    addl %ecx, %eax
-; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    leal (%eax,%eax,8), %ecx
+; X86-NEXT:    leal (%ecx,%ecx,2), %ecx
+; X86-NEXT:    leal (%ecx,%eax,2), %eax
 ; X86-NEXT:    # kill: %ax<def> %ax<kill> %eax<kill>
 ; X86-NEXT:    retl
 ;
@@ -571,8 +570,7 @@ define i16 @test_mul_by_29(i16 %x) {
 ; X64-NEXT:    # kill: %edi<def> %edi<kill> %rdi<def>
 ; X64-NEXT:    leal (%rdi,%rdi,8), %eax
 ; X64-NEXT:    leal (%rax,%rax,2), %eax
-; X64-NEXT:    addl %edi, %eax
-; X64-NEXT:    addl %edi, %eax
+; X64-NEXT:    leal (%rax,%rdi,2), %eax
 ; X64-NEXT:    # kill: %ax<def> %ax<kill> %eax<kill>
 ; X64-NEXT:    retq
   %mul = mul nsw i16 %x, 29
diff --git a/test/CodeGen/X86/mul-constant-i32.ll b/test/CodeGen/X86/mul-constant-i32.ll
index 228dd5e5f37..c8ac6a70927 100644
--- a/test/CodeGen/X86/mul-constant-i32.ll
+++ b/test/CodeGen/X86/mul-constant-i32.ll
@@ -1457,11 +1457,10 @@ define i32 @test_mul_by_28(i32 %x) {
 define i32 @test_mul_by_29(i32 %x) {
 ; X86-LABEL: test_mul_by_29:
 ; X86:       # BB#0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    leal (%ecx,%ecx,8), %eax
-; X86-NEXT:    leal (%eax,%eax,2), %eax
-; X86-NEXT:    addl %ecx, %eax
-; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    leal (%eax,%eax,8), %ecx
+; X86-NEXT:    leal (%ecx,%ecx,2), %ecx
+; X86-NEXT:    leal (%ecx,%eax,2), %eax
 ; X86-NEXT:    retl
 ;
 ; X64-HSW-LABEL: test_mul_by_29:
@@ -1469,8 +1468,7 @@ define i32 @test_mul_by_29(i32 %x) {
 ; X64-HSW-NEXT:    # kill: %edi<def> %edi<kill> %rdi<def>
 ; X64-HSW-NEXT:    leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
 ; X64-HSW-NEXT:    leal (%rax,%rax,2), %eax # sched: [1:0.50]
-; X64-HSW-NEXT:    addl %edi, %eax # sched: [1:0.25]
-; X64-HSW-NEXT:    addl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT:    leal (%rax,%rdi,2), %eax # sched: [1:0.50]
 ; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_29:
@@ -1478,8 +1476,7 @@ define i32 @test_mul_by_29(i32 %x) {
 ; X64-JAG-NEXT:    # kill: %edi<def> %edi<kill> %rdi<def>
 ; X64-JAG-NEXT:    leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
 ; X64-JAG-NEXT:    leal (%rax,%rax,2), %eax # sched: [1:0.50]
-; X64-JAG-NEXT:    addl %edi, %eax # sched: [1:0.50]
-; X64-JAG-NEXT:    addl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    leal (%rax,%rdi,2), %eax # sched: [1:0.50]
 ; X64-JAG-NEXT:    retq # sched: [4:1.00]
 ;
 ; X86-NOOPT-LABEL: test_mul_by_29:
diff --git a/test/CodeGen/X86/mul-constant-i64.ll b/test/CodeGen/X86/mul-constant-i64.ll
index 98568a6fc8e..8e171f973e0 100644
--- a/test/CodeGen/X86/mul-constant-i64.ll
+++ b/test/CodeGen/X86/mul-constant-i64.ll
@@ -1523,8 +1523,7 @@ define i64 @test_mul_by_29(i64 %x) {
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    leal (%eax,%eax,8), %ecx
 ; X86-NEXT:    leal (%ecx,%ecx,2), %ecx
-; X86-NEXT:    addl %eax, %ecx
-; X86-NEXT:    addl %eax, %ecx
+; X86-NEXT:    leal (%ecx,%eax,2), %ecx
 ; X86-NEXT:    movl $29, %eax
 ; X86-NEXT:    mull {{[0-9]+}}(%esp)
 ; X86-NEXT:    addl %ecx, %edx
@@ -1534,16 +1533,14 @@ define i64 @test_mul_by_29(i64 %x) {
 ; X64-HSW:       # BB#0:
 ; X64-HSW-NEXT:    leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
 ; X64-HSW-NEXT:    leaq (%rax,%rax,2), %rax # sched: [1:0.50]
-; X64-HSW-NEXT:    addq %rdi, %rax # sched: [1:0.25]
-; X64-HSW-NEXT:    addq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT:    leaq (%rax,%rdi,2), %rax # sched: [1:0.50]
 ; X64-HSW-NEXT:    retq # sched: [2:1.00]
 ;
 ; X64-JAG-LABEL: test_mul_by_29:
 ; X64-JAG:       # BB#0:
 ; X64-JAG-NEXT:    leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
 ; X64-JAG-NEXT:    leaq (%rax,%rax,2), %rax # sched: [1:0.50]
-; X64-JAG-NEXT:    addq %rdi, %rax # sched: [1:0.50]
-; X64-JAG-NEXT:    addq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    leaq (%rax,%rdi,2), %rax # sched: [1:0.50]
 ; X64-JAG-NEXT:    retq # sched: [4:1.00]
 ;
 ; X86-NOOPT-LABEL: test_mul_by_29:
diff --git a/test/CodeGen/X86/mul-constant-result.ll b/test/CodeGen/X86/mul-constant-result.ll
index 6e74c1d4e9e..f5846ab37ba 100644
--- a/test/CodeGen/X86/mul-constant-result.ll
+++ b/test/CodeGen/X86/mul-constant-result.ll
@@ -164,8 +164,7 @@ define i32 @mult(i32, i32) local_unnamed_addr #0 {
 ; X86-NEXT:  .LBB0_35:
 ; X86-NEXT:    leal (%eax,%eax,8), %ecx
 ; X86-NEXT:    leal (%ecx,%ecx,2), %ecx
-; X86-NEXT:    addl %eax, %ecx
-; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    leal (%ecx,%eax,2), %eax
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
 ; X86-NEXT:  .LBB0_36:
@@ -323,16 +322,17 @@ define i32 @mult(i32, i32) local_unnamed_addr #0 {
 ; X64-HSW-NEXT:  .LBB0_31:
 ; X64-HSW-NEXT:    leal (%rax,%rax,8), %ecx
 ; X64-HSW-NEXT:    leal (%rcx,%rcx,2), %ecx
-; X64-HSW-NEXT:    jmp .LBB0_17
-; X64-HSW-NEXT:  .LBB0_32:
-; X64-HSW-NEXT:    leal (%rax,%rax,8), %ecx
-; X64-HSW-NEXT:    leal (%rcx,%rcx,2), %ecx
-; X64-HSW-NEXT:    addl %eax, %ecx
 ; X64-HSW-NEXT:  .LBB0_17:
 ; X64-HSW-NEXT:    addl %eax, %ecx
 ; X64-HSW-NEXT:    movl %ecx, %eax
 ; X64-HSW-NEXT:    # kill: %eax<def> %eax<kill> %rax<kill>
 ; X64-HSW-NEXT:    retq
+; X64-HSW-NEXT:  .LBB0_32:
+; X64-HSW-NEXT:    leal (%rax,%rax,8), %ecx
+; X64-HSW-NEXT:    leal (%rcx,%rcx,2), %ecx
+; X64-HSW-NEXT:    leal (%rcx,%rax,2), %eax
+; X64-HSW-NEXT:    # kill: %eax<def> %eax<kill> %rax<kill>
+; X64-HSW-NEXT:    retq
 ; X64-HSW-NEXT:  .LBB0_33:
 ; X64-HSW-NEXT:    movl %eax, %ecx
 ; X64-HSW-NEXT:    shll $5, %ecx
diff --git a/test/CodeGen/X86/umul-with-overflow.ll b/test/CodeGen/X86/umul-with-overflow.ll
index 2e877a0b6e0..70b7daa6c16 100644
--- a/test/CodeGen/X86/umul-with-overflow.ll
+++ b/test/CodeGen/X86/umul-with-overflow.ll
@@ -40,10 +40,10 @@ define i32 @test2(i32 %a, i32 %b) nounwind readnone {
 ; X64-NEXT:    leal (%rdi,%rdi), %eax
 ; X64-NEXT:    retq
 entry:
-	%tmp0 = add i32 %b, %a
-	%tmp1 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %tmp0, i32 2)
-	%tmp2 = extractvalue { i32, i1 } %tmp1, 0
-	ret i32 %tmp2
+        %tmp0 = add i32 %b, %a
+        %tmp1 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %tmp0, i32 2)
+        %tmp2 = extractvalue { i32, i1 } %tmp1, 0
+        ret i32 %tmp2
 }
 
 define i32 @test3(i32 %a, i32 %b) nounwind readnone {
@@ -64,8 +64,8 @@ define i32 @test3(i32 %a, i32 %b) nounwind readnone {
 ; X64-NEXT:    mull %ecx
 ; X64-NEXT:    retq
 entry:
-	%tmp0 = add i32 %b, %a
-	%tmp1 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %tmp0, i32 4)
-	%tmp2 = extractvalue { i32, i1 } %tmp1, 0
-	ret i32 %tmp2
+        %tmp0 = add i32 %b, %a
+        %tmp1 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %tmp0, i32 4)
+        %tmp2 = extractvalue { i32, i1 } %tmp1, 0
+        ret i32 %tmp2
 }
diff --git a/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll b/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
index 7c01432914f..3f6d234c929 100644
--- a/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
@@ -13,14 +13,14 @@
 ; X64-NEXT: .p2align
 ; X64: %loop
 ; no complex address modes
-; X64-NOT: (%{{[^)]+}},%{{[^)]+}},
+; X64-NOT: [1-9]+(%{{[^)]+}},%{{[^)]+}},
 ;
 ; X32: @simple
 ; no expensive address computation in the preheader
 ; X32-NOT: imul
 ; X32: %loop
 ; no complex address modes
-; X32-NOT: (%{{[^)]+}},%{{[^)]+}},
+; X32-NOT: [1-9]+(%{{[^)]+}},%{{[^)]+}},
 define i32 @simple(i32* %a, i32* %b, i32 %x) nounwind {
 entry:
   br label %loop
@@ -103,7 +103,7 @@ exit:
 ; X32-NOT: mov{{.*}}(%esp){{$}}
 ; X32: %for.body{{$}}
 ; no complex address modes
-; X32-NOT: (%{{[^)]+}},%{{[^)]+}},
+; X32-NOT: [1-9]+(%{{[^)]+}},%{{[^)]+}},
 ; no reloads
 ; X32-NOT: (%esp)
 define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture %res, i32 %x, i32 %y, i32 %z) nounwind {