From 730509657a8b426c1f68131a3265d84489abd4c4 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Thu, 20 Feb 2020 22:42:42 +0100
Subject: [PATCH] [InstCombine] DCE instructions earlier

When InstCombine initially populates the worklist, it already
performs constant folding and DCE. However, as the instructions
are initially visited in program order, this DCE can pick up only
the last instruction of a dead chain, the rest would only get
picked up in the main InstCombine run.

To avoid this, we instead perform the DCE in separate pass over the
collected instructions in reverse order, which will allow us to
pick up full dead instruction chains. We already need to do this
reverse iteration anyway to populate the worklist, so this
shouldn't add extra cost.

This by itself only fixes a small part of the problem though:
The same basic issue also applies during the main InstCombine loop.
We generally always want DCE to occur as early as possible,
because it will allow one-use folds to happen. Address this by also
performing DCE while adding deferred instructions to the main worklist.

This drops the number of tests that perform more than 2 InstCombine
iterations from ~80 to ~40. There's some spurious test changes due
to operand order / icmp toggling.

Differential Revision: https://reviews.llvm.org/D75008
---
 .../InstCombine/InstCombineWorklist.h         | 29 +++++--------
 .../InstCombine/InstCombineInternal.h         |  2 +-
 .../InstCombine/InstructionCombining.cpp      | 43 +++++++++++++------
 .../InstCombine/2010-11-01-lshr-mask.ll       |  2 +-
 .../InstCombine/demorgan-sink-not-into-xor.ll |  6 +--
 test/Transforms/InstCombine/logical-select.ll |  8 ++--
 test/Transforms/InstCombine/pr44245.ll        |  2 +-
 .../InstCombine/select-imm-canon.ll           |  2 +-
 .../sub-ashr-and-to-icmp-select.ll            | 20 ++++-----
 .../InstCombine/sub-ashr-or-to-icmp-select.ll | 20 ++++-----
 test/Transforms/InstCombine/vec_sext.ll       |  8 ++--
 .../SimplifyCFG/merge-cond-stores.ll          |  4 +-
 12 files changed, 78 insertions(+), 68 deletions(-)
diff --git a/include/llvm/Transforms/InstCombine/InstCombineWorklist.h b/include/llvm/Transforms/InstCombine/InstCombineWorklist.h
index f019bb5ecf3..25aabe199d0 100644
--- a/include/llvm/Transforms/InstCombine/InstCombineWorklist.h
+++ b/include/llvm/Transforms/InstCombine/InstCombineWorklist.h
@@ -38,7 +38,7 @@ public:
   InstCombineWorklist(InstCombineWorklist &&) = default;
   InstCombineWorklist &operator=(InstCombineWorklist &&) = default;
 
-  bool isEmpty() const { return Worklist.empty(); }
+  bool isEmpty() const { return Worklist.empty() && Deferred.empty(); }
 
   /// Add instruction to the worklist.
   /// Instructions will be visited in the order they are added.
@@ -72,26 +72,15 @@ public:
       push(I);
   }
 
-  void addDeferredInstructions() {
-    for (Instruction *I : reverse(Deferred))
-      push(I);
-    Deferred.clear();
+  Instruction *popDeferred() {
+    if (Deferred.empty())
+      return nullptr;
+    return Deferred.pop_back_val();
   }
 
-  /// AddInitialGroup - Add the specified batch of stuff in reverse order.
-  /// which should only be done when the worklist is empty and when the group
-  /// has no duplicates.
-  void addInitialGroup(ArrayRef<Instruction *> List) {
-    assert(Worklist.empty() && "Worklist must be empty to add initial group");
-    Worklist.reserve(List.size()+16);
-    WorklistMap.reserve(List.size());
-    LLVM_DEBUG(dbgs() << "IC: ADDING: " << List.size()
-                      << " instrs to worklist\n");
-    unsigned Idx = 0;
-    for (Instruction *I : reverse(List)) {
-      WorklistMap.insert(std::make_pair(I, Idx++));
-      Worklist.push_back(I);
-    }
+  void reserve(size_t Size) {
+    Worklist.reserve(Size + 16);
+    WorklistMap.reserve(Size);
   }
 
   /// Remove I from the worklist if it exists.
@@ -107,6 +96,8 @@ public:
   }
 
   Instruction *removeOne() {
+    if (Worklist.empty())
+      return nullptr;
     Instruction *I = Worklist.pop_back_val();
     WorklistMap.erase(I);
     return I;
diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h
index f80da439546..38ba742bf5f 100644
--- a/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -724,7 +724,7 @@ public:
     if (I.getNumOperands() < 8) {
       for (Use &Operand : I.operands())
         if (auto *Inst = dyn_cast<Instruction>(Operand))
-          Worklist.push(Inst);
+          Worklist.add(Inst);
     }
     Worklist.remove(&I);
     I.eraseFromParent();
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index e25dfdef474..58d49d87221 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -3406,6 +3406,22 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
 
 bool InstCombiner::run() {
   while (!Worklist.isEmpty()) {
+    // Walk deferred instructions in reverse order, and push them to the
+    // worklist, which means they'll end up popped from the worklist in-order.
+    while (Instruction *I = Worklist.popDeferred()) {
+      // Check to see if we can DCE the instruction. We do this already here to
+      // reduce the number of uses and thus allow other folds to trigger.
+      // Note that eraseInstFromFunction() may push additional instructions on
+      // the deferred worklist, so this will DCE whole instruction chains.
+      if (isInstructionTriviallyDead(I, &TLI)) {
+        eraseInstFromFunction(*I);
+        ++NumDeadInst;
+        continue;
+      }
+
+      Worklist.push(I);
+    }
+
     Instruction *I = Worklist.removeOne();
     if (I == nullptr) continue;  // skip null values.
 
@@ -3552,7 +3568,6 @@ bool InstCombiner::run() {
       }
       MadeIRChange = true;
     }
-    Worklist.addDeferredInstructions();
   }
 
   Worklist.zap();
@@ -3588,16 +3603,6 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
     for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
       Instruction *Inst = &*BBI++;
 
-      // DCE instruction if trivially dead.
-      if (isInstructionTriviallyDead(Inst, TLI)) {
-        ++NumDeadInst;
-        LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
-        salvageDebugInfoOrMarkUndef(*Inst);
-        Inst->eraseFromParent();
-        MadeIRChange = true;
-        continue;
-      }
-
       // ConstantProp instruction if trivially constant.
       if (!Inst->use_empty() &&
           (Inst->getNumOperands() == 0 || isa<Constant>(Inst->getOperand(0))))
@@ -3665,7 +3670,21 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
   // of the function down.  This jives well with the way that it adds all uses
   // of instructions to the worklist after doing a transformation, thus avoiding
   // some N^2 behavior in pathological cases.
-  ICWorklist.addInitialGroup(InstrsForInstCombineWorklist);
+  ICWorklist.reserve(InstrsForInstCombineWorklist.size());
+  for (Instruction *Inst : reverse(InstrsForInstCombineWorklist)) {
+    // DCE instruction if trivially dead. As we iterate in reverse program
+    // order here, we will clean up whole chains of dead instructions.
+    if (isInstructionTriviallyDead(Inst, TLI)) {
+      ++NumDeadInst;
+      LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
+      salvageDebugInfoOrMarkUndef(*Inst);
+      Inst->eraseFromParent();
+      MadeIRChange = true;
+      continue;
+    }
+
+    ICWorklist.push(Inst);
+  }
 
   return MadeIRChange;
 }
diff --git a/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll b/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll
index ed32bb1aa9d..169bf7035a8 100644
--- a/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll
+++ b/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll
@@ -1,4 +1,4 @@
-; RUN: opt -instcombine -instcombine-infinite-loop-threshold=3 -S < %s | FileCheck %s
+; RUN: opt -instcombine -instcombine-infinite-loop-threshold=2 -S < %s | FileCheck %s
 
 ; <rdar://problem/8606771>
 define i32 @main(i32 %argc) {
diff --git a/test/Transforms/InstCombine/demorgan-sink-not-into-xor.ll b/test/Transforms/InstCombine/demorgan-sink-not-into-xor.ll
index c378033eef7..8d03a55b5b5 100644
--- a/test/Transforms/InstCombine/demorgan-sink-not-into-xor.ll
+++ b/test/Transforms/InstCombine/demorgan-sink-not-into-xor.ll
@@ -24,7 +24,7 @@ define i1 @positive_easyinvert(i16 %x, i8 %y) {
 ; CHECK-LABEL: @positive_easyinvert(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i16 [[X:%.*]], 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i8 [[Y:%.*]], -1
-; CHECK-NEXT:    [[TMP4:%.*]] = xor i1 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i1 [[TMP2]], [[TMP1]]
 ; CHECK-NEXT:    ret i1 [[TMP4]]
 ;
   %tmp1 = icmp slt i16 %x, 0
@@ -38,7 +38,7 @@ define i1 @positive_easyinvert0(i8 %y) {
 ; CHECK-LABEL: @positive_easyinvert0(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @gen1()
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i8 [[Y:%.*]], -1
-; CHECK-NEXT:    [[TMP4:%.*]] = xor i1 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i1 [[TMP2]], [[TMP1]]
 ; CHECK-NEXT:    ret i1 [[TMP4]]
 ;
   %tmp1 = call i1 @gen1()
@@ -52,7 +52,7 @@ define i1 @positive_easyinvert1(i8 %y) {
 ; CHECK-LABEL: @positive_easyinvert1(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @gen1()
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i8 [[Y:%.*]], -1
-; CHECK-NEXT:    [[TMP4:%.*]] = xor i1 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i1 [[TMP2]], [[TMP1]]
 ; CHECK-NEXT:    ret i1 [[TMP4]]
 ;
   %tmp1 = call i1 @gen1()
diff --git a/test/Transforms/InstCombine/logical-select.ll b/test/Transforms/InstCombine/logical-select.ll
index ac14fceb5c3..e14e2bf964d 100644
--- a/test/Transforms/InstCombine/logical-select.ll
+++ b/test/Transforms/InstCombine/logical-select.ll
@@ -535,8 +535,8 @@ define <4 x i32> @vec_sel_xor_multi_use(<4 x i32> %a, <4 x i32> %b, <4 x i1> %c)
 
 define i32 @allSignBits(i32 %cond, i32 %tval, i32 %fval) {
 ; CHECK-LABEL: @allSignBits(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[COND:%.*]], 0
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[TVAL:%.*]], i32 [[FVAL:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[COND:%.*]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[FVAL:%.*]], i32 [[TVAL:%.*]]
 ; CHECK-NEXT:    ret i32 [[TMP2]]
 ;
   %bitmask = ashr i32 %cond, 31
@@ -549,8 +549,8 @@ define i32 @allSignBits(i32 %cond, i32 %tval, i32 %fval) {
 
 define <4 x i8> @allSignBits_vec(<4 x i8> %cond, <4 x i8> %tval, <4 x i8> %fval) {
 ; CHECK-LABEL: @allSignBits_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i8> [[COND:%.*]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> [[TVAL:%.*]], <4 x i8> [[FVAL:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i8> [[COND:%.*]], <i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> [[FVAL:%.*]], <4 x i8> [[TVAL:%.*]]
 ; CHECK-NEXT:    ret <4 x i8> [[TMP2]]
 ;
   %bitmask = ashr <4 x i8> %cond, <i8 7, i8 7, i8 7, i8 7>
diff --git a/test/Transforms/InstCombine/pr44245.ll b/test/Transforms/InstCombine/pr44245.ll
index 15e4df21dc5..f7eb806ba4c 100644
--- a/test/Transforms/InstCombine/pr44245.ll
+++ b/test/Transforms/InstCombine/pr44245.ll
@@ -159,9 +159,9 @@ define void @test_2(i1 %c) local_unnamed_addr {
 ; CHECK:       cond.true133:
 ; CHECK-NEXT:    br label [[COND_END144:%.*]]
 ; CHECK:       cond.false138:
+; CHECK-NEXT:    store %type_2* undef, %type_2** null, align 536870912
 ; CHECK-NEXT:    br label [[COND_END144]]
 ; CHECK:       cond.end144:
-; CHECK-NEXT:    store %type_3* undef, %type_3** null, align 536870912
 ; CHECK-NEXT:    br label [[WHILE_COND]]
 ;
 entry:
diff --git a/test/Transforms/InstCombine/select-imm-canon.ll b/test/Transforms/InstCombine/select-imm-canon.ll
index 4c59be66862..272d4a47ce6 100644
--- a/test/Transforms/InstCombine/select-imm-canon.ll
+++ b/test/Transforms/InstCombine/select-imm-canon.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -instcombine -instcombine-infinite-loop-threshold=3 -S | FileCheck %s
+; RUN: opt < %s -instcombine -instcombine-infinite-loop-threshold=2 -S | FileCheck %s
 
 define i8 @single(i32 %A) {
 ; CHECK-LABEL: @single(
diff --git a/test/Transforms/InstCombine/sub-ashr-and-to-icmp-select.ll b/test/Transforms/InstCombine/sub-ashr-and-to-icmp-select.ll
index 6cbb25288fb..66609f14184 100644
--- a/test/Transforms/InstCombine/sub-ashr-and-to-icmp-select.ll
+++ b/test/Transforms/InstCombine/sub-ashr-and-to-icmp-select.ll
@@ -12,7 +12,7 @@
 
 define i8 @sub_ashr_and_i8(i8 %x, i8 %y) {
 ; CHECK-LABEL: @sub_ashr_and_i8(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i8 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 0
 ; CHECK-NEXT:    ret i8 [[AND]]
 ;
@@ -24,7 +24,7 @@ define i8 @sub_ashr_and_i8(i8 %x, i8 %y) {
 
 define i16 @sub_ashr_and_i16(i16 %x, i16 %y) {
 ; CHECK-LABEL: @sub_ashr_and_i16(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i16 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i16 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = select i1 [[TMP1]], i16 [[X]], i16 0
 ; CHECK-NEXT:    ret i16 [[AND]]
 ;
@@ -37,7 +37,7 @@ define i16 @sub_ashr_and_i16(i16 %x, i16 %y) {
 
 define i32 @sub_ashr_and_i32(i32 %x, i32 %y) {
 ; CHECK-LABEL: @sub_ashr_and_i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 0
 ; CHECK-NEXT:    ret i32 [[AND]]
 ;
@@ -49,7 +49,7 @@ define i32 @sub_ashr_and_i32(i32 %x, i32 %y) {
 
 define i64 @sub_ashr_and_i64(i64 %x, i64 %y) {
 ; CHECK-LABEL: @sub_ashr_and_i64(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i64 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i64 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = select i1 [[TMP1]], i64 [[X]], i64 0
 ; CHECK-NEXT:    ret i64 [[AND]]
 ;
@@ -63,7 +63,7 @@ define i64 @sub_ashr_and_i64(i64 %x, i64 %y) {
 
 define i32 @sub_ashr_and_i32_nuw_nsw(i32 %x, i32 %y) {
 ; CHECK-LABEL: @sub_ashr_and_i32_nuw_nsw(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 0
 ; CHECK-NEXT:    ret i32 [[AND]]
 ;
@@ -77,7 +77,7 @@ define i32 @sub_ashr_and_i32_nuw_nsw(i32 %x, i32 %y) {
 
 define i32 @sub_ashr_and_i32_commute(i32 %x, i32 %y) {
 ; CHECK-LABEL: @sub_ashr_and_i32_commute(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 0
 ; CHECK-NEXT:    ret i32 [[AND]]
 ;
@@ -91,7 +91,7 @@ define i32 @sub_ashr_and_i32_commute(i32 %x, i32 %y) {
 
 define <4 x i32> @sub_ashr_and_i32_vec(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @sub_ashr_and_i32_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[X]], <4 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <4 x i32> [[AND]]
 ;
@@ -103,7 +103,7 @@ define <4 x i32> @sub_ashr_and_i32_vec(<4 x i32> %x, <4 x i32> %y) {
 
 define <4 x i32> @sub_ashr_and_i32_vec_nuw_nsw(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @sub_ashr_and_i32_vec_nuw_nsw(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[X]], <4 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <4 x i32> [[AND]]
 ;
@@ -115,7 +115,7 @@ define <4 x i32> @sub_ashr_and_i32_vec_nuw_nsw(<4 x i32> %x, <4 x i32> %y) {
 
 define <4 x i32> @sub_ashr_and_i32_vec_commute(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @sub_ashr_and_i32_vec_commute(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[X]], <4 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <4 x i32> [[AND]]
 ;
@@ -144,7 +144,7 @@ define i32 @sub_ashr_and_i32_extra_use_sub(i32 %x, i32 %y, i32* %p) {
 
 define i32 @sub_ashr_and_i32_extra_use_and(i32 %x, i32 %y, i32* %p) {
 ; CHECK-LABEL: @sub_ashr_and_i32_extra_use_and(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 0
 ; CHECK-NEXT:    store i32 [[AND]], i32* [[P:%.*]], align 4
 ; CHECK-NEXT:    ret i32 [[AND]]
diff --git a/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll b/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll
index 6289e8f64b6..937bff62dc7 100644
--- a/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll
+++ b/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll
@@ -26,7 +26,7 @@ define i32 @clamp255_i32(i32 %x) {
 
 define i8 @sub_ashr_or_i8(i8 %x, i8 %y) {
 ; CHECK-LABEL: @sub_ashr_or_i8(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i8 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = select i1 [[TMP1]], i8 -1, i8 [[X]]
 ; CHECK-NEXT:    ret i8 [[OR]]
 ;
@@ -38,7 +38,7 @@ define i8 @sub_ashr_or_i8(i8 %x, i8 %y) {
 
 define i16 @sub_ashr_or_i16(i16 %x, i16 %y) {
 ; CHECK-LABEL: @sub_ashr_or_i16(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i16 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i16 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = select i1 [[TMP1]], i16 -1, i16 [[X]]
 ; CHECK-NEXT:    ret i16 [[OR]]
 ;
@@ -50,7 +50,7 @@ define i16 @sub_ashr_or_i16(i16 %x, i16 %y) {
 
 define i32 @sub_ashr_or_i32(i32 %x, i32 %y) {
 ; CHECK-LABEL: @sub_ashr_or_i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[X]]
 ; CHECK-NEXT:    ret i32 [[OR]]
 ;
@@ -62,7 +62,7 @@ define i32 @sub_ashr_or_i32(i32 %x, i32 %y) {
 
 define i64 @sub_ashr_or_i64(i64 %x, i64 %y) {
 ; CHECK-LABEL: @sub_ashr_or_i64(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i64 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i64 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = select i1 [[TMP1]], i64 -1, i64 [[X]]
 ; CHECK-NEXT:    ret i64 [[OR]]
 ;
@@ -76,7 +76,7 @@ define i64 @sub_ashr_or_i64(i64 %x, i64 %y) {
 
 define i32 @sub_ashr_or_i32_nuw_nsw(i32 %x, i32 %y) {
 ; CHECK-LABEL: @sub_ashr_or_i32_nuw_nsw(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[X]]
 ; CHECK-NEXT:    ret i32 [[OR]]
 ;
@@ -90,7 +90,7 @@ define i32 @sub_ashr_or_i32_nuw_nsw(i32 %x, i32 %y) {
 
 define i32 @sub_ashr_or_i32_commute(i32 %x, i32 %y) {
 ; CHECK-LABEL: @sub_ashr_or_i32_commute(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[X]]
 ; CHECK-NEXT:    ret i32 [[OR]]
 ;
@@ -104,7 +104,7 @@ define i32 @sub_ashr_or_i32_commute(i32 %x, i32 %y) {
 
 define <4 x i32> @sub_ashr_or_i32_vec(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @sub_ashr_or_i32_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> [[X]]
 ; CHECK-NEXT:    ret <4 x i32> [[OR]]
 ;
@@ -116,7 +116,7 @@ define <4 x i32> @sub_ashr_or_i32_vec(<4 x i32> %x, <4 x i32> %y) {
 
 define <4 x i32> @sub_ashr_or_i32_vec_nuw_nsw(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @sub_ashr_or_i32_vec_nuw_nsw(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> [[X]]
 ; CHECK-NEXT:    ret <4 x i32> [[OR]]
 ;
@@ -128,7 +128,7 @@ define <4 x i32> @sub_ashr_or_i32_vec_nuw_nsw(<4 x i32> %x, <4 x i32> %y) {
 
 define <4 x i32> @sub_ashr_or_i32_vec_commute(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @sub_ashr_or_i32_vec_commute(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> [[X]]
 ; CHECK-NEXT:    ret <4 x i32> [[OR]]
 ;
@@ -157,7 +157,7 @@ define i32 @sub_ashr_or_i32_extra_use_sub(i32 %x, i32 %y, i32* %p) {
 
 define i32 @sub_ashr_or_i32_extra_use_or(i32 %x, i32 %y, i32* %p) {
 ; CHECK-LABEL: @sub_ashr_or_i32_extra_use_or(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[X]]
 ; CHECK-NEXT:    store i32 [[OR]], i32* [[P:%.*]], align 4
 ; CHECK-NEXT:    ret i32 [[OR]]
diff --git a/test/Transforms/InstCombine/vec_sext.ll b/test/Transforms/InstCombine/vec_sext.ll
index e0e263b7b0b..39bd4087416 100644
--- a/test/Transforms/InstCombine/vec_sext.ll
+++ b/test/Transforms/InstCombine/vec_sext.ll
@@ -4,8 +4,8 @@
 define <4 x i32> @vec_select(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: @vec_select(
 ; CHECK-NEXT:    [[SUB:%.*]] = sub nsw <4 x i32> zeroinitializer, [[A:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[B:%.*]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[SUB]], <4 x i32> [[A]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[B:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A]], <4 x i32> [[SUB]]
 ; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 ;
   %cmp = icmp slt <4 x i32> %b, zeroinitializer
@@ -23,8 +23,8 @@ define <4 x i32> @vec_select(<4 x i32> %a, <4 x i32> %b) {
 define <4 x i32> @vec_select_alternate_sign_bit_test(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: @vec_select_alternate_sign_bit_test(
 ; CHECK-NEXT:    [[SUB:%.*]] = sub nsw <4 x i32> zeroinitializer, [[A:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[B:%.*]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A]], <4 x i32> [[SUB]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[B:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[SUB]], <4 x i32> [[A]]
 ; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 ;
   %cmp = icmp sgt <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
diff --git a/test/Transforms/SimplifyCFG/merge-cond-stores.ll b/test/Transforms/SimplifyCFG/merge-cond-stores.ll
index 5ad0e139bc2..e9a3e608ea2 100644
--- a/test/Transforms/SimplifyCFG/merge-cond-stores.ll
+++ b/test/Transforms/SimplifyCFG/merge-cond-stores.ll
@@ -77,9 +77,9 @@ end:
 define void @test_recursive(i32* %p, i32 %a, i32 %b, i32 %c, i32 %d) {
 ; CHECK-LABEL: @test_recursive(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = or i32 [[B:%.*]], [[A:%.*]]
 ; CHECK-NEXT:    [[X4:%.*]] = icmp eq i32 [[D:%.*]], 0
-; CHECK-NEXT:    [[TMP0:%.*]] = or i32 [[C:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[TMP0]], [[A:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[TMP0]], [[C:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = xor i1 [[X4]], true
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]]