[InstCombine] DCE instructions earlier

When InstCombine initially populates the worklist, it already performs constant folding and DCE. However, as the instructions are initially visited in program order, this DCE can pick up only the last instruction of a dead chain, the rest would only get picked up in the main InstCombine run. To avoid this, we instead perform the DCE in separate pass over the collected instructions in reverse order, which will allow us to pick up full dead instruction chains. We already need to do this reverse iteration anyway to populate the worklist, so this shouldn't add extra cost. This by itself only fixes a small part of the problem though: The same basic issue also applies during the main InstCombine loop. We generally always want DCE to occur as early as possible, because it will allow one-use folds to happen. Address this by also performing DCE while adding deferred instructions to the main worklist. This drops the number of tests that perform more than 2 InstCombine iterations from ~80 to ~40. There's some spurious test changes due to operand order / icmp toggling. Differential Revision: https://reviews.llvm.org/D75008
2024-11-22 18:54:02 +01:00 · 2020-02-20 22:42:42 +01:00 · 2020-02-20 22:42:42 +01:00 · 730509657a
commit 730509657a
parent 9f544f2f1b
12 changed files with 78 additions and 68 deletions
--- a/include/llvm/Transforms/InstCombine/InstCombineWorklist.h
+++ b/include/llvm/Transforms/InstCombine/InstCombineWorklist.h
@ -38,7 +38,7 @@ public:
  InstCombineWorklist(InstCombineWorklist &&) = default;
  InstCombineWorklist &operator=(InstCombineWorklist &&) = default;

-  bool isEmpty() const { return Worklist.empty(); }
+  bool isEmpty() const { return Worklist.empty() && Deferred.empty(); }

  /// Add instruction to the worklist.
  /// Instructions will be visited in the order they are added.
@ -72,26 +72,15 @@ public:
      push(I);
  }

-  void addDeferredInstructions() {
-    for (Instruction *I : reverse(Deferred))
-      push(I);
-    Deferred.clear();
+  Instruction *popDeferred() {
+    if (Deferred.empty())
+      return nullptr;
+    return Deferred.pop_back_val();
  }

-  /// AddInitialGroup - Add the specified batch of stuff in reverse order.
-  /// which should only be done when the worklist is empty and when the group
-  /// has no duplicates.
-  void addInitialGroup(ArrayRef<Instruction *> List) {
-    assert(Worklist.empty() && "Worklist must be empty to add initial group");
-    Worklist.reserve(List.size()+16);
-    WorklistMap.reserve(List.size());
-    LLVM_DEBUG(dbgs() << "IC: ADDING: " << List.size()
-                      << " instrs to worklist\n");
-    unsigned Idx = 0;
-    for (Instruction *I : reverse(List)) {
-      WorklistMap.insert(std::make_pair(I, Idx++));
-      Worklist.push_back(I);
-    }
+  void reserve(size_t Size) {
+    Worklist.reserve(Size + 16);
+    WorklistMap.reserve(Size);
  }

  /// Remove I from the worklist if it exists.
@ -107,6 +96,8 @@ public:
  }

  Instruction *removeOne() {
+    if (Worklist.empty())
+      return nullptr;
    Instruction *I = Worklist.pop_back_val();
    WorklistMap.erase(I);
    return I;
--- a/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/lib/Transforms/InstCombine/InstCombineInternal.h
@ -724,7 +724,7 @@ public:
    if (I.getNumOperands() < 8) {
      for (Use &Operand : I.operands())
        if (auto *Inst = dyn_cast<Instruction>(Operand))
-          Worklist.push(Inst);
+          Worklist.add(Inst);
    }
    Worklist.remove(&I);
    I.eraseFromParent();
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@ -3406,6 +3406,22 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {

 bool InstCombiner::run() {
  while (!Worklist.isEmpty()) {
+    // Walk deferred instructions in reverse order, and push them to the
+    // worklist, which means they'll end up popped from the worklist in-order.
+    while (Instruction *I = Worklist.popDeferred()) {
+      // Check to see if we can DCE the instruction. We do this already here to
+      // reduce the number of uses and thus allow other folds to trigger.
+      // Note that eraseInstFromFunction() may push additional instructions on
+      // the deferred worklist, so this will DCE whole instruction chains.
+      if (isInstructionTriviallyDead(I, &TLI)) {
+        eraseInstFromFunction(*I);
+        ++NumDeadInst;
+        continue;
+      }
+
+      Worklist.push(I);
+    }
+
    Instruction *I = Worklist.removeOne();
    if (I == nullptr) continue;  // skip null values.

@ -3552,7 +3568,6 @@ bool InstCombiner::run() {
      }
      MadeIRChange = true;
    }
-    Worklist.addDeferredInstructions();
  }

  Worklist.zap();
@ -3588,16 +3603,6 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
    for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
      Instruction *Inst = &*BBI++;

-      // DCE instruction if trivially dead.
-      if (isInstructionTriviallyDead(Inst, TLI)) {
-        ++NumDeadInst;
-        LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
-        salvageDebugInfoOrMarkUndef(*Inst);
-        Inst->eraseFromParent();
-        MadeIRChange = true;
-        continue;
-      }
-
      // ConstantProp instruction if trivially constant.
      if (!Inst->use_empty() &&
          (Inst->getNumOperands() == 0 || isa<Constant>(Inst->getOperand(0))))
@ -3665,7 +3670,21 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
  // of the function down.  This jives well with the way that it adds all uses
  // of instructions to the worklist after doing a transformation, thus avoiding
  // some N^2 behavior in pathological cases.
-  ICWorklist.addInitialGroup(InstrsForInstCombineWorklist);
+  ICWorklist.reserve(InstrsForInstCombineWorklist.size());
+  for (Instruction *Inst : reverse(InstrsForInstCombineWorklist)) {
+    // DCE instruction if trivially dead. As we iterate in reverse program
+    // order here, we will clean up whole chains of dead instructions.
+    if (isInstructionTriviallyDead(Inst, TLI)) {
+      ++NumDeadInst;
+      LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
+      salvageDebugInfoOrMarkUndef(*Inst);
+      Inst->eraseFromParent();
+      MadeIRChange = true;
+      continue;
+    }
+
+    ICWorklist.push(Inst);
+  }

  return MadeIRChange;
 }
--- a/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll
+++ b/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll
@ -1,4 +1,4 @@
-; RUN: opt -instcombine -instcombine-infinite-loop-threshold=3 -S < %s | FileCheck %s
+; RUN: opt -instcombine -instcombine-infinite-loop-threshold=2 -S < %s | FileCheck %s

 ; <rdar://problem/8606771>
 define i32 @main(i32 %argc) {
--- a/test/Transforms/InstCombine/demorgan-sink-not-into-xor.ll
+++ b/test/Transforms/InstCombine/demorgan-sink-not-into-xor.ll
@ -24,7 +24,7 @@ define i1 @positive_easyinvert(i16 %x, i8 %y) {
 ; CHECK-LABEL: @positive_easyinvert(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i16 [[X:%.*]], 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i8 [[Y:%.*]], -1
-; CHECK-NEXT:    [[TMP4:%.*]] = xor i1 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i1 [[TMP2]], [[TMP1]]
 ; CHECK-NEXT:    ret i1 [[TMP4]]
 ;
  %tmp1 = icmp slt i16 %x, 0
@ -38,7 +38,7 @@ define i1 @positive_easyinvert0(i8 %y) {
 ; CHECK-LABEL: @positive_easyinvert0(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @gen1()
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i8 [[Y:%.*]], -1
-; CHECK-NEXT:    [[TMP4:%.*]] = xor i1 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i1 [[TMP2]], [[TMP1]]
 ; CHECK-NEXT:    ret i1 [[TMP4]]
 ;
  %tmp1 = call i1 @gen1()
@ -52,7 +52,7 @@ define i1 @positive_easyinvert1(i8 %y) {
 ; CHECK-LABEL: @positive_easyinvert1(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @gen1()
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i8 [[Y:%.*]], -1
-; CHECK-NEXT:    [[TMP4:%.*]] = xor i1 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i1 [[TMP2]], [[TMP1]]
 ; CHECK-NEXT:    ret i1 [[TMP4]]
 ;
  %tmp1 = call i1 @gen1()
--- a/test/Transforms/InstCombine/logical-select.ll
+++ b/test/Transforms/InstCombine/logical-select.ll
@ -535,8 +535,8 @@ define <4 x i32> @vec_sel_xor_multi_use(<4 x i32> %a, <4 x i32> %b, <4 x i1> %c)

 define i32 @allSignBits(i32 %cond, i32 %tval, i32 %fval) {
 ; CHECK-LABEL: @allSignBits(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[COND:%.*]], 0
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[TVAL:%.*]], i32 [[FVAL:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[COND:%.*]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[FVAL:%.*]], i32 [[TVAL:%.*]]
 ; CHECK-NEXT:    ret i32 [[TMP2]]
 ;
  %bitmask = ashr i32 %cond, 31
@ -549,8 +549,8 @@ define i32 @allSignBits(i32 %cond, i32 %tval, i32 %fval) {

 define <4 x i8> @allSignBits_vec(<4 x i8> %cond, <4 x i8> %tval, <4 x i8> %fval) {
 ; CHECK-LABEL: @allSignBits_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i8> [[COND:%.*]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> [[TVAL:%.*]], <4 x i8> [[FVAL:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i8> [[COND:%.*]], <i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> [[FVAL:%.*]], <4 x i8> [[TVAL:%.*]]
 ; CHECK-NEXT:    ret <4 x i8> [[TMP2]]
 ;
  %bitmask = ashr <4 x i8> %cond, <i8 7, i8 7, i8 7, i8 7>
--- a/test/Transforms/InstCombine/pr44245.ll
+++ b/test/Transforms/InstCombine/pr44245.ll
@ -159,9 +159,9 @@ define void @test_2(i1 %c) local_unnamed_addr {
 ; CHECK:       cond.true133:
 ; CHECK-NEXT:    br label [[COND_END144:%.*]]
 ; CHECK:       cond.false138:
+; CHECK-NEXT:    store %type_2* undef, %type_2** null, align 536870912
 ; CHECK-NEXT:    br label [[COND_END144]]
 ; CHECK:       cond.end144:
-; CHECK-NEXT:    store %type_3* undef, %type_3** null, align 536870912
 ; CHECK-NEXT:    br label [[WHILE_COND]]
 ;
 entry:
--- a/test/Transforms/InstCombine/select-imm-canon.ll
+++ b/test/Transforms/InstCombine/select-imm-canon.ll
@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -instcombine -instcombine-infinite-loop-threshold=3 -S | FileCheck %s
+; RUN: opt < %s -instcombine -instcombine-infinite-loop-threshold=2 -S | FileCheck %s

 define i8 @single(i32 %A) {
 ; CHECK-LABEL: @single(
--- a/test/Transforms/InstCombine/sub-ashr-and-to-icmp-select.ll
+++ b/test/Transforms/InstCombine/sub-ashr-and-to-icmp-select.ll
@ -12,7 +12,7 @@

 define i8 @sub_ashr_and_i8(i8 %x, i8 %y) {
 ; CHECK-LABEL: @sub_ashr_and_i8(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i8 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 0
 ; CHECK-NEXT:    ret i8 [[AND]]
 ;
@ -24,7 +24,7 @@ define i8 @sub_ashr_and_i8(i8 %x, i8 %y) {

 define i16 @sub_ashr_and_i16(i16 %x, i16 %y) {
 ; CHECK-LABEL: @sub_ashr_and_i16(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i16 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i16 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = select i1 [[TMP1]], i16 [[X]], i16 0
 ; CHECK-NEXT:    ret i16 [[AND]]
 ;
@ -37,7 +37,7 @@ define i16 @sub_ashr_and_i16(i16 %x, i16 %y) {

 define i32 @sub_ashr_and_i32(i32 %x, i32 %y) {
 ; CHECK-LABEL: @sub_ashr_and_i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 0
 ; CHECK-NEXT:    ret i32 [[AND]]
 ;
@ -49,7 +49,7 @@ define i32 @sub_ashr_and_i32(i32 %x, i32 %y) {

 define i64 @sub_ashr_and_i64(i64 %x, i64 %y) {
 ; CHECK-LABEL: @sub_ashr_and_i64(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i64 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i64 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = select i1 [[TMP1]], i64 [[X]], i64 0
 ; CHECK-NEXT:    ret i64 [[AND]]
 ;
@ -63,7 +63,7 @@ define i64 @sub_ashr_and_i64(i64 %x, i64 %y) {

 define i32 @sub_ashr_and_i32_nuw_nsw(i32 %x, i32 %y) {
 ; CHECK-LABEL: @sub_ashr_and_i32_nuw_nsw(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 0
 ; CHECK-NEXT:    ret i32 [[AND]]
 ;
@ -77,7 +77,7 @@ define i32 @sub_ashr_and_i32_nuw_nsw(i32 %x, i32 %y) {

 define i32 @sub_ashr_and_i32_commute(i32 %x, i32 %y) {
 ; CHECK-LABEL: @sub_ashr_and_i32_commute(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 0
 ; CHECK-NEXT:    ret i32 [[AND]]
 ;
@ -91,7 +91,7 @@ define i32 @sub_ashr_and_i32_commute(i32 %x, i32 %y) {

 define <4 x i32> @sub_ashr_and_i32_vec(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @sub_ashr_and_i32_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[X]], <4 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <4 x i32> [[AND]]
 ;
@ -103,7 +103,7 @@ define <4 x i32> @sub_ashr_and_i32_vec(<4 x i32> %x, <4 x i32> %y) {

 define <4 x i32> @sub_ashr_and_i32_vec_nuw_nsw(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @sub_ashr_and_i32_vec_nuw_nsw(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[X]], <4 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <4 x i32> [[AND]]
 ;
@ -115,7 +115,7 @@ define <4 x i32> @sub_ashr_and_i32_vec_nuw_nsw(<4 x i32> %x, <4 x i32> %y) {

 define <4 x i32> @sub_ashr_and_i32_vec_commute(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @sub_ashr_and_i32_vec_commute(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[X]], <4 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <4 x i32> [[AND]]
 ;
@ -144,7 +144,7 @@ define i32 @sub_ashr_and_i32_extra_use_sub(i32 %x, i32 %y, i32* %p) {

 define i32 @sub_ashr_and_i32_extra_use_and(i32 %x, i32 %y, i32* %p) {
 ; CHECK-LABEL: @sub_ashr_and_i32_extra_use_and(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 0
 ; CHECK-NEXT:    store i32 [[AND]], i32* [[P:%.*]], align 4
 ; CHECK-NEXT:    ret i32 [[AND]]
--- a/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll
+++ b/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll
@ -26,7 +26,7 @@ define i32 @clamp255_i32(i32 %x) {

 define i8 @sub_ashr_or_i8(i8 %x, i8 %y) {
 ; CHECK-LABEL: @sub_ashr_or_i8(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i8 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = select i1 [[TMP1]], i8 -1, i8 [[X]]
 ; CHECK-NEXT:    ret i8 [[OR]]
 ;
@ -38,7 +38,7 @@ define i8 @sub_ashr_or_i8(i8 %x, i8 %y) {

 define i16 @sub_ashr_or_i16(i16 %x, i16 %y) {
 ; CHECK-LABEL: @sub_ashr_or_i16(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i16 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i16 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = select i1 [[TMP1]], i16 -1, i16 [[X]]
 ; CHECK-NEXT:    ret i16 [[OR]]
 ;
@ -50,7 +50,7 @@ define i16 @sub_ashr_or_i16(i16 %x, i16 %y) {

 define i32 @sub_ashr_or_i32(i32 %x, i32 %y) {
 ; CHECK-LABEL: @sub_ashr_or_i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[X]]
 ; CHECK-NEXT:    ret i32 [[OR]]
 ;
@ -62,7 +62,7 @@ define i32 @sub_ashr_or_i32(i32 %x, i32 %y) {

 define i64 @sub_ashr_or_i64(i64 %x, i64 %y) {
 ; CHECK-LABEL: @sub_ashr_or_i64(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i64 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i64 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = select i1 [[TMP1]], i64 -1, i64 [[X]]
 ; CHECK-NEXT:    ret i64 [[OR]]
 ;
@ -76,7 +76,7 @@ define i64 @sub_ashr_or_i64(i64 %x, i64 %y) {

 define i32 @sub_ashr_or_i32_nuw_nsw(i32 %x, i32 %y) {
 ; CHECK-LABEL: @sub_ashr_or_i32_nuw_nsw(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[X]]
 ; CHECK-NEXT:    ret i32 [[OR]]
 ;
@ -90,7 +90,7 @@ define i32 @sub_ashr_or_i32_nuw_nsw(i32 %x, i32 %y) {

 define i32 @sub_ashr_or_i32_commute(i32 %x, i32 %y) {
 ; CHECK-LABEL: @sub_ashr_or_i32_commute(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[X]]
 ; CHECK-NEXT:    ret i32 [[OR]]
 ;
@ -104,7 +104,7 @@ define i32 @sub_ashr_or_i32_commute(i32 %x, i32 %y) {

 define <4 x i32> @sub_ashr_or_i32_vec(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @sub_ashr_or_i32_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> [[X]]
 ; CHECK-NEXT:    ret <4 x i32> [[OR]]
 ;
@ -116,7 +116,7 @@ define <4 x i32> @sub_ashr_or_i32_vec(<4 x i32> %x, <4 x i32> %y) {

 define <4 x i32> @sub_ashr_or_i32_vec_nuw_nsw(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @sub_ashr_or_i32_vec_nuw_nsw(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> [[X]]
 ; CHECK-NEXT:    ret <4 x i32> [[OR]]
 ;
@ -128,7 +128,7 @@ define <4 x i32> @sub_ashr_or_i32_vec_nuw_nsw(<4 x i32> %x, <4 x i32> %y) {

 define <4 x i32> @sub_ashr_or_i32_vec_commute(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @sub_ashr_or_i32_vec_commute(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> [[X]]
 ; CHECK-NEXT:    ret <4 x i32> [[OR]]
 ;
@ -157,7 +157,7 @@ define i32 @sub_ashr_or_i32_extra_use_sub(i32 %x, i32 %y, i32* %p) {

 define i32 @sub_ashr_or_i32_extra_use_or(i32 %x, i32 %y, i32* %p) {
 ; CHECK-LABEL: @sub_ashr_or_i32_extra_use_or(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[X]]
 ; CHECK-NEXT:    store i32 [[OR]], i32* [[P:%.*]], align 4
 ; CHECK-NEXT:    ret i32 [[OR]]
--- a/test/Transforms/InstCombine/vec_sext.ll
+++ b/test/Transforms/InstCombine/vec_sext.ll
@ -4,8 +4,8 @@
 define <4 x i32> @vec_select(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: @vec_select(
 ; CHECK-NEXT:    [[SUB:%.*]] = sub nsw <4 x i32> zeroinitializer, [[A:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[B:%.*]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[SUB]], <4 x i32> [[A]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[B:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A]], <4 x i32> [[SUB]]
 ; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 ;
  %cmp = icmp slt <4 x i32> %b, zeroinitializer
@ -23,8 +23,8 @@ define <4 x i32> @vec_select(<4 x i32> %a, <4 x i32> %b) {
 define <4 x i32> @vec_select_alternate_sign_bit_test(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: @vec_select_alternate_sign_bit_test(
 ; CHECK-NEXT:    [[SUB:%.*]] = sub nsw <4 x i32> zeroinitializer, [[A:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[B:%.*]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A]], <4 x i32> [[SUB]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[B:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[SUB]], <4 x i32> [[A]]
 ; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 ;
  %cmp = icmp sgt <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
--- a/test/Transforms/SimplifyCFG/merge-cond-stores.ll
+++ b/test/Transforms/SimplifyCFG/merge-cond-stores.ll
@ -77,9 +77,9 @@ end:
 define void @test_recursive(i32* %p, i32 %a, i32 %b, i32 %c, i32 %d) {
 ; CHECK-LABEL: @test_recursive(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = or i32 [[B:%.*]], [[A:%.*]]
 ; CHECK-NEXT:    [[X4:%.*]] = icmp eq i32 [[D:%.*]], 0
-; CHECK-NEXT:    [[TMP0:%.*]] = or i32 [[C:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[TMP0]], [[A:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[TMP0]], [[C:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = xor i1 [[X4]], true
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]]