[InstCombine] Make folding (X >s -1) ? C1 : C2 --> ((X >>s 31) & (C2 - C1)) + C1 support splat vectors

This also uses decomposeBitTestICmp to decode the compare. Differential Revision: https://reviews.llvm.org/D36781 llvm-svn: 311044
2025-02-01 05:01:59 +01:00 · 2017-08-16 21:52:07 +00:00 · 2017-08-16 21:52:07 +00:00 · 6a4ddfbe1b
commit 6a4ddfbe1b
parent 374b3a728d
2 changed files with 73 additions and 17 deletions
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//

 #include "InstCombineInternal.h"
+#include "llvm/Analysis/CmpInstAnalysis.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/ValueTracking.h"
@ -694,27 +695,31 @@ Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI,
  // FIXME: Type and constness constraints could be lifted, but we have to
  //        watch code size carefully. We should consider xor instead of
  //        sub/add when we decide to do that.
-  if (IntegerType *Ty = dyn_cast<IntegerType>(CmpLHS->getType())) {
-    if (TrueVal->getType() == Ty) {
-      if (ConstantInt *Cmp = dyn_cast<ConstantInt>(CmpRHS)) {
-        ConstantInt *C1 = nullptr, *C2 = nullptr;
-        if (Pred == ICmpInst::ICMP_SGT && Cmp->isMinusOne()) {
-          C1 = dyn_cast<ConstantInt>(TrueVal);
-          C2 = dyn_cast<ConstantInt>(FalseVal);
-        } else if (Pred == ICmpInst::ICMP_SLT && Cmp->isZero()) {
-          C1 = dyn_cast<ConstantInt>(FalseVal);
-          C2 = dyn_cast<ConstantInt>(TrueVal);
-        }
-        if (C1 && C2) {
+  if (CmpLHS->getType()->isIntOrIntVectorTy() &&
+      CmpLHS->getType() == TrueVal->getType()) {
+    const APInt *C1, *C2;
+    if (match(TrueVal, m_APInt(C1)) && match(FalseVal, m_APInt(C2))) {
+      ICmpInst::Predicate Pred = ICI->getPredicate();
+      Value *X;
+      APInt Mask;
+      if (decomposeBitTestICmp(CmpLHS, CmpRHS, Pred, X, Mask)) {
+        if (Mask.isSignMask()) {
+          assert(X == CmpLHS && "Expected to use the compare input directly");
+          assert(ICmpInst::isEquality(Pred) && "Expected equality predicate");
+
+          if (Pred == ICmpInst::ICMP_NE)
+            std::swap(C1, C2);
+
          // This shift results in either -1 or 0.
-          Value *AShr = Builder.CreateAShr(CmpLHS, Ty->getBitWidth() - 1);
+          Value *AShr = Builder.CreateAShr(X, Mask.getBitWidth() - 1);

          // Check if we can express the operation with a single or.
-          if (C2->isMinusOne())
-            return replaceInstUsesWith(SI, Builder.CreateOr(AShr, C1));
+          if (C2->isAllOnesValue())
+            return replaceInstUsesWith(SI, Builder.CreateOr(AShr, *C1));

-          Value *And = Builder.CreateAnd(AShr, C2->getValue() - C1->getValue());
-          return replaceInstUsesWith(SI, Builder.CreateAdd(And, C1));
+          Value *And = Builder.CreateAnd(AShr, *C2 - *C1);
+          return replaceInstUsesWith(SI, Builder.CreateAdd(And,
+                                        ConstantInt::get(And->getType(), *C1)));
        }
      }
    }
--- a/test/Transforms/InstCombine/select.ll
+++ b/test/Transforms/InstCombine/select.ll
@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s

 ; PR1822
@ -649,6 +650,33 @@ define i32 @test35(i32 %x) {
 ; CHECK: ret
 }

+define <2 x i32> @test35vec(<2 x i32> %x) {
+; CHECK-LABEL: @test35vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], <i32 40, i32 40>
+; CHECK-NEXT:    [[TMP3:%.*]] = add nuw nsw <2 x i32> [[TMP2]], <i32 60, i32 60>
+; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
+;
+  %cmp = icmp sge <2 x i32> %x, <i32 0, i32 0>
+  %cond = select <2 x i1> %cmp, <2 x i32> <i32 60, i32 60>, <2 x i32> <i32 100, i32 100>
+  ret <2 x i32> %cond
+}
+
+; Make sure we can still perform this optimization with a truncate present
+define i32 @test35_with_trunc(i64 %x) {
+; CHECK-LABEL: @test35_with_trunc(
+; CHECK-NEXT:    [[X1:%.*]] = trunc i64 [[X:%.*]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 [[X1]], 31
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 40
+; CHECK-NEXT:    [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 60
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %x1 = trunc i64 %x to i32
+  %cmp = icmp sge i32 %x1, 0
+  %cond = select i1 %cmp, i32 60, i32 100
+  ret i32 %cond
+}
+
 define i32 @test36(i32 %x) {
  %cmp = icmp slt i32 %x, 0
  %cond = select i1 %cmp, i32 60, i32 100
@ -660,6 +688,18 @@ define i32 @test36(i32 %x) {
 ; CHECK: ret
 }

+define <2 x i32> @test36vec(<2 x i32> %x) {
+; CHECK-LABEL: @test36vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], <i32 -40, i32 -40>
+; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <2 x i32> [[TMP2]], <i32 100, i32 100>
+; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
+;
+  %cmp = icmp slt <2 x i32> %x, <i32 0, i32 0>
+  %cond = select <2 x i1> %cmp, <2 x i32> <i32 60, i32 60>, <2 x i32> <i32 100, i32 100>
+  ret <2 x i32> %cond
+}
+
 define i32 @test37(i32 %x) {
  %cmp = icmp sgt i32 %x, -1
  %cond = select i1 %cmp, i32 1, i32 -1
@ -670,6 +710,17 @@ define i32 @test37(i32 %x) {
 ; CHECK: ret
 }

+define <2 x i32> @test37vec(<2 x i32> %x) {
+; CHECK-LABEL: @test37vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    [[TMP2:%.*]] = or <2 x i32> [[TMP1]], <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
+;
+  %cmp = icmp sgt <2 x i32> %x, <i32 -1, i32 -1>
+  %cond = select <2 x i1> %cmp, <2 x i32> <i32 1, i32 1>, <2 x i32> <i32 -1, i32 -1>
+  ret <2 x i32> %cond
+}
+
 define i1 @test38(i1 %cond) {
  %zero = alloca i32
  %one = alloca i32