[InstCombine] allow more narrowing of casted select

D47163 created a rule that we should not change the casted type of a select when we have matching types in its compare condition. That was intended to help vector codegen, but it also could create situations where we miss subsequent folds as shown in PR44545: https://bugs.llvm.org/show_bug.cgi?id=44545 By using shouldChangeType(), we can continue to get the vector folds (because we always return false for vector types). But we also solve the motivating bug because it's ok to narrow the scalar select in that example. Our canonicalization rules around select are a mess, but AFAICT, this will not induce any infinite looping from the reverse transform (but we'll need to watch for that possibility if committed). Side note: there's a similar use of shouldChangeType() for phi ops just below this diff, and the source and destination types appear to be reversed. Differential Revision: https://reviews.llvm.org/D72733
2025-01-31 12:41:49 +01:00 · 2020-01-27 16:33:13 -05:00 · 2020-01-27 16:33:13 -05:00 · 665298049e
commit 665298049e
parent 1cf1e98f38
4 changed files with 15 additions and 13 deletions
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@ -276,16 +276,20 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
  }

  if (auto *Sel = dyn_cast<SelectInst>(Src)) {
-    // We are casting a select. Try to fold the cast into the select, but only
-    // if the select does not have a compare instruction with matching operand
-    // types. Creating a select with operands that are different sizes than its
+    // We are casting a select. Try to fold the cast into the select if the
+    // select does not have a compare instruction with matching operand types
+    // or the select is likely better done in a narrow type.
+    // Creating a select with operands that are different sizes than its
    // condition may inhibit other folds and lead to worse codegen.
    auto *Cmp = dyn_cast<CmpInst>(Sel->getCondition());
-    if (!Cmp || Cmp->getOperand(0)->getType() != Sel->getType())
+    if (!Cmp || Cmp->getOperand(0)->getType() != Sel->getType() ||
+        (CI.getOpcode() == Instruction::Trunc &&
+         shouldChangeType(CI.getSrcTy(), CI.getType()))) {
      if (Instruction *NV = FoldOpIntoSelect(CI, Sel)) {
        replaceAllDbgUsesWith(*Sel, *NV, CI, DT);
        return NV;
      }
+    }
  }

  // If we are casting a PHI, then fold the cast into the PHI.
--- a/test/Transforms/InstCombine/cast-select.ll
+++ b/test/Transforms/InstCombine/cast-select.ll
@ -56,8 +56,8 @@ define <2 x i32> @sext_vec(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) {
 define i16 @trunc(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @trunc(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 42, i32 [[Z:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = trunc i32 [[SEL]] to i16
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[Z:%.*]] to i16
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i16 42, i16 [[TMP1]]
 ; CHECK-NEXT:    ret i16 [[R]]
 ;
  %cmp = icmp ult i32 %x, %y
--- a/test/Transforms/InstCombine/select-imm-canon.ll
+++ b/test/Transforms/InstCombine/select-imm-canon.ll
@ -39,8 +39,8 @@ define i8 @thisdoesnotloop(i32 %A, i32 %B) {
 ; CHECK-LABEL: @thisdoesnotloop(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[L1:%.*]] = icmp slt i32 [[A:%.*]], -128
-; CHECK-NEXT:    [[L2:%.*]] = select i1 [[L1]], i32 -128, i32 [[B:%.*]]
-; CHECK-NEXT:    [[CONV7:%.*]] = trunc i32 [[L2]] to i8
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[B:%.*]] to i8
+; CHECK-NEXT:    [[CONV7:%.*]] = select i1 [[L1]], i8 -128, i8 [[TMP0]]
 ; CHECK-NEXT:    ret i8 [[CONV7]]
 ;
 entry:
--- a/test/Transforms/InstCombine/trunc.ll
+++ b/test/Transforms/InstCombine/trunc.ll
@ -626,15 +626,13 @@ define <2 x i8> @narrow_sub_vec_constant(<2 x i32> %x) {
  ret <2 x i8> %tr
 }

-; FIXME: If the select is narrowed based on the target's datalayout, we allow more optimizations.
+; If the select is narrowed based on the target's datalayout, we allow more optimizations.

 define i16 @PR44545(i32 %t0, i32 %data) {
 ; CHECK-LABEL: @PR44545(
-; CHECK-NEXT:    [[T1:%.*]] = add nuw nsw i32 [[T0:%.*]], 1
 ; CHECK-NEXT:    [[ISZERO:%.*]] = icmp eq i32 [[DATA:%.*]], 0
-; CHECK-NEXT:    [[FFS:%.*]] = select i1 [[ISZERO]], i32 0, i32 [[T1]]
-; CHECK-NEXT:    [[CAST:%.*]] = trunc i32 [[FFS]] to i16
-; CHECK-NEXT:    [[SUB:%.*]] = add nsw i16 [[CAST]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[T0:%.*]] to i16
+; CHECK-NEXT:    [[SUB:%.*]] = select i1 [[ISZERO]], i16 -1, i16 [[TMP1]]
 ; CHECK-NEXT:    ret i16 [[SUB]]
 ;
  %t1 = add nuw nsw i32 %t0, 1