[InstCombine] form uaddsat from add+umin (PR14613)

This is the last step towards solving the examples shown in: https://bugs.llvm.org/show_bug.cgi?id=14613 With this change, x86 should end up with psubus instructions when those are available. All known codegen issues with expanding the saturating intrinsics were resolved with: D59006 / rL356855 We also have some early evidence in D58872 that using the intrinsics will lead to better perf. If some target regresses from this, custom lowering of the intrinsics (as in the above for x86) may be needed. llvm-svn: 357012
2024-10-19 02:52:53 +02:00 · 2019-03-26 17:50:08 +00:00 · 2019-03-26 17:50:08 +00:00 · 83681af5d9
commit 83681af5d9
parent e762562b34
3 changed files with 94 additions and 22 deletions
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@ -1064,6 +1064,28 @@ static Instruction *canonicalizeLowbitMask(BinaryOperator &I,
  return BinaryOperator::CreateNot(NotMask, I.getName());
 }

+static Instruction *foldToUnsignedSaturatedAdd(BinaryOperator &I) {
+  assert(I.getOpcode() == Instruction::Add && "Expecting add instruction");
+  Type *Ty = I.getType();
+  auto getUAddSat = [&]() {
+    return Intrinsic::getDeclaration(I.getModule(), Intrinsic::uadd_sat, Ty);
+  };
+
+  // add (umin X, ~Y), Y --> uaddsat X, Y
+  Value *X, *Y;
+  if (match(&I, m_c_Add(m_c_UMin(m_Value(X), m_Not(m_Value(Y))),
+                        m_Deferred(Y))))
+    return CallInst::Create(getUAddSat(), { X, Y });
+
+  // add (umin X, ~C), C --> uaddsat X, C
+  const APInt *C, *NotC;
+  if (match(&I, m_Add(m_UMin(m_Value(X), m_APInt(NotC)), m_APInt(C))) &&
+      *C == ~*NotC)
+    return CallInst::Create(getUAddSat(), { X, ConstantInt::get(Ty, *C) });
+
+  return nullptr;
+}
+
 Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
  if (Value *V = SimplifyAddInst(I.getOperand(0), I.getOperand(1),
                                 I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),
@ -1266,6 +1288,9 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
  if (Instruction *V = canonicalizeLowbitMask(I, Builder))
    return V;

+  if (Instruction *SatAdd = foldToUnsignedSaturatedAdd(I))
+    return SatAdd;
+
  return Changed ? &I : nullptr;
 }

--- a/test/Transforms/InstCombine/minmax-fold.ll
+++ b/test/Transforms/InstCombine/minmax-fold.ll
@ -1134,9 +1134,7 @@ define <2 x i33> @add_umax_vec(<2 x i33> %x) {

 define i8 @PR14613_umin(i8 %x) {
 ; CHECK-LABEL: @PR14613_umin(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[X:%.*]], -16
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 -16
-; CHECK-NEXT:    [[U7:%.*]] = add i8 [[TMP2]], 15
+; CHECK-NEXT:    [[U7:%.*]] = call i8 @llvm.uadd.sat.i8(i8 [[X:%.*]], i8 15)
 ; CHECK-NEXT:    ret i8 [[U7]]
 ;
  %u4 = zext i8 %x to i32
--- a/test/Transforms/InstCombine/saturating-add-sub.ll
+++ b/test/Transforms/InstCombine/saturating-add-sub.ll
@ -1254,10 +1254,7 @@ declare <2 x i8> @get_v2i8()
 define i32 @unsigned_sat_variable_using_min_add(i32 %x) {
 ; CHECK-LABEL: @unsigned_sat_variable_using_min_add(
 ; CHECK-NEXT:    [[Y:%.*]] = call i32 @get_i32()
-; CHECK-NEXT:    [[NOTY:%.*]] = xor i32 [[Y]], -1
-; CHECK-NEXT:    [[C:%.*]] = icmp ugt i32 [[NOTY]], [[X:%.*]]
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i32 [[X]], i32 [[NOTY]]
-; CHECK-NEXT:    [[R:%.*]] = add i32 [[S]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 [[Y]])
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
  %y = call i32 @get_i32() ; thwart complexity-based canonicalization
@ -1271,10 +1268,7 @@ define i32 @unsigned_sat_variable_using_min_add(i32 %x) {
 define i32 @unsigned_sat_variable_using_min_commute_add(i32 %x) {
 ; CHECK-LABEL: @unsigned_sat_variable_using_min_commute_add(
 ; CHECK-NEXT:    [[Y:%.*]] = call i32 @get_i32()
-; CHECK-NEXT:    [[NOTY:%.*]] = xor i32 [[Y]], -1
-; CHECK-NEXT:    [[C:%.*]] = icmp ugt i32 [[NOTY]], [[X:%.*]]
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i32 [[X]], i32 [[NOTY]]
-; CHECK-NEXT:    [[R:%.*]] = add i32 [[Y]], [[S]]
+; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 [[Y]])
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
  %y = call i32 @get_i32() ; thwart complexity-based canonicalization
@ -1288,10 +1282,7 @@ define i32 @unsigned_sat_variable_using_min_commute_add(i32 %x) {
 define <2 x i8> @unsigned_sat_variable_using_min_commute_select(<2 x i8> %x) {
 ; CHECK-LABEL: @unsigned_sat_variable_using_min_commute_select(
 ; CHECK-NEXT:    [[Y:%.*]] = call <2 x i8> @get_v2i8()
-; CHECK-NEXT:    [[NOTY:%.*]] = xor <2 x i8> [[Y]], <i8 -1, i8 -1>
-; CHECK-NEXT:    [[C:%.*]] = icmp ult <2 x i8> [[NOTY]], [[X:%.*]]
-; CHECK-NEXT:    [[S:%.*]] = select <2 x i1> [[C]], <2 x i8> [[NOTY]], <2 x i8> [[X]]
-; CHECK-NEXT:    [[R:%.*]] = add <2 x i8> [[S]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[X:%.*]], <2 x i8> [[Y]])
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
  %y = call <2 x i8> @get_v2i8() ; thwart complexity-based canonicalization
@ -1305,10 +1296,7 @@ define <2 x i8> @unsigned_sat_variable_using_min_commute_select(<2 x i8> %x) {
 define <2 x i8> @unsigned_sat_variable_using_min_commute_add_select(<2 x i8> %x) {
 ; CHECK-LABEL: @unsigned_sat_variable_using_min_commute_add_select(
 ; CHECK-NEXT:    [[Y:%.*]] = call <2 x i8> @get_v2i8()
-; CHECK-NEXT:    [[NOTY:%.*]] = xor <2 x i8> [[Y]], <i8 -1, i8 -1>
-; CHECK-NEXT:    [[C:%.*]] = icmp ult <2 x i8> [[NOTY]], [[X:%.*]]
-; CHECK-NEXT:    [[S:%.*]] = select <2 x i1> [[C]], <2 x i8> [[NOTY]], <2 x i8> [[X]]
-; CHECK-NEXT:    [[R:%.*]] = add <2 x i8> [[Y]], [[S]]
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[X:%.*]], <2 x i8> [[Y]])
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
  %y = call <2 x i8> @get_v2i8() ; thwart complexity-based canonicalization
@ -1319,13 +1307,49 @@ define <2 x i8> @unsigned_sat_variable_using_min_commute_add_select(<2 x i8> %x)
  ret <2 x i8> %r
 }

+; Negative test
+
+define i32 @unsigned_sat_variable_using_wrong_min(i32 %x) {
+; CHECK-LABEL: @unsigned_sat_variable_using_wrong_min(
+; CHECK-NEXT:    [[Y:%.*]] = call i32 @get_i32()
+; CHECK-NEXT:    [[NOTY:%.*]] = xor i32 [[Y]], -1
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i32 [[NOTY]], [[X:%.*]]
+; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i32 [[X]], i32 [[NOTY]]
+; CHECK-NEXT:    [[R:%.*]] = add i32 [[Y]], [[S]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %y = call i32 @get_i32() ; thwart complexity-based canonicalization
+  %noty = xor i32 %y, -1
+  %c = icmp slt i32 %x, %noty
+  %s = select i1 %c, i32 %x, i32 %noty
+  %r = add i32 %y, %s
+  ret i32 %r
+}
+
+; Negative test
+
+define i32 @unsigned_sat_variable_using_wrong_value(i32 %x, i32 %z) {
+; CHECK-LABEL: @unsigned_sat_variable_using_wrong_value(
+; CHECK-NEXT:    [[Y:%.*]] = call i32 @get_i32()
+; CHECK-NEXT:    [[NOTY:%.*]] = xor i32 [[Y]], -1
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i32 [[NOTY]], [[X:%.*]]
+; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i32 [[X]], i32 [[NOTY]]
+; CHECK-NEXT:    [[R:%.*]] = add i32 [[S]], [[Z:%.*]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %y = call i32 @get_i32() ; thwart complexity-based canonicalization
+  %noty = xor i32 %y, -1
+  %c = icmp ult i32 %x, %noty
+  %s = select i1 %c, i32 %x, i32 %noty
+  %r = add i32 %z, %s
+  ret i32 %r
+}
+
 ; If we have a constant operand, there's no commutativity variation.

 define i32 @unsigned_sat_constant_using_min(i32 %x) {
 ; CHECK-LABEL: @unsigned_sat_constant_using_min(
-; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[X:%.*]], 42
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i32 [[X]], i32 42
-; CHECK-NEXT:    [[R:%.*]] = add nsw i32 [[S]], -43
+; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 -43)
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
  %c = icmp ult i32 %x, 42
@ -1334,3 +1358,28 @@ define i32 @unsigned_sat_constant_using_min(i32 %x) {
  ret i32 %r
 }

+define <2 x i32> @unsigned_sat_constant_using_min_splat(<2 x i32> %x) {
+; CHECK-LABEL: @unsigned_sat_constant_using_min_splat(
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[X:%.*]], <2 x i32> <i32 -15, i32 -15>)
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %c = icmp ult <2 x i32> %x, <i32 14, i32 14>
+  %s = select <2 x i1> %c, <2 x i32> %x, <2 x i32> <i32 14, i32 14>
+  %r = add <2 x i32> %s, <i32 -15, i32 -15>
+  ret <2 x i32> %r
+}
+
+; Negative test
+
+define i32 @unsigned_sat_constant_using_min_wrong_constant(i32 %x) {
+; CHECK-LABEL: @unsigned_sat_constant_using_min_wrong_constant(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[X:%.*]], 42
+; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i32 [[X]], i32 42
+; CHECK-NEXT:    [[R:%.*]] = add nsw i32 [[S]], -42
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %c = icmp ult i32 %x, 42
+  %s = select i1 %c, i32 %x, i32 42
+  %r = add i32 %s, -42
+  ret i32 %r
+}