[InstCombine] reduce even more unsigned saturated add with 'not' op

We want to use the sum in the icmp to allow matching with m_UAddWithOverflow and eliminate the 'not'. This is discussed in D51929 and is another step towards solving PR14613: https://bugs.llvm.org/show_bug.cgi?id=14613 Name: uaddsat, -1 fval %notx = xor i32 %x, -1 %a = add i32 %x, %y %c = icmp ugt i32 %notx, %y %r = select i1 %c, i32 %a, i32 -1 => %a = add i32 %x, %y %c2 = icmp ugt i32 %y, %a %r = select i1 %c2, i32 -1, i32 %a Name: uaddsat, -1 fval + ult %notx = xor i32 %x, -1 %a = add i32 %x, %y %c = icmp ult i32 %y, %notx %r = select i1 %c, i32 %a, i32 -1 => %a = add i32 %x, %y %c2 = icmp ugt i32 %y, %a %r = select i1 %c2, i32 -1, i32 %a https://rise4fun.com/Alive/nTp llvm-svn: 354276
2024-10-20 03:23:01 +02:00 · 2019-02-18 15:21:39 +00:00 · 2019-02-18 15:21:39 +00:00 · 732e40de19
commit 732e40de19
parent c8cc085915
2 changed files with 42 additions and 35 deletions
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@ -680,25 +680,46 @@ static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal,
  if (!Cmp->hasOneUse())
    return nullptr;

-  // Canonicalize to 'ULT' to simplify matching below.
  Value *Cmp0 = Cmp->getOperand(0);
  Value *Cmp1 = Cmp->getOperand(1);
+
+  // Match unsigned saturated add with constant.
+  Value *X;
+  const APInt *C, *CmpC;
+  if (match(TVal, m_Add(m_Value(X), m_APInt(C))) && X == Cmp0 &&
+      match(Cmp1, m_APInt(CmpC)) && *CmpC == ~*C) {
+    // Commute compare predicate and select operands. The backend is expecting
+    // this form (-1 is true value). If this changes, the backend must be
+    // updated too:
+    // (X u< ~C) ? (X + C) : -1 --> (X u> ~C) ? -1 : (X + C)
+    Value *NewCmp = Builder.CreateICmp(ICmpInst::ICMP_UGT, X, Cmp1);
+    return Builder.CreateSelect(NewCmp, FVal, TVal);
+  }
+
+  // Match unsigned saturated add of 2 variables with an unnecessary 'not'.
+  // There are 8 commuted variants.
+  // Canonicalize -1 (saturated result) to true value of the select.
+  if (match(FVal, m_AllOnes())) {
+    std::swap(TVal, FVal);
+    std::swap(Cmp0, Cmp1);
+  }
+  if (!match(TVal, m_AllOnes()))
+    return nullptr;
+
+  // Canonicalize predicate to 'ULT'.
  ICmpInst::Predicate Pred = Cmp->getPredicate();
  if (Pred == ICmpInst::ICMP_UGT) {
    Pred = ICmpInst::ICMP_ULT;
    std::swap(Cmp0, Cmp1);
  }
-
  if (Pred != ICmpInst::ICMP_ULT)
    return nullptr;

-  // Match unsigned saturated add of 2 variables with an unnecessary 'not'.
-  // TODO: There are more variations of this pattern.
-  Value *X, *Y;
-  if (match(TVal, m_AllOnes()) && match(Cmp0, m_Not(m_Value(X))) &&
+  Value *Y;
+  if (match(Cmp0, m_Not(m_Value(X))) &&
      match(FVal, m_c_Add(m_Specific(X), m_Value(Y))) && Y == Cmp1) {
    // Change the comparison to use the sum (false value of the select). That is
-    // the canonical pattern match form for uadd.with.overflow and eliminates a
+    // a canonical pattern match form for uadd.with.overflow and eliminates a
    // use of the 'not' op:
    // (~X u< Y) ? -1 : (X + Y) --> ((X + Y) u< Y) ? -1 : (X + Y)
    // (~X u< Y) ? -1 : (Y + X) --> ((Y + X) u< Y) ? -1 : (Y + X)
@ -706,16 +727,6 @@ static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal,
    return Builder.CreateSelect(NewCmp, TVal, FVal);
  }

-  // Match unsigned saturated add with constant.
-  const APInt *C, *CmpC;
-  if (match(TVal, m_Add(m_Value(X), m_APInt(C))) && X == Cmp0 &&
-      match(FVal, m_AllOnes()) && match(Cmp1, m_APInt(CmpC)) && *CmpC == ~*C) {
-    // Commute compare predicate and select operands:
-    // (X u< ~C) ? (X + C) : -1 --> (X u> ~C) ? -1 : (X + C)
-    Value *NewCmp = Builder.CreateICmp(ICmpInst::ICMP_UGT, X, Cmp1);
-    return Builder.CreateSelect(NewCmp, FVal, TVal);
-  }
-
  return nullptr;
 }

--- a/test/Transforms/InstCombine/saturating-add-sub.ll
+++ b/test/Transforms/InstCombine/saturating-add-sub.ll
@ -706,11 +706,10 @@ define <2 x i32> @uadd_sat_ugt_commute_add(<2 x i32> %xp, <2 x i32> %yp) {
 define i32 @uadd_sat_commute_select(i32 %x, i32 %yp) {
 ; CHECK-LABEL: @uadd_sat_commute_select(
 ; CHECK-NEXT:    [[Y:%.*]] = sdiv i32 [[YP:%.*]], 2442
-; CHECK-NEXT:    [[NOTX:%.*]] = xor i32 [[X:%.*]], -1
-; CHECK-NEXT:    [[A:%.*]] = add i32 [[Y]], [[X]]
-; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[Y]], [[NOTX]]
-; CHECK-NEXT:    [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 -1
-; CHECK-NEXT:    ret i32 [[R]]
+; CHECK-NEXT:    [[A:%.*]] = add i32 [[Y]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
+; CHECK-NEXT:    ret i32 [[TMP2]]
 ;
  %y = sdiv i32 %yp, 2442 ; thwart complexity-based-canonicalization
  %notx = xor i32 %x, -1
@ -724,11 +723,10 @@ define i32 @uadd_sat_commute_select_commute_add(i32 %xp, i32 %yp) {
 ; CHECK-LABEL: @uadd_sat_commute_select_commute_add(
 ; CHECK-NEXT:    [[X:%.*]] = urem i32 42, [[XP:%.*]]
 ; CHECK-NEXT:    [[Y:%.*]] = sdiv i32 [[YP:%.*]], 2442
-; CHECK-NEXT:    [[NOTX:%.*]] = xor i32 [[X]], -1
 ; CHECK-NEXT:    [[A:%.*]] = add nsw i32 [[X]], [[Y]]
-; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[Y]], [[NOTX]]
-; CHECK-NEXT:    [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 -1
-; CHECK-NEXT:    ret i32 [[R]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
+; CHECK-NEXT:    ret i32 [[TMP2]]
 ;
  %x = urem i32 42, %xp ; thwart complexity-based-canonicalization
  %y = sdiv i32 %yp, 2442 ; thwart complexity-based-canonicalization
@ -741,11 +739,10 @@ define i32 @uadd_sat_commute_select_commute_add(i32 %xp, i32 %yp) {

 define <2 x i32> @uadd_sat_commute_select_ugt(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @uadd_sat_commute_select_ugt(
-; CHECK-NEXT:    [[NOTX:%.*]] = xor <2 x i32> [[X:%.*]], <i32 -1, i32 -1>
-; CHECK-NEXT:    [[A:%.*]] = add <2 x i32> [[Y:%.*]], [[X]]
-; CHECK-NEXT:    [[C:%.*]] = icmp ugt <2 x i32> [[NOTX]], [[Y]]
-; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[C]], <2 x i32> [[A]], <2 x i32> <i32 -1, i32 -1>
-; CHECK-NEXT:    ret <2 x i32> [[R]]
+; CHECK-NEXT:    [[A:%.*]] = add <2 x i32> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i32> [[A]], [[Y]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> <i32 -1, i32 -1>, <2 x i32> [[A]]
+; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
 ;
  %notx = xor <2 x i32> %x, <i32 -1, i32 -1>
  %a = add <2 x i32> %y, %x
@ -757,11 +754,10 @@ define <2 x i32> @uadd_sat_commute_select_ugt(<2 x i32> %x, <2 x i32> %y) {
 define i32 @uadd_sat_commute_select_ugt_commute_add(i32 %xp, i32 %y) {
 ; CHECK-LABEL: @uadd_sat_commute_select_ugt_commute_add(
 ; CHECK-NEXT:    [[X:%.*]] = srem i32 42, [[XP:%.*]]
-; CHECK-NEXT:    [[NOTX:%.*]] = xor i32 [[X]], -1
 ; CHECK-NEXT:    [[A:%.*]] = add i32 [[X]], [[Y:%.*]]
-; CHECK-NEXT:    [[C:%.*]] = icmp ugt i32 [[NOTX]], [[Y]]
-; CHECK-NEXT:    [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 -1
-; CHECK-NEXT:    ret i32 [[R]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
+; CHECK-NEXT:    ret i32 [[TMP2]]
 ;
  %x = srem i32 42, %xp   ; thwart complexity-based-canonicalization
  %notx = xor i32 %x, -1