Recommit "[PatternMatch] Match XOR variant of unsigned-add overflow check."

This version fixes a buildbot failure cause by picking the wrong insert point for XORs. We cannot pick the XOR binary operator as insert point, as it is not guaranteed that both input operands for the overflow intrinsic are defined before it. This reverts the revert commit c7fc0e5da6c3c36eb5f3a874a6cdeaedb26856e0.
2024-11-25 04:02:41 +01:00 · 2020-02-23 16:32:55 +00:00 · 2020-02-23 16:32:55 +00:00 · 5f8cf84ae0
commit 5f8cf84ae0
parent 9b0c7c26a4
7 changed files with 99 additions and 69 deletions
--- a/include/llvm/IR/PatternMatch.h
+++ b/include/llvm/IR/PatternMatch.h
@ -1674,7 +1674,8 @@ m_UnordFMin(const LHS &L, const RHS &R) {
 }

 //===----------------------------------------------------------------------===//
-// Matchers for overflow check patterns: e.g. (a + b) u< a
+// Matchers for overflow check patterns: e.g. (a + b) u< a, (a ^ -1) <u b
+// Note that S might be matched to other instructions than AddInst.
 //

 template <typename LHS_t, typename RHS_t, typename Sum_t>
@ -1705,6 +1706,19 @@ struct UAddWithOverflow_match {
      if (AddExpr.match(ICmpRHS) && (ICmpLHS == AddLHS || ICmpLHS == AddRHS))
        return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS);

+    Value *Op1;
+    auto XorExpr = m_OneUse(m_Xor(m_Value(Op1), m_AllOnes()));
+    // (a ^ -1) <u b
+    if (Pred == ICmpInst::ICMP_ULT) {
+      if (XorExpr.match(ICmpLHS))
+        return L.match(Op1) && R.match(ICmpRHS) && S.match(ICmpLHS);
+    }
+    //  b > u (a ^ -1)
+    if (Pred == ICmpInst::ICMP_UGT) {
+      if (XorExpr.match(ICmpRHS))
+        return L.match(Op1) && R.match(ICmpLHS) && S.match(ICmpRHS);
+    }
+
    // Match special-case for increment-by-1.
    if (Pred == ICmpInst::ICMP_EQ) {
      // (a + 1) == 0
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@ -399,7 +399,8 @@ class TypePromotionTransaction;
    bool simplifyOffsetableRelocate(Instruction &I);

    bool tryToSinkFreeOperands(Instruction *I);
-    bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, CmpInst *Cmp,
+    bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0,
+                                     Value *Arg1, CmpInst *Cmp,
                                     Intrinsic::ID IID);
    bool optimizeCmp(CmpInst *Cmp, bool &ModifiedDT);
    bool combineToUSubWithOverflow(CmpInst *Cmp, bool &ModifiedDT);
@ -1185,6 +1186,7 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
 }

 bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
+                                                 Value *Arg0, Value *Arg1,
                                                 CmpInst *Cmp,
                                                 Intrinsic::ID IID) {
  if (BO->getParent() != Cmp->getParent()) {
@ -1202,8 +1204,6 @@ bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
  }

  // We allow matching the canonical IR (add X, C) back to (usubo X, -C).
-  Value *Arg0 = BO->getOperand(0);
-  Value *Arg1 = BO->getOperand(1);
  if (BO->getOpcode() == Instruction::Add &&
      IID == Intrinsic::usub_with_overflow) {
    assert(isa<Constant>(Arg1) && "Unexpected input for usubo");
@ -1213,7 +1213,9 @@ bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
  // Insert at the first instruction of the pair.
  Instruction *InsertPt = nullptr;
  for (Instruction &Iter : *Cmp->getParent()) {
-    if (&Iter == BO || &Iter == Cmp) {
+    // If BO is an XOR, it is not guaranteed that it comes after both inputs to
+    // the overflow intrinsic are defined.
+    if ((BO->getOpcode() != Instruction::Xor && &Iter == BO) || &Iter == Cmp) {
      InsertPt = &Iter;
      break;
    }
@ -1222,12 +1224,16 @@ bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,

  IRBuilder<> Builder(InsertPt);
  Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
-  Value *Math = Builder.CreateExtractValue(MathOV, 0, "math");
+  if (BO->getOpcode() != Instruction::Xor) {
+    Value *Math = Builder.CreateExtractValue(MathOV, 0, "math");
+    BO->replaceAllUsesWith(Math);
+  } else
+    assert(BO->hasOneUse() &&
+           "Patterns with XOr should use the BO only in the compare");
  Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
-  BO->replaceAllUsesWith(Math);
  Cmp->replaceAllUsesWith(OV);
-  BO->eraseFromParent();
  Cmp->eraseFromParent();
+  BO->eraseFromParent();
  return true;
 }

@ -1267,9 +1273,13 @@ bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
                                               bool &ModifiedDT) {
  Value *A, *B;
  BinaryOperator *Add;
-  if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add))))
+  if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) {
    if (!matchUAddWithOverflowConstantEdgeCases(Cmp, Add))
      return false;
+    // Set A and B in case we match matchUAddWithOverflowConstantEdgeCases.
+    A = Add->getOperand(0);
+    B = Add->getOperand(1);
+  }

  if (!TLI->shouldFormOverflowOp(ISD::UADDO,
                                 TLI->getValueType(*DL, Add->getType()),
@ -1282,7 +1292,8 @@ bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
  if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse())
    return false;

-  if (!replaceMathCmpWithIntrinsic(Add, Cmp, Intrinsic::uadd_with_overflow))
+  if (!replaceMathCmpWithIntrinsic(Add, A, B, Cmp,
+                                   Intrinsic::uadd_with_overflow))
    return false;

  // Reset callers - do not crash by iterating over a dead instruction.
@ -1344,7 +1355,8 @@ bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
                                 Sub->hasNUsesOrMore(2)))
    return false;

-  if (!replaceMathCmpWithIntrinsic(Sub, Cmp, Intrinsic::usub_with_overflow))
+  if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1),
+                                   Cmp, Intrinsic::usub_with_overflow))
    return false;

  // Reset callers - do not crash by iterating over a dead instruction.
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@ -5568,8 +5568,11 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
        isa<IntegerType>(A->getType())) {
      Value *Result;
      Constant *Overflow;
-      if (OptimizeOverflowCheck(Instruction::Add, /*Signed*/false, A, B,
-                                *AddI, Result, Overflow)) {
+      // m_UAddWithOverflow can match patterns that do not include  an explicit
+      // "add" instruction, so check the opcode of the matched op.
+      if (AddI->getOpcode() == Instruction::Add &&
+          OptimizeOverflowCheck(Instruction::Add, /*Signed*/ false, A, B, *AddI,
+                                Result, Overflow)) {
        replaceInstUsesWith(*AddI, Result);
        return replaceInstUsesWith(I, Overflow);
      }
--- a/test/CodeGen/AArch64/sat-add.ll
+++ b/test/CodeGen/AArch64/sat-add.ll
@ -201,11 +201,11 @@ define i8 @unsigned_sat_variable_i8_using_cmp_sum(i8 %x, i8 %y) {
 define i8 @unsigned_sat_variable_i8_using_cmp_notval(i8 %x, i8 %y) {
 ; CHECK-LABEL: unsigned_sat_variable_i8_using_cmp_notval:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w0, #0xff
-; CHECK-NEXT:    mvn w9, w1
-; CHECK-NEXT:    add w10, w0, w1
-; CHECK-NEXT:    cmp w8, w9, uxtb
-; CHECK-NEXT:    csinv w0, w10, wzr, ls
+; CHECK-NEXT:    and w8, w1, #0xff
+; CHECK-NEXT:    add w8, w8, w0, uxtb
+; CHECK-NEXT:    add w9, w0, w1
+; CHECK-NEXT:    tst w8, #0x100
+; CHECK-NEXT:    csinv w0, w9, wzr, eq
 ; CHECK-NEXT:    ret
  %noty = xor i8 %y, -1
  %a = add i8 %x, %y
@ -247,11 +247,11 @@ define i16 @unsigned_sat_variable_i16_using_cmp_sum(i16 %x, i16 %y) {
 define i16 @unsigned_sat_variable_i16_using_cmp_notval(i16 %x, i16 %y) {
 ; CHECK-LABEL: unsigned_sat_variable_i16_using_cmp_notval:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w0, #0xffff
-; CHECK-NEXT:    mvn w9, w1
-; CHECK-NEXT:    add w10, w0, w1
-; CHECK-NEXT:    cmp w8, w9, uxth
-; CHECK-NEXT:    csinv w0, w10, wzr, ls
+; CHECK-NEXT:    and w8, w1, #0xffff
+; CHECK-NEXT:    add w8, w8, w0, uxth
+; CHECK-NEXT:    add w9, w0, w1
+; CHECK-NEXT:    tst w8, #0x10000
+; CHECK-NEXT:    csinv w0, w9, wzr, eq
 ; CHECK-NEXT:    ret
  %noty = xor i16 %y, -1
  %a = add i16 %x, %y
@ -290,10 +290,9 @@ define i32 @unsigned_sat_variable_i32_using_cmp_sum(i32 %x, i32 %y) {
 define i32 @unsigned_sat_variable_i32_using_cmp_notval(i32 %x, i32 %y) {
 ; CHECK-LABEL: unsigned_sat_variable_i32_using_cmp_notval:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn w8, w1
-; CHECK-NEXT:    add w9, w0, w1
-; CHECK-NEXT:    cmp w0, w8
-; CHECK-NEXT:    csinv w0, w9, wzr, ls
+; CHECK-NEXT:    cmn w1, w0
+; CHECK-NEXT:    add w8, w0, w1
+; CHECK-NEXT:    csinv w0, w8, wzr, lo
 ; CHECK-NEXT:    ret
  %noty = xor i32 %y, -1
  %a = add i32 %x, %y
@ -332,10 +331,9 @@ define i64 @unsigned_sat_variable_i64_using_cmp_sum(i64 %x, i64 %y) {
 define i64 @unsigned_sat_variable_i64_using_cmp_notval(i64 %x, i64 %y) {
 ; CHECK-LABEL: unsigned_sat_variable_i64_using_cmp_notval:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn x8, x1
-; CHECK-NEXT:    add x9, x0, x1
-; CHECK-NEXT:    cmp x0, x8
-; CHECK-NEXT:    csinv x0, x9, xzr, ls
+; CHECK-NEXT:    cmn x1, x0
+; CHECK-NEXT:    add x8, x0, x1
+; CHECK-NEXT:    csinv x0, x8, xzr, lo
 ; CHECK-NEXT:    ret
  %noty = xor i64 %y, -1
  %a = add i64 %x, %y
--- a/test/CodeGen/X86/sat-add.ll
+++ b/test/CodeGen/X86/sat-add.ll
@ -211,14 +211,10 @@ define i8 @unsigned_sat_variable_i8_using_cmp_sum(i8 %x, i8 %y) {
 define i8 @unsigned_sat_variable_i8_using_cmp_notval(i8 %x, i8 %y) {
 ; ANY-LABEL: unsigned_sat_variable_i8_using_cmp_notval:
 ; ANY:       # %bb.0:
-; ANY-NEXT:    # kill: def $esi killed $esi def $rsi
-; ANY-NEXT:    # kill: def $edi killed $edi def $rdi
-; ANY-NEXT:    leal (%rdi,%rsi), %eax
-; ANY-NEXT:    notb %sil
-; ANY-NEXT:    cmpb %sil, %dil
-; ANY-NEXT:    movzbl %al, %ecx
+; ANY-NEXT:    addb %dil, %sil
+; ANY-NEXT:    movzbl %sil, %ecx
 ; ANY-NEXT:    movl $255, %eax
-; ANY-NEXT:    cmovbel %ecx, %eax
+; ANY-NEXT:    cmovael %ecx, %eax
 ; ANY-NEXT:    # kill: def $al killed $al killed $eax
 ; ANY-NEXT:    retq
  %noty = xor i8 %y, -1
@ -263,13 +259,9 @@ define i16 @unsigned_sat_variable_i16_using_cmp_sum(i16 %x, i16 %y) {
 define i16 @unsigned_sat_variable_i16_using_cmp_notval(i16 %x, i16 %y) {
 ; ANY-LABEL: unsigned_sat_variable_i16_using_cmp_notval:
 ; ANY:       # %bb.0:
-; ANY-NEXT:    # kill: def $esi killed $esi def $rsi
-; ANY-NEXT:    # kill: def $edi killed $edi def $rdi
-; ANY-NEXT:    leal (%rdi,%rsi), %ecx
-; ANY-NEXT:    notl %esi
-; ANY-NEXT:    cmpw %si, %di
+; ANY-NEXT:    addw %di, %si
 ; ANY-NEXT:    movl $65535, %eax # imm = 0xFFFF
-; ANY-NEXT:    cmovbel %ecx, %eax
+; ANY-NEXT:    cmovael %esi, %eax
 ; ANY-NEXT:    # kill: def $ax killed $ax killed $eax
 ; ANY-NEXT:    retq
  %noty = xor i16 %y, -1
@ -312,13 +304,9 @@ define i32 @unsigned_sat_variable_i32_using_cmp_sum(i32 %x, i32 %y) {
 define i32 @unsigned_sat_variable_i32_using_cmp_notval(i32 %x, i32 %y) {
 ; ANY-LABEL: unsigned_sat_variable_i32_using_cmp_notval:
 ; ANY:       # %bb.0:
-; ANY-NEXT:    # kill: def $esi killed $esi def $rsi
-; ANY-NEXT:    # kill: def $edi killed $edi def $rdi
-; ANY-NEXT:    leal (%rdi,%rsi), %ecx
-; ANY-NEXT:    notl %esi
-; ANY-NEXT:    cmpl %esi, %edi
+; ANY-NEXT:    addl %esi, %edi
 ; ANY-NEXT:    movl $-1, %eax
-; ANY-NEXT:    cmovbel %ecx, %eax
+; ANY-NEXT:    cmovael %edi, %eax
 ; ANY-NEXT:    retq
  %noty = xor i32 %y, -1
  %a = add i32 %x, %y
@ -359,11 +347,9 @@ define i64 @unsigned_sat_variable_i64_using_cmp_sum(i64 %x, i64 %y) {
 define i64 @unsigned_sat_variable_i64_using_cmp_notval(i64 %x, i64 %y) {
 ; ANY-LABEL: unsigned_sat_variable_i64_using_cmp_notval:
 ; ANY:       # %bb.0:
-; ANY-NEXT:    leaq (%rdi,%rsi), %rcx
-; ANY-NEXT:    notq %rsi
-; ANY-NEXT:    cmpq %rsi, %rdi
+; ANY-NEXT:    addq %rsi, %rdi
 ; ANY-NEXT:    movq $-1, %rax
-; ANY-NEXT:    cmovbeq %rcx, %rax
+; ANY-NEXT:    cmovaeq %rdi, %rax
 ; ANY-NEXT:    retq
  %noty = xor i64 %y, -1
  %a = add i64 %x, %y
--- a/test/Transforms/CodeGenPrepare/AArch64/overflow-intrinsics.ll
+++ b/test/Transforms/CodeGenPrepare/AArch64/overflow-intrinsics.ll
@ -102,9 +102,9 @@ define i64 @uaddo3_math_overflow_used(i64 %a, i64 %b, i64* %res) nounwind ssp {
 ; pattern as well.
 define i64 @uaddo6_xor(i64 %a, i64 %b) {
 ; CHECK-LABEL: @uaddo6_xor(
-; CHECK-NEXT:    [[X:%.*]] = xor i64 [[A:%.*]], -1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[X]], [[B:%.*]]
-; CHECK-NEXT:    [[Q:%.*]] = select i1 [[CMP]], i64 [[B]], i64 42
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[A:%.*]], i64 [[B:%.*]])
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
+; CHECK-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
 ; CHECK-NEXT:    ret i64 [[Q]]
 ;
  %x = xor i64 %a, -1
@ -115,13 +115,13 @@ define i64 @uaddo6_xor(i64 %a, i64 %b) {

 define i64 @uaddo6_xor_commuted(i64 %a, i64 %b) {
 ; CHECK-LABEL: @uaddo6_xor_commuted(
-; CHECK-NEXT:    [[X:%.*]] = xor i64 -1, [[A:%.*]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[X]], [[B:%.*]]
-; CHECK-NEXT:    [[Q:%.*]] = select i1 [[CMP]], i64 [[B]], i64 42
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[A:%.*]], i64 [[B:%.*]])
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
+; CHECK-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
 ; CHECK-NEXT:    ret i64 [[Q]]
 ;
-  %x = xor i64 -1, %a
-  %cmp = icmp ult i64 %x, %b
+  %x = xor i64 %a, -1
+  %cmp = icmp ugt i64 %b, %x
  %Q = select i1 %cmp, i64 %b, i64 42
  ret i64 %Q
 }
--- a/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll
+++ b/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll
@ -153,9 +153,9 @@ exit:
 ; pattern as well.
 define i64 @uaddo6_xor(i64 %a, i64 %b) {
 ; CHECK-LABEL: @uaddo6_xor(
-; CHECK-NEXT:    [[X:%.*]] = xor i64 [[A:%.*]], -1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[X]], [[B:%.*]]
-; CHECK-NEXT:    [[Q:%.*]] = select i1 [[CMP]], i64 [[B]], i64 42
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[A:%.*]], i64 [[B:%.*]])
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
+; CHECK-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
 ; CHECK-NEXT:    ret i64 [[Q]]
 ;
  %x = xor i64 %a, -1
@ -166,12 +166,12 @@ define i64 @uaddo6_xor(i64 %a, i64 %b) {

 define i64 @uaddo6_xor_commuted(i64 %a, i64 %b) {
 ; CHECK-LABEL: @uaddo6_xor_commuted(
-; CHECK-NEXT:    [[X:%.*]] = xor i64 -1, [[A:%.*]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[X]], [[B:%.*]]
-; CHECK-NEXT:    [[Q:%.*]] = select i1 [[CMP]], i64 [[B]], i64 42
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[A:%.*]], i64 [[B:%.*]])
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
+; CHECK-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
 ; CHECK-NEXT:    ret i64 [[Q]]
 ;
-  %x = xor i64 -1, %a
+  %x = xor i64 %a, -1
  %cmp = icmp ult i64 %x, %b
  %Q = select i1 %cmp, i64 %b, i64 42
  ret i64 %Q
@ -194,6 +194,23 @@ define i64 @uaddo6_xor_multi_use(i64 %a, i64 %b) {
  ret i64 %Q
 }

+; Make sure we do not use the XOR binary operator as insert point, as it may
+; come before the second operand of the overflow intrinsic.
+define i1 @uaddo6_xor_op_after_XOR(i32 %a, i32* %b.ptr) {
+; CHECK-LABEL: @uaddo6_xor_op_after_XOR(
+; CHECK-NEXT:    [[B:%.*]] = load i32, i32* [[B_PTR:%.*]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[A:%.*]], i32 [[B]])
+; CHECK-NEXT:    [[OV1:%.*]] = extractvalue { i32, i1 } [[TMP1]], 1
+; CHECK-NEXT:    [[OV:%.*]] = xor i1 [[OV1]], true
+; CHECK-NEXT:    ret i1 [[OV]]
+;
+  %x = xor i32 %a, -1
+  %b = load i32, i32* %b.ptr, align 8
+  %cmp14 = icmp ugt i32 %b, %x
+  %ov = xor i1 %cmp14, true
+  ret i1 %ov
+}
+
 ; When adding 1, the general pattern for add-overflow may be different due to icmp canonicalization.
 ; PR31754: https://bugs.llvm.org/show_bug.cgi?id=31754