[InstCombine] add/move folds for [not]-xor

We handled all of the commuted variants for plain xor already, although they were scattered around and sometimes folded less efficiently using distributive laws. We had no folds for not-xor. Handling all of these patterns consistently is part of trying to reinstate: https://reviews.llvm.org/rL300977 llvm-svn: 301144
2024-11-24 03:33:20 +01:00 · 2017-04-23 22:00:02 +00:00 · 2017-04-23 22:00:02 +00:00 · e588bc6051
commit e588bc6051
parent 1831498677
2 changed files with 101 additions and 76 deletions
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@ -1234,6 +1234,56 @@ static Instruction *foldBoolSextMaskToSelect(BinaryOperator &I) {
  return nullptr;
 }

+static Instruction *foldAndToXor(BinaryOperator &I,
+                                 InstCombiner::BuilderTy &Builder) {
+  assert(I.getOpcode() == Instruction::And);
+  Value *Op0 = I.getOperand(0);
+  Value *Op1 = I.getOperand(1);
+  Value *A, *B;
+
+  // Operand complexity canonicalization guarantees that the 'or' is Op0.
+  // (A | B) & ~(A & B) --> A ^ B
+  // (A | B) & ~(B & A) --> A ^ B
+  if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
+      match(Op1, m_Not(m_c_And(m_Specific(A), m_Specific(B)))))
+    return BinaryOperator::CreateXor(A, B);
+
+  // (A | ~B) & (~A | B) --> ~(A ^ B)
+  // (A | ~B) & (B | ~A) --> ~(A ^ B)
+  // (~B | A) & (~A | B) --> ~(A ^ B)
+  // (~B | A) & (B | ~A) --> ~(A ^ B)
+  if (match(Op0, m_c_Or(m_Value(A), m_Not(m_Value(B)))) &&
+      match(Op1, m_c_Or(m_Not(m_Specific(A)), m_Value(B))))
+    return BinaryOperator::CreateNot(Builder.CreateXor(A, B));
+
+  return nullptr;
+}
+
+static Instruction *foldOrToXor(BinaryOperator &I,
+                                InstCombiner::BuilderTy &Builder) {
+  assert(I.getOpcode() == Instruction::Or);
+  Value *Op0 = I.getOperand(0);
+  Value *Op1 = I.getOperand(1);
+  Value *A, *B;
+
+  // Operand complexity canonicalization guarantees that the 'and' is Op0.
+  // (A & B) | ~(A | B) --> ~(A ^ B)
+  // (A & B) | ~(B | A) --> ~(A ^ B)
+  if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
+      match(Op1, m_Not(m_c_Or(m_Specific(A), m_Specific(B)))))
+    return BinaryOperator::CreateNot(Builder.CreateXor(A, B));
+
+  // (A & ~B) | (~A & B) --> A ^ B
+  // (A & ~B) | (B & ~A) --> A ^ B
+  // (~B & A) | (~A & B) --> A ^ B
+  // (~B & A) | (B & ~A) --> A ^ B
+  if (match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) &&
+      match(Op1, m_c_And(m_Not(m_Specific(A)), m_Specific(B))))
+    return BinaryOperator::CreateXor(A, B);
+
+  return nullptr;
+}
+
 // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
 // here. We should standardize that construct where it is needed or choose some
 // other way to ensure that commutated variants of patterns are not missed.
@ -1247,15 +1297,19 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
  if (Value *V = SimplifyAndInst(Op0, Op1, DL, &TLI, &DT, &AC))
    return replaceInstUsesWith(I, V);

-  // (A|B)&(A|C) -> A|(B&C) etc
-  if (Value *V = SimplifyUsingDistributiveLaws(I))
-    return replaceInstUsesWith(I, V);
-
  // See if we can simplify any instructions used by the instruction whose sole
  // purpose is to compute bits we don't care about.
  if (SimplifyDemandedInstructionBits(I))
    return &I;

+  // Do this before using distributive laws to catch simple and/or/not patterns.
+  if (Instruction *Xor = foldAndToXor(I, *Builder))
+    return Xor;
+
+  // (A|B)&(A|C) -> A|(B&C) etc
+  if (Value *V = SimplifyUsingDistributiveLaws(I))
+    return replaceInstUsesWith(I, V);
+
  if (Value *V = SimplifyBSwap(I))
    return replaceInstUsesWith(I, V);

@ -1366,19 +1420,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
    return DeMorgan;

  {
-    Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
-    // (A|B) & ~(A&B) -> A^B
-    if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
-        match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) &&
-        ((A == C && B == D) || (A == D && B == C)))
-      return BinaryOperator::CreateXor(A, B);
-
-    // ~(A&B) & (A|B) -> A^B
-    if (match(Op1, m_Or(m_Value(A), m_Value(B))) &&
-        match(Op0, m_Not(m_And(m_Value(C), m_Value(D)))) &&
-        ((A == C && B == D) || (A == D && B == C)))
-      return BinaryOperator::CreateXor(A, B);
-
+    Value *A = nullptr, *B = nullptr, *C = nullptr;
    // A&(A^B) => A & ~B
    {
      Value *tmpOp0 = Op0;
@ -2037,15 +2079,19 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
  if (Value *V = SimplifyOrInst(Op0, Op1, DL, &TLI, &DT, &AC))
    return replaceInstUsesWith(I, V);

-  // (A&B)|(A&C) -> A&(B|C) etc
-  if (Value *V = SimplifyUsingDistributiveLaws(I))
-    return replaceInstUsesWith(I, V);
-
  // See if we can simplify any instructions used by the instruction whose sole
  // purpose is to compute bits we don't care about.
  if (SimplifyDemandedInstructionBits(I))
    return &I;

+  // Do this before using distributive laws to catch simple and/or/not patterns.
+  if (Instruction *Xor = foldOrToXor(I, *Builder))
+    return Xor;
+
+  // (A&B)|(A&C) -> A&(B|C) etc
+  if (Value *V = SimplifyUsingDistributiveLaws(I))
+    return replaceInstUsesWith(I, V);
+
  if (Value *V = SimplifyBSwap(I))
    return replaceInstUsesWith(I, V);

@ -2182,23 +2228,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
        return replaceInstUsesWith(I, V);
    }

-    // ((A&~B)|(~A&B)) -> A^B
-    if ((match(C, m_Not(m_Specific(D))) &&
-         match(B, m_Not(m_Specific(A)))))
-      return BinaryOperator::CreateXor(A, D);
-    // ((~B&A)|(~A&B)) -> A^B
-    if ((match(A, m_Not(m_Specific(D))) &&
-         match(B, m_Not(m_Specific(C)))))
-      return BinaryOperator::CreateXor(C, D);
-    // ((A&~B)|(B&~A)) -> A^B
-    if ((match(C, m_Not(m_Specific(B))) &&
-         match(D, m_Not(m_Specific(A)))))
-      return BinaryOperator::CreateXor(A, B);
-    // ((~B&A)|(B&~A)) -> A^B
-    if ((match(A, m_Not(m_Specific(B))) &&
-         match(D, m_Not(m_Specific(C)))))
-      return BinaryOperator::CreateXor(C, B);
-
    // ((A|B)&1)|(B&-2) -> (A&1) | B
    if (match(A, m_Or(m_Value(V1), m_Specific(B))) ||
        match(A, m_Or(m_Specific(B), m_Value(V1)))) {
--- a/test/Transforms/InstCombine/and-or-not.ll
+++ b/test/Transforms/InstCombine/and-or-not.ll
@ -34,7 +34,7 @@ define i32 @and_to_xor2(i32 %a, i32 %b) {

 define i32 @and_to_xor3(i32 %a, i32 %b) {
 ; CHECK-LABEL: @and_to_xor3(
-; CHECK-NEXT:    [[AND2:%.*]] = xor i32 %b, %a
+; CHECK-NEXT:    [[AND2:%.*]] = xor i32 %a, %b
 ; CHECK-NEXT:    ret i32 [[AND2]]
 ;
  %or = or i32 %a, %b
@ -48,7 +48,7 @@ define i32 @and_to_xor3(i32 %a, i32 %b) {

 define i32 @and_to_xor4(i32 %a, i32 %b) {
 ; CHECK-LABEL: @and_to_xor4(
-; CHECK-NEXT:    [[AND2:%.*]] = xor i32 %a, %b
+; CHECK-NEXT:    [[AND2:%.*]] = xor i32 %b, %a
 ; CHECK-NEXT:    ret i32 [[AND2]]
 ;
  %or = or i32 %b, %a
@ -73,15 +73,14 @@ define <4 x i32> @and_to_xor1_vec(<4 x i32> %a, <4 x i32> %b) {
 ; In the next 4 tests, cast instructions are used to thwart operand complexity
 ; canonicalizations, so we can test all of the commuted patterns.

+; (a | ~b) & (~a | b) --> ~(a ^ b)
+
 define i32 @and_to_nxor1(float %fa, float %fb) {
 ; CHECK-LABEL: @and_to_nxor1(
 ; CHECK-NEXT:    [[A:%.*]] = fptosi float %fa to i32
 ; CHECK-NEXT:    [[B:%.*]] = fptosi float %fb to i32
-; CHECK-NEXT:    [[NOTA:%.*]] = xor i32 [[A]], -1
-; CHECK-NEXT:    [[NOTB:%.*]] = xor i32 [[B]], -1
-; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[A]], [[NOTB]]
-; CHECK-NEXT:    [[OR2:%.*]] = or i32 [[NOTA]], [[B]]
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[OR1]], [[OR2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A]], [[B]]
+; CHECK-NEXT:    [[AND:%.*]] = xor i32 [[TMP1]], -1
 ; CHECK-NEXT:    ret i32 [[AND]]
 ;
  %a = fptosi float %fa to i32
@ -94,15 +93,14 @@ define i32 @and_to_nxor1(float %fa, float %fb) {
  ret i32 %and
 }

+; (a | ~b) & (b | ~a) --> ~(a ^ b)
+
 define i32 @and_to_nxor2(float %fa, float %fb) {
 ; CHECK-LABEL: @and_to_nxor2(
 ; CHECK-NEXT:    [[A:%.*]] = fptosi float %fa to i32
 ; CHECK-NEXT:    [[B:%.*]] = fptosi float %fb to i32
-; CHECK-NEXT:    [[NOTA:%.*]] = xor i32 [[A]], -1
-; CHECK-NEXT:    [[NOTB:%.*]] = xor i32 [[B]], -1
-; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[A]], [[NOTB]]
-; CHECK-NEXT:    [[OR2:%.*]] = or i32 [[B]], [[NOTA]]
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[OR1]], [[OR2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A]], [[B]]
+; CHECK-NEXT:    [[AND:%.*]] = xor i32 [[TMP1]], -1
 ; CHECK-NEXT:    ret i32 [[AND]]
 ;
  %a = fptosi float %fa to i32
@ -115,15 +113,14 @@ define i32 @and_to_nxor2(float %fa, float %fb) {
  ret i32 %and
 }

+; (~a | b) & (a | ~b) --> ~(a ^ b)
+
 define i32 @and_to_nxor3(float %fa, float %fb) {
 ; CHECK-LABEL: @and_to_nxor3(
 ; CHECK-NEXT:    [[A:%.*]] = fptosi float %fa to i32
 ; CHECK-NEXT:    [[B:%.*]] = fptosi float %fb to i32
-; CHECK-NEXT:    [[NOTA:%.*]] = xor i32 [[A]], -1
-; CHECK-NEXT:    [[NOTB:%.*]] = xor i32 [[B]], -1
-; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[NOTA]], [[B]]
-; CHECK-NEXT:    [[OR2:%.*]] = or i32 [[A]], [[NOTB]]
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[OR1]], [[OR2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[B]], [[A]]
+; CHECK-NEXT:    [[AND:%.*]] = xor i32 [[TMP1]], -1
 ; CHECK-NEXT:    ret i32 [[AND]]
 ;
  %a = fptosi float %fa to i32
@ -136,15 +133,14 @@ define i32 @and_to_nxor3(float %fa, float %fb) {
  ret i32 %and
 }

+; (~a | b) & (~b | a) --> ~(a ^ b)
+
 define i32 @and_to_nxor4(float %fa, float %fb) {
 ; CHECK-LABEL: @and_to_nxor4(
 ; CHECK-NEXT:    [[A:%.*]] = fptosi float %fa to i32
 ; CHECK-NEXT:    [[B:%.*]] = fptosi float %fb to i32
-; CHECK-NEXT:    [[NOTA:%.*]] = xor i32 [[A]], -1
-; CHECK-NEXT:    [[NOTB:%.*]] = xor i32 [[B]], -1
-; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[NOTA]], [[B]]
-; CHECK-NEXT:    [[OR2:%.*]] = or i32 [[NOTB]], [[A]]
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[OR1]], [[OR2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[B]], [[A]]
+; CHECK-NEXT:    [[AND:%.*]] = xor i32 [[TMP1]], -1
 ; CHECK-NEXT:    ret i32 [[AND]]
 ;
  %a = fptosi float %fa to i32
@ -233,12 +229,12 @@ define i32 @or_to_xor4(float %fa, float %fb) {
  ret i32 %or
 }

+; (a & b) | ~(a | b) --> ~(a ^ b)
+
 define i32 @or_to_nxor1(i32 %a, i32 %b) {
 ; CHECK-LABEL: @or_to_nxor1(
-; CHECK-NEXT:    [[AND:%.*]] = and i32 %a, %b
-; CHECK-NEXT:    [[OR:%.*]] = or i32 %a, %b
-; CHECK-NEXT:    [[NOTOR:%.*]] = xor i32 [[OR]], -1
-; CHECK-NEXT:    [[OR2:%.*]] = or i32 [[AND]], [[NOTOR]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 %a, %b
+; CHECK-NEXT:    [[OR2:%.*]] = xor i32 [[TMP1]], -1
 ; CHECK-NEXT:    ret i32 [[OR2]]
 ;
  %and = and i32 %a, %b
@ -248,12 +244,12 @@ define i32 @or_to_nxor1(i32 %a, i32 %b) {
  ret i32 %or2
 }

+; (a & b) | ~(b | a) --> ~(a ^ b)
+
 define i32 @or_to_nxor2(i32 %a, i32 %b) {
 ; CHECK-LABEL: @or_to_nxor2(
-; CHECK-NEXT:    [[AND:%.*]] = and i32 %a, %b
-; CHECK-NEXT:    [[OR:%.*]] = or i32 %b, %a
-; CHECK-NEXT:    [[NOTOR:%.*]] = xor i32 [[OR]], -1
-; CHECK-NEXT:    [[OR2:%.*]] = or i32 [[AND]], [[NOTOR]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 %a, %b
+; CHECK-NEXT:    [[OR2:%.*]] = xor i32 [[TMP1]], -1
 ; CHECK-NEXT:    ret i32 [[OR2]]
 ;
  %and = and i32 %a, %b
@ -263,12 +259,12 @@ define i32 @or_to_nxor2(i32 %a, i32 %b) {
  ret i32 %or2
 }

+; ~(a | b) | (a & b) --> ~(a ^ b)
+
 define i32 @or_to_nxor3(i32 %a, i32 %b) {
 ; CHECK-LABEL: @or_to_nxor3(
-; CHECK-NEXT:    [[AND:%.*]] = and i32 %a, %b
-; CHECK-NEXT:    [[OR:%.*]] = or i32 %a, %b
-; CHECK-NEXT:    [[NOTOR:%.*]] = xor i32 [[OR]], -1
-; CHECK-NEXT:    [[OR2:%.*]] = or i32 [[AND]], [[NOTOR]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 %a, %b
+; CHECK-NEXT:    [[OR2:%.*]] = xor i32 [[TMP1]], -1
 ; CHECK-NEXT:    ret i32 [[OR2]]
 ;
  %and = and i32 %a, %b
@ -278,12 +274,12 @@ define i32 @or_to_nxor3(i32 %a, i32 %b) {
  ret i32 %or2
 }

+; ~(a | b) | (b & a) --> ~(a ^ b)
+
 define i32 @or_to_nxor4(i32 %a, i32 %b) {
 ; CHECK-LABEL: @or_to_nxor4(
-; CHECK-NEXT:    [[AND:%.*]] = and i32 %b, %a
-; CHECK-NEXT:    [[OR:%.*]] = or i32 %a, %b
-; CHECK-NEXT:    [[NOTOR:%.*]] = xor i32 [[OR]], -1
-; CHECK-NEXT:    [[OR2:%.*]] = or i32 [[AND]], [[NOTOR]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 %b, %a
+; CHECK-NEXT:    [[OR2:%.*]] = xor i32 [[TMP1]], -1
 ; CHECK-NEXT:    ret i32 [[OR2]]
 ;
  %and = and i32 %b, %a