From e588bc6051718712025f44a814b761e733e4e4b6 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sun, 23 Apr 2017 22:00:02 +0000 Subject: [PATCH] [InstCombine] add/move folds for [not]-xor We handled all of the commuted variants for plain xor already, although they were scattered around and sometimes folded less efficiently using distributive laws. We had no folds for not-xor. Handling all of these patterns consistently is part of trying to reinstate: https://reviews.llvm.org/rL300977 llvm-svn: 301144 --- .../InstCombine/InstCombineAndOrXor.cpp | 105 +++++++++++------- test/Transforms/InstCombine/and-or-not.ll | 72 ++++++------ 2 files changed, 101 insertions(+), 76 deletions(-) diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 17a6fb9a402..dccc3115463 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1234,6 +1234,56 @@ static Instruction *foldBoolSextMaskToSelect(BinaryOperator &I) { return nullptr; } +static Instruction *foldAndToXor(BinaryOperator &I, + InstCombiner::BuilderTy &Builder) { + assert(I.getOpcode() == Instruction::And); + Value *Op0 = I.getOperand(0); + Value *Op1 = I.getOperand(1); + Value *A, *B; + + // Operand complexity canonicalization guarantees that the 'or' is Op0. + // (A | B) & ~(A & B) --> A ^ B + // (A | B) & ~(B & A) --> A ^ B + if (match(Op0, m_Or(m_Value(A), m_Value(B))) && + match(Op1, m_Not(m_c_And(m_Specific(A), m_Specific(B))))) + return BinaryOperator::CreateXor(A, B); + + // (A | ~B) & (~A | B) --> ~(A ^ B) + // (A | ~B) & (B | ~A) --> ~(A ^ B) + // (~B | A) & (~A | B) --> ~(A ^ B) + // (~B | A) & (B | ~A) --> ~(A ^ B) + if (match(Op0, m_c_Or(m_Value(A), m_Not(m_Value(B)))) && + match(Op1, m_c_Or(m_Not(m_Specific(A)), m_Value(B)))) + return BinaryOperator::CreateNot(Builder.CreateXor(A, B)); + + return nullptr; +} + +static Instruction *foldOrToXor(BinaryOperator &I, + InstCombiner::BuilderTy &Builder) { + assert(I.getOpcode() == Instruction::Or); + Value *Op0 = I.getOperand(0); + Value *Op1 = I.getOperand(1); + Value *A, *B; + + // Operand complexity canonicalization guarantees that the 'and' is Op0. + // (A & B) | ~(A | B) --> ~(A ^ B) + // (A & B) | ~(B | A) --> ~(A ^ B) + if (match(Op0, m_And(m_Value(A), m_Value(B))) && + match(Op1, m_Not(m_c_Or(m_Specific(A), m_Specific(B))))) + return BinaryOperator::CreateNot(Builder.CreateXor(A, B)); + + // (A & ~B) | (~A & B) --> A ^ B + // (A & ~B) | (B & ~A) --> A ^ B + // (~B & A) | (~A & B) --> A ^ B + // (~B & A) | (B & ~A) --> A ^ B + if (match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) && + match(Op1, m_c_And(m_Not(m_Specific(A)), m_Specific(B)))) + return BinaryOperator::CreateXor(A, B); + + return nullptr; +} + // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches // here. We should standardize that construct where it is needed or choose some // other way to ensure that commutated variants of patterns are not missed. @@ -1247,15 +1297,19 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { if (Value *V = SimplifyAndInst(Op0, Op1, DL, &TLI, &DT, &AC)) return replaceInstUsesWith(I, V); - // (A|B)&(A|C) -> A|(B&C) etc - if (Value *V = SimplifyUsingDistributiveLaws(I)) - return replaceInstUsesWith(I, V); - // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(I)) return &I; + // Do this before using distributive laws to catch simple and/or/not patterns. + if (Instruction *Xor = foldAndToXor(I, *Builder)) + return Xor; + + // (A|B)&(A|C) -> A|(B&C) etc + if (Value *V = SimplifyUsingDistributiveLaws(I)) + return replaceInstUsesWith(I, V); + if (Value *V = SimplifyBSwap(I)) return replaceInstUsesWith(I, V); @@ -1366,19 +1420,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { return DeMorgan; { - Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr; - // (A|B) & ~(A&B) -> A^B - if (match(Op0, m_Or(m_Value(A), m_Value(B))) && - match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) && - ((A == C && B == D) || (A == D && B == C))) - return BinaryOperator::CreateXor(A, B); - - // ~(A&B) & (A|B) -> A^B - if (match(Op1, m_Or(m_Value(A), m_Value(B))) && - match(Op0, m_Not(m_And(m_Value(C), m_Value(D)))) && - ((A == C && B == D) || (A == D && B == C))) - return BinaryOperator::CreateXor(A, B); - + Value *A = nullptr, *B = nullptr, *C = nullptr; // A&(A^B) => A & ~B { Value *tmpOp0 = Op0; @@ -2037,15 +2079,19 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (Value *V = SimplifyOrInst(Op0, Op1, DL, &TLI, &DT, &AC)) return replaceInstUsesWith(I, V); - // (A&B)|(A&C) -> A&(B|C) etc - if (Value *V = SimplifyUsingDistributiveLaws(I)) - return replaceInstUsesWith(I, V); - // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(I)) return &I; + // Do this before using distributive laws to catch simple and/or/not patterns. + if (Instruction *Xor = foldOrToXor(I, *Builder)) + return Xor; + + // (A&B)|(A&C) -> A&(B|C) etc + if (Value *V = SimplifyUsingDistributiveLaws(I)) + return replaceInstUsesWith(I, V); + if (Value *V = SimplifyBSwap(I)) return replaceInstUsesWith(I, V); @@ -2182,23 +2228,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { return replaceInstUsesWith(I, V); } - // ((A&~B)|(~A&B)) -> A^B - if ((match(C, m_Not(m_Specific(D))) && - match(B, m_Not(m_Specific(A))))) - return BinaryOperator::CreateXor(A, D); - // ((~B&A)|(~A&B)) -> A^B - if ((match(A, m_Not(m_Specific(D))) && - match(B, m_Not(m_Specific(C))))) - return BinaryOperator::CreateXor(C, D); - // ((A&~B)|(B&~A)) -> A^B - if ((match(C, m_Not(m_Specific(B))) && - match(D, m_Not(m_Specific(A))))) - return BinaryOperator::CreateXor(A, B); - // ((~B&A)|(B&~A)) -> A^B - if ((match(A, m_Not(m_Specific(B))) && - match(D, m_Not(m_Specific(C))))) - return BinaryOperator::CreateXor(C, B); - // ((A|B)&1)|(B&-2) -> (A&1) | B if (match(A, m_Or(m_Value(V1), m_Specific(B))) || match(A, m_Or(m_Specific(B), m_Value(V1)))) { diff --git a/test/Transforms/InstCombine/and-or-not.ll b/test/Transforms/InstCombine/and-or-not.ll index 10bd5df59f4..a2e8a10735f 100644 --- a/test/Transforms/InstCombine/and-or-not.ll +++ b/test/Transforms/InstCombine/and-or-not.ll @@ -34,7 +34,7 @@ define i32 @and_to_xor2(i32 %a, i32 %b) { define i32 @and_to_xor3(i32 %a, i32 %b) { ; CHECK-LABEL: @and_to_xor3( -; CHECK-NEXT: [[AND2:%.*]] = xor i32 %b, %a +; CHECK-NEXT: [[AND2:%.*]] = xor i32 %a, %b ; CHECK-NEXT: ret i32 [[AND2]] ; %or = or i32 %a, %b @@ -48,7 +48,7 @@ define i32 @and_to_xor3(i32 %a, i32 %b) { define i32 @and_to_xor4(i32 %a, i32 %b) { ; CHECK-LABEL: @and_to_xor4( -; CHECK-NEXT: [[AND2:%.*]] = xor i32 %a, %b +; CHECK-NEXT: [[AND2:%.*]] = xor i32 %b, %a ; CHECK-NEXT: ret i32 [[AND2]] ; %or = or i32 %b, %a @@ -73,15 +73,14 @@ define <4 x i32> @and_to_xor1_vec(<4 x i32> %a, <4 x i32> %b) { ; In the next 4 tests, cast instructions are used to thwart operand complexity ; canonicalizations, so we can test all of the commuted patterns. +; (a | ~b) & (~a | b) --> ~(a ^ b) + define i32 @and_to_nxor1(float %fa, float %fb) { ; CHECK-LABEL: @and_to_nxor1( ; CHECK-NEXT: [[A:%.*]] = fptosi float %fa to i32 ; CHECK-NEXT: [[B:%.*]] = fptosi float %fb to i32 -; CHECK-NEXT: [[NOTA:%.*]] = xor i32 [[A]], -1 -; CHECK-NEXT: [[NOTB:%.*]] = xor i32 [[B]], -1 -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[A]], [[NOTB]] -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[NOTA]], [[B]] -; CHECK-NEXT: [[AND:%.*]] = and i32 [[OR1]], [[OR2]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[A]], [[B]] +; CHECK-NEXT: [[AND:%.*]] = xor i32 [[TMP1]], -1 ; CHECK-NEXT: ret i32 [[AND]] ; %a = fptosi float %fa to i32 @@ -94,15 +93,14 @@ define i32 @and_to_nxor1(float %fa, float %fb) { ret i32 %and } +; (a | ~b) & (b | ~a) --> ~(a ^ b) + define i32 @and_to_nxor2(float %fa, float %fb) { ; CHECK-LABEL: @and_to_nxor2( ; CHECK-NEXT: [[A:%.*]] = fptosi float %fa to i32 ; CHECK-NEXT: [[B:%.*]] = fptosi float %fb to i32 -; CHECK-NEXT: [[NOTA:%.*]] = xor i32 [[A]], -1 -; CHECK-NEXT: [[NOTB:%.*]] = xor i32 [[B]], -1 -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[A]], [[NOTB]] -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[B]], [[NOTA]] -; CHECK-NEXT: [[AND:%.*]] = and i32 [[OR1]], [[OR2]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[A]], [[B]] +; CHECK-NEXT: [[AND:%.*]] = xor i32 [[TMP1]], -1 ; CHECK-NEXT: ret i32 [[AND]] ; %a = fptosi float %fa to i32 @@ -115,15 +113,14 @@ define i32 @and_to_nxor2(float %fa, float %fb) { ret i32 %and } +; (~a | b) & (a | ~b) --> ~(a ^ b) + define i32 @and_to_nxor3(float %fa, float %fb) { ; CHECK-LABEL: @and_to_nxor3( ; CHECK-NEXT: [[A:%.*]] = fptosi float %fa to i32 ; CHECK-NEXT: [[B:%.*]] = fptosi float %fb to i32 -; CHECK-NEXT: [[NOTA:%.*]] = xor i32 [[A]], -1 -; CHECK-NEXT: [[NOTB:%.*]] = xor i32 [[B]], -1 -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOTA]], [[B]] -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[A]], [[NOTB]] -; CHECK-NEXT: [[AND:%.*]] = and i32 [[OR1]], [[OR2]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[A]] +; CHECK-NEXT: [[AND:%.*]] = xor i32 [[TMP1]], -1 ; CHECK-NEXT: ret i32 [[AND]] ; %a = fptosi float %fa to i32 @@ -136,15 +133,14 @@ define i32 @and_to_nxor3(float %fa, float %fb) { ret i32 %and } +; (~a | b) & (~b | a) --> ~(a ^ b) + define i32 @and_to_nxor4(float %fa, float %fb) { ; CHECK-LABEL: @and_to_nxor4( ; CHECK-NEXT: [[A:%.*]] = fptosi float %fa to i32 ; CHECK-NEXT: [[B:%.*]] = fptosi float %fb to i32 -; CHECK-NEXT: [[NOTA:%.*]] = xor i32 [[A]], -1 -; CHECK-NEXT: [[NOTB:%.*]] = xor i32 [[B]], -1 -; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOTA]], [[B]] -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[NOTB]], [[A]] -; CHECK-NEXT: [[AND:%.*]] = and i32 [[OR1]], [[OR2]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[B]], [[A]] +; CHECK-NEXT: [[AND:%.*]] = xor i32 [[TMP1]], -1 ; CHECK-NEXT: ret i32 [[AND]] ; %a = fptosi float %fa to i32 @@ -233,12 +229,12 @@ define i32 @or_to_xor4(float %fa, float %fb) { ret i32 %or } +; (a & b) | ~(a | b) --> ~(a ^ b) + define i32 @or_to_nxor1(i32 %a, i32 %b) { ; CHECK-LABEL: @or_to_nxor1( -; CHECK-NEXT: [[AND:%.*]] = and i32 %a, %b -; CHECK-NEXT: [[OR:%.*]] = or i32 %a, %b -; CHECK-NEXT: [[NOTOR:%.*]] = xor i32 [[OR]], -1 -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[AND]], [[NOTOR]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 %a, %b +; CHECK-NEXT: [[OR2:%.*]] = xor i32 [[TMP1]], -1 ; CHECK-NEXT: ret i32 [[OR2]] ; %and = and i32 %a, %b @@ -248,12 +244,12 @@ define i32 @or_to_nxor1(i32 %a, i32 %b) { ret i32 %or2 } +; (a & b) | ~(b | a) --> ~(a ^ b) + define i32 @or_to_nxor2(i32 %a, i32 %b) { ; CHECK-LABEL: @or_to_nxor2( -; CHECK-NEXT: [[AND:%.*]] = and i32 %a, %b -; CHECK-NEXT: [[OR:%.*]] = or i32 %b, %a -; CHECK-NEXT: [[NOTOR:%.*]] = xor i32 [[OR]], -1 -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[AND]], [[NOTOR]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 %a, %b +; CHECK-NEXT: [[OR2:%.*]] = xor i32 [[TMP1]], -1 ; CHECK-NEXT: ret i32 [[OR2]] ; %and = and i32 %a, %b @@ -263,12 +259,12 @@ define i32 @or_to_nxor2(i32 %a, i32 %b) { ret i32 %or2 } +; ~(a | b) | (a & b) --> ~(a ^ b) + define i32 @or_to_nxor3(i32 %a, i32 %b) { ; CHECK-LABEL: @or_to_nxor3( -; CHECK-NEXT: [[AND:%.*]] = and i32 %a, %b -; CHECK-NEXT: [[OR:%.*]] = or i32 %a, %b -; CHECK-NEXT: [[NOTOR:%.*]] = xor i32 [[OR]], -1 -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[AND]], [[NOTOR]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 %a, %b +; CHECK-NEXT: [[OR2:%.*]] = xor i32 [[TMP1]], -1 ; CHECK-NEXT: ret i32 [[OR2]] ; %and = and i32 %a, %b @@ -278,12 +274,12 @@ define i32 @or_to_nxor3(i32 %a, i32 %b) { ret i32 %or2 } +; ~(a | b) | (b & a) --> ~(a ^ b) + define i32 @or_to_nxor4(i32 %a, i32 %b) { ; CHECK-LABEL: @or_to_nxor4( -; CHECK-NEXT: [[AND:%.*]] = and i32 %b, %a -; CHECK-NEXT: [[OR:%.*]] = or i32 %a, %b -; CHECK-NEXT: [[NOTOR:%.*]] = xor i32 [[OR]], -1 -; CHECK-NEXT: [[OR2:%.*]] = or i32 [[AND]], [[NOTOR]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 %b, %a +; CHECK-NEXT: [[OR2:%.*]] = xor i32 [[TMP1]], -1 ; CHECK-NEXT: ret i32 [[OR2]] ; %and = and i32 %b, %a