diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 0d41e51f48d..16863e615a1 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2115,6 +2115,18 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1)) return N0.getOperand(0); + // fold ((A-B)+(C-A)) -> (C-B) + if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB && + N0.getOperand(0) == N1.getOperand(1)) + return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0), + N0.getOperand(1)); + + // fold ((A-B)+(B-C)) -> (A-C) + if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB && + N0.getOperand(1) == N1.getOperand(0)) + return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), + N1.getOperand(1)); + // fold (A+(B-(A+C))) to (B-C) if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && N0 == N1.getOperand(1).getOperand(0)) diff --git a/test/CodeGen/X86/combine-add.ll b/test/CodeGen/X86/combine-add.ll index 2b240a27e9a..8eb4a5d76b6 100644 --- a/test/CodeGen/X86/combine-add.ll +++ b/test/CodeGen/X86/combine-add.ll @@ -99,20 +99,17 @@ define <4 x i32> @combine_vec_add_sub1(<4 x i32> %a, <4 x i32> %b) { ret <4 x i32> %2 } -; FIXME: fold ((A-B)+(C-A)) -> (C-B) +; fold ((A-B)+(C-A)) -> (C-B) define <4 x i32> @combine_vec_add_sub_sub0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; SSE-LABEL: combine_vec_add_sub_sub0: ; SSE: # %bb.0: -; SSE-NEXT: psubd %xmm0, %xmm2 +; SSE-NEXT: movdqa %xmm2, %xmm0 ; SSE-NEXT: psubd %xmm1, %xmm0 -; SSE-NEXT: paddd %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_add_sub_sub0: ; AVX: # %bb.0: -; AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm1 -; AVX-NEXT: vpsubd %xmm0, %xmm2, %xmm0 -; AVX-NEXT: vpaddd %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vpsubd %xmm1, %xmm2, %xmm0 ; AVX-NEXT: retq %1 = sub <4 x i32> %a, %b %2 = sub <4 x i32> %c, %a @@ -120,20 +117,16 @@ define <4 x i32> @combine_vec_add_sub_sub0(<4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %3 } -; FIXME: fold ((A-B)+(B-C)) -> (A-C) +; fold ((A-B)+(B-C)) -> (A-C) define <4 x i32> @combine_vec_add_sub_sub1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; SSE-LABEL: combine_vec_add_sub_sub1: ; SSE: # %bb.0: -; SSE-NEXT: psubd %xmm1, %xmm0 -; SSE-NEXT: psubd %xmm2, %xmm1 -; SSE-NEXT: paddd %xmm1, %xmm0 +; SSE-NEXT: psubd %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_add_sub_sub1: ; AVX: # %bb.0: -; AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpsubd %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpsubd %xmm2, %xmm0, %xmm0 ; AVX-NEXT: retq %1 = sub <4 x i32> %a, %b %2 = sub <4 x i32> %b, %c diff --git a/test/CodeGen/X86/combine-sbb.ll b/test/CodeGen/X86/combine-sbb.ll index f72e4e5199a..6ef26f0db0d 100644 --- a/test/CodeGen/X86/combine-sbb.ll +++ b/test/CodeGen/X86/combine-sbb.ll @@ -153,26 +153,16 @@ define i8 @PR24545(i32, i32, i32* nocapture readonly) { define i32 @PR40483_sub1(i32*, i32) nounwind { ; X86-LABEL: PR40483_sub1: ; X86: # %bb.0: -; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%ecx), %edx -; X86-NEXT: movl %edx, %esi -; X86-NEXT: subl %eax, %esi -; X86-NEXT: movl %esi, (%ecx) -; X86-NEXT: subl %edx, %eax -; X86-NEXT: addl %esi, %eax -; X86-NEXT: popl %esi +; X86-NEXT: subl %eax, (%ecx) +; X86-NEXT: xorl %eax, %eax ; X86-NEXT: retl ; ; X64-LABEL: PR40483_sub1: ; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: movl %ecx, %eax -; X64-NEXT: subl %esi, %eax -; X64-NEXT: movl %eax, (%rdi) -; X64-NEXT: subl %ecx, %esi -; X64-NEXT: addl %esi, %eax +; X64-NEXT: subl %esi, (%rdi) +; X64-NEXT: xorl %eax, %eax ; X64-NEXT: retq %3 = load i32, i32* %0, align 4 %4 = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 %3, i32 %1)