[DAGCombine] Add ADD(SUB,SUB) combines

Noticed while investigating PR40483, and fixes the basic test case from the bug - but not a more general case. We're pretty weak at dealing with ADD/SUB combines compared to the SimplifyAssociativeOrCommutative/SimplifyUsingDistributiveLaws abilities that InstCombine can manage. llvm-svn: 353044
2024-10-19 11:02:59 +02:00 · 2019-02-04 13:44:49 +00:00 · 2019-02-04 13:44:49 +00:00 · d28b271aff
commit d28b271aff
parent 2820258583
3 changed files with 22 additions and 27 deletions
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@ -2115,6 +2115,18 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
  if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
    return N0.getOperand(0);

+  // fold ((A-B)+(C-A)) -> (C-B)
+  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
+      N0.getOperand(0) == N1.getOperand(1))
+    return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
+                       N0.getOperand(1));
+
+  // fold ((A-B)+(B-C)) -> (A-C)
+  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
+      N0.getOperand(1) == N1.getOperand(0))
+    return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
+                       N1.getOperand(1));
+
  // fold (A+(B-(A+C))) to (B-C)
  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
      N0 == N1.getOperand(1).getOperand(0))
--- a/test/CodeGen/X86/combine-add.ll
+++ b/test/CodeGen/X86/combine-add.ll
@ -99,20 +99,17 @@ define <4 x i32> @combine_vec_add_sub1(<4 x i32> %a, <4 x i32> %b) {
  ret <4 x i32> %2
 }

-; FIXME: fold ((A-B)+(C-A)) -> (C-B)
+; fold ((A-B)+(C-A)) -> (C-B)
 define <4 x i32> @combine_vec_add_sub_sub0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; SSE-LABEL: combine_vec_add_sub_sub0:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    psubd %xmm0, %xmm2
+; SSE-NEXT:    movdqa %xmm2, %xmm0
 ; SSE-NEXT:    psubd %xmm1, %xmm0
-; SSE-NEXT:    paddd %xmm2, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_vec_add_sub_sub0:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpsubd %xmm1, %xmm0, %xmm1
-; AVX-NEXT:    vpsubd %xmm0, %xmm2, %xmm0
-; AVX-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    vpsubd %xmm1, %xmm2, %xmm0
 ; AVX-NEXT:    retq
  %1 = sub <4 x i32> %a, %b
  %2 = sub <4 x i32> %c, %a
@ -120,20 +117,16 @@ define <4 x i32> @combine_vec_add_sub_sub0(<4 x i32> %a, <4 x i32> %b, <4 x i32>
  ret <4 x i32> %3
 }

-; FIXME: fold ((A-B)+(B-C)) -> (A-C)
+; fold ((A-B)+(B-C)) -> (A-C)
 define <4 x i32> @combine_vec_add_sub_sub1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; SSE-LABEL: combine_vec_add_sub_sub1:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    psubd %xmm1, %xmm0
-; SSE-NEXT:    psubd %xmm2, %xmm1
-; SSE-NEXT:    paddd %xmm1, %xmm0
+; SSE-NEXT:    psubd %xmm2, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_vec_add_sub_sub1:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpsubd %xmm2, %xmm1, %xmm1
-; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpsubd %xmm2, %xmm0, %xmm0
 ; AVX-NEXT:    retq
  %1 = sub <4 x i32> %a, %b
  %2 = sub <4 x i32> %b, %c
--- a/test/CodeGen/X86/combine-sbb.ll
+++ b/test/CodeGen/X86/combine-sbb.ll
@ -153,26 +153,16 @@ define i8 @PR24545(i32, i32, i32* nocapture readonly) {
 define i32 @PR40483_sub1(i32*, i32) nounwind {
 ; X86-LABEL: PR40483_sub1:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl (%ecx), %edx
-; X86-NEXT:    movl %edx, %esi
-; X86-NEXT:    subl %eax, %esi
-; X86-NEXT:    movl %esi, (%ecx)
-; X86-NEXT:    subl %edx, %eax
-; X86-NEXT:    addl %esi, %eax
-; X86-NEXT:    popl %esi
+; X86-NEXT:    subl %eax, (%ecx)
+; X86-NEXT:    xorl %eax, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: PR40483_sub1:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl (%rdi), %ecx
-; X64-NEXT:    movl %ecx, %eax
-; X64-NEXT:    subl %esi, %eax
-; X64-NEXT:    movl %eax, (%rdi)
-; X64-NEXT:    subl %ecx, %esi
-; X64-NEXT:    addl %esi, %eax
+; X64-NEXT:    subl %esi, (%rdi)
+; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    retq
  %3 = load i32, i32* %0, align 4
  %4 = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 %3, i32 %1)