[DAGCombine][X86][AArch64][MIPS][LANAI] (C - x) - y -> C - (x + y) fold (PR41952)

Summary: This *might* be the last fold for `sink-addsub-of-const.ll`, but i'm not sure yet. As far as i can tell, there are no regressions here (ignoring x86-32), all changes are either good or neutral. This, almost surprisingly to me, fixes the motivational tests (in `shift-amount-mod.ll`) `@reg32_lshr_by_sub_from_negated` from [[ https://bugs.llvm.org/show_bug.cgi?id=41952 | PR41952 ]]. https://rise4fun.com/Alive/vMd3 Reviewers: RKSimon, t.p.northover, craig.topper, spatel, efriedma Reviewed By: RKSimon Subscribers: sdardis, javed.absar, arichardson, kristof.beyls, jrtc27, atanasyan, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D62774 llvm-svn: 362488
2024-10-18 18:42:46 +02:00 · 2019-06-04 11:06:21 +00:00 · 2019-06-04 11:06:21 +00:00 · 4de4faa425
commit 4de4faa425
parent f0d869544f
8 changed files with 69 additions and 63 deletions
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@ -3033,6 +3033,12 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
    SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
    return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
  }
+  // (C - x) - y  ->  C - (x + y)
+  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+      isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
+    SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
+    return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
+  }

  // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
  // rather than 'sub 0/1' (the sext should get folded).
--- a/test/CodeGen/AArch64/shift-amount-mod.ll
+++ b/test/CodeGen/AArch64/shift-amount-mod.ll
@ -318,9 +318,8 @@ define void @modify64_ashr_by_negated(i64* %valptr, i64 %shamt) nounwind {
 define i32 @reg32_lshr_by_sub_from_negated(i32 %val, i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: reg32_lshr_by_sub_from_negated:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #32
-; CHECK-NEXT:    sub w8, w8, w1
-; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    add w8, w1, w2
+; CHECK-NEXT:    neg w8, w8
 ; CHECK-NEXT:    lsr w0, w0, w8
 ; CHECK-NEXT:    ret
  %nega = sub i32 32, %a
@ -331,9 +330,8 @@ define i32 @reg32_lshr_by_sub_from_negated(i32 %val, i32 %a, i32 %b) nounwind {
 define i64 @reg64_lshr_by_sub_from_negated(i64 %val, i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: reg64_lshr_by_sub_from_negated:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #64
-; CHECK-NEXT:    sub x8, x8, x1
-; CHECK-NEXT:    sub x8, x8, x2
+; CHECK-NEXT:    add x8, x1, x2
+; CHECK-NEXT:    neg x8, x8
 ; CHECK-NEXT:    lsr x0, x0, x8
 ; CHECK-NEXT:    ret
  %nega = sub i64 64, %a
@ -482,8 +480,8 @@ define i64 @reg64_lshr_by_negated_unfolded(i64 %val, i64 %shamt) nounwind {
 define i32 @reg32_lshr_by_negated_unfolded_sub_b(i32 %val, i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: reg32_lshr_by_negated_unfolded_sub_b:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    neg w8, w1
-; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    add w8, w1, w2
+; CHECK-NEXT:    neg w8, w8
 ; CHECK-NEXT:    lsr w0, w0, w8
 ; CHECK-NEXT:    ret
  %nega = sub i32 0, %a
@ -495,8 +493,8 @@ define i32 @reg32_lshr_by_negated_unfolded_sub_b(i32 %val, i32 %a, i32 %b) nounw
 define i64 @reg64_lshr_by_negated_unfolded_sub_b(i64 %val, i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: reg64_lshr_by_negated_unfolded_sub_b:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    neg x8, x1
-; CHECK-NEXT:    sub x8, x8, x2
+; CHECK-NEXT:    add x8, x1, x2
+; CHECK-NEXT:    neg x8, x8
 ; CHECK-NEXT:    lsr x0, x0, x8
 ; CHECK-NEXT:    ret
  %nega = sub i64 0, %a
--- a/test/CodeGen/AArch64/sink-addsub-of-const.ll
+++ b/test/CodeGen/AArch64/sink-addsub-of-const.ll
@ -129,9 +129,9 @@ define i32 @sink_sub_of_const_to_sub2(i32 %a, i32 %b) {
 define i32 @sink_sub_from_const_to_sub(i32 %a, i32 %b) {
 ; CHECK-LABEL: sink_sub_from_const_to_sub:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #32
-; CHECK-NEXT:    sub w8, w8, w0
-; CHECK-NEXT:    sub w0, w8, w1
+; CHECK-NEXT:    add w8, w0, w1
+; CHECK-NEXT:    mov w9, #32
+; CHECK-NEXT:    sub w0, w9, w8
 ; CHECK-NEXT:    ret
  %t0 = sub i32 32, %a
  %r = sub i32 %t0, %b
@ -300,8 +300,8 @@ define <4 x i32> @vec_sink_sub_from_const_to_sub(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI22_0
 ; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI22_0]
+; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    sub v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
  %t0 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %a
  %r = sub <4 x i32> %t0, %b
--- a/test/CodeGen/Lanai/constant_multiply.ll
+++ b/test/CodeGen/Lanai/constant_multiply.ll
@ -150,9 +150,9 @@ define i32 @fm9(i32 inreg %a) #0 {
 ; CHECK-NEXT:    st %fp, [--%sp]
 ; CHECK-NEXT:    add %sp, 0x8, %fp
 ; CHECK-NEXT:    sub %sp, 0x8, %sp
-; CHECK-NEXT:    sub %r0, %r6, %r3
-; CHECK-NEXT:    sh %r6, 0x3, %r9
-; CHECK-NEXT:    sub %r3, %r9, %rv
+; CHECK-NEXT:    sh %r6, 0x3, %r3
+; CHECK-NEXT:    add %r6, %r3, %r3
+; CHECK-NEXT:    sub %r0, %r3, %rv
 ; CHECK-NEXT:    ld -4[%fp], %pc ! return
 ; CHECK-NEXT:    add %fp, 0x0, %sp
 ; CHECK-NEXT:    ld -8[%fp], %fp
@ -166,10 +166,10 @@ define i32 @fm10(i32 inreg %a) #0 {
 ; CHECK-NEXT:    st %fp, [--%sp]
 ; CHECK-NEXT:    add %sp, 0x8, %fp
 ; CHECK-NEXT:    sub %sp, 0x8, %sp
-; CHECK-NEXT:    sh %r6, 0x1, %r3
-; CHECK-NEXT:    sub %r0, %r3, %r3
-; CHECK-NEXT:    sh %r6, 0x3, %r9
-; CHECK-NEXT:    sub %r3, %r9, %rv
+; CHECK-NEXT:    sh %r6, 0x3, %r3
+; CHECK-NEXT:    sh %r6, 0x1, %r9
+; CHECK-NEXT:    add %r9, %r3, %r3
+; CHECK-NEXT:    sub %r0, %r3, %rv
 ; CHECK-NEXT:    ld -4[%fp], %pc ! return
 ; CHECK-NEXT:    add %fp, 0x0, %sp
 ; CHECK-NEXT:    ld -8[%fp], %fp
--- a/test/CodeGen/Mips/const-mult.ll
+++ b/test/CodeGen/Mips/const-mult.ll
@ -179,8 +179,8 @@ define i128 @mul170141183460469231731687303715884105723_128(i128 signext %a) {
 ; MIPS32-NEXT:    subu $1, $1, $3
 ; MIPS32-NEXT:    subu $5, $1, $12
 ; MIPS32-NEXT:    subu $4, $9, $10
-; MIPS32-NEXT:    negu $1, $8
-; MIPS32-NEXT:    subu $3, $1, $11
+; MIPS32-NEXT:    addu $1, $8, $11
+; MIPS32-NEXT:    negu $3, $1
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    negu $2, $2
 ;
--- a/test/CodeGen/Mips/madd-msub.ll
+++ b/test/CodeGen/Mips/madd-msub.ll
@ -342,13 +342,13 @@ define i64 @msub2(i32 zeroext %a, i32 zeroext %b, i32 zeroext %c) nounwind readn
 ;
 ; 32R6-LABEL: msub2:
 ; 32R6:       # %bb.0: # %entry
-; 32R6-NEXT:    mul $1, $5, $4
-; 32R6-NEXT:    sltu $2, $6, $1
-; 32R6-NEXT:    muhu $3, $5, $4
-; 32R6-NEXT:    negu $3, $3
-; 32R6-NEXT:    subu $2, $3, $2
+; 32R6-NEXT:    muhu $1, $5, $4
+; 32R6-NEXT:    mul $3, $5, $4
+; 32R6-NEXT:    sltu $2, $6, $3
+; 32R6-NEXT:    addu $1, $1, $2
+; 32R6-NEXT:    negu $2, $1
 ; 32R6-NEXT:    jr $ra
-; 32R6-NEXT:    subu $3, $6, $1
+; 32R6-NEXT:    subu $3, $6, $3
 ;
 ; DSP-LABEL: msub2:
 ; DSP:       # %bb.0: # %entry
@ -377,12 +377,12 @@ define i64 @msub2(i32 zeroext %a, i32 zeroext %b, i32 zeroext %c) nounwind readn
 ; 16:       # %bb.0: # %entry
 ; 16-NEXT:    multu $5, $4
 ; 16-NEXT:    mflo $2
-; 16-NEXT:    mfhi $4
-; 16-NEXT:    subu $3, $6, $2
+; 16-NEXT:    mfhi $3
 ; 16-NEXT:    sltu $6, $2
-; 16-NEXT:    move $2, $24
-; 16-NEXT:    neg $4, $4
-; 16-NEXT:    subu $2, $4, $2
+; 16-NEXT:    move $4, $24
+; 16-NEXT:    addu $4, $3, $4
+; 16-NEXT:    subu $3, $6, $2
+; 16-NEXT:    neg $2, $4
 ; 16-NEXT:    jrc $ra
 entry:
  %conv = zext i32 %c to i64
--- a/test/CodeGen/X86/shift-amount-mod.ll
+++ b/test/CodeGen/X86/shift-amount-mod.ll
@ -735,19 +735,20 @@ define i32 @reg32_lshr_by_sub_from_negated(i32 %val, i32 %a, i32 %b) nounwind {
 ; X32-LABEL: reg32_lshr_by_sub_from_negated:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl $32, %ecx
-; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    addl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    negb %cl
 ; X32-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X32-NEXT:    shrl %cl, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: reg32_lshr_by_sub_from_negated:
 ; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $edx killed $edx def $rdx
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    movl $32, %ecx
-; X64-NEXT:    subl %esi, %ecx
-; X64-NEXT:    subl %edx, %ecx
+; X64-NEXT:    leal (%rsi,%rdx), %ecx
+; X64-NEXT:    negb %cl
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shrl %cl, %eax
 ; X64-NEXT:    retq
@ -762,9 +763,10 @@ define i64 @reg64_lshr_by_sub_from_negated(i64 %val, i64 %a, i64 %b) nounwind {
 ; X32-NEXT:    pushl %esi
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT:    movl $64, %ecx
-; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT:    addl {{[0-9]+}}(%esp), %edx
+; X32-NEXT:    movb $64, %cl
+; X32-NEXT:    subb %dl, %cl
 ; X32-NEXT:    movl %esi, %edx
 ; X32-NEXT:    shrl %cl, %edx
 ; X32-NEXT:    shrdl %cl, %esi, %eax
@ -780,9 +782,8 @@ define i64 @reg64_lshr_by_sub_from_negated(i64 %val, i64 %a, i64 %b) nounwind {
 ; X64-LABEL: reg64_lshr_by_sub_from_negated:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    movl $64, %ecx
-; X64-NEXT:    subl %esi, %ecx
-; X64-NEXT:    subl %edx, %ecx
+; X64-NEXT:    leal (%rdx,%rsi), %ecx
+; X64-NEXT:    negb %cl
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shrq %cl, %rax
 ; X64-NEXT:    retq
@ -1108,19 +1109,20 @@ define i32 @reg32_lshr_by_negated_unfolded_sub_b(i32 %val, i32 %a, i32 %b) nounw
 ; X32-LABEL: reg32_lshr_by_negated_unfolded_sub_b:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    xorl %ecx, %ecx
-; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    addl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    negb %cl
 ; X32-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X32-NEXT:    shrl %cl, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: reg32_lshr_by_negated_unfolded_sub_b:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    # kill: def $edx killed $edx def $rdx
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    negl %ecx
-; X64-NEXT:    subl %edx, %ecx
+; X64-NEXT:    leal (%rsi,%rdx), %ecx
+; X64-NEXT:    negb %cl
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shrl %cl, %eax
 ; X64-NEXT:    retq
@ -1136,10 +1138,10 @@ define i64 @reg64_lshr_by_negated_unfolded_sub_b(i64 %val, i64 %a, i64 %b) nounw
 ; X32-NEXT:    pushl %esi
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT:    xorl %ecx, %ecx
-; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    addb $64, %cl
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT:    addl {{[0-9]+}}(%esp), %edx
+; X32-NEXT:    movb $64, %cl
+; X32-NEXT:    subb %dl, %cl
 ; X32-NEXT:    movl %esi, %edx
 ; X32-NEXT:    shrl %cl, %edx
 ; X32-NEXT:    shrdl %cl, %esi, %eax
@ -1155,9 +1157,8 @@ define i64 @reg64_lshr_by_negated_unfolded_sub_b(i64 %val, i64 %a, i64 %b) nounw
 ; X64-LABEL: reg64_lshr_by_negated_unfolded_sub_b:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    movl $64, %ecx
-; X64-NEXT:    subl %esi, %ecx
-; X64-NEXT:    subl %edx, %ecx
+; X64-NEXT:    leal (%rdx,%rsi), %ecx
+; X64-NEXT:    negb %cl
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shrq %cl, %rax
 ; X64-NEXT:    retq
--- a/test/CodeGen/X86/sink-addsub-of-const.ll
+++ b/test/CodeGen/X86/sink-addsub-of-const.ll
@ -214,16 +214,17 @@ define i32 @sink_sub_of_const_to_sub2(i32 %a, i32 %b) {
 define i32 @sink_sub_from_const_to_sub(i32 %a, i32 %b) {
 ; X32-LABEL: sink_sub_from_const_to_sub:
 ; X32:       # %bb.0:
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    addl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    movl $32, %eax
-; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    subl %ecx, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: sink_sub_from_const_to_sub:
 ; X64:       # %bb.0:
+; X64-NEXT:    addl %esi, %edi
 ; X64-NEXT:    movl $32, %eax
 ; X64-NEXT:    subl %edi, %eax
-; X64-NEXT:    subl %esi, %eax
 ; X64-NEXT:    retq
  %t0 = sub i32 32, %a
  %r = sub i32 %t0, %b
@ -448,8 +449,8 @@ define <4 x i32> @vec_sink_sub_from_const_to_sub(<4 x i32> %a, <4 x i32> %b) {
 ; ALL-LABEL: vec_sink_sub_from_const_to_sub:
 ; ALL:       # %bb.0:
 ; ALL-NEXT:    movdqa {{.*#+}} xmm2 = <42,24,u,46>
+; ALL-NEXT:    paddd %xmm1, %xmm0
 ; ALL-NEXT:    psubd %xmm0, %xmm2
-; ALL-NEXT:    psubd %xmm1, %xmm2
 ; ALL-NEXT:    movdqa %xmm2, %xmm0
 ; ALL-NEXT:    ret{{[l|q]}}
  %t0 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %a