mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-19 11:02:59 +02:00
[DAGCombine] Add ADD(SUB,SUB) combines
Noticed while investigating PR40483, and fixes the basic test case from the bug - but not a more general case. We're pretty weak at dealing with ADD/SUB combines compared to the SimplifyAssociativeOrCommutative/SimplifyUsingDistributiveLaws abilities that InstCombine can manage. llvm-svn: 353044
This commit is contained in:
parent
2820258583
commit
d28b271aff
@ -2115,6 +2115,18 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
|
||||
if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
|
||||
return N0.getOperand(0);
|
||||
|
||||
// fold ((A-B)+(C-A)) -> (C-B)
|
||||
if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
|
||||
N0.getOperand(0) == N1.getOperand(1))
|
||||
return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
|
||||
N0.getOperand(1));
|
||||
|
||||
// fold ((A-B)+(B-C)) -> (A-C)
|
||||
if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
|
||||
N0.getOperand(1) == N1.getOperand(0))
|
||||
return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
|
||||
N1.getOperand(1));
|
||||
|
||||
// fold (A+(B-(A+C))) to (B-C)
|
||||
if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
|
||||
N0 == N1.getOperand(1).getOperand(0))
|
||||
|
@ -99,20 +99,17 @@ define <4 x i32> @combine_vec_add_sub1(<4 x i32> %a, <4 x i32> %b) {
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
|
||||
; FIXME: fold ((A-B)+(C-A)) -> (C-B)
|
||||
; fold ((A-B)+(C-A)) -> (C-B)
|
||||
define <4 x i32> @combine_vec_add_sub_sub0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
||||
; SSE-LABEL: combine_vec_add_sub_sub0:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: psubd %xmm0, %xmm2
|
||||
; SSE-NEXT: movdqa %xmm2, %xmm0
|
||||
; SSE-NEXT: psubd %xmm1, %xmm0
|
||||
; SSE-NEXT: paddd %xmm2, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: combine_vec_add_sub_sub0:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm1
|
||||
; AVX-NEXT: vpsubd %xmm0, %xmm2, %xmm0
|
||||
; AVX-NEXT: vpaddd %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: vpsubd %xmm1, %xmm2, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%1 = sub <4 x i32> %a, %b
|
||||
%2 = sub <4 x i32> %c, %a
|
||||
@ -120,20 +117,16 @@ define <4 x i32> @combine_vec_add_sub_sub0(<4 x i32> %a, <4 x i32> %b, <4 x i32>
|
||||
ret <4 x i32> %3
|
||||
}
|
||||
|
||||
; FIXME: fold ((A-B)+(B-C)) -> (A-C)
|
||||
; fold ((A-B)+(B-C)) -> (A-C)
|
||||
define <4 x i32> @combine_vec_add_sub_sub1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
||||
; SSE-LABEL: combine_vec_add_sub_sub1:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: psubd %xmm1, %xmm0
|
||||
; SSE-NEXT: psubd %xmm2, %xmm1
|
||||
; SSE-NEXT: paddd %xmm1, %xmm0
|
||||
; SSE-NEXT: psubd %xmm2, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: combine_vec_add_sub_sub1:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpsubd %xmm2, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpsubd %xmm2, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%1 = sub <4 x i32> %a, %b
|
||||
%2 = sub <4 x i32> %b, %c
|
||||
|
@ -153,26 +153,16 @@ define i8 @PR24545(i32, i32, i32* nocapture readonly) {
|
||||
define i32 @PR40483_sub1(i32*, i32) nounwind {
|
||||
; X86-LABEL: PR40483_sub1:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl (%ecx), %edx
|
||||
; X86-NEXT: movl %edx, %esi
|
||||
; X86-NEXT: subl %eax, %esi
|
||||
; X86-NEXT: movl %esi, (%ecx)
|
||||
; X86-NEXT: subl %edx, %eax
|
||||
; X86-NEXT: addl %esi, %eax
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: subl %eax, (%ecx)
|
||||
; X86-NEXT: xorl %eax, %eax
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: PR40483_sub1:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movl (%rdi), %ecx
|
||||
; X64-NEXT: movl %ecx, %eax
|
||||
; X64-NEXT: subl %esi, %eax
|
||||
; X64-NEXT: movl %eax, (%rdi)
|
||||
; X64-NEXT: subl %ecx, %esi
|
||||
; X64-NEXT: addl %esi, %eax
|
||||
; X64-NEXT: subl %esi, (%rdi)
|
||||
; X64-NEXT: xorl %eax, %eax
|
||||
; X64-NEXT: retq
|
||||
%3 = load i32, i32* %0, align 4
|
||||
%4 = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 %3, i32 %1)
|
||||
|
Loading…
Reference in New Issue
Block a user