1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-21 03:53:04 +02:00

[X86][SSE] Remove PMULDQ/PMULUDQ by zero

Exposed by D50328

Differential Revision: https://reviews.llvm.org/D50328

llvm-svn: 339337
This commit is contained in:
Simon Pilgrim 2018-08-09 12:37:36 +00:00
parent 4f0fc73b9c
commit 36ad2350af
4 changed files with 52 additions and 70 deletions

View File

@ -39689,6 +39689,15 @@ static SDValue combinePMULDQ(SDNode *N, SelectionDAG &DAG,
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
// Canonicalize constant to RHS.
if (DAG.isConstantIntBuildVectorOrConstantInt(LHS) &&
!DAG.isConstantIntBuildVectorOrConstantInt(RHS))
return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), RHS, LHS);
// Multiply by zero.
if (ISD::isBuildVectorAllZeros(RHS.getNode()))
return RHS;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
!DCI.isBeforeLegalizeOps());

View File

@ -213,32 +213,29 @@ define void @test1(x86_mmx* %A, x86_mmx* %B) {
; X32-NEXT: movq %xmm0, (%eax)
; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,1,3]
; X32-NEXT: pxor %xmm2, %xmm2
; X32-NEXT: pmuludq %xmm1, %xmm2
; X32-NEXT: movdqa %xmm1, %xmm3
; X32-NEXT: psrlq $32, %xmm3
; X32-NEXT: pmuludq %xmm0, %xmm3
; X32-NEXT: paddq %xmm2, %xmm3
; X32-NEXT: psllq $32, %xmm3
; X32-NEXT: pmuludq %xmm1, %xmm0
; X32-NEXT: paddq %xmm3, %xmm0
; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
; X32-NEXT: movq %xmm1, (%eax)
; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; X32-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1,1,3]
; X32-NEXT: andps %xmm0, %xmm1
; X32-NEXT: movdqa %xmm1, %xmm2
; X32-NEXT: pmuludq %xmm0, %xmm2
; X32-NEXT: psrlq $32, %xmm1
; X32-NEXT: pmuludq %xmm0, %xmm1
; X32-NEXT: psllq $32, %xmm1
; X32-NEXT: paddq %xmm2, %xmm1
; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
; X32-NEXT: movq %xmm0, (%eax)
; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,1,3]
; X32-NEXT: orps %xmm1, %xmm0
; X32-NEXT: andps %xmm1, %xmm0
; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
; X32-NEXT: movq %xmm1, (%eax)
; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; X32-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1,1,3]
; X32-NEXT: xorps %xmm0, %xmm1
; X32-NEXT: orps %xmm0, %xmm1
; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
; X32-NEXT: movq %xmm0, (%eax)
; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,1,3]
; X32-NEXT: xorps %xmm1, %xmm0
; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X32-NEXT: movq %xmm0, (%eax)
; X32-NEXT: emms
; X32-NEXT: retl
;
@ -253,15 +250,12 @@ define void @test1(x86_mmx* %A, x86_mmx* %B) {
; X64-NEXT: movq %xmm0, (%rdi)
; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
; X64-NEXT: pxor %xmm2, %xmm2
; X64-NEXT: pmuludq %xmm1, %xmm2
; X64-NEXT: movdqa %xmm1, %xmm3
; X64-NEXT: psrlq $32, %xmm3
; X64-NEXT: pmuludq %xmm0, %xmm3
; X64-NEXT: paddq %xmm2, %xmm3
; X64-NEXT: psllq $32, %xmm3
; X64-NEXT: movdqa %xmm1, %xmm2
; X64-NEXT: pmuludq %xmm0, %xmm2
; X64-NEXT: psrlq $32, %xmm1
; X64-NEXT: pmuludq %xmm0, %xmm1
; X64-NEXT: paddq %xmm3, %xmm1
; X64-NEXT: psllq $32, %xmm1
; X64-NEXT: paddq %xmm2, %xmm1
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
; X64-NEXT: movq %xmm0, (%rdi)
; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero

View File

@ -1235,13 +1235,10 @@ define void @mul_2xi16_sext_zext(i8* nocapture readonly %a, i8* nocapture readon
; X86-SSE-NEXT: pxor %xmm2, %xmm2
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,1,3]
; X86-SSE-NEXT: movdqa %xmm1, %xmm3
; X86-SSE-NEXT: pmuludq %xmm0, %xmm3
; X86-SSE-NEXT: pmuludq %xmm2, %xmm1
; X86-SSE-NEXT: pmuludq %xmm0, %xmm1
; X86-SSE-NEXT: pmuludq %xmm0, %xmm2
; X86-SSE-NEXT: paddq %xmm1, %xmm2
; X86-SSE-NEXT: psllq $32, %xmm2
; X86-SSE-NEXT: paddq %xmm3, %xmm2
; X86-SSE-NEXT: paddq %xmm1, %xmm2
; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
; X86-SSE-NEXT: movq %xmm0, (%esi,%ecx,4)
; X86-SSE-NEXT: popl %esi
@ -1279,13 +1276,10 @@ define void @mul_2xi16_sext_zext(i8* nocapture readonly %a, i8* nocapture readon
; X64-SSE-NEXT: pxor %xmm2, %xmm2
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,1,3]
; X64-SSE-NEXT: movdqa %xmm1, %xmm3
; X64-SSE-NEXT: pmuludq %xmm0, %xmm3
; X64-SSE-NEXT: pmuludq %xmm2, %xmm1
; X64-SSE-NEXT: pmuludq %xmm0, %xmm1
; X64-SSE-NEXT: pmuludq %xmm0, %xmm2
; X64-SSE-NEXT: paddq %xmm1, %xmm2
; X64-SSE-NEXT: psllq $32, %xmm2
; X64-SSE-NEXT: paddq %xmm3, %xmm2
; X64-SSE-NEXT: paddq %xmm1, %xmm2
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
; X64-SSE-NEXT: movq %xmm0, (%rax,%rdx,4)
; X64-SSE-NEXT: retq

View File

@ -5510,28 +5510,20 @@ define <16 x i8> @trunc_or_const_v16i16_v16i8(<16 x i16> %a0) nounwind {
define <4 x i32> @mul_add_const_v4i64_v4i32(<4 x i32> %a0, <4 x i32> %a1) nounwind {
; SSE-LABEL: mul_add_const_v4i64_v4i32:
; SSE: # %bb.0:
; SSE-NEXT: movdqa %xmm0, %xmm2
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,1,3,3]
; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,1,1,3]
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,1,3]
; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,1,3,3]
; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm1[0,1,1,3]
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3]
; SSE-NEXT: pxor %xmm4, %xmm4
; SSE-NEXT: pxor %xmm5, %xmm5
; SSE-NEXT: pmuludq %xmm1, %xmm5
; SSE-NEXT: movdqa %xmm2, %xmm6
; SSE-NEXT: pmuludq %xmm4, %xmm6
; SSE-NEXT: paddq %xmm5, %xmm6
; SSE-NEXT: psllq $32, %xmm6
; SSE-NEXT: pmuludq %xmm1, %xmm2
; SSE-NEXT: paddq %xmm6, %xmm2
; SSE-NEXT: movdqa %xmm0, %xmm1
; SSE-NEXT: pmuludq %xmm4, %xmm1
; SSE-NEXT: pmuludq %xmm3, %xmm4
; SSE-NEXT: paddq %xmm1, %xmm4
; SSE-NEXT: psllq $32, %xmm4
; SSE-NEXT: pmuludq %xmm3, %xmm0
; SSE-NEXT: paddq %xmm4, %xmm0
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
; SSE-NEXT: pmuludq %xmm1, %xmm3
; SSE-NEXT: pxor %xmm0, %xmm0
; SSE-NEXT: pmuludq %xmm0, %xmm1
; SSE-NEXT: psllq $32, %xmm1
; SSE-NEXT: paddq %xmm3, %xmm1
; SSE-NEXT: pmuludq %xmm4, %xmm2
; SSE-NEXT: pmuludq %xmm4, %xmm0
; SSE-NEXT: psllq $32, %xmm0
; SSE-NEXT: paddq %xmm2, %xmm0
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
; SSE-NEXT: paddd {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
@ -5607,24 +5599,17 @@ define <4 x i32> @mul_add_multiuse_v4i64_v4i32(<4 x i32> %a0, <4 x i32> %a1) nou
; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,1,3,3]
; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm1[0,1,1,3]
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3]
; SSE-NEXT: pxor %xmm5, %xmm5
; SSE-NEXT: pxor %xmm6, %xmm6
; SSE-NEXT: pmuludq %xmm1, %xmm6
; SSE-NEXT: movdqa %xmm3, %xmm7
; SSE-NEXT: pmuludq %xmm5, %xmm7
; SSE-NEXT: paddq %xmm6, %xmm7
; SSE-NEXT: psllq $32, %xmm7
; SSE-NEXT: pmuludq %xmm1, %xmm3
; SSE-NEXT: paddq %xmm7, %xmm3
; SSE-NEXT: movdqa %xmm2, %xmm1
; SSE-NEXT: pxor %xmm5, %xmm5
; SSE-NEXT: pmuludq %xmm5, %xmm1
; SSE-NEXT: pmuludq %xmm4, %xmm5
; SSE-NEXT: paddq %xmm1, %xmm5
; SSE-NEXT: psllq $32, %xmm5
; SSE-NEXT: psllq $32, %xmm1
; SSE-NEXT: paddq %xmm3, %xmm1
; SSE-NEXT: pmuludq %xmm4, %xmm2
; SSE-NEXT: paddq %xmm5, %xmm2
; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
; SSE-NEXT: paddd %xmm2, %xmm0
; SSE-NEXT: pmuludq %xmm4, %xmm5
; SSE-NEXT: psllq $32, %xmm5
; SSE-NEXT: paddq %xmm2, %xmm5
; SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,2],xmm1[0,2]
; SSE-NEXT: paddd %xmm5, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: mul_add_multiuse_v4i64_v4i32: