1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

[DAGCombiner] after simplifying demanded elements of vector operand of extract, revisit the extract; 2nd try

This is a retry of rL349051 (reverted at rL349056). I changed the check for dead-ness from
number of uses to an opcode test for DELETED_NODE based on existing similar code.

Differential Revision: https://reviews.llvm.org/D55655

llvm-svn: 349058
This commit is contained in:
Sanjay Patel 2018-12-13 17:05:01 +00:00
parent 240f871115
commit ab6a3a3093
9 changed files with 14 additions and 31 deletions

View File

@ -15698,8 +15698,13 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
if (CstElt->getAPIntValue().ult(VT.getVectorNumElements()))
DemandedElts.setBit(CstElt->getZExtValue());
}
if (SimplifyDemandedVectorElts(InVec, DemandedElts, true))
if (SimplifyDemandedVectorElts(InVec, DemandedElts, true)) {
// We simplified the vector operand of this extract element. If this
// extract is not dead, visit it again so it is folded properly.
if (N->getOpcode() != ISD::DELETED_NODE)
AddToWorklist(N);
return SDValue(N, 0);
}
}
bool BCNumEltsChanged = false;

View File

@ -93,8 +93,7 @@ define i64 @t4(<2 x double>* %a) {
;
; X64-SSSE3-LABEL: t4:
; X64-SSSE3: # %bb.0:
; X64-SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; X64-SSSE3-NEXT: movq %xmm0, %rax
; X64-SSSE3-NEXT: movq (%rdi), %rax
; X64-SSSE3-NEXT: retq
;
; X64-AVX-LABEL: t4:

View File

@ -284,13 +284,12 @@ define float @extract_lane_insertps_5123(<4 x float> %a0, <4 x float> *%p1) {
define float @extract_lane_insertps_6123(<4 x float> %a0, <4 x float> *%p1) {
; SSE-LABEL: extract_lane_insertps_6123:
; SSE: # %bb.0:
; SSE-NEXT: movaps (%rdi), %xmm0
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-NEXT: retq
;
; AVX-LABEL: extract_lane_insertps_6123:
; AVX: # %bb.0:
; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0]
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: retq
%a1 = load <4 x float>, <4 x float> *%p1
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 128)

View File

@ -556,7 +556,7 @@ define <8 x float> @load_v8f32_v8i32(<8 x i32> %trigger, <8 x float>* %addr, <8
; SSE2-NEXT: movss {{.*#+}} xmm5 = mem[0],zero,zero,zero
; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm5[0],xmm2[1,2,3]
; SSE2-NEXT: LBB6_2: ## %else
; SSE2-NEXT: packssdw %xmm0, %xmm4
; SSE2-NEXT: psrlq $16, %xmm4
; SSE2-NEXT: movd %xmm4, %eax
; SSE2-NEXT: shrl $16, %eax
; SSE2-NEXT: testb $1, %al

View File

@ -145,7 +145,7 @@ define void @store_v8i32_v8i32(<8 x i32> %trigger, <8 x i32>* %addr, <8 x i32> %
; SSE2-NEXT: ## %bb.1: ## %cond.store
; SSE2-NEXT: movd %xmm2, (%rdi)
; SSE2-NEXT: LBB2_2: ## %else
; SSE2-NEXT: packssdw %xmm0, %xmm4
; SSE2-NEXT: psrlq $16, %xmm4
; SSE2-NEXT: movd %xmm4, %eax
; SSE2-NEXT: shrl $16, %eax
; SSE2-NEXT: testb $1, %al

View File

@ -1175,7 +1175,6 @@ define void @mul_2xi16_sext_zext(i8* nocapture readonly %a, i8* nocapture readon
; X86-SSE-NEXT: pmuludq %xmm0, %xmm1
; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; X86-SSE-NEXT: pmuludq %xmm2, %xmm0
; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X86-SSE-NEXT: movq %xmm1, (%esi,%ecx,4)
; X86-SSE-NEXT: popl %esi
@ -1210,7 +1209,6 @@ define void @mul_2xi16_sext_zext(i8* nocapture readonly %a, i8* nocapture readon
; X64-SSE-NEXT: pmuludq %xmm0, %xmm1
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; X64-SSE-NEXT: pmuludq %xmm2, %xmm0
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X64-SSE-NEXT: movq %xmm1, (%rax,%rdx,4)
; X64-SSE-NEXT: retq
@ -1965,7 +1963,6 @@ define void @mul_2xi16_varconst3(i8* nocapture readonly %a, i64 %index) {
; X86-SSE-NEXT: pmuludq %xmm1, %xmm0
; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; X86-SSE-NEXT: pmuludq %xmm2, %xmm1
; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4)
; X86-SSE-NEXT: retl
@ -1992,7 +1989,6 @@ define void @mul_2xi16_varconst3(i8* nocapture readonly %a, i64 %index) {
; X64-SSE-NEXT: pmuludq %xmm1, %xmm0
; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; X64-SSE-NEXT: pmuludq %xmm2, %xmm1
; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X64-SSE-NEXT: movq %xmm0, (%rax,%rsi,4)
; X64-SSE-NEXT: retq
@ -2037,7 +2033,6 @@ define void @mul_2xi16_varconst4(i8* nocapture readonly %a, i64 %index) {
; X86-SSE-NEXT: pmuludq %xmm1, %xmm0
; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; X86-SSE-NEXT: pmuludq %xmm2, %xmm1
; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4)
; X86-SSE-NEXT: retl
@ -2064,7 +2059,6 @@ define void @mul_2xi16_varconst4(i8* nocapture readonly %a, i64 %index) {
; X64-SSE-NEXT: pmuludq %xmm1, %xmm0
; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; X64-SSE-NEXT: pmuludq %xmm2, %xmm1
; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X64-SSE-NEXT: movq %xmm0, (%rax,%rsi,4)
; X64-SSE-NEXT: retq

View File

@ -25,17 +25,13 @@ define void @t1(float* %R, <4 x float>* %P1) nounwind {
define float @t2(<4 x float>* %P1) nounwind {
; X32-LABEL: t2:
; X32: # %bb.0:
; X32-NEXT: pushl %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X32-NEXT: movss %xmm0, (%esp)
; X32-NEXT: flds (%esp)
; X32-NEXT: popl %eax
; X32-NEXT: flds 8(%eax)
; X32-NEXT: retl
;
; X64-LABEL: t2:
; X64: # %bb.0:
; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-NEXT: retq
%X = load <4 x float>, <4 x float>* %P1
%tmp = extractelement <4 x float> %X, i32 2

View File

@ -1863,8 +1863,6 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper

View File

@ -808,7 +808,6 @@ define i32 @test_v2i32(<2 x i32> %a0) {
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX512DQ-NEXT: vpmullq %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vmovd %xmm0, %eax
; AVX512DQ-NEXT: vzeroupper
@ -817,7 +816,6 @@ define i32 @test_v2i32(<2 x i32> %a0) {
; AVX512DQVL-LABEL: test_v2i32:
; AVX512DQVL: # %bb.0:
; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512DQVL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX512DQVL-NEXT: vpmullq %xmm1, %xmm0, %xmm0
; AVX512DQVL-NEXT: vmovd %xmm0, %eax
; AVX512DQVL-NEXT: retq
@ -1178,7 +1176,6 @@ define i16 @test_v2i16(<2 x i16> %a0) {
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX512DQ-NEXT: vpmullq %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vmovd %xmm0, %eax
; AVX512DQ-NEXT: # kill: def $ax killed $ax killed $eax
@ -1188,7 +1185,6 @@ define i16 @test_v2i16(<2 x i16> %a0) {
; AVX512DQVL-LABEL: test_v2i16:
; AVX512DQVL: # %bb.0:
; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512DQVL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX512DQVL-NEXT: vpmullq %xmm1, %xmm0, %xmm0
; AVX512DQVL-NEXT: vmovd %xmm0, %eax
; AVX512DQVL-NEXT: # kill: def $ax killed $ax killed $eax
@ -1210,7 +1206,7 @@ define i16 @test_v4i16(<4 x i16> %a0) {
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; SSE2-NEXT: pmuludq %xmm0, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: pextrw $0, %xmm1, %eax
; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-NEXT: retq
;
@ -1658,7 +1654,6 @@ define i8 @test_v2i8(<2 x i8> %a0) {
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX512DQ-NEXT: vpmullq %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpextrb $0, %xmm0, %eax
; AVX512DQ-NEXT: # kill: def $al killed $al killed $eax
@ -1668,7 +1663,6 @@ define i8 @test_v2i8(<2 x i8> %a0) {
; AVX512DQVL-LABEL: test_v2i8:
; AVX512DQVL: # %bb.0:
; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512DQVL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX512DQVL-NEXT: vpmullq %xmm1, %xmm0, %xmm0
; AVX512DQVL-NEXT: vpextrb $0, %xmm0, %eax
; AVX512DQVL-NEXT: # kill: def $al killed $al killed $eax
@ -1899,8 +1893,6 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper