mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 04:02:41 +01:00
[X86] When lowering insert_vector_elt/extract_vector_elt of vXi1 with a non-constant index just use either a 128-bit type or the vXi8 type with the correct number of elements.
Despite what the comment said there isn't better codegen for 512-bit vectors. The 128/256/512 bit implementation jus stores to memory and loads an element. There's no advantage to doing that with a larger size. In fact in many cases it causes a stack realignment and generates worse code. llvm-svn: 321369
This commit is contained in:
parent
15d0e14230
commit
acd88472c6
@ -14578,11 +14578,10 @@ static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG,
|
||||
unsigned NumElts = VecVT.getVectorNumElements();
|
||||
// Extending v8i1/v16i1 to 512-bit get better performance on KNL
|
||||
// than extending to 128/256bit.
|
||||
unsigned VecSize = (NumElts <= 4 ? 128 : 512);
|
||||
MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(VecSize / NumElts), NumElts);
|
||||
SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, ExtVT, Vec);
|
||||
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
|
||||
ExtVT.getVectorElementType(), Ext, Idx);
|
||||
MVT ExtEltVT = (NumElts <= 8) ? MVT::getIntegerVT(128 / NumElts) : MVT::i8;
|
||||
MVT ExtVecVT = MVT::getVectorVT(ExtEltVT, NumElts);
|
||||
SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, ExtVecVT, Vec);
|
||||
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ExtEltVT, Ext, Idx);
|
||||
return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
|
||||
}
|
||||
|
||||
@ -14777,9 +14776,8 @@ static SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG,
|
||||
// Non constant index. Extend source and destination,
|
||||
// insert element and then truncate the result.
|
||||
unsigned NumElts = VecVT.getVectorNumElements();
|
||||
unsigned VecSize = (NumElts <= 4 ? 128 : 512);
|
||||
MVT ExtVecVT = MVT::getVectorVT(MVT::getIntegerVT(VecSize/NumElts), NumElts);
|
||||
MVT ExtEltVT = ExtVecVT.getVectorElementType();
|
||||
MVT ExtEltVT = (NumElts <= 8) ? MVT::getIntegerVT(128 / NumElts) : MVT::i8;
|
||||
MVT ExtVecVT = MVT::getVectorVT(ExtEltVT, NumElts);
|
||||
SDValue ExtOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtVecVT,
|
||||
DAG.getNode(ISD::SIGN_EXTEND, dl, ExtVecVT, Vec),
|
||||
DAG.getNode(ISD::SIGN_EXTEND, dl, ExtEltVT, Elt), Idx);
|
||||
|
@ -1616,45 +1616,28 @@ define zeroext i8 @test_extractelement_varible_v4i1(<4 x i32> %a, <4 x i32> %b,
|
||||
define zeroext i8 @test_extractelement_varible_v8i1(<8 x i32> %a, <8 x i32> %b, i32 %index) {
|
||||
; KNL-LABEL: test_extractelement_varible_v8i1:
|
||||
; KNL: ## %bb.0:
|
||||
; KNL-NEXT: pushq %rbp
|
||||
; KNL-NEXT: .cfi_def_cfa_offset 16
|
||||
; KNL-NEXT: .cfi_offset %rbp, -16
|
||||
; KNL-NEXT: movq %rsp, %rbp
|
||||
; KNL-NEXT: .cfi_def_cfa_register %rbp
|
||||
; KNL-NEXT: andq $-64, %rsp
|
||||
; KNL-NEXT: subq $128, %rsp
|
||||
; KNL-NEXT: ## kill: def %edi killed %edi def %rdi
|
||||
; KNL-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1
|
||||
; KNL-NEXT: ## kill: def %ymm0 killed %ymm0 def %zmm0
|
||||
; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k1
|
||||
; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
||||
; KNL-NEXT: vmovdqa64 %zmm0, (%rsp)
|
||||
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
||||
; KNL-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; KNL-NEXT: andl $7, %edi
|
||||
; KNL-NEXT: movzbl (%rsp,%rdi,8), %eax
|
||||
; KNL-NEXT: movzbl -24(%rsp,%rdi,2), %eax
|
||||
; KNL-NEXT: andl $1, %eax
|
||||
; KNL-NEXT: movq %rbp, %rsp
|
||||
; KNL-NEXT: popq %rbp
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test_extractelement_varible_v8i1:
|
||||
; SKX: ## %bb.0:
|
||||
; SKX-NEXT: pushq %rbp
|
||||
; SKX-NEXT: .cfi_def_cfa_offset 16
|
||||
; SKX-NEXT: .cfi_offset %rbp, -16
|
||||
; SKX-NEXT: movq %rsp, %rbp
|
||||
; SKX-NEXT: .cfi_def_cfa_register %rbp
|
||||
; SKX-NEXT: andq $-64, %rsp
|
||||
; SKX-NEXT: subq $128, %rsp
|
||||
; SKX-NEXT: ## kill: def %edi killed %edi def %rdi
|
||||
; SKX-NEXT: vpcmpnleud %ymm1, %ymm0, %k0
|
||||
; SKX-NEXT: vpmovm2q %k0, %zmm0
|
||||
; SKX-NEXT: vmovdqa64 %zmm0, (%rsp)
|
||||
; SKX-NEXT: vpmovm2w %k0, %xmm0
|
||||
; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; SKX-NEXT: andl $7, %edi
|
||||
; SKX-NEXT: movzbl (%rsp,%rdi,8), %eax
|
||||
; SKX-NEXT: movzbl -24(%rsp,%rdi,2), %eax
|
||||
; SKX-NEXT: andl $1, %eax
|
||||
; SKX-NEXT: movq %rbp, %rsp
|
||||
; SKX-NEXT: popq %rbp
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
%t1 = icmp ugt <8 x i32> %a, %b
|
||||
@ -1666,43 +1649,28 @@ define zeroext i8 @test_extractelement_varible_v8i1(<8 x i32> %a, <8 x i32> %b,
|
||||
define zeroext i8 @test_extractelement_varible_v16i1(<16 x i32> %a, <16 x i32> %b, i32 %index) {
|
||||
; KNL-LABEL: test_extractelement_varible_v16i1:
|
||||
; KNL: ## %bb.0:
|
||||
; KNL-NEXT: pushq %rbp
|
||||
; KNL-NEXT: .cfi_def_cfa_offset 16
|
||||
; KNL-NEXT: .cfi_offset %rbp, -16
|
||||
; KNL-NEXT: movq %rsp, %rbp
|
||||
; KNL-NEXT: .cfi_def_cfa_register %rbp
|
||||
; KNL-NEXT: andq $-64, %rsp
|
||||
; KNL-NEXT: subq $128, %rsp
|
||||
; KNL-NEXT: ## kill: def %edi killed %edi def %rdi
|
||||
; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k1
|
||||
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
||||
; KNL-NEXT: vmovdqa32 %zmm0, (%rsp)
|
||||
; KNL-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; KNL-NEXT: andl $15, %edi
|
||||
; KNL-NEXT: movzbl (%rsp,%rdi,4), %eax
|
||||
; KNL-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
|
||||
; KNL-NEXT: movzbl (%rdi,%rax), %eax
|
||||
; KNL-NEXT: andl $1, %eax
|
||||
; KNL-NEXT: movq %rbp, %rsp
|
||||
; KNL-NEXT: popq %rbp
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test_extractelement_varible_v16i1:
|
||||
; SKX: ## %bb.0:
|
||||
; SKX-NEXT: pushq %rbp
|
||||
; SKX-NEXT: .cfi_def_cfa_offset 16
|
||||
; SKX-NEXT: .cfi_offset %rbp, -16
|
||||
; SKX-NEXT: movq %rsp, %rbp
|
||||
; SKX-NEXT: .cfi_def_cfa_register %rbp
|
||||
; SKX-NEXT: andq $-64, %rsp
|
||||
; SKX-NEXT: subq $128, %rsp
|
||||
; SKX-NEXT: ## kill: def %edi killed %edi def %rdi
|
||||
; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
|
||||
; SKX-NEXT: vpmovm2d %k0, %zmm0
|
||||
; SKX-NEXT: vmovdqa32 %zmm0, (%rsp)
|
||||
; SKX-NEXT: vpmovm2b %k0, %xmm0
|
||||
; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; SKX-NEXT: andl $15, %edi
|
||||
; SKX-NEXT: movzbl (%rsp,%rdi,4), %eax
|
||||
; SKX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
|
||||
; SKX-NEXT: movzbl (%rdi,%rax), %eax
|
||||
; SKX-NEXT: andl $1, %eax
|
||||
; SKX-NEXT: movq %rbp, %rsp
|
||||
; SKX-NEXT: popq %rbp
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
%t1 = icmp ugt <16 x i32> %a, %b
|
||||
@ -1743,14 +1711,15 @@ define zeroext i8 @test_extractelement_varible_v32i1(<32 x i8> %a, <32 x i8> %b,
|
||||
; SKX-NEXT: .cfi_offset %rbp, -16
|
||||
; SKX-NEXT: movq %rsp, %rbp
|
||||
; SKX-NEXT: .cfi_def_cfa_register %rbp
|
||||
; SKX-NEXT: andq $-64, %rsp
|
||||
; SKX-NEXT: subq $128, %rsp
|
||||
; SKX-NEXT: andq $-32, %rsp
|
||||
; SKX-NEXT: subq $64, %rsp
|
||||
; SKX-NEXT: ## kill: def %edi killed %edi def %rdi
|
||||
; SKX-NEXT: vpcmpnleub %ymm1, %ymm0, %k0
|
||||
; SKX-NEXT: vpmovm2w %k0, %zmm0
|
||||
; SKX-NEXT: vmovdqa32 %zmm0, (%rsp)
|
||||
; SKX-NEXT: vpmovm2b %k0, %ymm0
|
||||
; SKX-NEXT: vmovdqa %ymm0, (%rsp)
|
||||
; SKX-NEXT: andl $31, %edi
|
||||
; SKX-NEXT: movzbl (%rsp,%rdi,2), %eax
|
||||
; SKX-NEXT: movq %rsp, %rax
|
||||
; SKX-NEXT: movzbl (%rdi,%rax), %eax
|
||||
; SKX-NEXT: andl $1, %eax
|
||||
; SKX-NEXT: movq %rbp, %rsp
|
||||
; SKX-NEXT: popq %rbp
|
||||
@ -1816,20 +1785,19 @@ define i32 @test_insertelement_variable_v32i1(<32 x i8> %a, i8 %b, i32 %index) {
|
||||
; SKX-NEXT: .cfi_offset %rbp, -16
|
||||
; SKX-NEXT: movq %rsp, %rbp
|
||||
; SKX-NEXT: .cfi_def_cfa_register %rbp
|
||||
; SKX-NEXT: andq $-64, %rsp
|
||||
; SKX-NEXT: subq $128, %rsp
|
||||
; SKX-NEXT: andq $-32, %rsp
|
||||
; SKX-NEXT: subq $64, %rsp
|
||||
; SKX-NEXT: ## kill: def %esi killed %esi def %rsi
|
||||
; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; SKX-NEXT: vpcmpnleub %ymm1, %ymm0, %k0
|
||||
; SKX-NEXT: xorl %eax, %eax
|
||||
; SKX-NEXT: testb %dil, %dil
|
||||
; SKX-NEXT: setne %al
|
||||
; SKX-NEXT: vpmovm2w %k0, %zmm0
|
||||
; SKX-NEXT: vmovdqa32 %zmm0, (%rsp)
|
||||
; SKX-NEXT: andl $31, %esi
|
||||
; SKX-NEXT: movw %ax, (%rsp,%rsi,2)
|
||||
; SKX-NEXT: vpsllw $15, (%rsp), %zmm0
|
||||
; SKX-NEXT: vpmovw2m %zmm0, %k0
|
||||
; SKX-NEXT: testb %dil, %dil
|
||||
; SKX-NEXT: vpmovm2b %k0, %ymm0
|
||||
; SKX-NEXT: vmovdqa %ymm0, (%rsp)
|
||||
; SKX-NEXT: movq %rsp, %rax
|
||||
; SKX-NEXT: setne (%rsi,%rax)
|
||||
; SKX-NEXT: vpsllw $7, (%rsp), %ymm0
|
||||
; SKX-NEXT: vpmovb2m %ymm0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: movq %rbp, %rsp
|
||||
; SKX-NEXT: popq %rbp
|
||||
|
Loading…
Reference in New Issue
Block a user