mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[X86] Make reduceMaskedLoadToScalarLoad/reduceMaskedStoreToScalarStore work for avx512 after type legalization.
The scalar elements of the vXi1 build_vector will have been type legalized to i8 by padding with 0s. So we can't check for all ones. Instead we should just look at bit 0 of the constant. Differential Revision: https://reviews.llvm.org/D87863
This commit is contained in:
parent
5cf94a6388
commit
634488ce52
@ -44454,7 +44454,7 @@ static int getOneTrueElt(SDValue V) {
|
||||
auto *ConstNode = dyn_cast<ConstantSDNode>(Op);
|
||||
if (!ConstNode)
|
||||
return -1;
|
||||
if (ConstNode->getAPIntValue().isAllOnesValue()) {
|
||||
if (ConstNode->getAPIntValue().countTrailingOnes() >= 1) {
|
||||
// If we already found a one, this is too many.
|
||||
if (TrueIndex >= 0)
|
||||
return -1;
|
||||
|
@ -7235,43 +7235,43 @@ define <16 x i64> @load_one_mask_bit_set6(<16 x i64>* %addr, <16 x i64> %val) {
|
||||
;
|
||||
; AVX512F-LABEL: load_one_mask_bit_set6:
|
||||
; AVX512F: ## %bb.0:
|
||||
; AVX512F-NEXT: movb $4, %al
|
||||
; AVX512F-NEXT: kmovw %eax, %k1
|
||||
; AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1}
|
||||
; AVX512F-NEXT: movb $36, %al
|
||||
; AVX512F-NEXT: kmovw %eax, %k1
|
||||
; AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1 {%k1}
|
||||
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; AVX512F-NEXT: vpinsrq $0, 16(%rdi), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vinserti32x4 $1, %xmm2, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: load_one_mask_bit_set6:
|
||||
; AVX512VLDQ: ## %bb.0:
|
||||
; AVX512VLDQ-NEXT: movb $4, %al
|
||||
; AVX512VLDQ-NEXT: kmovw %eax, %k1
|
||||
; AVX512VLDQ-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1}
|
||||
; AVX512VLDQ-NEXT: movb $36, %al
|
||||
; AVX512VLDQ-NEXT: kmovw %eax, %k1
|
||||
; AVX512VLDQ-NEXT: vmovdqu64 64(%rdi), %zmm1 {%k1}
|
||||
; AVX512VLDQ-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpinsrq $0, 16(%rdi), %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vinserti32x4 $1, %xmm2, %zmm0, %zmm0
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: load_one_mask_bit_set6:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: movb $4, %al
|
||||
; AVX512VLBW-NEXT: kmovd %eax, %k1
|
||||
; AVX512VLBW-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1}
|
||||
; AVX512VLBW-NEXT: movb $36, %al
|
||||
; AVX512VLBW-NEXT: kmovd %eax, %k1
|
||||
; AVX512VLBW-NEXT: vmovdqu64 64(%rdi), %zmm1 {%k1}
|
||||
; AVX512VLBW-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; AVX512VLBW-NEXT: vpinsrq $0, 16(%rdi), %xmm2, %xmm2
|
||||
; AVX512VLBW-NEXT: vinserti32x4 $1, %xmm2, %zmm0, %zmm0
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
; X86-AVX512-LABEL: load_one_mask_bit_set6:
|
||||
; X86-AVX512: ## %bb.0:
|
||||
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-AVX512-NEXT: movb $4, %cl
|
||||
; X86-AVX512-NEXT: kmovd %ecx, %k1
|
||||
; X86-AVX512-NEXT: vmovdqu64 (%eax), %zmm0 {%k1}
|
||||
; X86-AVX512-NEXT: movb $36, %cl
|
||||
; X86-AVX512-NEXT: kmovd %ecx, %k1
|
||||
; X86-AVX512-NEXT: vmovdqu64 64(%eax), %zmm1 {%k1}
|
||||
; X86-AVX512-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; X86-AVX512-NEXT: vmovlps {{.*#+}} xmm2 = mem[0,1],xmm2[2,3]
|
||||
; X86-AVX512-NEXT: vinsertf32x4 $1, %xmm2, %zmm0, %zmm0
|
||||
; X86-AVX512-NEXT: retl
|
||||
%res = call <16 x i64> @llvm.masked.load.v16i64.p0v16i64(<16 x i64>* %addr, i32 4, <16 x i1> <i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false>, <16 x i64> %val)
|
||||
ret <16 x i64> %res
|
||||
|
@ -4943,48 +4943,23 @@ define void @one_mask_bit_set6(<16 x i64>* %addr, <16 x i64> %val) {
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: one_mask_bit_set6:
|
||||
; AVX512F: ## %bb.0:
|
||||
; AVX512F-NEXT: movb $8, %al
|
||||
; AVX512F-NEXT: kmovw %eax, %k1
|
||||
; AVX512F-NEXT: vmovdqu64 %zmm1, 64(%rdi) {%k1}
|
||||
; AVX512F-NEXT: movb $64, %al
|
||||
; AVX512F-NEXT: kmovw %eax, %k1
|
||||
; AVX512F-NEXT: vmovdqu64 %zmm0, (%rdi) {%k1}
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: one_mask_bit_set6:
|
||||
; AVX512VLDQ: ## %bb.0:
|
||||
; AVX512VLDQ-NEXT: movb $8, %al
|
||||
; AVX512VLDQ-NEXT: kmovw %eax, %k1
|
||||
; AVX512VLDQ-NEXT: vmovdqu64 %zmm1, 64(%rdi) {%k1}
|
||||
; AVX512VLDQ-NEXT: movb $64, %al
|
||||
; AVX512VLDQ-NEXT: kmovw %eax, %k1
|
||||
; AVX512VLDQ-NEXT: vmovdqu64 %zmm0, (%rdi) {%k1}
|
||||
; AVX512VLDQ-NEXT: vzeroupper
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: one_mask_bit_set6:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: movb $8, %al
|
||||
; AVX512VLBW-NEXT: kmovd %eax, %k1
|
||||
; AVX512VLBW-NEXT: vmovdqu64 %zmm1, 64(%rdi) {%k1}
|
||||
; AVX512VLBW-NEXT: movb $64, %al
|
||||
; AVX512VLBW-NEXT: kmovd %eax, %k1
|
||||
; AVX512VLBW-NEXT: vmovdqu64 %zmm0, (%rdi) {%k1}
|
||||
; AVX512VLBW-NEXT: vzeroupper
|
||||
; AVX512VLBW-NEXT: retq
|
||||
; AVX512-LABEL: one_mask_bit_set6:
|
||||
; AVX512: ## %bb.0:
|
||||
; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0
|
||||
; AVX512-NEXT: vmovlps %xmm0, 48(%rdi)
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm1, %xmm0
|
||||
; AVX512-NEXT: vpextrq $1, %xmm0, 88(%rdi)
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; X86-AVX512-LABEL: one_mask_bit_set6:
|
||||
; X86-AVX512: ## %bb.0:
|
||||
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-AVX512-NEXT: movb $8, %cl
|
||||
; X86-AVX512-NEXT: kmovd %ecx, %k1
|
||||
; X86-AVX512-NEXT: vmovdqu64 %zmm1, 64(%eax) {%k1}
|
||||
; X86-AVX512-NEXT: movb $64, %cl
|
||||
; X86-AVX512-NEXT: kmovd %ecx, %k1
|
||||
; X86-AVX512-NEXT: vmovdqu64 %zmm0, (%eax) {%k1}
|
||||
; X86-AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0
|
||||
; X86-AVX512-NEXT: vmovlps %xmm0, 48(%eax)
|
||||
; X86-AVX512-NEXT: vextractf128 $1, %ymm1, %xmm0
|
||||
; X86-AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; X86-AVX512-NEXT: vmovlps %xmm0, 88(%eax)
|
||||
; X86-AVX512-NEXT: vzeroupper
|
||||
; X86-AVX512-NEXT: retl
|
||||
call void @llvm.masked.store.v16i64.p0v16i64(<16 x i64> %val, <16 x i64>* %addr, i32 4, <16 x i1><i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false>)
|
||||
|
Loading…
x
Reference in New Issue
Block a user