1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-20 11:33:24 +02:00

[X86] Handle alls version of vXi1 insert_vector_elt with a constant index without falling back to shuffles.

We previously only supported inserting to the LSB or MSB where it was easy to zero to perform an OR to insert.

This change effectively extracts the old value and the new value, xors them together and then xors that single bit with the correct location in the original vector. This will cancel out the old value in the first xor leaving the new value in the position.

The way I've implemented this uses 3 shifts and two xors and uses an additional register. We can avoid the additional register at the cost of another shift.

llvm-svn: 320120
This commit is contained in:
Craig Topper 2017-12-08 00:16:09 +00:00
parent 674cb8ec15
commit 1f33f8f1b9
7 changed files with 6543 additions and 8173 deletions

View File

@ -14699,21 +14699,14 @@ static SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG,
// If the kshift instructions of the correct width aren't natively supported
// then we need to promote the vector to the native size to get the correct
// zeroing behavior.
bool HasNativeShift = true;
if ((!Subtarget.hasDQI() && NumElems == 8) || (NumElems < 8)) {
HasNativeShift = false;
// For now don't do this if we are going to end up using the shuffle
// below. This minimizes test diffs.
// TODO: Remove this restriction once we no longer need a shuffle fallback.
if (Vec.isUndef() || IdxVal == 0) {
// Need to promote to v16i1, do the insert, then extract back.
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
DAG.getUNDEF(MVT::v16i1), Vec,
DAG.getIntPtrConstant(0, dl));
Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v16i1, Vec, Elt, Idx);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VecVT, Op,
DAG.getIntPtrConstant(0, dl));
}
// Need to promote to v16i1, do the insert, then extract back.
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
DAG.getUNDEF(MVT::v16i1), Vec,
DAG.getIntPtrConstant(0, dl));
Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v16i1, Vec, Elt, Idx);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VecVT, Op,
DAG.getIntPtrConstant(0, dl));
}
SDValue EltInVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Elt);
@ -14741,7 +14734,7 @@ static SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec);
}
// Insertion of one bit into last position
if (HasNativeShift && IdxVal == NumElems - 1) {
if (IdxVal == NumElems - 1) {
// Move the bit to the last position inside the vector.
EltInVec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, EltInVec,
DAG.getConstant(IdxVal, dl, MVT::i8));
@ -14754,12 +14747,20 @@ static SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec);
}
// Use shuffle to insert element.
SmallVector<int, 64> MaskVec(NumElems);
for (unsigned i = 0; i != NumElems; ++i)
MaskVec[i] = (i == IdxVal) ? NumElems : i;
return DAG.getVectorShuffle(VecVT, dl, Vec, EltInVec, MaskVec);
// Move the current value of the bit to be replace to bit 0.
SDValue Merged = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec,
DAG.getConstant(IdxVal, dl, MVT::i8));
// Xor with the new bit.
Merged = DAG.getNode(ISD::XOR, dl, VecVT, Merged, EltInVec);
// Shift to MSB, filling bottom bits with 0.
Merged = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, Merged,
DAG.getConstant(NumElems - 1, dl, MVT::i8));
// Shift to the final position, filling upper bits with 0.
Merged = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Merged,
DAG.getConstant(NumElems - 1 - IdxVal, dl, MVT::i8));
// Xor with original vector to cancel out the original bit value that's still
// present.
return DAG.getNode(ISD::XOR, dl, VecVT, Merged, Vec);
}
SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,

View File

@ -309,31 +309,28 @@ define i16 @test16(i1 *%addr, i16 %a) {
; KNL-LABEL: test16:
; KNL: ## %bb.0:
; KNL-NEXT: movb (%rdi), %al
; KNL-NEXT: kmovw %esi, %k1
; KNL-NEXT: kmovw %eax, %k2
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; KNL-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,7,8,9,16,11,12,13,14,15]
; KNL-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; KNL-NEXT: vpslld $31, %zmm2, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %esi, %k0
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kshiftrw $10, %k0, %k2
; KNL-NEXT: kxorw %k1, %k2, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $5, %k1, %k1
; KNL-NEXT: kxorw %k0, %k1, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: ## kill: def %ax killed %ax killed %eax
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test16:
; SKX: ## %bb.0:
; SKX-NEXT: kmovb (%rdi), %k0
; SKX-NEXT: kmovd %esi, %k1
; SKX-NEXT: vpmovm2d %k0, %zmm0
; SKX-NEXT: vpmovm2d %k1, %zmm1
; SKX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,7,8,9,16,11,12,13,14,15]
; SKX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; SKX-NEXT: vpmovd2m %zmm2, %k0
; SKX-NEXT: kshiftrw $10, %k1, %k2
; SKX-NEXT: kxorw %k0, %k2, %k0
; SKX-NEXT: kshiftlw $15, %k0, %k0
; SKX-NEXT: kshiftrw $5, %k0, %k0
; SKX-NEXT: kxorw %k1, %k0, %k0
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: ## kill: def %ax killed %ax killed %eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
%x = load i1 , i1 * %addr, align 128
%a1 = bitcast i16 %a to <16 x i1>
@ -346,31 +343,28 @@ define i8 @test17(i1 *%addr, i8 %a) {
; KNL-LABEL: test17:
; KNL: ## %bb.0:
; KNL-NEXT: movb (%rdi), %al
; KNL-NEXT: kmovw %esi, %k1
; KNL-NEXT: kmovw %eax, %k2
; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; KNL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,8,5,6,7]
; KNL-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; KNL-NEXT: vpsllq $63, %zmm2, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %esi, %k0
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kshiftrw $4, %k0, %k2
; KNL-NEXT: kxorw %k1, %k2, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $11, %k1, %k1
; KNL-NEXT: kxorw %k0, %k1, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: ## kill: def %al killed %al killed %eax
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test17:
; SKX: ## %bb.0:
; SKX-NEXT: kmovb (%rdi), %k0
; SKX-NEXT: kmovd %esi, %k1
; SKX-NEXT: vpmovm2q %k0, %zmm0
; SKX-NEXT: vpmovm2q %k1, %zmm1
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,8,5,6,7]
; SKX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; SKX-NEXT: vpmovq2m %zmm2, %k0
; SKX-NEXT: kshiftrb $4, %k1, %k2
; SKX-NEXT: kxorb %k0, %k2, %k0
; SKX-NEXT: kshiftlb $7, %k0, %k0
; SKX-NEXT: kshiftrb $3, %k0, %k0
; SKX-NEXT: kxorb %k1, %k0, %k0
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: ## kill: def %al killed %al killed %eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
%x = load i1 , i1 * %addr, align 128
%a1 = bitcast i8 %a to <8 x i1>
@ -962,12 +956,12 @@ define i32 @test_insertelement_v32i1(i32 %a, i32 %b, <32 x i32> %x , <32 x i32>
; SKX-NEXT: vpcmpltud %zmm2, %zmm0, %k0
; SKX-NEXT: vpcmpltud %zmm3, %zmm1, %k1
; SKX-NEXT: kunpckwd %k0, %k1, %k0
; SKX-NEXT: vpmovm2w %k0, %zmm0
; SKX-NEXT: kmovd %eax, %k0
; SKX-NEXT: vpmovm2w %k0, %zmm1
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,32,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
; SKX-NEXT: vpermi2w %zmm1, %zmm0, %zmm2
; SKX-NEXT: vpmovw2m %zmm2, %k0
; SKX-NEXT: kshiftrd $4, %k0, %k1
; SKX-NEXT: kmovd %eax, %k2
; SKX-NEXT: kxord %k2, %k1, %k1
; SKX-NEXT: kshiftld $31, %k1, %k1
; SKX-NEXT: kshiftrd $27, %k1, %k1
; SKX-NEXT: kxord %k0, %k1, %k0
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
@ -988,37 +982,33 @@ define i8 @test_iinsertelement_v4i1(i32 %a, i32 %b, <4 x i32> %x , <4 x i32> %y)
; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1
; KNL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; KNL-NEXT: vpextrb $4, %xmm0, %ecx
; KNL-NEXT: kmovw %ecx, %k1
; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; KNL-NEXT: kmovw %ecx, %k0
; KNL-NEXT: vpextrb $0, %xmm0, %ecx
; KNL-NEXT: andl $1, %ecx
; KNL-NEXT: kmovw %ecx, %k0
; KNL-NEXT: kshiftrw $1, %k0, %k1
; KNL-NEXT: kshiftlw $1, %k1, %k1
; KNL-NEXT: korw %k0, %k1, %k1
; KNL-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; KNL-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; KNL-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; KNL-NEXT: vpsllq $63, %zmm3, %zmm1
; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; KNL-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; KNL-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; KNL-NEXT: vpsllq $63, %zmm3, %zmm1
; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; KNL-NEXT: kmovw %ecx, %k1
; KNL-NEXT: kshiftrw $1, %k0, %k2
; KNL-NEXT: kshiftlw $1, %k2, %k2
; KNL-NEXT: korw %k1, %k2, %k1
; KNL-NEXT: kshiftrw $1, %k1, %k2
; KNL-NEXT: kxorw %k0, %k2, %k0
; KNL-NEXT: kshiftlw $15, %k0, %k0
; KNL-NEXT: kshiftrw $14, %k0, %k0
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kshiftrw $2, %k0, %k1
; KNL-NEXT: kmovw %eax, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $13, %k1, %k1
; KNL-NEXT: kxorw %k0, %k1, %k0
; KNL-NEXT: kshiftrw $3, %k0, %k1
; KNL-NEXT: vpextrb $12, %xmm0, %eax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; KNL-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; KNL-NEXT: vpsllq $63, %zmm2, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %eax, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $12, %k1, %k1
; KNL-NEXT: kxorw %k0, %k1, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: ## kill: def %al killed %al killed %eax
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test_iinsertelement_v4i1:
@ -1026,12 +1016,12 @@ define i8 @test_iinsertelement_v4i1(i32 %a, i32 %b, <4 x i32> %x , <4 x i32> %y)
; SKX-NEXT: cmpl %esi, %edi
; SKX-NEXT: setb %al
; SKX-NEXT: vpcmpltud %xmm1, %xmm0, %k0
; SKX-NEXT: vpmovm2d %k0, %xmm0
; SKX-NEXT: kmovd %eax, %k0
; SKX-NEXT: vpmovm2d %k0, %xmm1
; SKX-NEXT: vpbroadcastq %xmm1, %xmm1
; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
; SKX-NEXT: vpmovd2m %xmm0, %k0
; SKX-NEXT: kshiftrw $2, %k0, %k1
; SKX-NEXT: kmovd %eax, %k2
; SKX-NEXT: kxorw %k2, %k1, %k1
; SKX-NEXT: kshiftlw $15, %k1, %k1
; SKX-NEXT: kshiftrw $13, %k1, %k1
; SKX-NEXT: kxorw %k0, %k1, %k0
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: ## kill: def %al killed %al killed %eax
; SKX-NEXT: retq
@ -1057,17 +1047,15 @@ define i8 @test_iinsertelement_v2i1(i32 %a, i32 %b, <2 x i64> %x , <2 x i64> %y)
; KNL-NEXT: kmovw %ecx, %k0
; KNL-NEXT: kshiftrw $1, %k0, %k1
; KNL-NEXT: kshiftlw $1, %k1, %k1
; KNL-NEXT: korw %k0, %k1, %k1
; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; KNL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
; KNL-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; KNL-NEXT: vpsllq $63, %zmm2, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
; KNL-NEXT: korw %k0, %k1, %k0
; KNL-NEXT: kshiftrw $1, %k0, %k1
; KNL-NEXT: kmovw %eax, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $14, %k1, %k1
; KNL-NEXT: kxorw %k0, %k1, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: ## kill: def %al killed %al killed %eax
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test_iinsertelement_v2i1:
@ -1075,11 +1063,12 @@ define i8 @test_iinsertelement_v2i1(i32 %a, i32 %b, <2 x i64> %x , <2 x i64> %y)
; SKX-NEXT: cmpl %esi, %edi
; SKX-NEXT: setb %al
; SKX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
; SKX-NEXT: vpmovm2q %k0, %xmm0
; SKX-NEXT: kmovd %eax, %k0
; SKX-NEXT: vpmovm2q %k0, %xmm1
; SKX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SKX-NEXT: vpmovq2m %xmm0, %k0
; SKX-NEXT: kshiftrw $1, %k0, %k1
; SKX-NEXT: kmovd %eax, %k2
; SKX-NEXT: kxorw %k2, %k1, %k1
; SKX-NEXT: kshiftlw $15, %k1, %k1
; SKX-NEXT: kshiftrw $14, %k1, %k1
; SKX-NEXT: kxorw %k0, %k1, %k0
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: ## kill: def %al killed %al killed %eax
; SKX-NEXT: retq

View File

@ -972,14 +972,11 @@ define <64 x i8> @test16(i64 %x) {
; SKX-NEXT: kmovq %rdi, %k0
; SKX-NEXT: movb $1, %al
; SKX-NEXT: kmovd %eax, %k1
; SKX-NEXT: vpmovm2b %k1, %zmm0
; SKX-NEXT: vpsllq $40, %xmm0, %xmm0
; SKX-NEXT: vpmovm2b %k0, %zmm1
; SKX-NEXT: movl $32, %eax
; SKX-NEXT: kmovd %eax, %k1
; SKX-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1}
; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
; SKX-NEXT: vpmovb2m %zmm0, %k0
; SKX-NEXT: kshiftrq $5, %k0, %k2
; SKX-NEXT: kxorq %k1, %k2, %k1
; SKX-NEXT: kshiftlq $63, %k1, %k1
; SKX-NEXT: kshiftrq $58, %k1, %k1
; SKX-NEXT: kxorq %k0, %k1, %k0
; SKX-NEXT: vpmovm2b %k0, %zmm0
; SKX-NEXT: retq
;
@ -988,13 +985,11 @@ define <64 x i8> @test16(i64 %x) {
; AVX512BW-NEXT: kmovq %rdi, %k0
; AVX512BW-NEXT: movb $1, %al
; AVX512BW-NEXT: kmovd %eax, %k1
; AVX512BW-NEXT: vpmovm2b %k1, %zmm0
; AVX512BW-NEXT: vpsllq $40, %xmm0, %xmm0
; AVX512BW-NEXT: vpmovm2b %k0, %zmm1
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512BW-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
; AVX512BW-NEXT: kshiftrq $5, %k0, %k2
; AVX512BW-NEXT: kxorq %k1, %k2, %k1
; AVX512BW-NEXT: kshiftlq $63, %k1, %k1
; AVX512BW-NEXT: kshiftrq $58, %k1, %k1
; AVX512BW-NEXT: kxorq %k0, %k1, %k0
; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
; AVX512BW-NEXT: retq
;
@ -1085,14 +1080,11 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
; SKX-NEXT: cmpl %edx, %esi
; SKX-NEXT: setg %al
; SKX-NEXT: kmovd %eax, %k1
; SKX-NEXT: vpmovm2b %k1, %zmm0
; SKX-NEXT: vpsllq $40, %xmm0, %xmm0
; SKX-NEXT: vpmovm2b %k0, %zmm1
; SKX-NEXT: movl $32, %eax
; SKX-NEXT: kmovd %eax, %k1
; SKX-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1}
; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
; SKX-NEXT: vpmovb2m %zmm0, %k0
; SKX-NEXT: kshiftrq $5, %k0, %k2
; SKX-NEXT: kxorq %k1, %k2, %k1
; SKX-NEXT: kshiftlq $63, %k1, %k1
; SKX-NEXT: kshiftrq $58, %k1, %k1
; SKX-NEXT: kxorq %k0, %k1, %k0
; SKX-NEXT: vpmovm2b %k0, %zmm0
; SKX-NEXT: retq
;
@ -1102,13 +1094,11 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
; AVX512BW-NEXT: cmpl %edx, %esi
; AVX512BW-NEXT: setg %al
; AVX512BW-NEXT: kmovd %eax, %k1
; AVX512BW-NEXT: vpmovm2b %k1, %zmm0
; AVX512BW-NEXT: vpsllq $40, %xmm0, %xmm0
; AVX512BW-NEXT: vpmovm2b %k0, %zmm1
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512BW-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
; AVX512BW-NEXT: kshiftrq $5, %k0, %k2
; AVX512BW-NEXT: kxorq %k1, %k2, %k1
; AVX512BW-NEXT: kshiftlq $63, %k1, %k1
; AVX512BW-NEXT: kshiftrq $58, %k1, %k1
; AVX512BW-NEXT: kxorq %k0, %k1, %k0
; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
; AVX512BW-NEXT: retq
;
@ -1159,24 +1149,22 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
define <8 x i1> @test18(i8 %a, i16 %y) {
; KNL-LABEL: test18:
; KNL: ## %bb.0:
; KNL-NEXT: kmovw %edi, %k2
; KNL-NEXT: kmovw %esi, %k0
; KNL-NEXT: kshiftlw $7, %k0, %k1
; KNL-NEXT: kmovw %edi, %k0
; KNL-NEXT: kmovw %esi, %k1
; KNL-NEXT: kshiftlw $7, %k1, %k2
; KNL-NEXT: kshiftrw $15, %k2, %k2
; KNL-NEXT: kshiftlw $6, %k1, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kshiftlw $6, %k0, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k3
; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k3} {z}
; KNL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7]
; KNL-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; KNL-NEXT: vpsllq $63, %zmm2, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k2
; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; KNL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,8]
; KNL-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; KNL-NEXT: vpsllq $63, %zmm2, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
; KNL-NEXT: kshiftrw $6, %k0, %k3
; KNL-NEXT: kxorw %k1, %k3, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $9, %k1, %k1
; KNL-NEXT: kxorw %k0, %k1, %k0
; KNL-NEXT: kshiftrw $7, %k0, %k1
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $8, %k1, %k1
; KNL-NEXT: kxorw %k0, %k1, %k1
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdw %zmm0, %ymm0
; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %ymm0
@ -1185,45 +1173,42 @@ define <8 x i1> @test18(i8 %a, i16 %y) {
;
; SKX-LABEL: test18:
; SKX: ## %bb.0:
; SKX-NEXT: kmovd %edi, %k1
; SKX-NEXT: kmovd %esi, %k2
; SKX-NEXT: kshiftlw $7, %k2, %k0
; SKX-NEXT: kshiftrw $15, %k0, %k0
; SKX-NEXT: kshiftlw $6, %k2, %k2
; SKX-NEXT: kmovd %edi, %k0
; SKX-NEXT: kmovd %esi, %k1
; SKX-NEXT: kshiftlw $7, %k1, %k2
; SKX-NEXT: kshiftrw $15, %k2, %k2
; SKX-NEXT: vpmovm2q %k1, %zmm0
; SKX-NEXT: vpmovm2q %k2, %zmm1
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7]
; SKX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; SKX-NEXT: vpmovq2m %zmm2, %k1
; SKX-NEXT: kshiftlb $1, %k1, %k1
; SKX-NEXT: kshiftlw $6, %k1, %k1
; SKX-NEXT: kshiftrw $15, %k1, %k1
; SKX-NEXT: kshiftrb $6, %k0, %k3
; SKX-NEXT: kxorb %k1, %k3, %k1
; SKX-NEXT: kshiftlb $7, %k1, %k1
; SKX-NEXT: kshiftrb $1, %k1, %k1
; SKX-NEXT: kshiftlb $7, %k0, %k0
; SKX-NEXT: korb %k0, %k1, %k0
; SKX-NEXT: kxorb %k0, %k1, %k0
; SKX-NEXT: kshiftlb $1, %k0, %k0
; SKX-NEXT: kshiftrb $1, %k0, %k0
; SKX-NEXT: kshiftlb $7, %k2, %k1
; SKX-NEXT: korb %k1, %k0, %k0
; SKX-NEXT: vpmovm2w %k0, %xmm0
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
;
; AVX512BW-LABEL: test18:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: kmovd %edi, %k2
; AVX512BW-NEXT: kmovd %esi, %k0
; AVX512BW-NEXT: kshiftlw $7, %k0, %k1
; AVX512BW-NEXT: kmovd %edi, %k0
; AVX512BW-NEXT: kmovd %esi, %k1
; AVX512BW-NEXT: kshiftlw $7, %k1, %k2
; AVX512BW-NEXT: kshiftrw $15, %k2, %k2
; AVX512BW-NEXT: kshiftlw $6, %k1, %k1
; AVX512BW-NEXT: kshiftrw $15, %k1, %k1
; AVX512BW-NEXT: kshiftlw $6, %k0, %k0
; AVX512BW-NEXT: kshiftrw $15, %k0, %k3
; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
; AVX512BW-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k3} {z}
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7]
; AVX512BW-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; AVX512BW-NEXT: vpsllq $63, %zmm2, %zmm0
; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k2
; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
; AVX512BW-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,8]
; AVX512BW-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; AVX512BW-NEXT: vpsllq $63, %zmm2, %zmm0
; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0
; AVX512BW-NEXT: kshiftrw $6, %k0, %k3
; AVX512BW-NEXT: kxorw %k1, %k3, %k1
; AVX512BW-NEXT: kshiftlw $15, %k1, %k1
; AVX512BW-NEXT: kshiftrw $9, %k1, %k1
; AVX512BW-NEXT: kxorw %k0, %k1, %k0
; AVX512BW-NEXT: kshiftrw $7, %k0, %k1
; AVX512BW-NEXT: kxorw %k2, %k1, %k1
; AVX512BW-NEXT: kshiftlw $15, %k1, %k1
; AVX512BW-NEXT: kshiftrw $8, %k1, %k1
; AVX512BW-NEXT: kxorw %k0, %k1, %k0
; AVX512BW-NEXT: vpmovm2w %k0, %zmm0
; AVX512BW-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0
; AVX512BW-NEXT: vzeroupper
@ -1231,21 +1216,21 @@ define <8 x i1> @test18(i8 %a, i16 %y) {
;
; AVX512DQ-LABEL: test18:
; AVX512DQ: ## %bb.0:
; AVX512DQ-NEXT: kmovw %edi, %k1
; AVX512DQ-NEXT: kmovw %esi, %k2
; AVX512DQ-NEXT: kshiftlw $7, %k2, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kshiftlw $6, %k2, %k2
; AVX512DQ-NEXT: kmovw %edi, %k0
; AVX512DQ-NEXT: kmovw %esi, %k1
; AVX512DQ-NEXT: kshiftlw $7, %k1, %k2
; AVX512DQ-NEXT: kshiftrw $15, %k2, %k2
; AVX512DQ-NEXT: vpmovm2q %k1, %zmm0
; AVX512DQ-NEXT: vpmovm2q %k2, %zmm1
; AVX512DQ-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7]
; AVX512DQ-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; AVX512DQ-NEXT: vpmovq2m %zmm2, %k1
; AVX512DQ-NEXT: kshiftlb $1, %k1, %k1
; AVX512DQ-NEXT: kshiftlw $6, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kshiftrb $6, %k0, %k3
; AVX512DQ-NEXT: kxorb %k1, %k3, %k1
; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1
; AVX512DQ-NEXT: kshiftrb $1, %k1, %k1
; AVX512DQ-NEXT: kshiftlb $7, %k0, %k0
; AVX512DQ-NEXT: korb %k0, %k1, %k0
; AVX512DQ-NEXT: kxorb %k0, %k1, %k0
; AVX512DQ-NEXT: kshiftlb $1, %k0, %k0
; AVX512DQ-NEXT: kshiftrb $1, %k0, %k0
; AVX512DQ-NEXT: kshiftlb $7, %k2, %k1
; AVX512DQ-NEXT: korb %k1, %k0, %k0
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
; AVX512DQ-NEXT: ## kill: def %xmm0 killed %xmm0 killed %ymm0

View File

@ -7325,14 +7325,11 @@ define <64 x i8> @vmov_test16(i64 %x) {
; GENERIC-NEXT: kmovq %rdi, %k0 # sched: [1:0.33]
; GENERIC-NEXT: movb $1, %al # sched: [1:0.33]
; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovm2b %k1, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: vpsllq $40, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovm2b %k0, %zmm1 # sched: [1:0.33]
; GENERIC-NEXT: movl $32, %eax # sched: [1:0.33]
; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} # sched: [2:1.00]
; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] sched: [1:1.00]
; GENERIC-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:0.33]
; GENERIC-NEXT: kshiftrq $5, %k0, %k2 # sched: [1:1.00]
; GENERIC-NEXT: kxorq %k1, %k2, %k1 # sched: [1:1.00]
; GENERIC-NEXT: kshiftlq $63, %k1, %k1 # sched: [1:1.00]
; GENERIC-NEXT: kshiftrq $58, %k1, %k1 # sched: [1:1.00]
; GENERIC-NEXT: kxorq %k0, %k1, %k0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -7341,14 +7338,11 @@ define <64 x i8> @vmov_test16(i64 %x) {
; SKX-NEXT: kmovq %rdi, %k0 # sched: [1:1.00]
; SKX-NEXT: movb $1, %al # sched: [1:0.25]
; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00]
; SKX-NEXT: vpmovm2b %k1, %zmm0 # sched: [1:0.25]
; SKX-NEXT: vpsllq $40, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpmovm2b %k0, %zmm1 # sched: [1:0.25]
; SKX-NEXT: movl $32, %eax # sched: [1:0.25]
; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00]
; SKX-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} # sched: [1:0.33]
; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] sched: [3:1.00]
; SKX-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:1.00]
; SKX-NEXT: kshiftrq $5, %k0, %k2 # sched: [3:1.00]
; SKX-NEXT: kxorq %k1, %k2, %k1 # sched: [1:1.00]
; SKX-NEXT: kshiftlq $63, %k1, %k1 # sched: [3:1.00]
; SKX-NEXT: kshiftrq $58, %k1, %k1 # sched: [3:1.00]
; SKX-NEXT: kxorq %k0, %k1, %k0 # sched: [1:1.00]
; SKX-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
%a = bitcast i64 %x to <64 x i1>
@ -7365,14 +7359,11 @@ define <64 x i8> @vmov_test17(i64 %x, i32 %y, i32 %z) {
; GENERIC-NEXT: cmpl %edx, %esi # sched: [1:0.33]
; GENERIC-NEXT: setg %al # sched: [1:0.50]
; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovm2b %k1, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: vpsllq $40, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovm2b %k0, %zmm1 # sched: [1:0.33]
; GENERIC-NEXT: movl $32, %eax # sched: [1:0.33]
; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} # sched: [2:1.00]
; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] sched: [1:1.00]
; GENERIC-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:0.33]
; GENERIC-NEXT: kshiftrq $5, %k0, %k2 # sched: [1:1.00]
; GENERIC-NEXT: kxorq %k1, %k2, %k1 # sched: [1:1.00]
; GENERIC-NEXT: kshiftlq $63, %k1, %k1 # sched: [1:1.00]
; GENERIC-NEXT: kshiftrq $58, %k1, %k1 # sched: [1:1.00]
; GENERIC-NEXT: kxorq %k0, %k1, %k0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -7382,14 +7373,11 @@ define <64 x i8> @vmov_test17(i64 %x, i32 %y, i32 %z) {
; SKX-NEXT: cmpl %edx, %esi # sched: [1:0.25]
; SKX-NEXT: setg %al # sched: [1:0.50]
; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00]
; SKX-NEXT: vpmovm2b %k1, %zmm0 # sched: [1:0.25]
; SKX-NEXT: vpsllq $40, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpmovm2b %k0, %zmm1 # sched: [1:0.25]
; SKX-NEXT: movl $32, %eax # sched: [1:0.25]
; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00]
; SKX-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} # sched: [1:0.33]
; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] sched: [3:1.00]
; SKX-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:1.00]
; SKX-NEXT: kshiftrq $5, %k0, %k2 # sched: [3:1.00]
; SKX-NEXT: kxorq %k1, %k2, %k1 # sched: [1:1.00]
; SKX-NEXT: kshiftlq $63, %k1, %k1 # sched: [3:1.00]
; SKX-NEXT: kshiftrq $58, %k1, %k1 # sched: [3:1.00]
; SKX-NEXT: kxorq %k0, %k1, %k0 # sched: [1:1.00]
; SKX-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
%a = bitcast i64 %x to <64 x i1>
@ -7402,44 +7390,42 @@ define <64 x i8> @vmov_test17(i64 %x, i32 %y, i32 %z) {
define <8 x i1> @vmov_test18(i8 %a, i16 %y) {
; GENERIC-LABEL: vmov_test18:
; GENERIC: # %bb.0:
; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: kmovd %esi, %k2 # sched: [1:0.33]
; GENERIC-NEXT: kshiftlw $7, %k2, %k0 # sched: [1:1.00]
; GENERIC-NEXT: kshiftrw $15, %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: kshiftlw $6, %k2, %k2 # sched: [1:1.00]
; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: kshiftlw $7, %k1, %k2 # sched: [1:1.00]
; GENERIC-NEXT: kshiftrw $15, %k2, %k2 # sched: [1:1.00]
; GENERIC-NEXT: vpmovm2q %k1, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: vpmovm2q %k2, %zmm1 # sched: [1:0.33]
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7] sched: [4:0.50]
; GENERIC-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 # sched: [1:1.00]
; GENERIC-NEXT: vpmovq2m %zmm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: kshiftlb $1, %k1, %k1 # sched: [1:1.00]
; GENERIC-NEXT: kshiftlw $6, %k1, %k1 # sched: [1:1.00]
; GENERIC-NEXT: kshiftrw $15, %k1, %k1 # sched: [1:1.00]
; GENERIC-NEXT: kshiftrb $6, %k0, %k3 # sched: [1:1.00]
; GENERIC-NEXT: kxorb %k1, %k3, %k1 # sched: [1:1.00]
; GENERIC-NEXT: kshiftlb $7, %k1, %k1 # sched: [1:1.00]
; GENERIC-NEXT: kshiftrb $1, %k1, %k1 # sched: [1:1.00]
; GENERIC-NEXT: kshiftlb $7, %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: korb %k0, %k1, %k0 # sched: [1:1.00]
; GENERIC-NEXT: kxorb %k0, %k1, %k0 # sched: [1:1.00]
; GENERIC-NEXT: kshiftlb $1, %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: kshiftrb $1, %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: kshiftlb $7, %k2, %k1 # sched: [1:1.00]
; GENERIC-NEXT: korb %k1, %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: vzeroupper
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vmov_test18:
; SKX: # %bb.0:
; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
; SKX-NEXT: kmovd %esi, %k2 # sched: [1:1.00]
; SKX-NEXT: kshiftlw $7, %k2, %k0 # sched: [3:1.00]
; SKX-NEXT: kshiftrw $15, %k0, %k0 # sched: [3:1.00]
; SKX-NEXT: kshiftlw $6, %k2, %k2 # sched: [3:1.00]
; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00]
; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00]
; SKX-NEXT: kshiftlw $7, %k1, %k2 # sched: [3:1.00]
; SKX-NEXT: kshiftrw $15, %k2, %k2 # sched: [3:1.00]
; SKX-NEXT: vpmovm2q %k1, %zmm0 # sched: [1:0.25]
; SKX-NEXT: vpmovm2q %k2, %zmm1 # sched: [1:0.25]
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7] sched: [8:0.50]
; SKX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 # sched: [3:1.00]
; SKX-NEXT: vpmovq2m %zmm2, %k1 # sched: [1:1.00]
; SKX-NEXT: kshiftlb $1, %k1, %k1 # sched: [3:1.00]
; SKX-NEXT: kshiftlw $6, %k1, %k1 # sched: [3:1.00]
; SKX-NEXT: kshiftrw $15, %k1, %k1 # sched: [3:1.00]
; SKX-NEXT: kshiftrb $6, %k0, %k3 # sched: [3:1.00]
; SKX-NEXT: kxorb %k1, %k3, %k1 # sched: [1:1.00]
; SKX-NEXT: kshiftlb $7, %k1, %k1 # sched: [3:1.00]
; SKX-NEXT: kshiftrb $1, %k1, %k1 # sched: [3:1.00]
; SKX-NEXT: kshiftlb $7, %k0, %k0 # sched: [3:1.00]
; SKX-NEXT: korb %k0, %k1, %k0 # sched: [1:1.00]
; SKX-NEXT: kxorb %k0, %k1, %k0 # sched: [1:1.00]
; SKX-NEXT: kshiftlb $1, %k0, %k0 # sched: [3:1.00]
; SKX-NEXT: kshiftrb $1, %k0, %k0 # sched: [3:1.00]
; SKX-NEXT: kshiftlb $7, %k2, %k1 # sched: [3:1.00]
; SKX-NEXT: korb %k1, %k0, %k0 # sched: [1:1.00]
; SKX-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.25]
; SKX-NEXT: vzeroupper # sched: [4:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%b = bitcast i8 %a to <8 x i1>
%b1 = bitcast i16 %y to <16 x i1>

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff