1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 02:52:53 +02:00

[X86][AVX512] Fold concat(and(x,y),and(z,w)) -> and(concat(x,z),concat(y,w)) for 512-bit vectors

Helps vpternlog folding on non-AVX512BW targets
This commit is contained in:
Simon Pilgrim 2020-08-01 20:34:39 +01:00
parent 6d5e604171
commit 63334e0fdd
11 changed files with 431 additions and 504 deletions

View File

@ -48090,6 +48090,25 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
Op0.getOperand(1));
}
break;
case ISD::AND:
case ISD::OR:
case ISD::XOR:
case X86ISD::ANDNP:
// TODO: Add 256-bit support.
if (!IsSplat && VT.is512BitVector()) {
SmallVector<SDValue, 2> LHS, RHS;
for (unsigned i = 0; i != NumOps; ++i) {
LHS.push_back(Ops[i].getOperand(0));
RHS.push_back(Ops[i].getOperand(1));
}
MVT SrcVT = Op0.getOperand(0).getSimpleValueType();
SrcVT = MVT::getVectorVT(SrcVT.getScalarType(),
NumOps * SrcVT.getVectorNumElements());
return DAG.getNode(Op0.getOpcode(), DL, VT,
DAG.getNode(ISD::CONCAT_VECTORS, DL, SrcVT, LHS),
DAG.getNode(ISD::CONCAT_VECTORS, DL, SrcVT, RHS));
}
break;
case X86ISD::PACKSS:
case X86ISD::PACKUS:
if (!IsSplat && NumOps == 2 && VT.is256BitVector() &&

View File

@ -1692,16 +1692,15 @@ define i64 @test_insertelement_variable_v64i1(<64 x i8> %a, i8 %b, i32 %index) {
; KNL-NEXT: andq $-64, %rsp
; KNL-NEXT: subq $128, %rsp
; KNL-NEXT: ## kill: def $esi killed $esi def $rsi
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm2
; KNL-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
; KNL-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; KNL-NEXT: andl $63, %esi
; KNL-NEXT: testb %dil, %dil
; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp)
; KNL-NEXT: vmovdqa %ymm2, (%rsp)
; KNL-NEXT: vmovdqa64 %zmm0, (%rsp)
; KNL-NEXT: setne (%rsp,%rsi)
; KNL-NEXT: vpmovsxbd (%rsp), %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
@ -1772,116 +1771,115 @@ define i96 @test_insertelement_variable_v96i1(<96 x i8> %a, i8 %b, i32 %index) {
; KNL-NEXT: subq $192, %rsp
; KNL-NEXT: movl 744(%rbp), %eax
; KNL-NEXT: andl $127, %eax
; KNL-NEXT: vmovd %edi, %xmm0
; KNL-NEXT: vpinsrb $1, %esi, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $4, %r8d, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $5, %r9d, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $6, 16(%rbp), %xmm0, %xmm0
; KNL-NEXT: vpinsrb $7, 24(%rbp), %xmm0, %xmm0
; KNL-NEXT: vpinsrb $8, 32(%rbp), %xmm0, %xmm0
; KNL-NEXT: vpinsrb $9, 40(%rbp), %xmm0, %xmm0
; KNL-NEXT: vpinsrb $10, 48(%rbp), %xmm0, %xmm0
; KNL-NEXT: vpinsrb $11, 56(%rbp), %xmm0, %xmm0
; KNL-NEXT: vpinsrb $12, 64(%rbp), %xmm0, %xmm0
; KNL-NEXT: vpinsrb $13, 72(%rbp), %xmm0, %xmm0
; KNL-NEXT: vpinsrb $14, 80(%rbp), %xmm0, %xmm0
; KNL-NEXT: vpinsrb $15, 88(%rbp), %xmm0, %xmm0
; KNL-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; KNL-NEXT: vpinsrb $1, 232(%rbp), %xmm0, %xmm0
; KNL-NEXT: vpinsrb $2, 240(%rbp), %xmm0, %xmm0
; KNL-NEXT: vpinsrb $3, 248(%rbp), %xmm0, %xmm0
; KNL-NEXT: vpinsrb $4, 256(%rbp), %xmm0, %xmm0
; KNL-NEXT: vpinsrb $5, 264(%rbp), %xmm0, %xmm0
; KNL-NEXT: vpinsrb $6, 272(%rbp), %xmm0, %xmm0
; KNL-NEXT: vpinsrb $7, 280(%rbp), %xmm0, %xmm0
; KNL-NEXT: vpinsrb $8, 288(%rbp), %xmm0, %xmm0
; KNL-NEXT: vpinsrb $9, 296(%rbp), %xmm0, %xmm0
; KNL-NEXT: vpinsrb $10, 304(%rbp), %xmm0, %xmm0
; KNL-NEXT: vpinsrb $11, 312(%rbp), %xmm0, %xmm0
; KNL-NEXT: vpinsrb $12, 320(%rbp), %xmm0, %xmm0
; KNL-NEXT: vpinsrb $13, 328(%rbp), %xmm0, %xmm0
; KNL-NEXT: vpinsrb $14, 336(%rbp), %xmm0, %xmm0
; KNL-NEXT: vpinsrb $15, 344(%rbp), %xmm0, %xmm0
; KNL-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; KNL-NEXT: vpinsrb $1, 104(%rbp), %xmm1, %xmm1
; KNL-NEXT: vpinsrb $2, 112(%rbp), %xmm1, %xmm1
; KNL-NEXT: vpinsrb $3, 120(%rbp), %xmm1, %xmm1
; KNL-NEXT: vpinsrb $4, 128(%rbp), %xmm1, %xmm1
; KNL-NEXT: vpinsrb $5, 136(%rbp), %xmm1, %xmm1
; KNL-NEXT: vpinsrb $6, 144(%rbp), %xmm1, %xmm1
; KNL-NEXT: vpinsrb $7, 152(%rbp), %xmm1, %xmm1
; KNL-NEXT: vpinsrb $8, 160(%rbp), %xmm1, %xmm1
; KNL-NEXT: vpinsrb $9, 168(%rbp), %xmm1, %xmm1
; KNL-NEXT: vpinsrb $10, 176(%rbp), %xmm1, %xmm1
; KNL-NEXT: vpinsrb $11, 184(%rbp), %xmm1, %xmm1
; KNL-NEXT: vpinsrb $12, 192(%rbp), %xmm1, %xmm1
; KNL-NEXT: vpinsrb $13, 200(%rbp), %xmm1, %xmm1
; KNL-NEXT: vpinsrb $14, 208(%rbp), %xmm1, %xmm1
; KNL-NEXT: vpinsrb $15, 216(%rbp), %xmm1, %xmm1
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; KNL-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; KNL-NEXT: vpinsrb $1, 232(%rbp), %xmm2, %xmm2
; KNL-NEXT: vpinsrb $2, 240(%rbp), %xmm2, %xmm2
; KNL-NEXT: vpinsrb $3, 248(%rbp), %xmm2, %xmm2
; KNL-NEXT: vpinsrb $4, 256(%rbp), %xmm2, %xmm2
; KNL-NEXT: vpinsrb $5, 264(%rbp), %xmm2, %xmm2
; KNL-NEXT: vpinsrb $6, 272(%rbp), %xmm2, %xmm2
; KNL-NEXT: vpinsrb $7, 280(%rbp), %xmm2, %xmm2
; KNL-NEXT: vpinsrb $8, 288(%rbp), %xmm2, %xmm2
; KNL-NEXT: vpinsrb $9, 296(%rbp), %xmm2, %xmm2
; KNL-NEXT: vpinsrb $10, 304(%rbp), %xmm2, %xmm2
; KNL-NEXT: vpinsrb $11, 312(%rbp), %xmm2, %xmm2
; KNL-NEXT: vpinsrb $12, 320(%rbp), %xmm2, %xmm2
; KNL-NEXT: vpinsrb $13, 328(%rbp), %xmm2, %xmm2
; KNL-NEXT: vpinsrb $14, 336(%rbp), %xmm2, %xmm2
; KNL-NEXT: vpinsrb $15, 344(%rbp), %xmm2, %xmm2
; KNL-NEXT: vpinsrb $1, 360(%rbp), %xmm1, %xmm1
; KNL-NEXT: vpinsrb $2, 368(%rbp), %xmm1, %xmm1
; KNL-NEXT: vpinsrb $3, 376(%rbp), %xmm1, %xmm1
; KNL-NEXT: vpinsrb $4, 384(%rbp), %xmm1, %xmm1
; KNL-NEXT: vpinsrb $5, 392(%rbp), %xmm1, %xmm1
; KNL-NEXT: vpinsrb $6, 400(%rbp), %xmm1, %xmm1
; KNL-NEXT: vpinsrb $7, 408(%rbp), %xmm1, %xmm1
; KNL-NEXT: vpinsrb $8, 416(%rbp), %xmm1, %xmm1
; KNL-NEXT: vpinsrb $9, 424(%rbp), %xmm1, %xmm1
; KNL-NEXT: vpinsrb $10, 432(%rbp), %xmm1, %xmm1
; KNL-NEXT: vpinsrb $11, 440(%rbp), %xmm1, %xmm1
; KNL-NEXT: vpinsrb $12, 448(%rbp), %xmm1, %xmm1
; KNL-NEXT: vpinsrb $13, 456(%rbp), %xmm1, %xmm1
; KNL-NEXT: vpinsrb $14, 464(%rbp), %xmm1, %xmm1
; KNL-NEXT: vpinsrb $15, 472(%rbp), %xmm1, %xmm1
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
; KNL-NEXT: vpxor %xmm0, %xmm0, %xmm0
; KNL-NEXT: vpcmpeqb %ymm0, %ymm1, %ymm1
; KNL-NEXT: vmovd %edi, %xmm2
; KNL-NEXT: vpinsrb $1, %esi, %xmm2, %xmm2
; KNL-NEXT: vpinsrb $2, %edx, %xmm2, %xmm2
; KNL-NEXT: vpinsrb $3, %ecx, %xmm2, %xmm2
; KNL-NEXT: vpinsrb $4, %r8d, %xmm2, %xmm2
; KNL-NEXT: vpinsrb $5, %r9d, %xmm2, %xmm2
; KNL-NEXT: vpinsrb $6, 16(%rbp), %xmm2, %xmm2
; KNL-NEXT: vpinsrb $7, 24(%rbp), %xmm2, %xmm2
; KNL-NEXT: vpinsrb $8, 32(%rbp), %xmm2, %xmm2
; KNL-NEXT: vpinsrb $9, 40(%rbp), %xmm2, %xmm2
; KNL-NEXT: vpinsrb $10, 48(%rbp), %xmm2, %xmm2
; KNL-NEXT: vpinsrb $11, 56(%rbp), %xmm2, %xmm2
; KNL-NEXT: vpinsrb $12, 64(%rbp), %xmm2, %xmm2
; KNL-NEXT: vpinsrb $13, 72(%rbp), %xmm2, %xmm2
; KNL-NEXT: vpinsrb $14, 80(%rbp), %xmm2, %xmm2
; KNL-NEXT: vpinsrb $15, 88(%rbp), %xmm2, %xmm2
; KNL-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero
; KNL-NEXT: vpinsrb $1, 360(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $2, 368(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $3, 376(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $4, 384(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $5, 392(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $6, 400(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $7, 408(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $8, 416(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $9, 424(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $10, 432(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $11, 440(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $12, 448(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $13, 456(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $14, 464(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $15, 472(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $1, 104(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $2, 112(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $3, 120(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $4, 128(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $5, 136(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $6, 144(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $7, 152(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $8, 160(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $9, 168(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $10, 176(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $11, 184(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $12, 192(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $13, 200(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $14, 208(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $15, 216(%rbp), %xmm3, %xmm3
; KNL-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
; KNL-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm2
; KNL-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2
; KNL-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero
; KNL-NEXT: vpinsrb $1, 488(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $2, 496(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $3, 504(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $4, 512(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $5, 520(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $6, 528(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $7, 536(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $8, 544(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $9, 552(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $10, 560(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $11, 568(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $12, 576(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $13, 584(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $14, 592(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $15, 600(%rbp), %xmm3, %xmm3
; KNL-NEXT: vmovd {{.*#+}} xmm4 = mem[0],zero,zero,zero
; KNL-NEXT: vpinsrb $1, 616(%rbp), %xmm4, %xmm4
; KNL-NEXT: vpinsrb $2, 624(%rbp), %xmm4, %xmm4
; KNL-NEXT: vpinsrb $3, 632(%rbp), %xmm4, %xmm4
; KNL-NEXT: vpinsrb $4, 640(%rbp), %xmm4, %xmm4
; KNL-NEXT: vpinsrb $5, 648(%rbp), %xmm4, %xmm4
; KNL-NEXT: vpinsrb $6, 656(%rbp), %xmm4, %xmm4
; KNL-NEXT: vpinsrb $7, 664(%rbp), %xmm4, %xmm4
; KNL-NEXT: vpinsrb $8, 672(%rbp), %xmm4, %xmm4
; KNL-NEXT: vpinsrb $9, 680(%rbp), %xmm4, %xmm4
; KNL-NEXT: vpinsrb $10, 688(%rbp), %xmm4, %xmm4
; KNL-NEXT: vpinsrb $11, 696(%rbp), %xmm4, %xmm4
; KNL-NEXT: vpinsrb $12, 704(%rbp), %xmm4, %xmm4
; KNL-NEXT: vpinsrb $13, 712(%rbp), %xmm4, %xmm4
; KNL-NEXT: vpinsrb $14, 720(%rbp), %xmm4, %xmm4
; KNL-NEXT: vpinsrb $15, 728(%rbp), %xmm4, %xmm4
; KNL-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3
; KNL-NEXT: vpcmpeqb %ymm1, %ymm3, %ymm1
; KNL-NEXT: vpcmpeqb %ymm0, %ymm2, %ymm2
; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1
; KNL-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
; KNL-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; KNL-NEXT: vpinsrb $1, 488(%rbp), %xmm2, %xmm2
; KNL-NEXT: vpinsrb $2, 496(%rbp), %xmm2, %xmm2
; KNL-NEXT: vpinsrb $3, 504(%rbp), %xmm2, %xmm2
; KNL-NEXT: vpinsrb $4, 512(%rbp), %xmm2, %xmm2
; KNL-NEXT: vpinsrb $5, 520(%rbp), %xmm2, %xmm2
; KNL-NEXT: vpinsrb $6, 528(%rbp), %xmm2, %xmm2
; KNL-NEXT: vpinsrb $7, 536(%rbp), %xmm2, %xmm2
; KNL-NEXT: vpinsrb $8, 544(%rbp), %xmm2, %xmm2
; KNL-NEXT: vpinsrb $9, 552(%rbp), %xmm2, %xmm2
; KNL-NEXT: vpinsrb $10, 560(%rbp), %xmm2, %xmm2
; KNL-NEXT: vpinsrb $11, 568(%rbp), %xmm2, %xmm2
; KNL-NEXT: vpinsrb $12, 576(%rbp), %xmm2, %xmm2
; KNL-NEXT: vpinsrb $13, 584(%rbp), %xmm2, %xmm2
; KNL-NEXT: vpinsrb $14, 592(%rbp), %xmm2, %xmm2
; KNL-NEXT: vpinsrb $15, 600(%rbp), %xmm2, %xmm2
; KNL-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero
; KNL-NEXT: vpinsrb $1, 616(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $2, 624(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $3, 632(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $4, 640(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $5, 648(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $6, 656(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $7, 664(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $8, 672(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $9, 680(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $10, 688(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $11, 696(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $12, 704(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $13, 712(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $14, 720(%rbp), %xmm3, %xmm3
; KNL-NEXT: vpinsrb $15, 728(%rbp), %xmm3, %xmm3
; KNL-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
; KNL-NEXT: vpcmpeqb %ymm0, %ymm2, %ymm0
; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; KNL-NEXT: cmpb $0, 736(%rbp)
; KNL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
; KNL-NEXT: vmovdqa %ymm2, {{[0-9]+}}(%rsp)
; KNL-NEXT: vmovdqa %ymm0, (%rsp)
; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp)
; KNL-NEXT: vmovdqa64 %zmm1, (%rsp)
; KNL-NEXT: setne (%rsp,%rax)
; KNL-NEXT: vpmovsxbd (%rsp), %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
@ -2079,23 +2077,21 @@ define i128 @test_insertelement_variable_v128i1(<128 x i8> %a, i8 %b, i32 %index
; KNL-NEXT: andq $-64, %rsp
; KNL-NEXT: subq $192, %rsp
; KNL-NEXT: ## kill: def $esi killed $esi def $rsi
; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm3
; KNL-NEXT: vpternlogq $15, %zmm3, %zmm3, %zmm3
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3
; KNL-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2
; KNL-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0
; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; KNL-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm4
; KNL-NEXT: vpternlogq $15, %zmm4, %zmm4, %zmm4
; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm1
; KNL-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; KNL-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2
; KNL-NEXT: vpcmpeqb %ymm3, %ymm1, %ymm1
; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
; KNL-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
; KNL-NEXT: andl $127, %esi
; KNL-NEXT: testb %dil, %dil
; KNL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
; KNL-NEXT: vmovdqa %ymm4, {{[0-9]+}}(%rsp)
; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp)
; KNL-NEXT: vmovdqa %ymm3, (%rsp)
; KNL-NEXT: vmovdqa64 %zmm1, {{[0-9]+}}(%rsp)
; KNL-NEXT: vmovdqa64 %zmm0, (%rsp)
; KNL-NEXT: setne (%rsp,%rsi)
; KNL-NEXT: vpmovsxbd (%rsp), %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0

View File

@ -1314,18 +1314,18 @@ define <64 x i8> @test_bitreverse_v64i8(<64 x i8> %a) nounwind {
; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm3
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240]
; AVX512F-NEXT: vpshufb %ymm3, %ymm4, %ymm3
; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm5
; AVX512F-NEXT: vpshufb %ymm5, %ymm4, %ymm4
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15]
; AVX512F-NEXT: vpshufb %ymm1, %ymm5, %ymm1
; AVX512F-NEXT: vpor %ymm1, %ymm3, %ymm1
; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm3
; AVX512F-NEXT: vpshufb %ymm3, %ymm4, %ymm3
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15]
; AVX512F-NEXT: vpshufb %ymm1, %ymm4, %ymm1
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX512F-NEXT: vpshufb %ymm0, %ymm5, %ymm0
; AVX512F-NEXT: vpor %ymm0, %ymm3, %ymm0
; AVX512F-NEXT: vpshufb %ymm0, %ymm4, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512F-NEXT: vporq %zmm0, %zmm3, %zmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: test_bitreverse_v64i8:
@ -1591,19 +1591,19 @@ define <32 x i16> @test_bitreverse_v32i16(<32 x i16> %a) nounwind {
; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm4
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240]
; AVX512F-NEXT: vpshufb %ymm4, %ymm5, %ymm4
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15]
; AVX512F-NEXT: vpshufb %ymm1, %ymm6, %ymm1
; AVX512F-NEXT: vpor %ymm1, %ymm4, %ymm1
; AVX512F-NEXT: vpshufb %ymm2, %ymm0, %ymm0
; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm2
; AVX512F-NEXT: vpshufb %ymm2, %ymm5, %ymm2
; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15]
; AVX512F-NEXT: vpshufb %ymm1, %ymm4, %ymm1
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
; AVX512F-NEXT: vpshufb %ymm0, %ymm6, %ymm0
; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0
; AVX512F-NEXT: vpshufb %ymm0, %ymm4, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512F-NEXT: vporq %zmm0, %zmm2, %zmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: test_bitreverse_v32i16:
@ -1887,19 +1887,19 @@ define <16 x i32> @test_bitreverse_v16i32(<16 x i32> %a) nounwind {
; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm4
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240]
; AVX512F-NEXT: vpshufb %ymm4, %ymm5, %ymm4
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15]
; AVX512F-NEXT: vpshufb %ymm1, %ymm6, %ymm1
; AVX512F-NEXT: vpor %ymm1, %ymm4, %ymm1
; AVX512F-NEXT: vpshufb %ymm2, %ymm0, %ymm0
; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm2
; AVX512F-NEXT: vpshufb %ymm2, %ymm5, %ymm2
; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15]
; AVX512F-NEXT: vpshufb %ymm1, %ymm4, %ymm1
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
; AVX512F-NEXT: vpshufb %ymm0, %ymm6, %ymm0
; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0
; AVX512F-NEXT: vpshufb %ymm0, %ymm4, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512F-NEXT: vporq %zmm0, %zmm2, %zmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: test_bitreverse_v16i32:
@ -2191,19 +2191,19 @@ define <8 x i64> @test_bitreverse_v8i64(<8 x i64> %a) nounwind {
; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm4
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240]
; AVX512F-NEXT: vpshufb %ymm4, %ymm5, %ymm4
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15]
; AVX512F-NEXT: vpshufb %ymm1, %ymm6, %ymm1
; AVX512F-NEXT: vpor %ymm1, %ymm4, %ymm1
; AVX512F-NEXT: vpshufb %ymm2, %ymm0, %ymm0
; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm2
; AVX512F-NEXT: vpshufb %ymm2, %ymm5, %ymm2
; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15]
; AVX512F-NEXT: vpshufb %ymm1, %ymm4, %ymm1
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
; AVX512F-NEXT: vpshufb %ymm0, %ymm6, %ymm0
; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0
; AVX512F-NEXT: vpshufb %ymm0, %ymm4, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512F-NEXT: vporq %zmm0, %zmm2, %zmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: test_bitreverse_v8i64:

View File

@ -821,31 +821,30 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %
; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX512F-NEXT: vpsllw %xmm4, %ymm3, %ymm3
; AVX512F-NEXT: vpsllw %xmm4, %ymm0, %ymm5
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm5, %zmm3
; AVX512F-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
; AVX512F-NEXT: vpsllw %xmm4, %xmm5, %xmm6
; AVX512F-NEXT: vpbroadcastb %xmm6, %ymm6
; AVX512F-NEXT: vpand %ymm6, %ymm3, %ymm3
; AVX512F-NEXT: vpsllw %xmm4, %ymm0, %ymm4
; AVX512F-NEXT: vpand %ymm6, %ymm4, %ymm4
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3
; AVX512F-NEXT: vpsllw %xmm4, %xmm5, %xmm4
; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4
; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm4, %zmm4
; AVX512F-NEXT: vpandq %zmm4, %zmm3, %zmm3
; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; AVX512F-NEXT: vpsubb %xmm2, %xmm4, %xmm4
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm6
; AVX512F-NEXT: vpsrlw %xmm4, %ymm6, %ymm6
; AVX512F-NEXT: vpsrlw %xmm4, %xmm5, %xmm5
; AVX512F-NEXT: vpsrlw $8, %xmm5, %xmm5
; AVX512F-NEXT: vpbroadcastb %xmm5, %ymm5
; AVX512F-NEXT: vpand %ymm5, %ymm6, %ymm6
; AVX512F-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
; AVX512F-NEXT: vpand %ymm5, %ymm1, %ymm1
; AVX512F-NEXT: vinserti64x4 $1, %ymm6, %zmm1, %zmm1
; AVX512F-NEXT: vporq %zmm1, %zmm3, %zmm1
; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2
; AVX512F-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0
; AVX512F-NEXT: vpsrlw %xmm4, %xmm5, %xmm4
; AVX512F-NEXT: vpsrlw $8, %xmm4, %xmm4
; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4
; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm4, %zmm4
; AVX512F-NEXT: vpternlogq $236, %zmm1, %zmm3, %zmm4
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm1
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1
; AVX512F-NEXT: vpternlogq $226, %zmm4, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v64i8:
@ -854,31 +853,30 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %
; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VL-NEXT: vpsllw %xmm4, %ymm3, %ymm3
; AVX512VL-NEXT: vpsllw %xmm4, %ymm0, %ymm5
; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm5, %zmm3
; AVX512VL-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
; AVX512VL-NEXT: vpsllw %xmm4, %xmm5, %xmm6
; AVX512VL-NEXT: vpbroadcastb %xmm6, %ymm6
; AVX512VL-NEXT: vpand %ymm6, %ymm3, %ymm3
; AVX512VL-NEXT: vpsllw %xmm4, %ymm0, %ymm4
; AVX512VL-NEXT: vpand %ymm6, %ymm4, %ymm4
; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3
; AVX512VL-NEXT: vpsllw %xmm4, %xmm5, %xmm4
; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4
; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm4, %zmm4
; AVX512VL-NEXT: vpandq %zmm4, %zmm3, %zmm3
; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; AVX512VL-NEXT: vpsubb %xmm2, %xmm4, %xmm4
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm6
; AVX512VL-NEXT: vpsrlw %xmm4, %ymm6, %ymm6
; AVX512VL-NEXT: vpsrlw %xmm4, %xmm5, %xmm5
; AVX512VL-NEXT: vpsrlw $8, %xmm5, %xmm5
; AVX512VL-NEXT: vpbroadcastb %xmm5, %ymm5
; AVX512VL-NEXT: vpand %ymm5, %ymm6, %ymm6
; AVX512VL-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
; AVX512VL-NEXT: vpand %ymm5, %ymm1, %ymm1
; AVX512VL-NEXT: vinserti64x4 $1, %ymm6, %zmm1, %zmm1
; AVX512VL-NEXT: vporq %zmm1, %zmm3, %zmm1
; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2
; AVX512VL-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0
; AVX512VL-NEXT: vpsrlw %xmm4, %xmm5, %xmm4
; AVX512VL-NEXT: vpsrlw $8, %xmm4, %xmm4
; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4
; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm4, %zmm4
; AVX512VL-NEXT: vpternlogq $236, %zmm1, %zmm3, %zmm4
; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm1
; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1
; AVX512VL-NEXT: vpternlogq $226, %zmm4, %zmm1, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v64i8:
@ -1510,40 +1508,28 @@ define <32 x i16> @splatconstant_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y) no
define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
; AVX512F-LABEL: splatconstant_funnnel_v64i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512F-NEXT: vpsllw $4, %ymm2, %ymm2
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm2
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0
; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm2
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm0
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512F-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatconstant_funnnel_v64i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1
; AVX512VL-NEXT: vpand %ymm3, %ymm1, %ymm1
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512VL-NEXT: vpsllw $4, %ymm2, %ymm2
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm2
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm0
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm1
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1
; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_funnnel_v64i8:

View File

@ -483,14 +483,13 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX512F-NEXT: vpsllw %xmm2, %ymm1, %ymm1
; AVX512F-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
; AVX512F-NEXT: vpsllw %xmm2, %xmm4, %xmm4
; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4
; AVX512F-NEXT: vpand %ymm4, %ymm1, %ymm1
; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm0
; AVX512F-NEXT: vpand %ymm4, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512F-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm1
; AVX512F-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: vpsllw %xmm2, %xmm0, %xmm0
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; AVX512F-NEXT: vpternlogq $236, %zmm1, %zmm3, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v64i8:
@ -529,14 +528,13 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VL-NEXT: vpsllw %xmm2, %ymm1, %ymm1
; AVX512VL-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
; AVX512VL-NEXT: vpsllw %xmm2, %xmm4, %xmm4
; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4
; AVX512VL-NEXT: vpand %ymm4, %ymm1, %ymm1
; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm0
; AVX512VL-NEXT: vpand %ymm4, %ymm0, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512VL-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm1
; AVX512VL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vpsllw %xmm2, %xmm0, %xmm0
; AVX512VL-NEXT: vpbroadcastb %xmm0, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; AVX512VL-NEXT: vpternlogq $236, %zmm1, %zmm3, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v64i8:
@ -886,38 +884,26 @@ define <32 x i16> @splatconstant_funnnel_v32i16(<32 x i16> %x) nounwind {
define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x) nounwind {
; AVX512F-LABEL: splatconstant_funnnel_v64i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm2
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm4
; AVX512F-NEXT: vpand %ymm3, %ymm4, %ymm3
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-NEXT: vpsllw $4, %ymm1, %ymm1
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0
; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512F-NEXT: vporq %zmm2, %zmm0, %zmm0
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm1
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512F-NEXT: vpsllw $4, %ymm2, %ymm3
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512F-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatconstant_funnnel_v64i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm2
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm4
; AVX512VL-NEXT: vpand %ymm3, %ymm4, %ymm3
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm1
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512VL-NEXT: vpand %ymm3, %ymm1, %ymm1
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512VL-NEXT: vporq %zmm2, %zmm0, %zmm0
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512VL-NEXT: vpsllw $4, %ymm2, %ymm3
; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_funnnel_v64i8:

View File

@ -805,68 +805,66 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i
define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt) nounwind {
; AVX512F-LABEL: splatvar_funnnel_v64i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm3
; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX512F-NEXT: vpsrlw %xmm4, %ymm3, %ymm3
; AVX512F-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
; AVX512F-NEXT: vpsrlw %xmm4, %xmm5, %xmm6
; AVX512F-NEXT: vpsrlw $8, %xmm6, %xmm6
; AVX512F-NEXT: vpbroadcastb %xmm6, %ymm6
; AVX512F-NEXT: vpand %ymm6, %ymm3, %ymm3
; AVX512F-NEXT: vpsrlw %xmm4, %ymm1, %ymm4
; AVX512F-NEXT: vpand %ymm6, %ymm4, %ymm4
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3
; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2
; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm3
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; AVX512F-NEXT: vpsubb %xmm2, %xmm4, %xmm4
; AVX512F-NEXT: vpsubb %xmm3, %xmm4, %xmm4
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm6
; AVX512F-NEXT: vpsllw %xmm4, %ymm6, %ymm6
; AVX512F-NEXT: vpsllw %xmm4, %xmm5, %xmm5
; AVX512F-NEXT: vpbroadcastb %xmm5, %ymm5
; AVX512F-NEXT: vpand %ymm5, %ymm6, %ymm6
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm5
; AVX512F-NEXT: vpsllw %xmm4, %ymm5, %ymm5
; AVX512F-NEXT: vpsllw %xmm4, %ymm0, %ymm0
; AVX512F-NEXT: vpand %ymm5, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm6, %zmm0, %zmm0
; AVX512F-NEXT: vporq %zmm3, %zmm0, %zmm3
; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm0
; AVX512F-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
; AVX512F-NEXT: vpsllw %xmm4, %xmm5, %xmm4
; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4
; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm4, %zmm4
; AVX512F-NEXT: vpandq %zmm4, %zmm0, %zmm0
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm4
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX512F-NEXT: vpsrlw %xmm2, %ymm4, %ymm4
; AVX512F-NEXT: vpsrlw %xmm2, %ymm1, %ymm6
; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm6, %zmm4
; AVX512F-NEXT: vpsrlw %xmm2, %xmm5, %xmm2
; AVX512F-NEXT: vpsrlw $8, %xmm2, %xmm2
; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2
; AVX512F-NEXT: vpternlogq $236, %zmm4, %zmm0, %zmm2
; AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: vpcmpeqb %ymm0, %ymm2, %ymm0
; AVX512F-NEXT: vpcmpeqb %ymm0, %ymm3, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; AVX512F-NEXT: vpternlogq $202, %zmm3, %zmm1, %zmm0
; AVX512F-NEXT: vpternlogq $202, %zmm2, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v64i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm3
; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VL-NEXT: vpsrlw %xmm4, %ymm3, %ymm3
; AVX512VL-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
; AVX512VL-NEXT: vpsrlw %xmm4, %xmm5, %xmm6
; AVX512VL-NEXT: vpsrlw $8, %xmm6, %xmm6
; AVX512VL-NEXT: vpbroadcastb %xmm6, %ymm6
; AVX512VL-NEXT: vpand %ymm6, %ymm3, %ymm3
; AVX512VL-NEXT: vpsrlw %xmm4, %ymm1, %ymm4
; AVX512VL-NEXT: vpand %ymm6, %ymm4, %ymm4
; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3
; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2
; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm3
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; AVX512VL-NEXT: vpsubb %xmm2, %xmm4, %xmm4
; AVX512VL-NEXT: vpsubb %xmm3, %xmm4, %xmm4
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm6
; AVX512VL-NEXT: vpsllw %xmm4, %ymm6, %ymm6
; AVX512VL-NEXT: vpsllw %xmm4, %xmm5, %xmm5
; AVX512VL-NEXT: vpbroadcastb %xmm5, %ymm5
; AVX512VL-NEXT: vpand %ymm5, %ymm6, %ymm6
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm5
; AVX512VL-NEXT: vpsllw %xmm4, %ymm5, %ymm5
; AVX512VL-NEXT: vpsllw %xmm4, %ymm0, %ymm0
; AVX512VL-NEXT: vpand %ymm5, %ymm0, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm6, %zmm0, %zmm0
; AVX512VL-NEXT: vporq %zmm3, %zmm0, %zmm3
; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm0
; AVX512VL-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
; AVX512VL-NEXT: vpsllw %xmm4, %xmm5, %xmm4
; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4
; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm4, %zmm4
; AVX512VL-NEXT: vpandq %zmm4, %zmm0, %zmm0
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm4
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VL-NEXT: vpsrlw %xmm2, %ymm4, %ymm4
; AVX512VL-NEXT: vpsrlw %xmm2, %ymm1, %ymm6
; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm6, %zmm4
; AVX512VL-NEXT: vpsrlw %xmm2, %xmm5, %xmm2
; AVX512VL-NEXT: vpsrlw $8, %xmm2, %xmm2
; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2
; AVX512VL-NEXT: vpternlogq $236, %zmm4, %zmm0, %zmm2
; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vpcmpeqb %ymm0, %ymm2, %ymm0
; AVX512VL-NEXT: vpcmpeqb %ymm0, %ymm3, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; AVX512VL-NEXT: vpternlogq $202, %zmm3, %zmm1, %zmm0
; AVX512VL-NEXT: vpternlogq $202, %zmm2, %zmm1, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v64i8:
@ -1494,40 +1492,28 @@ define <32 x i16> @splatconstant_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y) no
define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
; AVX512F-LABEL: splatconstant_funnnel_v64i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512F-NEXT: vpsllw $4, %ymm2, %ymm2
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm2
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0
; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm2
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm0
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512F-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatconstant_funnnel_v64i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1
; AVX512VL-NEXT: vpand %ymm3, %ymm1, %ymm1
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512VL-NEXT: vpsllw $4, %ymm2, %ymm2
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm2
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm0
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm1
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1
; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_funnnel_v64i8:

View File

@ -480,15 +480,14 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512F-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
; AVX512F-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
; AVX512F-NEXT: vpsrlw %xmm1, %xmm4, %xmm4
; AVX512F-NEXT: vpsrlw $8, %xmm4, %xmm4
; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4
; AVX512F-NEXT: vpand %ymm4, %ymm2, %ymm2
; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512F-NEXT: vpand %ymm4, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512F-NEXT: vporq %zmm0, %zmm3, %zmm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm2
; AVX512F-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vpsrlw $8, %xmm0, %xmm0
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; AVX512F-NEXT: vpternlogq $236, %zmm2, %zmm3, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v64i8:
@ -524,15 +523,14 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VL-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
; AVX512VL-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
; AVX512VL-NEXT: vpsrlw %xmm1, %xmm4, %xmm4
; AVX512VL-NEXT: vpsrlw $8, %xmm4, %xmm4
; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4
; AVX512VL-NEXT: vpand %ymm4, %ymm2, %ymm2
; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: vpand %ymm4, %ymm0, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512VL-NEXT: vporq %zmm0, %zmm3, %zmm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm2
; AVX512VL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm0
; AVX512VL-NEXT: vpbroadcastb %xmm0, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; AVX512VL-NEXT: vpternlogq $236, %zmm2, %zmm3, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v64i8:
@ -882,38 +880,26 @@ define <32 x i16> @splatconstant_funnnel_v32i16(<32 x i16> %x) nounwind {
define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x) nounwind {
; AVX512F-LABEL: splatconstant_funnnel_v64i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm2
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm4
; AVX512F-NEXT: vpand %ymm3, %ymm4, %ymm3
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-NEXT: vpsllw $4, %ymm1, %ymm1
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0
; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512F-NEXT: vporq %zmm2, %zmm0, %zmm0
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm1
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512F-NEXT: vpsllw $4, %ymm2, %ymm3
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512F-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatconstant_funnnel_v64i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm2
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm4
; AVX512VL-NEXT: vpand %ymm3, %ymm4, %ymm3
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm1
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512VL-NEXT: vpand %ymm3, %ymm1, %ymm1
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512VL-NEXT: vporq %zmm2, %zmm0, %zmm0
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512VL-NEXT: vpsllw $4, %ymm2, %ymm3
; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_funnnel_v64i8:

View File

@ -133,38 +133,36 @@ define <32 x i16> @test_div7_32i16(<32 x i16> %a) nounwind {
define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind {
; AVX512F-LABEL: test_div7_64i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15],ymm1[24],ymm2[24],ymm1[25],ymm2[25],ymm1[26],ymm2[26],ymm1[27],ymm2[27],ymm1[28],ymm2[28],ymm1[29],ymm2[29],ymm1[30],ymm2[30],ymm1[31],ymm2[31]
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37]
; AVX512F-NEXT: vpmullw %ymm4, %ymm3, %ymm3
; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm5 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[16],ymm2[16],ymm1[17],ymm2[17],ymm1[18],ymm2[18],ymm1[19],ymm2[19],ymm1[20],ymm2[20],ymm1[21],ymm2[21],ymm1[22],ymm2[22],ymm1[23],ymm2[23]
; AVX512F-NEXT: vpmullw %ymm4, %ymm5, %ymm5
; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5
; AVX512F-NEXT: vpackuswb %ymm3, %ymm5, %ymm3
; AVX512F-NEXT: vpsubb %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX512F-NEXT: vpand %ymm5, %ymm1, %ymm1
; AVX512F-NEXT: vpaddb %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: vpsrlw $2, %ymm1, %ymm1
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm6 = ymm0[8],ymm2[8],ymm0[9],ymm2[9],ymm0[10],ymm2[10],ymm0[11],ymm2[11],ymm0[12],ymm2[12],ymm0[13],ymm2[13],ymm0[14],ymm2[14],ymm0[15],ymm2[15],ymm0[24],ymm2[24],ymm0[25],ymm2[25],ymm0[26],ymm2[26],ymm0[27],ymm2[27],ymm0[28],ymm2[28],ymm0[29],ymm2[29],ymm0[30],ymm2[30],ymm0[31],ymm2[31]
; AVX512F-NEXT: vpmullw %ymm4, %ymm6, %ymm6
; AVX512F-NEXT: vpsrlw $8, %ymm6, %ymm6
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm0[0],ymm2[0],ymm0[1],ymm2[1],ymm0[2],ymm2[2],ymm0[3],ymm2[3],ymm0[4],ymm2[4],ymm0[5],ymm2[5],ymm0[6],ymm2[6],ymm0[7],ymm2[7],ymm0[16],ymm2[16],ymm0[17],ymm2[17],ymm0[18],ymm2[18],ymm0[19],ymm2[19],ymm0[20],ymm2[20],ymm0[21],ymm2[21],ymm0[22],ymm2[22],ymm0[23],ymm2[23]
; AVX512F-NEXT: vpmullw %ymm4, %ymm2, %ymm2
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37]
; AVX512F-NEXT: vpmullw %ymm3, %ymm2, %ymm2
; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2
; AVX512F-NEXT: vpackuswb %ymm6, %ymm2, %ymm2
; AVX512F-NEXT: vpsubb %ymm2, %ymm0, %ymm0
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
; AVX512F-NEXT: vpmullw %ymm3, %ymm4, %ymm4
; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4
; AVX512F-NEXT: vpackuswb %ymm2, %ymm4, %ymm2
; AVX512F-NEXT: vpsubb %ymm2, %ymm0, %ymm4
; AVX512F-NEXT: vpsrlw $1, %ymm4, %ymm4
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
; AVX512F-NEXT: vpand %ymm5, %ymm4, %ymm4
; AVX512F-NEXT: vpaddb %ymm2, %ymm4, %ymm2
; AVX512F-NEXT: vpsrlw $2, %ymm2, %ymm2
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
; AVX512F-NEXT: vpmullw %ymm3, %ymm4, %ymm4
; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
; AVX512F-NEXT: vpmullw %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: vpsrlw $8, %ymm1, %ymm1
; AVX512F-NEXT: vpackuswb %ymm4, %ymm1, %ymm1
; AVX512F-NEXT: vpsubb %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vpsrlw $1, %ymm0, %ymm0
; AVX512F-NEXT: vpand %ymm5, %ymm0, %ymm0
; AVX512F-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX512F-NEXT: vpaddb %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vpsrlw $2, %ymm0, %ymm0
; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: test_div7_64i8:

View File

@ -418,22 +418,21 @@ define <64 x i8> @splatvar_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; AVX512F-NEXT: vpsllw %xmm2, %ymm3, %ymm4
; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm5
; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm5, %zmm4
; AVX512F-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
; AVX512F-NEXT: vpsllw %xmm2, %xmm5, %xmm6
; AVX512F-NEXT: vpbroadcastb %xmm6, %ymm6
; AVX512F-NEXT: vpand %ymm6, %ymm4, %ymm4
; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm2
; AVX512F-NEXT: vpand %ymm6, %ymm2, %ymm2
; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2
; AVX512F-NEXT: vpsllw %xmm2, %xmm5, %xmm2
; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2
; AVX512F-NEXT: vpandq %zmm2, %zmm4, %zmm2
; AVX512F-NEXT: vpsrlw %xmm1, %ymm3, %ymm3
; AVX512F-NEXT: vpsrlw %xmm1, %xmm5, %xmm4
; AVX512F-NEXT: vpsrlw $8, %xmm4, %xmm4
; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4
; AVX512F-NEXT: vpand %ymm4, %ymm3, %ymm3
; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512F-NEXT: vpand %ymm4, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
; AVX512F-NEXT: vporq %zmm0, %zmm2, %zmm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm3
; AVX512F-NEXT: vpsrlw %xmm1, %xmm5, %xmm0
; AVX512F-NEXT: vpsrlw $8, %xmm0, %xmm0
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; AVX512F-NEXT: vpternlogq $236, %zmm3, %zmm2, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_rotate_v64i8:
@ -445,22 +444,21 @@ define <64 x i8> @splatvar_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; AVX512VL-NEXT: vpsllw %xmm2, %ymm3, %ymm4
; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm5
; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm5, %zmm4
; AVX512VL-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
; AVX512VL-NEXT: vpsllw %xmm2, %xmm5, %xmm6
; AVX512VL-NEXT: vpbroadcastb %xmm6, %ymm6
; AVX512VL-NEXT: vpand %ymm6, %ymm4, %ymm4
; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm2
; AVX512VL-NEXT: vpand %ymm6, %ymm2, %ymm2
; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2
; AVX512VL-NEXT: vpsllw %xmm2, %xmm5, %xmm2
; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2
; AVX512VL-NEXT: vpandq %zmm2, %zmm4, %zmm2
; AVX512VL-NEXT: vpsrlw %xmm1, %ymm3, %ymm3
; AVX512VL-NEXT: vpsrlw %xmm1, %xmm5, %xmm4
; AVX512VL-NEXT: vpsrlw $8, %xmm4, %xmm4
; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4
; AVX512VL-NEXT: vpand %ymm4, %ymm3, %ymm3
; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: vpand %ymm4, %ymm0, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
; AVX512VL-NEXT: vporq %zmm0, %zmm2, %zmm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm3
; AVX512VL-NEXT: vpsrlw %xmm1, %xmm5, %xmm0
; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm0
; AVX512VL-NEXT: vpbroadcastb %xmm0, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; AVX512VL-NEXT: vpternlogq $236, %zmm3, %zmm2, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_rotate_v64i8:
@ -809,38 +807,26 @@ define <32 x i16> @splatconstant_rotate_v32i16(<32 x i16> %a) nounwind {
define <64 x i8> @splatconstant_rotate_v64i8(<64 x i8> %a) nounwind {
; AVX512F-LABEL: splatconstant_rotate_v64i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512F-NEXT: vpsllw $4, %ymm1, %ymm2
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm4
; AVX512F-NEXT: vpand %ymm3, %ymm4, %ymm3
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm1
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512F-NEXT: vpsllw $4, %ymm2, %ymm3
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512F-NEXT: vporq %zmm0, %zmm2, %zmm0
; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512F-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatconstant_rotate_v64i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm2
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm4
; AVX512VL-NEXT: vpand %ymm3, %ymm4, %ymm3
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512VL-NEXT: vpand %ymm3, %ymm1, %ymm1
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512VL-NEXT: vpsllw $4, %ymm2, %ymm3
; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512VL-NEXT: vporq %zmm0, %zmm2, %zmm0
; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_rotate_v64i8:
@ -947,40 +933,28 @@ define <32 x i16> @splatconstant_rotate_mask_v32i16(<32 x i16> %a) nounwind {
define <64 x i8> @splatconstant_rotate_mask_v64i8(<64 x i8> %a) nounwind {
; AVX512F-LABEL: splatconstant_rotate_mask_v64i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512F-NEXT: vpsllw $4, %ymm1, %ymm2
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm4
; AVX512F-NEXT: vpand %ymm3, %ymm4, %ymm3
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm1
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512F-NEXT: vpsllw $4, %ymm2, %ymm3
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm1
; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm0
; AVX512F-NEXT: vpternlogq $248, {{.*}}(%rip), %zmm1, %zmm0
; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm2
; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm0
; AVX512F-NEXT: vpternlogq $248, {{.*}}(%rip), %zmm2, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatconstant_rotate_mask_v64i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm2
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm4
; AVX512VL-NEXT: vpand %ymm3, %ymm4, %ymm3
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512VL-NEXT: vpand %ymm3, %ymm1, %ymm1
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512VL-NEXT: vpsllw $4, %ymm2, %ymm3
; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm1
; AVX512VL-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm0
; AVX512VL-NEXT: vpternlogq $248, {{.*}}(%rip), %zmm1, %zmm0
; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm2
; AVX512VL-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm0
; AVX512VL-NEXT: vpternlogq $248, {{.*}}(%rip), %zmm2, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_rotate_mask_v64i8:

View File

@ -156,14 +156,14 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
; AVX512DQ-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
; AVX512DQ-NEXT: vpsrlw %xmm1, %xmm3, %xmm3
; AVX512DQ-NEXT: vpsrlw $8, %xmm3, %xmm3
; AVX512DQ-NEXT: vpbroadcastb %xmm3, %ymm3
; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512DQ-NEXT: vpand %ymm3, %ymm0, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512DQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX512DQ-NEXT: vpsrlw %xmm1, %xmm2, %xmm1
; AVX512DQ-NEXT: vpsrlw $8, %xmm1, %xmm1
; AVX512DQ-NEXT: vpbroadcastb %xmm1, %ymm1
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1
; AVX512DQ-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_shift_v64i8:
@ -308,13 +308,11 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) nounwind {
define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind {
; AVX512DQ-LABEL: splatconstant_shift_v64i8:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512DQ-NEXT: vpsrlw $3, %ymm1, %ymm1
; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX512DQ-NEXT: vpsrlw $3, %ymm0, %ymm1
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512DQ-NEXT: vpsrlw $3, %ymm0, %ymm0
; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; AVX512DQ-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_shift_v64i8:

View File

@ -151,13 +151,13 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512DQ-NEXT: vpsllw %xmm1, %ymm2, %ymm2
; AVX512DQ-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
; AVX512DQ-NEXT: vpsllw %xmm1, %xmm3, %xmm3
; AVX512DQ-NEXT: vpbroadcastb %xmm3, %ymm3
; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX512DQ-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512DQ-NEXT: vpand %ymm3, %ymm0, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512DQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX512DQ-NEXT: vpsllw %xmm1, %xmm2, %xmm1
; AVX512DQ-NEXT: vpbroadcastb %xmm1, %ymm1
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1
; AVX512DQ-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_shift_v64i8:
@ -306,13 +306,11 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) nounwind {
define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind {
; AVX512DQ-LABEL: splatconstant_shift_v64i8:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512DQ-NEXT: vpsllw $3, %ymm1, %ymm1
; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248]
; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX512DQ-NEXT: vpsllw $3, %ymm0, %ymm1
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512DQ-NEXT: vpsllw $3, %ymm0, %ymm0
; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; AVX512DQ-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_shift_v64i8: