mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[X86][AVX512] Fold concat(and(x,y),and(z,w)) -> and(concat(x,z),concat(y,w)) for 512-bit vectors
Helps vpternlog folding on non-AVX512BW targets
This commit is contained in:
parent
6d5e604171
commit
63334e0fdd
@ -48090,6 +48090,25 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
|
||||
Op0.getOperand(1));
|
||||
}
|
||||
break;
|
||||
case ISD::AND:
|
||||
case ISD::OR:
|
||||
case ISD::XOR:
|
||||
case X86ISD::ANDNP:
|
||||
// TODO: Add 256-bit support.
|
||||
if (!IsSplat && VT.is512BitVector()) {
|
||||
SmallVector<SDValue, 2> LHS, RHS;
|
||||
for (unsigned i = 0; i != NumOps; ++i) {
|
||||
LHS.push_back(Ops[i].getOperand(0));
|
||||
RHS.push_back(Ops[i].getOperand(1));
|
||||
}
|
||||
MVT SrcVT = Op0.getOperand(0).getSimpleValueType();
|
||||
SrcVT = MVT::getVectorVT(SrcVT.getScalarType(),
|
||||
NumOps * SrcVT.getVectorNumElements());
|
||||
return DAG.getNode(Op0.getOpcode(), DL, VT,
|
||||
DAG.getNode(ISD::CONCAT_VECTORS, DL, SrcVT, LHS),
|
||||
DAG.getNode(ISD::CONCAT_VECTORS, DL, SrcVT, RHS));
|
||||
}
|
||||
break;
|
||||
case X86ISD::PACKSS:
|
||||
case X86ISD::PACKUS:
|
||||
if (!IsSplat && NumOps == 2 && VT.is256BitVector() &&
|
||||
|
@ -1692,16 +1692,15 @@ define i64 @test_insertelement_variable_v64i1(<64 x i8> %a, i8 %b, i32 %index) {
|
||||
; KNL-NEXT: andq $-64, %rsp
|
||||
; KNL-NEXT: subq $128, %rsp
|
||||
; KNL-NEXT: ## kill: def $esi killed $esi def $rsi
|
||||
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm2
|
||||
; KNL-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2
|
||||
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
||||
; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
|
||||
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
|
||||
; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; KNL-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
|
||||
; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
|
||||
; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
|
||||
; KNL-NEXT: andl $63, %esi
|
||||
; KNL-NEXT: testb %dil, %dil
|
||||
; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp)
|
||||
; KNL-NEXT: vmovdqa %ymm2, (%rsp)
|
||||
; KNL-NEXT: vmovdqa64 %zmm0, (%rsp)
|
||||
; KNL-NEXT: setne (%rsp,%rsi)
|
||||
; KNL-NEXT: vpmovsxbd (%rsp), %zmm0
|
||||
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
|
||||
@ -1772,116 +1771,115 @@ define i96 @test_insertelement_variable_v96i1(<96 x i8> %a, i8 %b, i32 %index) {
|
||||
; KNL-NEXT: subq $192, %rsp
|
||||
; KNL-NEXT: movl 744(%rbp), %eax
|
||||
; KNL-NEXT: andl $127, %eax
|
||||
; KNL-NEXT: vmovd %edi, %xmm0
|
||||
; KNL-NEXT: vpinsrb $1, %esi, %xmm0, %xmm0
|
||||
; KNL-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
|
||||
; KNL-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
|
||||
; KNL-NEXT: vpinsrb $4, %r8d, %xmm0, %xmm0
|
||||
; KNL-NEXT: vpinsrb $5, %r9d, %xmm0, %xmm0
|
||||
; KNL-NEXT: vpinsrb $6, 16(%rbp), %xmm0, %xmm0
|
||||
; KNL-NEXT: vpinsrb $7, 24(%rbp), %xmm0, %xmm0
|
||||
; KNL-NEXT: vpinsrb $8, 32(%rbp), %xmm0, %xmm0
|
||||
; KNL-NEXT: vpinsrb $9, 40(%rbp), %xmm0, %xmm0
|
||||
; KNL-NEXT: vpinsrb $10, 48(%rbp), %xmm0, %xmm0
|
||||
; KNL-NEXT: vpinsrb $11, 56(%rbp), %xmm0, %xmm0
|
||||
; KNL-NEXT: vpinsrb $12, 64(%rbp), %xmm0, %xmm0
|
||||
; KNL-NEXT: vpinsrb $13, 72(%rbp), %xmm0, %xmm0
|
||||
; KNL-NEXT: vpinsrb $14, 80(%rbp), %xmm0, %xmm0
|
||||
; KNL-NEXT: vpinsrb $15, 88(%rbp), %xmm0, %xmm0
|
||||
; KNL-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; KNL-NEXT: vpinsrb $1, 232(%rbp), %xmm0, %xmm0
|
||||
; KNL-NEXT: vpinsrb $2, 240(%rbp), %xmm0, %xmm0
|
||||
; KNL-NEXT: vpinsrb $3, 248(%rbp), %xmm0, %xmm0
|
||||
; KNL-NEXT: vpinsrb $4, 256(%rbp), %xmm0, %xmm0
|
||||
; KNL-NEXT: vpinsrb $5, 264(%rbp), %xmm0, %xmm0
|
||||
; KNL-NEXT: vpinsrb $6, 272(%rbp), %xmm0, %xmm0
|
||||
; KNL-NEXT: vpinsrb $7, 280(%rbp), %xmm0, %xmm0
|
||||
; KNL-NEXT: vpinsrb $8, 288(%rbp), %xmm0, %xmm0
|
||||
; KNL-NEXT: vpinsrb $9, 296(%rbp), %xmm0, %xmm0
|
||||
; KNL-NEXT: vpinsrb $10, 304(%rbp), %xmm0, %xmm0
|
||||
; KNL-NEXT: vpinsrb $11, 312(%rbp), %xmm0, %xmm0
|
||||
; KNL-NEXT: vpinsrb $12, 320(%rbp), %xmm0, %xmm0
|
||||
; KNL-NEXT: vpinsrb $13, 328(%rbp), %xmm0, %xmm0
|
||||
; KNL-NEXT: vpinsrb $14, 336(%rbp), %xmm0, %xmm0
|
||||
; KNL-NEXT: vpinsrb $15, 344(%rbp), %xmm0, %xmm0
|
||||
; KNL-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; KNL-NEXT: vpinsrb $1, 104(%rbp), %xmm1, %xmm1
|
||||
; KNL-NEXT: vpinsrb $2, 112(%rbp), %xmm1, %xmm1
|
||||
; KNL-NEXT: vpinsrb $3, 120(%rbp), %xmm1, %xmm1
|
||||
; KNL-NEXT: vpinsrb $4, 128(%rbp), %xmm1, %xmm1
|
||||
; KNL-NEXT: vpinsrb $5, 136(%rbp), %xmm1, %xmm1
|
||||
; KNL-NEXT: vpinsrb $6, 144(%rbp), %xmm1, %xmm1
|
||||
; KNL-NEXT: vpinsrb $7, 152(%rbp), %xmm1, %xmm1
|
||||
; KNL-NEXT: vpinsrb $8, 160(%rbp), %xmm1, %xmm1
|
||||
; KNL-NEXT: vpinsrb $9, 168(%rbp), %xmm1, %xmm1
|
||||
; KNL-NEXT: vpinsrb $10, 176(%rbp), %xmm1, %xmm1
|
||||
; KNL-NEXT: vpinsrb $11, 184(%rbp), %xmm1, %xmm1
|
||||
; KNL-NEXT: vpinsrb $12, 192(%rbp), %xmm1, %xmm1
|
||||
; KNL-NEXT: vpinsrb $13, 200(%rbp), %xmm1, %xmm1
|
||||
; KNL-NEXT: vpinsrb $14, 208(%rbp), %xmm1, %xmm1
|
||||
; KNL-NEXT: vpinsrb $15, 216(%rbp), %xmm1, %xmm1
|
||||
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
||||
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
|
||||
; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
|
||||
; KNL-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; KNL-NEXT: vpinsrb $1, 232(%rbp), %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $2, 240(%rbp), %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $3, 248(%rbp), %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $4, 256(%rbp), %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $5, 264(%rbp), %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $6, 272(%rbp), %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $7, 280(%rbp), %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $8, 288(%rbp), %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $9, 296(%rbp), %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $10, 304(%rbp), %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $11, 312(%rbp), %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $12, 320(%rbp), %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $13, 328(%rbp), %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $14, 336(%rbp), %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $15, 344(%rbp), %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $1, 360(%rbp), %xmm1, %xmm1
|
||||
; KNL-NEXT: vpinsrb $2, 368(%rbp), %xmm1, %xmm1
|
||||
; KNL-NEXT: vpinsrb $3, 376(%rbp), %xmm1, %xmm1
|
||||
; KNL-NEXT: vpinsrb $4, 384(%rbp), %xmm1, %xmm1
|
||||
; KNL-NEXT: vpinsrb $5, 392(%rbp), %xmm1, %xmm1
|
||||
; KNL-NEXT: vpinsrb $6, 400(%rbp), %xmm1, %xmm1
|
||||
; KNL-NEXT: vpinsrb $7, 408(%rbp), %xmm1, %xmm1
|
||||
; KNL-NEXT: vpinsrb $8, 416(%rbp), %xmm1, %xmm1
|
||||
; KNL-NEXT: vpinsrb $9, 424(%rbp), %xmm1, %xmm1
|
||||
; KNL-NEXT: vpinsrb $10, 432(%rbp), %xmm1, %xmm1
|
||||
; KNL-NEXT: vpinsrb $11, 440(%rbp), %xmm1, %xmm1
|
||||
; KNL-NEXT: vpinsrb $12, 448(%rbp), %xmm1, %xmm1
|
||||
; KNL-NEXT: vpinsrb $13, 456(%rbp), %xmm1, %xmm1
|
||||
; KNL-NEXT: vpinsrb $14, 464(%rbp), %xmm1, %xmm1
|
||||
; KNL-NEXT: vpinsrb $15, 472(%rbp), %xmm1, %xmm1
|
||||
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
|
||||
; KNL-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; KNL-NEXT: vpcmpeqb %ymm0, %ymm1, %ymm1
|
||||
; KNL-NEXT: vmovd %edi, %xmm2
|
||||
; KNL-NEXT: vpinsrb $1, %esi, %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $2, %edx, %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $3, %ecx, %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $4, %r8d, %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $5, %r9d, %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $6, 16(%rbp), %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $7, 24(%rbp), %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $8, 32(%rbp), %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $9, 40(%rbp), %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $10, 48(%rbp), %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $11, 56(%rbp), %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $12, 64(%rbp), %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $13, 72(%rbp), %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $14, 80(%rbp), %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $15, 88(%rbp), %xmm2, %xmm2
|
||||
; KNL-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
||||
; KNL-NEXT: vpinsrb $1, 360(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $2, 368(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $3, 376(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $4, 384(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $5, 392(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $6, 400(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $7, 408(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $8, 416(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $9, 424(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $10, 432(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $11, 440(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $12, 448(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $13, 456(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $14, 464(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $15, 472(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $1, 104(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $2, 112(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $3, 120(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $4, 128(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $5, 136(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $6, 144(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $7, 152(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $8, 160(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $9, 168(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $10, 176(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $11, 184(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $12, 192(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $13, 200(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $14, 208(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $15, 216(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
|
||||
; KNL-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm2
|
||||
; KNL-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2
|
||||
; KNL-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
||||
; KNL-NEXT: vpinsrb $1, 488(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $2, 496(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $3, 504(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $4, 512(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $5, 520(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $6, 528(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $7, 536(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $8, 544(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $9, 552(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $10, 560(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $11, 568(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $12, 576(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $13, 584(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $14, 592(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $15, 600(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vmovd {{.*#+}} xmm4 = mem[0],zero,zero,zero
|
||||
; KNL-NEXT: vpinsrb $1, 616(%rbp), %xmm4, %xmm4
|
||||
; KNL-NEXT: vpinsrb $2, 624(%rbp), %xmm4, %xmm4
|
||||
; KNL-NEXT: vpinsrb $3, 632(%rbp), %xmm4, %xmm4
|
||||
; KNL-NEXT: vpinsrb $4, 640(%rbp), %xmm4, %xmm4
|
||||
; KNL-NEXT: vpinsrb $5, 648(%rbp), %xmm4, %xmm4
|
||||
; KNL-NEXT: vpinsrb $6, 656(%rbp), %xmm4, %xmm4
|
||||
; KNL-NEXT: vpinsrb $7, 664(%rbp), %xmm4, %xmm4
|
||||
; KNL-NEXT: vpinsrb $8, 672(%rbp), %xmm4, %xmm4
|
||||
; KNL-NEXT: vpinsrb $9, 680(%rbp), %xmm4, %xmm4
|
||||
; KNL-NEXT: vpinsrb $10, 688(%rbp), %xmm4, %xmm4
|
||||
; KNL-NEXT: vpinsrb $11, 696(%rbp), %xmm4, %xmm4
|
||||
; KNL-NEXT: vpinsrb $12, 704(%rbp), %xmm4, %xmm4
|
||||
; KNL-NEXT: vpinsrb $13, 712(%rbp), %xmm4, %xmm4
|
||||
; KNL-NEXT: vpinsrb $14, 720(%rbp), %xmm4, %xmm4
|
||||
; KNL-NEXT: vpinsrb $15, 728(%rbp), %xmm4, %xmm4
|
||||
; KNL-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3
|
||||
; KNL-NEXT: vpcmpeqb %ymm1, %ymm3, %ymm1
|
||||
; KNL-NEXT: vpcmpeqb %ymm0, %ymm2, %ymm2
|
||||
; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1
|
||||
; KNL-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
|
||||
; KNL-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; KNL-NEXT: vpinsrb $1, 488(%rbp), %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $2, 496(%rbp), %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $3, 504(%rbp), %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $4, 512(%rbp), %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $5, 520(%rbp), %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $6, 528(%rbp), %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $7, 536(%rbp), %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $8, 544(%rbp), %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $9, 552(%rbp), %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $10, 560(%rbp), %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $11, 568(%rbp), %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $12, 576(%rbp), %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $13, 584(%rbp), %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $14, 592(%rbp), %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $15, 600(%rbp), %xmm2, %xmm2
|
||||
; KNL-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
||||
; KNL-NEXT: vpinsrb $1, 616(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $2, 624(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $3, 632(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $4, 640(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $5, 648(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $6, 656(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $7, 664(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $8, 672(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $9, 680(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $10, 688(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $11, 696(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $12, 704(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $13, 712(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $14, 720(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vpinsrb $15, 728(%rbp), %xmm3, %xmm3
|
||||
; KNL-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
|
||||
; KNL-NEXT: vpcmpeqb %ymm0, %ymm2, %ymm0
|
||||
; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
|
||||
; KNL-NEXT: cmpb $0, 736(%rbp)
|
||||
; KNL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
|
||||
; KNL-NEXT: vmovdqa %ymm2, {{[0-9]+}}(%rsp)
|
||||
; KNL-NEXT: vmovdqa %ymm0, (%rsp)
|
||||
; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp)
|
||||
; KNL-NEXT: vmovdqa64 %zmm1, (%rsp)
|
||||
; KNL-NEXT: setne (%rsp,%rax)
|
||||
; KNL-NEXT: vpmovsxbd (%rsp), %zmm0
|
||||
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
|
||||
@ -2079,23 +2077,21 @@ define i128 @test_insertelement_variable_v128i1(<128 x i8> %a, i8 %b, i32 %index
|
||||
; KNL-NEXT: andq $-64, %rsp
|
||||
; KNL-NEXT: subq $192, %rsp
|
||||
; KNL-NEXT: ## kill: def $esi killed $esi def $rsi
|
||||
; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm3
|
||||
; KNL-NEXT: vpternlogq $15, %zmm3, %zmm3, %zmm3
|
||||
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
||||
; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
|
||||
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||
; KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; KNL-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2
|
||||
; KNL-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0
|
||||
; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
|
||||
; KNL-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm4
|
||||
; KNL-NEXT: vpternlogq $15, %zmm4, %zmm4, %zmm4
|
||||
; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm1
|
||||
; KNL-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
|
||||
; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
|
||||
; KNL-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2
|
||||
; KNL-NEXT: vpcmpeqb %ymm3, %ymm1, %ymm1
|
||||
; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
|
||||
; KNL-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
|
||||
; KNL-NEXT: andl $127, %esi
|
||||
; KNL-NEXT: testb %dil, %dil
|
||||
; KNL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
|
||||
; KNL-NEXT: vmovdqa %ymm4, {{[0-9]+}}(%rsp)
|
||||
; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp)
|
||||
; KNL-NEXT: vmovdqa %ymm3, (%rsp)
|
||||
; KNL-NEXT: vmovdqa64 %zmm1, {{[0-9]+}}(%rsp)
|
||||
; KNL-NEXT: vmovdqa64 %zmm0, (%rsp)
|
||||
; KNL-NEXT: setne (%rsp,%rsi)
|
||||
; KNL-NEXT: vpmovsxbd (%rsp), %zmm0
|
||||
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
|
||||
|
@ -1314,18 +1314,18 @@ define <64 x i8> @test_bitreverse_v64i8(<64 x i8> %a) nounwind {
|
||||
; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm3
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240]
|
||||
; AVX512F-NEXT: vpshufb %ymm3, %ymm4, %ymm3
|
||||
; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm5
|
||||
; AVX512F-NEXT: vpshufb %ymm5, %ymm4, %ymm4
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15]
|
||||
; AVX512F-NEXT: vpshufb %ymm1, %ymm5, %ymm1
|
||||
; AVX512F-NEXT: vpor %ymm1, %ymm3, %ymm1
|
||||
; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm3
|
||||
; AVX512F-NEXT: vpshufb %ymm3, %ymm4, %ymm3
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15]
|
||||
; AVX512F-NEXT: vpshufb %ymm1, %ymm4, %ymm1
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpshufb %ymm0, %ymm5, %ymm0
|
||||
; AVX512F-NEXT: vpor %ymm0, %ymm3, %ymm0
|
||||
; AVX512F-NEXT: vpshufb %ymm0, %ymm4, %ymm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vporq %zmm0, %zmm3, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: test_bitreverse_v64i8:
|
||||
@ -1591,19 +1591,19 @@ define <32 x i16> @test_bitreverse_v32i16(<32 x i16> %a) nounwind {
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm4
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240]
|
||||
; AVX512F-NEXT: vpshufb %ymm4, %ymm5, %ymm4
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15]
|
||||
; AVX512F-NEXT: vpshufb %ymm1, %ymm6, %ymm1
|
||||
; AVX512F-NEXT: vpor %ymm1, %ymm4, %ymm1
|
||||
; AVX512F-NEXT: vpshufb %ymm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm2
|
||||
; AVX512F-NEXT: vpshufb %ymm2, %ymm5, %ymm2
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15]
|
||||
; AVX512F-NEXT: vpshufb %ymm1, %ymm4, %ymm1
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpshufb %ymm0, %ymm6, %ymm0
|
||||
; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0
|
||||
; AVX512F-NEXT: vpshufb %ymm0, %ymm4, %ymm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vporq %zmm0, %zmm2, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: test_bitreverse_v32i16:
|
||||
@ -1887,19 +1887,19 @@ define <16 x i32> @test_bitreverse_v16i32(<16 x i32> %a) nounwind {
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm4
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240]
|
||||
; AVX512F-NEXT: vpshufb %ymm4, %ymm5, %ymm4
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15]
|
||||
; AVX512F-NEXT: vpshufb %ymm1, %ymm6, %ymm1
|
||||
; AVX512F-NEXT: vpor %ymm1, %ymm4, %ymm1
|
||||
; AVX512F-NEXT: vpshufb %ymm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm2
|
||||
; AVX512F-NEXT: vpshufb %ymm2, %ymm5, %ymm2
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15]
|
||||
; AVX512F-NEXT: vpshufb %ymm1, %ymm4, %ymm1
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpshufb %ymm0, %ymm6, %ymm0
|
||||
; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0
|
||||
; AVX512F-NEXT: vpshufb %ymm0, %ymm4, %ymm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vporq %zmm0, %zmm2, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: test_bitreverse_v16i32:
|
||||
@ -2191,19 +2191,19 @@ define <8 x i64> @test_bitreverse_v8i64(<8 x i64> %a) nounwind {
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm4
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240]
|
||||
; AVX512F-NEXT: vpshufb %ymm4, %ymm5, %ymm4
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15]
|
||||
; AVX512F-NEXT: vpshufb %ymm1, %ymm6, %ymm1
|
||||
; AVX512F-NEXT: vpor %ymm1, %ymm4, %ymm1
|
||||
; AVX512F-NEXT: vpshufb %ymm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm2
|
||||
; AVX512F-NEXT: vpshufb %ymm2, %ymm5, %ymm2
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15]
|
||||
; AVX512F-NEXT: vpshufb %ymm1, %ymm4, %ymm1
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpshufb %ymm0, %ymm6, %ymm0
|
||||
; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0
|
||||
; AVX512F-NEXT: vpshufb %ymm0, %ymm4, %ymm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vporq %zmm0, %zmm2, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: test_bitreverse_v8i64:
|
||||
|
@ -821,31 +821,30 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %
|
||||
; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512F-NEXT: vpsllw %xmm4, %ymm3, %ymm3
|
||||
; AVX512F-NEXT: vpsllw %xmm4, %ymm0, %ymm5
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm5, %zmm3
|
||||
; AVX512F-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
||||
; AVX512F-NEXT: vpsllw %xmm4, %xmm5, %xmm6
|
||||
; AVX512F-NEXT: vpbroadcastb %xmm6, %ymm6
|
||||
; AVX512F-NEXT: vpand %ymm6, %ymm3, %ymm3
|
||||
; AVX512F-NEXT: vpsllw %xmm4, %ymm0, %ymm4
|
||||
; AVX512F-NEXT: vpand %ymm6, %ymm4, %ymm4
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3
|
||||
; AVX512F-NEXT: vpsllw %xmm4, %xmm5, %xmm4
|
||||
; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm4, %zmm4
|
||||
; AVX512F-NEXT: vpandq %zmm4, %zmm3, %zmm3
|
||||
; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
|
||||
; AVX512F-NEXT: vpsubb %xmm2, %xmm4, %xmm4
|
||||
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm6
|
||||
; AVX512F-NEXT: vpsrlw %xmm4, %ymm6, %ymm6
|
||||
; AVX512F-NEXT: vpsrlw %xmm4, %xmm5, %xmm5
|
||||
; AVX512F-NEXT: vpsrlw $8, %xmm5, %xmm5
|
||||
; AVX512F-NEXT: vpbroadcastb %xmm5, %ymm5
|
||||
; AVX512F-NEXT: vpand %ymm5, %ymm6, %ymm6
|
||||
; AVX512F-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpand %ymm5, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm6, %zmm1, %zmm1
|
||||
; AVX512F-NEXT: vporq %zmm1, %zmm3, %zmm1
|
||||
; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2
|
||||
; AVX512F-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0
|
||||
; AVX512F-NEXT: vpsrlw %xmm4, %xmm5, %xmm4
|
||||
; AVX512F-NEXT: vpsrlw $8, %xmm4, %xmm4
|
||||
; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm4, %zmm4
|
||||
; AVX512F-NEXT: vpternlogq $236, %zmm1, %zmm3, %zmm4
|
||||
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm1
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1
|
||||
; AVX512F-NEXT: vpternlogq $226, %zmm4, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatvar_funnnel_v64i8:
|
||||
@ -854,31 +853,30 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %
|
||||
; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsllw %xmm4, %ymm3, %ymm3
|
||||
; AVX512VL-NEXT: vpsllw %xmm4, %ymm0, %ymm5
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm5, %zmm3
|
||||
; AVX512VL-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
||||
; AVX512VL-NEXT: vpsllw %xmm4, %xmm5, %xmm6
|
||||
; AVX512VL-NEXT: vpbroadcastb %xmm6, %ymm6
|
||||
; AVX512VL-NEXT: vpand %ymm6, %ymm3, %ymm3
|
||||
; AVX512VL-NEXT: vpsllw %xmm4, %ymm0, %ymm4
|
||||
; AVX512VL-NEXT: vpand %ymm6, %ymm4, %ymm4
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3
|
||||
; AVX512VL-NEXT: vpsllw %xmm4, %xmm5, %xmm4
|
||||
; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm4, %zmm4
|
||||
; AVX512VL-NEXT: vpandq %zmm4, %zmm3, %zmm3
|
||||
; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
|
||||
; AVX512VL-NEXT: vpsubb %xmm2, %xmm4, %xmm4
|
||||
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm6
|
||||
; AVX512VL-NEXT: vpsrlw %xmm4, %ymm6, %ymm6
|
||||
; AVX512VL-NEXT: vpsrlw %xmm4, %xmm5, %xmm5
|
||||
; AVX512VL-NEXT: vpsrlw $8, %xmm5, %xmm5
|
||||
; AVX512VL-NEXT: vpbroadcastb %xmm5, %ymm5
|
||||
; AVX512VL-NEXT: vpand %ymm5, %ymm6, %ymm6
|
||||
; AVX512VL-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vpand %ymm5, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm6, %zmm1, %zmm1
|
||||
; AVX512VL-NEXT: vporq %zmm1, %zmm3, %zmm1
|
||||
; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2
|
||||
; AVX512VL-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0
|
||||
; AVX512VL-NEXT: vpsrlw %xmm4, %xmm5, %xmm4
|
||||
; AVX512VL-NEXT: vpsrlw $8, %xmm4, %xmm4
|
||||
; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm4, %zmm4
|
||||
; AVX512VL-NEXT: vpternlogq $236, %zmm1, %zmm3, %zmm4
|
||||
; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm1
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1
|
||||
; AVX512VL-NEXT: vpternlogq $226, %zmm4, %zmm1, %zmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatvar_funnnel_v64i8:
|
||||
@ -1510,40 +1508,28 @@ define <32 x i16> @splatconstant_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y) no
|
||||
define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
|
||||
; AVX512F-LABEL: splatconstant_funnnel_v64i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm2
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm2
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm0
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatconstant_funnnel_v64i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
|
||||
; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vpand %ymm3, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||
; AVX512VL-NEXT: vpsllw $4, %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
||||
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm2
|
||||
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm0
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm1
|
||||
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
|
||||
|
@ -483,14 +483,13 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
|
||||
; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512F-NEXT: vpsllw %xmm2, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
||||
; AVX512F-NEXT: vpsllw %xmm2, %xmm4, %xmm4
|
||||
; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4
|
||||
; AVX512F-NEXT: vpand %ymm4, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vporq %zmm3, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm1
|
||||
; AVX512F-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpsllw %xmm2, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpternlogq $236, %zmm1, %zmm3, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatvar_funnnel_v64i8:
|
||||
@ -529,14 +528,13 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
|
||||
; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsllw %xmm2, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
||||
; AVX512VL-NEXT: vpsllw %xmm2, %xmm4, %xmm4
|
||||
; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4
|
||||
; AVX512VL-NEXT: vpand %ymm4, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vporq %zmm3, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm1
|
||||
; AVX512VL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpsllw %xmm2, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpbroadcastb %xmm0, %ymm0
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vpternlogq $236, %zmm1, %zmm3, %zmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatvar_funnnel_v64i8:
|
||||
@ -886,38 +884,26 @@ define <32 x i16> @splatconstant_funnnel_v32i16(<32 x i16> %x) nounwind {
|
||||
define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x) nounwind {
|
||||
; AVX512F-LABEL: splatconstant_funnnel_v64i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm2
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm4
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm4, %ymm3
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vporq %zmm2, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm1
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm2, %ymm3
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatconstant_funnnel_v64i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
|
||||
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm2
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm4
|
||||
; AVX512VL-NEXT: vpand %ymm3, %ymm4, %ymm3
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
|
||||
; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512VL-NEXT: vpand %ymm3, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vporq %zmm2, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||
; AVX512VL-NEXT: vpsllw $4, %ymm2, %ymm3
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
|
||||
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
|
||||
|
@ -805,68 +805,66 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i
|
||||
define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt) nounwind {
|
||||
; AVX512F-LABEL: splatvar_funnnel_v64i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm3
|
||||
; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512F-NEXT: vpsrlw %xmm4, %ymm3, %ymm3
|
||||
; AVX512F-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
||||
; AVX512F-NEXT: vpsrlw %xmm4, %xmm5, %xmm6
|
||||
; AVX512F-NEXT: vpsrlw $8, %xmm6, %xmm6
|
||||
; AVX512F-NEXT: vpbroadcastb %xmm6, %ymm6
|
||||
; AVX512F-NEXT: vpand %ymm6, %ymm3, %ymm3
|
||||
; AVX512F-NEXT: vpsrlw %xmm4, %ymm1, %ymm4
|
||||
; AVX512F-NEXT: vpand %ymm6, %ymm4, %ymm4
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3
|
||||
; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2
|
||||
; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm3
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
|
||||
; AVX512F-NEXT: vpsubb %xmm2, %xmm4, %xmm4
|
||||
; AVX512F-NEXT: vpsubb %xmm3, %xmm4, %xmm4
|
||||
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm6
|
||||
; AVX512F-NEXT: vpsllw %xmm4, %ymm6, %ymm6
|
||||
; AVX512F-NEXT: vpsllw %xmm4, %xmm5, %xmm5
|
||||
; AVX512F-NEXT: vpbroadcastb %xmm5, %ymm5
|
||||
; AVX512F-NEXT: vpand %ymm5, %ymm6, %ymm6
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm5
|
||||
; AVX512F-NEXT: vpsllw %xmm4, %ymm5, %ymm5
|
||||
; AVX512F-NEXT: vpsllw %xmm4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpand %ymm5, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm6, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vporq %zmm3, %zmm0, %zmm3
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
||||
; AVX512F-NEXT: vpsllw %xmm4, %xmm5, %xmm4
|
||||
; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm4, %zmm4
|
||||
; AVX512F-NEXT: vpandq %zmm4, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm4
|
||||
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512F-NEXT: vpsrlw %xmm2, %ymm4, %ymm4
|
||||
; AVX512F-NEXT: vpsrlw %xmm2, %ymm1, %ymm6
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm6, %zmm4
|
||||
; AVX512F-NEXT: vpsrlw %xmm2, %xmm5, %xmm2
|
||||
; AVX512F-NEXT: vpsrlw $8, %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2
|
||||
; AVX512F-NEXT: vpternlogq $236, %zmm4, %zmm0, %zmm2
|
||||
; AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpcmpeqb %ymm0, %ymm2, %ymm0
|
||||
; AVX512F-NEXT: vpcmpeqb %ymm0, %ymm3, %ymm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpternlogq $202, %zmm3, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: vpternlogq $202, %zmm2, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatvar_funnnel_v64i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm3
|
||||
; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsrlw %xmm4, %ymm3, %ymm3
|
||||
; AVX512VL-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
||||
; AVX512VL-NEXT: vpsrlw %xmm4, %xmm5, %xmm6
|
||||
; AVX512VL-NEXT: vpsrlw $8, %xmm6, %xmm6
|
||||
; AVX512VL-NEXT: vpbroadcastb %xmm6, %ymm6
|
||||
; AVX512VL-NEXT: vpand %ymm6, %ymm3, %ymm3
|
||||
; AVX512VL-NEXT: vpsrlw %xmm4, %ymm1, %ymm4
|
||||
; AVX512VL-NEXT: vpand %ymm6, %ymm4, %ymm4
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3
|
||||
; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2
|
||||
; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm3
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
|
||||
; AVX512VL-NEXT: vpsubb %xmm2, %xmm4, %xmm4
|
||||
; AVX512VL-NEXT: vpsubb %xmm3, %xmm4, %xmm4
|
||||
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm6
|
||||
; AVX512VL-NEXT: vpsllw %xmm4, %ymm6, %ymm6
|
||||
; AVX512VL-NEXT: vpsllw %xmm4, %xmm5, %xmm5
|
||||
; AVX512VL-NEXT: vpbroadcastb %xmm5, %ymm5
|
||||
; AVX512VL-NEXT: vpand %ymm5, %ymm6, %ymm6
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm5
|
||||
; AVX512VL-NEXT: vpsllw %xmm4, %ymm5, %ymm5
|
||||
; AVX512VL-NEXT: vpsllw %xmm4, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpand %ymm5, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm6, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vporq %zmm3, %zmm0, %zmm3
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
||||
; AVX512VL-NEXT: vpsllw %xmm4, %xmm5, %xmm4
|
||||
; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm4, %zmm4
|
||||
; AVX512VL-NEXT: vpandq %zmm4, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm4
|
||||
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsrlw %xmm2, %ymm4, %ymm4
|
||||
; AVX512VL-NEXT: vpsrlw %xmm2, %ymm1, %ymm6
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm6, %zmm4
|
||||
; AVX512VL-NEXT: vpsrlw %xmm2, %xmm5, %xmm2
|
||||
; AVX512VL-NEXT: vpsrlw $8, %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2
|
||||
; AVX512VL-NEXT: vpternlogq $236, %zmm4, %zmm0, %zmm2
|
||||
; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpcmpeqb %ymm0, %ymm2, %ymm0
|
||||
; AVX512VL-NEXT: vpcmpeqb %ymm0, %ymm3, %ymm0
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vpternlogq $202, %zmm3, %zmm1, %zmm0
|
||||
; AVX512VL-NEXT: vpternlogq $202, %zmm2, %zmm1, %zmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatvar_funnnel_v64i8:
|
||||
@ -1494,40 +1492,28 @@ define <32 x i16> @splatconstant_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y) no
|
||||
define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
|
||||
; AVX512F-LABEL: splatconstant_funnnel_v64i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm2
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm2
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm0
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatconstant_funnnel_v64i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
|
||||
; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vpand %ymm3, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||
; AVX512VL-NEXT: vpsllw $4, %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
||||
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm2
|
||||
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm0
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm1
|
||||
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
|
||||
|
@ -480,15 +480,14 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
|
||||
; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512F-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
||||
; AVX512F-NEXT: vpsrlw %xmm1, %xmm4, %xmm4
|
||||
; AVX512F-NEXT: vpsrlw $8, %xmm4, %xmm4
|
||||
; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4
|
||||
; AVX512F-NEXT: vpand %ymm4, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vporq %zmm0, %zmm3, %zmm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm2
|
||||
; AVX512F-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpsrlw $8, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpternlogq $236, %zmm2, %zmm3, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatvar_funnnel_v64i8:
|
||||
@ -524,15 +523,14 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
|
||||
; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
||||
; AVX512VL-NEXT: vpsrlw %xmm1, %xmm4, %xmm4
|
||||
; AVX512VL-NEXT: vpsrlw $8, %xmm4, %xmm4
|
||||
; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4
|
||||
; AVX512VL-NEXT: vpand %ymm4, %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vporq %zmm0, %zmm3, %zmm0
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm2
|
||||
; AVX512VL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpbroadcastb %xmm0, %ymm0
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vpternlogq $236, %zmm2, %zmm3, %zmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatvar_funnnel_v64i8:
|
||||
@ -882,38 +880,26 @@ define <32 x i16> @splatconstant_funnnel_v32i16(<32 x i16> %x) nounwind {
|
||||
define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x) nounwind {
|
||||
; AVX512F-LABEL: splatconstant_funnnel_v64i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm2
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm4
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm4, %ymm3
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vporq %zmm2, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm1
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm2, %ymm3
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatconstant_funnnel_v64i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
|
||||
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm2
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm4
|
||||
; AVX512VL-NEXT: vpand %ymm3, %ymm4, %ymm3
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
|
||||
; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512VL-NEXT: vpand %ymm3, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vporq %zmm2, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||
; AVX512VL-NEXT: vpsllw $4, %ymm2, %ymm3
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
|
||||
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
|
||||
|
@ -133,38 +133,36 @@ define <32 x i16> @test_div7_32i16(<32 x i16> %a) nounwind {
|
||||
define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind {
|
||||
; AVX512F-LABEL: test_div7_64i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
|
||||
; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15],ymm1[24],ymm2[24],ymm1[25],ymm2[25],ymm1[26],ymm2[26],ymm1[27],ymm2[27],ymm1[28],ymm2[28],ymm1[29],ymm2[29],ymm1[30],ymm2[30],ymm1[31],ymm2[31]
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37]
|
||||
; AVX512F-NEXT: vpmullw %ymm4, %ymm3, %ymm3
|
||||
; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3
|
||||
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm5 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[16],ymm2[16],ymm1[17],ymm2[17],ymm1[18],ymm2[18],ymm1[19],ymm2[19],ymm1[20],ymm2[20],ymm1[21],ymm2[21],ymm1[22],ymm2[22],ymm1[23],ymm2[23]
|
||||
; AVX512F-NEXT: vpmullw %ymm4, %ymm5, %ymm5
|
||||
; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5
|
||||
; AVX512F-NEXT: vpackuswb %ymm3, %ymm5, %ymm3
|
||||
; AVX512F-NEXT: vpsubb %ymm3, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; AVX512F-NEXT: vpand %ymm5, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpaddb %ymm3, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpsrlw $2, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm6 = ymm0[8],ymm2[8],ymm0[9],ymm2[9],ymm0[10],ymm2[10],ymm0[11],ymm2[11],ymm0[12],ymm2[12],ymm0[13],ymm2[13],ymm0[14],ymm2[14],ymm0[15],ymm2[15],ymm0[24],ymm2[24],ymm0[25],ymm2[25],ymm0[26],ymm2[26],ymm0[27],ymm2[27],ymm0[28],ymm2[28],ymm0[29],ymm2[29],ymm0[30],ymm2[30],ymm0[31],ymm2[31]
|
||||
; AVX512F-NEXT: vpmullw %ymm4, %ymm6, %ymm6
|
||||
; AVX512F-NEXT: vpsrlw $8, %ymm6, %ymm6
|
||||
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm0[0],ymm2[0],ymm0[1],ymm2[1],ymm0[2],ymm2[2],ymm0[3],ymm2[3],ymm0[4],ymm2[4],ymm0[5],ymm2[5],ymm0[6],ymm2[6],ymm0[7],ymm2[7],ymm0[16],ymm2[16],ymm0[17],ymm2[17],ymm0[18],ymm2[18],ymm0[19],ymm2[19],ymm0[20],ymm2[20],ymm0[21],ymm2[21],ymm0[22],ymm2[22],ymm0[23],ymm2[23]
|
||||
; AVX512F-NEXT: vpmullw %ymm4, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37]
|
||||
; AVX512F-NEXT: vpmullw %ymm3, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpackuswb %ymm6, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpsubb %ymm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
|
||||
; AVX512F-NEXT: vpmullw %ymm3, %ymm4, %ymm4
|
||||
; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4
|
||||
; AVX512F-NEXT: vpackuswb %ymm2, %ymm4, %ymm2
|
||||
; AVX512F-NEXT: vpsubb %ymm2, %ymm0, %ymm4
|
||||
; AVX512F-NEXT: vpsrlw $1, %ymm4, %ymm4
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
|
||||
; AVX512F-NEXT: vpand %ymm5, %ymm4, %ymm4
|
||||
; AVX512F-NEXT: vpaddb %ymm2, %ymm4, %ymm2
|
||||
; AVX512F-NEXT: vpsrlw $2, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
||||
; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
|
||||
; AVX512F-NEXT: vpmullw %ymm3, %ymm4, %ymm4
|
||||
; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4
|
||||
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
|
||||
; AVX512F-NEXT: vpmullw %ymm3, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpsrlw $8, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpackuswb %ymm4, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpsubb %ymm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpsrlw $1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpand %ymm5, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpaddb %ymm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpaddb %ymm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpsrlw $2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
|
||||
; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: test_div7_64i8:
|
||||
|
@ -418,22 +418,21 @@ define <64 x i8> @splatvar_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
|
||||
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
|
||||
; AVX512F-NEXT: vpsllw %xmm2, %ymm3, %ymm4
|
||||
; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm5
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm5, %zmm4
|
||||
; AVX512F-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
||||
; AVX512F-NEXT: vpsllw %xmm2, %xmm5, %xmm6
|
||||
; AVX512F-NEXT: vpbroadcastb %xmm6, %ymm6
|
||||
; AVX512F-NEXT: vpand %ymm6, %ymm4, %ymm4
|
||||
; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm2
|
||||
; AVX512F-NEXT: vpand %ymm6, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2
|
||||
; AVX512F-NEXT: vpsllw %xmm2, %xmm5, %xmm2
|
||||
; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2
|
||||
; AVX512F-NEXT: vpandq %zmm2, %zmm4, %zmm2
|
||||
; AVX512F-NEXT: vpsrlw %xmm1, %ymm3, %ymm3
|
||||
; AVX512F-NEXT: vpsrlw %xmm1, %xmm5, %xmm4
|
||||
; AVX512F-NEXT: vpsrlw $8, %xmm4, %xmm4
|
||||
; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4
|
||||
; AVX512F-NEXT: vpand %ymm4, %ymm3, %ymm3
|
||||
; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vporq %zmm0, %zmm2, %zmm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm3
|
||||
; AVX512F-NEXT: vpsrlw %xmm1, %xmm5, %xmm0
|
||||
; AVX512F-NEXT: vpsrlw $8, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpternlogq $236, %zmm3, %zmm2, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatvar_rotate_v64i8:
|
||||
@ -445,22 +444,21 @@ define <64 x i8> @splatvar_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
|
||||
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
|
||||
; AVX512VL-NEXT: vpsllw %xmm2, %ymm3, %ymm4
|
||||
; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm5
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm5, %zmm4
|
||||
; AVX512VL-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
||||
; AVX512VL-NEXT: vpsllw %xmm2, %xmm5, %xmm6
|
||||
; AVX512VL-NEXT: vpbroadcastb %xmm6, %ymm6
|
||||
; AVX512VL-NEXT: vpand %ymm6, %ymm4, %ymm4
|
||||
; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm2
|
||||
; AVX512VL-NEXT: vpand %ymm6, %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2
|
||||
; AVX512VL-NEXT: vpsllw %xmm2, %xmm5, %xmm2
|
||||
; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2
|
||||
; AVX512VL-NEXT: vpandq %zmm2, %zmm4, %zmm2
|
||||
; AVX512VL-NEXT: vpsrlw %xmm1, %ymm3, %ymm3
|
||||
; AVX512VL-NEXT: vpsrlw %xmm1, %xmm5, %xmm4
|
||||
; AVX512VL-NEXT: vpsrlw $8, %xmm4, %xmm4
|
||||
; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4
|
||||
; AVX512VL-NEXT: vpand %ymm4, %ymm3, %ymm3
|
||||
; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vporq %zmm0, %zmm2, %zmm0
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm3
|
||||
; AVX512VL-NEXT: vpsrlw %xmm1, %xmm5, %xmm0
|
||||
; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpbroadcastb %xmm0, %ymm0
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vpternlogq $236, %zmm3, %zmm2, %zmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatvar_rotate_v64i8:
|
||||
@ -809,38 +807,26 @@ define <32 x i16> @splatconstant_rotate_v32i16(<32 x i16> %a) nounwind {
|
||||
define <64 x i8> @splatconstant_rotate_v64i8(<64 x i8> %a) nounwind {
|
||||
; AVX512F-LABEL: splatconstant_rotate_v64i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm1, %ymm2
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm4
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm4, %ymm3
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm1
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm2, %ymm3
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vporq %zmm0, %zmm2, %zmm0
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatconstant_rotate_v64i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
|
||||
; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm2
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm4
|
||||
; AVX512VL-NEXT: vpand %ymm3, %ymm4, %ymm3
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
|
||||
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; AVX512VL-NEXT: vpand %ymm3, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||
; AVX512VL-NEXT: vpsllw $4, %ymm2, %ymm3
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
|
||||
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vporq %zmm0, %zmm2, %zmm0
|
||||
; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatconstant_rotate_v64i8:
|
||||
@ -947,40 +933,28 @@ define <32 x i16> @splatconstant_rotate_mask_v32i16(<32 x i16> %a) nounwind {
|
||||
define <64 x i8> @splatconstant_rotate_mask_v64i8(<64 x i8> %a) nounwind {
|
||||
; AVX512F-LABEL: splatconstant_rotate_mask_v64i8:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm1, %ymm2
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm4
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm4, %ymm3
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm1
|
||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||
; AVX512F-NEXT: vpsllw $4, %ymm2, %ymm3
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm1
|
||||
; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm0
|
||||
; AVX512F-NEXT: vpternlogq $248, {{.*}}(%rip), %zmm1, %zmm0
|
||||
; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm2
|
||||
; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm0
|
||||
; AVX512F-NEXT: vpternlogq $248, {{.*}}(%rip), %zmm2, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: splatconstant_rotate_mask_v64i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
|
||||
; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm2
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
|
||||
; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm4
|
||||
; AVX512VL-NEXT: vpand %ymm3, %ymm4, %ymm3
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
|
||||
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; AVX512VL-NEXT: vpand %ymm3, %ymm1, %ymm1
|
||||
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1
|
||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||
; AVX512VL-NEXT: vpsllw $4, %ymm2, %ymm3
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
|
||||
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm1
|
||||
; AVX512VL-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm0
|
||||
; AVX512VL-NEXT: vpternlogq $248, {{.*}}(%rip), %zmm1, %zmm0
|
||||
; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2
|
||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm2
|
||||
; AVX512VL-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm0
|
||||
; AVX512VL-NEXT: vpternlogq $248, {{.*}}(%rip), %zmm2, %zmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatconstant_rotate_mask_v64i8:
|
||||
|
@ -156,14 +156,14 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
|
||||
; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||
; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
|
||||
; AVX512DQ-NEXT: vpsrlw %xmm1, %xmm3, %xmm3
|
||||
; AVX512DQ-NEXT: vpsrlw $8, %xmm3, %xmm3
|
||||
; AVX512DQ-NEXT: vpbroadcastb %xmm3, %ymm3
|
||||
; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX512DQ-NEXT: vpsrlw %xmm1, %xmm2, %xmm1
|
||||
; AVX512DQ-NEXT: vpsrlw $8, %xmm1, %xmm1
|
||||
; AVX512DQ-NEXT: vpbroadcastb %xmm1, %ymm1
|
||||
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1
|
||||
; AVX512DQ-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatvar_shift_v64i8:
|
||||
@ -308,13 +308,11 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) nounwind {
|
||||
define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind {
|
||||
; AVX512DQ-LABEL: splatconstant_shift_v64i8:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1
|
||||
; AVX512DQ-NEXT: vpsrlw $3, %ymm1, %ymm1
|
||||
; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
|
||||
; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1
|
||||
; AVX512DQ-NEXT: vpsrlw $3, %ymm0, %ymm1
|
||||
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpsrlw $3, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
|
||||
; AVX512DQ-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatconstant_shift_v64i8:
|
||||
|
@ -151,13 +151,13 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
|
||||
; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||
; AVX512DQ-NEXT: vpsllw %xmm1, %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
|
||||
; AVX512DQ-NEXT: vpsllw %xmm1, %xmm3, %xmm3
|
||||
; AVX512DQ-NEXT: vpbroadcastb %xmm3, %ymm3
|
||||
; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2
|
||||
; AVX512DQ-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpand %ymm3, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX512DQ-NEXT: vpsllw %xmm1, %xmm2, %xmm1
|
||||
; AVX512DQ-NEXT: vpbroadcastb %xmm1, %ymm1
|
||||
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1
|
||||
; AVX512DQ-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatvar_shift_v64i8:
|
||||
@ -306,13 +306,11 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) nounwind {
|
||||
define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind {
|
||||
; AVX512DQ-LABEL: splatconstant_shift_v64i8:
|
||||
; AVX512DQ: # %bb.0:
|
||||
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1
|
||||
; AVX512DQ-NEXT: vpsllw $3, %ymm1, %ymm1
|
||||
; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248]
|
||||
; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1
|
||||
; AVX512DQ-NEXT: vpsllw $3, %ymm0, %ymm1
|
||||
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpsllw $3, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0
|
||||
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
|
||||
; AVX512DQ-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: splatconstant_shift_v64i8:
|
||||
|
Loading…
x
Reference in New Issue
Block a user