mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
[X86][AVX512] Added support for VPMOVZX shuffle decoding.
llvm-svn: 260007
This commit is contained in:
parent
06fe5a7928
commit
b7e95cd192
@ -40,6 +40,14 @@ using namespace llvm;
|
||||
CASE_AVX_INS_COMMON(Inst, Y, r##src) \
|
||||
CASE_SSE_INS_COMMON(Inst, r##src)
|
||||
|
||||
#define CASE_PMOVZX(Inst, src) \
|
||||
CASE_MASK_INS_COMMON(Inst, Z, r##src) \
|
||||
CASE_MASK_INS_COMMON(Inst, Z256, r##src) \
|
||||
CASE_MASK_INS_COMMON(Inst, Z128, r##src) \
|
||||
CASE_AVX_INS_COMMON(Inst, , r##src) \
|
||||
CASE_AVX_INS_COMMON(Inst, Y, r##src) \
|
||||
CASE_SSE_INS_COMMON(Inst, r##src)
|
||||
|
||||
#define CASE_UNPCK(Inst, src) \
|
||||
CASE_MASK_INS_COMMON(Inst, Z, r##src) \
|
||||
CASE_MASK_INS_COMMON(Inst, Z256, r##src) \
|
||||
@ -95,46 +103,22 @@ static MVT getZeroExtensionResultType(const MCInst *MI) {
|
||||
default:
|
||||
llvm_unreachable("Unknown zero extension instruction");
|
||||
// zero extension to i16
|
||||
case X86::PMOVZXBWrm:
|
||||
case X86::PMOVZXBWrr:
|
||||
case X86::VPMOVZXBWrm:
|
||||
case X86::VPMOVZXBWrr:
|
||||
case X86::VPMOVZXBWYrm:
|
||||
case X86::VPMOVZXBWYrr:
|
||||
CASE_PMOVZX(PMOVZXBW, m)
|
||||
CASE_PMOVZX(PMOVZXBW, r)
|
||||
return getRegOperandVectorVT(MI, MVT::i16, 0);
|
||||
// zero extension to i32
|
||||
case X86::PMOVZXBDrm:
|
||||
case X86::PMOVZXBDrr:
|
||||
case X86::VPMOVZXBDrm:
|
||||
case X86::VPMOVZXBDrr:
|
||||
case X86::VPMOVZXBDYrm:
|
||||
case X86::VPMOVZXBDYrr:
|
||||
case X86::PMOVZXWDrm:
|
||||
case X86::PMOVZXWDrr:
|
||||
case X86::VPMOVZXWDrm:
|
||||
case X86::VPMOVZXWDrr:
|
||||
case X86::VPMOVZXWDYrm:
|
||||
case X86::VPMOVZXWDYrr:
|
||||
CASE_PMOVZX(PMOVZXBD, m)
|
||||
CASE_PMOVZX(PMOVZXBD, r)
|
||||
CASE_PMOVZX(PMOVZXWD, m)
|
||||
CASE_PMOVZX(PMOVZXWD, r)
|
||||
return getRegOperandVectorVT(MI, MVT::i32, 0);
|
||||
// zero extension to i64
|
||||
case X86::PMOVZXBQrm:
|
||||
case X86::PMOVZXBQrr:
|
||||
case X86::VPMOVZXBQrm:
|
||||
case X86::VPMOVZXBQrr:
|
||||
case X86::VPMOVZXBQYrm:
|
||||
case X86::VPMOVZXBQYrr:
|
||||
case X86::PMOVZXWQrm:
|
||||
case X86::PMOVZXWQrr:
|
||||
case X86::VPMOVZXWQrm:
|
||||
case X86::VPMOVZXWQrr:
|
||||
case X86::VPMOVZXWQYrm:
|
||||
case X86::VPMOVZXWQYrr:
|
||||
case X86::PMOVZXDQrm:
|
||||
case X86::PMOVZXDQrr:
|
||||
case X86::VPMOVZXDQrm:
|
||||
case X86::VPMOVZXDQrr:
|
||||
case X86::VPMOVZXDQYrm:
|
||||
case X86::VPMOVZXDQYrr:
|
||||
CASE_PMOVZX(PMOVZXBQ, m)
|
||||
CASE_PMOVZX(PMOVZXBQ, r)
|
||||
CASE_PMOVZX(PMOVZXWQ, m)
|
||||
CASE_PMOVZX(PMOVZXWQ, r)
|
||||
CASE_PMOVZX(PMOVZXDQ, m)
|
||||
CASE_PMOVZX(PMOVZXDQ, r)
|
||||
return getRegOperandVectorVT(MI, MVT::i64, 0);
|
||||
}
|
||||
}
|
||||
@ -689,56 +673,32 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
|
||||
Src2Name = getRegName(MI->getOperand(2).getReg());
|
||||
break;
|
||||
|
||||
case X86::PMOVZXBWrr:
|
||||
case X86::PMOVZXBDrr:
|
||||
case X86::PMOVZXBQrr:
|
||||
case X86::VPMOVZXBWrr:
|
||||
case X86::VPMOVZXBDrr:
|
||||
case X86::VPMOVZXBQrr:
|
||||
case X86::VPMOVZXBWYrr:
|
||||
case X86::VPMOVZXBDYrr:
|
||||
case X86::VPMOVZXBQYrr:
|
||||
Src1Name = getRegName(MI->getOperand(1).getReg());
|
||||
CASE_PMOVZX(PMOVZXBW, r)
|
||||
CASE_PMOVZX(PMOVZXBD, r)
|
||||
CASE_PMOVZX(PMOVZXBQ, r)
|
||||
Src1Name = getRegName(MI->getOperand(MI->getNumOperands() - 1).getReg());
|
||||
// FALL THROUGH.
|
||||
case X86::PMOVZXBWrm:
|
||||
case X86::PMOVZXBDrm:
|
||||
case X86::PMOVZXBQrm:
|
||||
case X86::VPMOVZXBWrm:
|
||||
case X86::VPMOVZXBDrm:
|
||||
case X86::VPMOVZXBQrm:
|
||||
case X86::VPMOVZXBWYrm:
|
||||
case X86::VPMOVZXBDYrm:
|
||||
case X86::VPMOVZXBQYrm:
|
||||
CASE_PMOVZX(PMOVZXBW, m)
|
||||
CASE_PMOVZX(PMOVZXBD, m)
|
||||
CASE_PMOVZX(PMOVZXBQ, m)
|
||||
DecodeZeroExtendMask(MVT::i8, getZeroExtensionResultType(MI), ShuffleMask);
|
||||
DestName = getRegName(MI->getOperand(0).getReg());
|
||||
break;
|
||||
|
||||
case X86::PMOVZXWDrr:
|
||||
case X86::PMOVZXWQrr:
|
||||
case X86::VPMOVZXWDrr:
|
||||
case X86::VPMOVZXWQrr:
|
||||
case X86::VPMOVZXWDYrr:
|
||||
case X86::VPMOVZXWQYrr:
|
||||
Src1Name = getRegName(MI->getOperand(1).getReg());
|
||||
CASE_PMOVZX(PMOVZXWD, r)
|
||||
CASE_PMOVZX(PMOVZXWQ, r)
|
||||
Src1Name = getRegName(MI->getOperand(MI->getNumOperands() - 1).getReg());
|
||||
// FALL THROUGH.
|
||||
case X86::PMOVZXWDrm:
|
||||
case X86::PMOVZXWQrm:
|
||||
case X86::VPMOVZXWDrm:
|
||||
case X86::VPMOVZXWQrm:
|
||||
case X86::VPMOVZXWDYrm:
|
||||
case X86::VPMOVZXWQYrm:
|
||||
CASE_PMOVZX(PMOVZXWD, m)
|
||||
CASE_PMOVZX(PMOVZXWQ, m)
|
||||
DecodeZeroExtendMask(MVT::i16, getZeroExtensionResultType(MI), ShuffleMask);
|
||||
DestName = getRegName(MI->getOperand(0).getReg());
|
||||
break;
|
||||
|
||||
case X86::PMOVZXDQrr:
|
||||
case X86::VPMOVZXDQrr:
|
||||
case X86::VPMOVZXDQYrr:
|
||||
Src1Name = getRegName(MI->getOperand(1).getReg());
|
||||
CASE_PMOVZX(PMOVZXDQ, r)
|
||||
Src1Name = getRegName(MI->getOperand(MI->getNumOperands() - 1).getReg());
|
||||
// FALL THROUGH.
|
||||
case X86::PMOVZXDQrm:
|
||||
case X86::VPMOVZXDQrm:
|
||||
case X86::VPMOVZXDQYrm:
|
||||
CASE_PMOVZX(PMOVZXDQ, m)
|
||||
DecodeZeroExtendMask(MVT::i32, getZeroExtensionResultType(MI), ShuffleMask);
|
||||
DestName = getRegName(MI->getOperand(0).getReg());
|
||||
break;
|
||||
|
@ -15,7 +15,7 @@ define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind re
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
|
||||
; SKX-NEXT: vpmovw2m %xmm0, %k1
|
||||
; SKX-NEXT: vpmovzxbw (%rdi), %xmm0 {%k1} {z}
|
||||
; SKX-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
|
||||
; SKX-NEXT: retq
|
||||
%a = load <8 x i8>,<8 x i8> *%i,align 1
|
||||
%x = zext <8 x i8> %a to <8 x i16>
|
||||
@ -59,7 +59,7 @@ define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwi
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
|
||||
; SKX-NEXT: vpmovb2m %xmm0, %k1
|
||||
; SKX-NEXT: vpmovzxbw (%rdi), %ymm0 {%k1} {z}
|
||||
; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
|
||||
; SKX-NEXT: retq
|
||||
%a = load <16 x i8>,<16 x i8> *%i,align 1
|
||||
%x = zext <16 x i8> %a to <16 x i16>
|
||||
@ -90,15 +90,10 @@ define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwi
|
||||
}
|
||||
|
||||
define <16 x i16> @zext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
|
||||
; KNL-LABEL: zext_16x8_to_16x16:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: zext_16x8_to_16x16:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovzxbw %xmm0, %ymm0
|
||||
; SKX-NEXT: retq
|
||||
; ALL-LABEL: zext_16x8_to_16x16:
|
||||
; ALL: ## BB#0:
|
||||
; ALL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; ALL-NEXT: retq
|
||||
%x = zext <16 x i8> %a to <16 x i16>
|
||||
ret <16 x i16> %x
|
||||
}
|
||||
@ -117,7 +112,7 @@ define <16 x i16> @zext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwi
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpsllw $7, %xmm1, %xmm1
|
||||
; SKX-NEXT: vpmovb2m %xmm1, %k1
|
||||
; SKX-NEXT: vpmovzxbw %xmm0, %ymm0 {%k1} {z}
|
||||
; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; SKX-NEXT: retq
|
||||
%x = zext <16 x i8> %a to <16 x i16>
|
||||
%ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
|
||||
@ -175,7 +170,7 @@ define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwi
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpsllw $7, %ymm0, %ymm0
|
||||
; SKX-NEXT: vpmovb2m %ymm0, %k1
|
||||
; SKX-NEXT: vpmovzxbw (%rdi), %zmm0 {%k1} {z}
|
||||
; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero
|
||||
; SKX-NEXT: retq
|
||||
%a = load <32 x i8>,<32 x i8> *%i,align 1
|
||||
%x = zext <32 x i8> %a to <32 x i16>
|
||||
@ -223,7 +218,7 @@ define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
|
||||
;
|
||||
; SKX-LABEL: zext_32x8_to_32x16:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovzxbw %ymm0, %zmm0
|
||||
; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
|
||||
; SKX-NEXT: retq
|
||||
%x = zext <32 x i8> %a to <32 x i16>
|
||||
ret <32 x i16> %x
|
||||
@ -250,7 +245,7 @@ define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwi
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpsllw $7, %ymm1, %ymm1
|
||||
; SKX-NEXT: vpmovb2m %ymm1, %k1
|
||||
; SKX-NEXT: vpmovzxbw %ymm0, %zmm0 {%k1} {z}
|
||||
; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
|
||||
; SKX-NEXT: retq
|
||||
%x = zext <32 x i8> %a to <32 x i16>
|
||||
%ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
|
||||
@ -315,7 +310,7 @@ define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind re
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpslld $31, %xmm0, %xmm0
|
||||
; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1
|
||||
; SKX-NEXT: vpmovzxbd (%rdi), %xmm0 {%k1} {z}
|
||||
; SKX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
|
||||
; SKX-NEXT: retq
|
||||
%a = load <4 x i8>,<4 x i8> *%i,align 1
|
||||
%x = zext <4 x i8> %a to <4 x i32>
|
||||
@ -359,7 +354,7 @@ define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind re
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
|
||||
; SKX-NEXT: vpmovw2m %xmm0, %k1
|
||||
; SKX-NEXT: vpmovzxbd (%rdi), %ymm0 {%k1} {z}
|
||||
; SKX-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
|
||||
; SKX-NEXT: retq
|
||||
%a = load <8 x i8>,<8 x i8> *%i,align 1
|
||||
%x = zext <8 x i8> %a to <8 x i32>
|
||||
@ -396,14 +391,14 @@ define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwi
|
||||
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
|
||||
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
|
||||
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
|
||||
; KNL-NEXT: vpmovzxbd (%rdi), %zmm0 {%k1} {z}
|
||||
; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: zext_16x8mem_to_16x32:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
|
||||
; SKX-NEXT: vpmovb2m %xmm0, %k1
|
||||
; SKX-NEXT: vpmovzxbd (%rdi), %zmm0 {%k1} {z}
|
||||
; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
|
||||
; SKX-NEXT: retq
|
||||
%a = load <16 x i8>,<16 x i8> *%i,align 1
|
||||
%x = zext <16 x i8> %a to <16 x i32>
|
||||
@ -438,14 +433,14 @@ define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounw
|
||||
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
|
||||
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
|
||||
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
|
||||
; KNL-NEXT: vpmovzxbd %xmm0, %zmm0 {%k1} {z}
|
||||
; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: zext_16x8_to_16x32_mask:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpsllw $7, %xmm1, %xmm1
|
||||
; SKX-NEXT: vpmovb2m %xmm1, %k1
|
||||
; SKX-NEXT: vpmovzxbd %xmm0, %zmm0 {%k1} {z}
|
||||
; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
||||
; SKX-NEXT: retq
|
||||
%x = zext <16 x i8> %a to <16 x i32>
|
||||
%ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
|
||||
@ -475,7 +470,7 @@ define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounw
|
||||
define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
|
||||
; ALL-LABEL: zext_16x8_to_16x32:
|
||||
; ALL: ## BB#0:
|
||||
; ALL-NEXT: vpmovzxbd %xmm0, %zmm0
|
||||
; ALL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
||||
; ALL-NEXT: retq
|
||||
%x = zext <16 x i8> %i to <16 x i32>
|
||||
ret <16 x i32> %x
|
||||
@ -504,7 +499,7 @@ define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind re
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpsllq $63, %xmm0, %xmm0
|
||||
; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1
|
||||
; SKX-NEXT: vpmovzxbq (%rdi), %xmm0 {%k1} {z}
|
||||
; SKX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; SKX-NEXT: retq
|
||||
%a = load <2 x i8>,<2 x i8> *%i,align 1
|
||||
%x = zext <2 x i8> %a to <2 x i64>
|
||||
@ -556,7 +551,7 @@ define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind re
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpslld $31, %xmm0, %xmm0
|
||||
; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1
|
||||
; SKX-NEXT: vpmovzxbq (%rdi), %ymm0 {%k1} {z}
|
||||
; SKX-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
|
||||
; SKX-NEXT: retq
|
||||
%a = load <4 x i8>,<4 x i8> *%i,align 1
|
||||
%x = zext <4 x i8> %a to <4 x i64>
|
||||
@ -602,14 +597,14 @@ define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind re
|
||||
; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
|
||||
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
|
||||
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
|
||||
; KNL-NEXT: vpmovzxbq (%rdi), %zmm0 {%k1} {z}
|
||||
; KNL-NEXT: vpmovzxbq {{.*#+}} zmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: zext_8x8mem_to_8x64:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
|
||||
; SKX-NEXT: vpmovw2m %xmm0, %k1
|
||||
; SKX-NEXT: vpmovzxbq (%rdi), %zmm0 {%k1} {z}
|
||||
; SKX-NEXT: vpmovzxbq {{.*#+}} zmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
|
||||
; SKX-NEXT: retq
|
||||
%a = load <8 x i8>,<8 x i8> *%i,align 1
|
||||
%x = zext <8 x i8> %a to <8 x i64>
|
||||
@ -661,7 +656,7 @@ define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpslld $31, %xmm0, %xmm0
|
||||
; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1
|
||||
; SKX-NEXT: vpmovzxwd (%rdi), %xmm0 {%k1} {z}
|
||||
; SKX-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
|
||||
; SKX-NEXT: retq
|
||||
%a = load <4 x i16>,<4 x i16> *%i,align 1
|
||||
%x = zext <4 x i16> %a to <4 x i32>
|
||||
@ -716,7 +711,7 @@ define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
|
||||
; SKX-NEXT: vpmovw2m %xmm0, %k1
|
||||
; SKX-NEXT: vpmovzxwd (%rdi), %ymm0 {%k1} {z}
|
||||
; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
|
||||
; SKX-NEXT: retq
|
||||
%a = load <8 x i16>,<8 x i16> *%i,align 1
|
||||
%x = zext <8 x i16> %a to <8 x i32>
|
||||
@ -772,7 +767,7 @@ define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpsllw $15, %xmm1, %xmm1
|
||||
; SKX-NEXT: vpmovw2m %xmm1, %k1
|
||||
; SKX-NEXT: vpmovzxwd %xmm0, %ymm0 {%k1} {z}
|
||||
; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; SKX-NEXT: retq
|
||||
%x = zext <8 x i16> %a to <8 x i32>
|
||||
%ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
|
||||
@ -780,15 +775,10 @@ define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind
|
||||
}
|
||||
|
||||
define <8 x i32> @zext_8x16_to_8x32(<8 x i16> %a ) nounwind readnone {
|
||||
; KNL-LABEL: zext_8x16_to_8x32:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: zext_8x16_to_8x32:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovzxwd %xmm0, %ymm0
|
||||
; SKX-NEXT: retq
|
||||
; ALL-LABEL: zext_8x16_to_8x32:
|
||||
; ALL: ## BB#0:
|
||||
; ALL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; ALL-NEXT: retq
|
||||
%x = zext <8 x i16> %a to <8 x i32>
|
||||
ret <8 x i32> %x
|
||||
}
|
||||
@ -799,14 +789,14 @@ define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) noun
|
||||
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
|
||||
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
|
||||
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
|
||||
; KNL-NEXT: vpmovzxwd (%rdi), %zmm0 {%k1} {z}
|
||||
; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: zext_16x16mem_to_16x32:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
|
||||
; SKX-NEXT: vpmovb2m %xmm0, %k1
|
||||
; SKX-NEXT: vpmovzxwd (%rdi), %zmm0 {%k1} {z}
|
||||
; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
|
||||
; SKX-NEXT: retq
|
||||
%a = load <16 x i16>,<16 x i16> *%i,align 1
|
||||
%x = zext <16 x i16> %a to <16 x i32>
|
||||
@ -850,14 +840,14 @@ define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) noun
|
||||
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
|
||||
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
|
||||
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
|
||||
; KNL-NEXT: vpmovzxwd %ymm0, %zmm0 {%k1} {z}
|
||||
; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: zext_16x16_to_16x32mask:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpsllw $7, %xmm1, %xmm1
|
||||
; SKX-NEXT: vpmovb2m %xmm1, %k1
|
||||
; SKX-NEXT: vpmovzxwd %ymm0, %zmm0 {%k1} {z}
|
||||
; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
|
||||
; SKX-NEXT: retq
|
||||
%x = zext <16 x i16> %a to <16 x i32>
|
||||
%ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
|
||||
@ -867,7 +857,7 @@ define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) noun
|
||||
define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %a ) nounwind readnone {
|
||||
; ALL-LABEL: zext_16x16_to_16x32:
|
||||
; ALL: ## BB#0:
|
||||
; ALL-NEXT: vpmovzxwd %ymm0, %zmm0
|
||||
; ALL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
|
||||
; ALL-NEXT: retq
|
||||
%x = zext <16 x i16> %a to <16 x i32>
|
||||
ret <16 x i32> %x
|
||||
@ -887,7 +877,7 @@ define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpsllq $63, %xmm0, %xmm0
|
||||
; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1
|
||||
; SKX-NEXT: vpmovzxwq (%rdi), %xmm0 {%k1} {z}
|
||||
; SKX-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
|
||||
; SKX-NEXT: retq
|
||||
%a = load <2 x i16>,<2 x i16> *%i,align 1
|
||||
%x = zext <2 x i16> %a to <2 x i64>
|
||||
@ -941,7 +931,7 @@ define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpslld $31, %xmm0, %xmm0
|
||||
; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1
|
||||
; SKX-NEXT: vpmovzxwq (%rdi), %ymm0 {%k1} {z}
|
||||
; SKX-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
|
||||
; SKX-NEXT: retq
|
||||
%a = load <4 x i16>,<4 x i16> *%i,align 1
|
||||
%x = zext <4 x i16> %a to <4 x i64>
|
||||
@ -987,14 +977,14 @@ define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind
|
||||
; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
|
||||
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
|
||||
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
|
||||
; KNL-NEXT: vpmovzxwq (%rdi), %zmm0 {%k1} {z}
|
||||
; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: zext_8x16mem_to_8x64:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
|
||||
; SKX-NEXT: vpmovw2m %xmm0, %k1
|
||||
; SKX-NEXT: vpmovzxwq (%rdi), %zmm0 {%k1} {z}
|
||||
; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
|
||||
; SKX-NEXT: retq
|
||||
%a = load <8 x i16>,<8 x i16> *%i,align 1
|
||||
%x = zext <8 x i16> %a to <8 x i64>
|
||||
@ -1039,14 +1029,14 @@ define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind
|
||||
; KNL-NEXT: vpmovsxwq %xmm1, %zmm1
|
||||
; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
|
||||
; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
|
||||
; KNL-NEXT: vpmovzxwq %xmm0, %zmm0 {%k1} {z}
|
||||
; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: zext_8x16_to_8x64mask:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpsllw $15, %xmm1, %xmm1
|
||||
; SKX-NEXT: vpmovw2m %xmm1, %k1
|
||||
; SKX-NEXT: vpmovzxwq %xmm0, %zmm0 {%k1} {z}
|
||||
; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
|
||||
; SKX-NEXT: retq
|
||||
%x = zext <8 x i16> %a to <8 x i64>
|
||||
%ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
|
||||
@ -1056,7 +1046,7 @@ define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind
|
||||
define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %a) nounwind readnone {
|
||||
; ALL-LABEL: zext_8x16_to_8x64:
|
||||
; ALL: ## BB#0:
|
||||
; ALL-NEXT: vpmovzxwq %xmm0, %zmm0
|
||||
; ALL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
|
||||
; ALL-NEXT: retq
|
||||
%ret = zext <8 x i16> %a to <8 x i64>
|
||||
ret <8 x i64> %ret
|
||||
@ -1076,7 +1066,7 @@ define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpsllq $63, %xmm0, %xmm0
|
||||
; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1
|
||||
; SKX-NEXT: vpmovzxdq (%rdi), %xmm0 {%k1} {z}
|
||||
; SKX-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
|
||||
; SKX-NEXT: retq
|
||||
%a = load <2 x i32>,<2 x i32> *%i,align 1
|
||||
%x = zext <2 x i32> %a to <2 x i64>
|
||||
@ -1130,7 +1120,7 @@ define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpslld $31, %xmm0, %xmm0
|
||||
; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1
|
||||
; SKX-NEXT: vpmovzxdq (%rdi), %ymm0 {%k1} {z}
|
||||
; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
|
||||
; SKX-NEXT: retq
|
||||
%a = load <4 x i32>,<4 x i32> *%i,align 1
|
||||
%x = zext <4 x i32> %a to <4 x i64>
|
||||
@ -1193,7 +1183,7 @@ define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpslld $31, %xmm1, %xmm1
|
||||
; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1
|
||||
; SKX-NEXT: vpmovzxdq %xmm0, %ymm0 {%k1} {z}
|
||||
; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SKX-NEXT: retq
|
||||
%x = zext <4 x i32> %a to <4 x i64>
|
||||
%ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
|
||||
@ -1206,14 +1196,14 @@ define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind
|
||||
; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
|
||||
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
|
||||
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
|
||||
; KNL-NEXT: vpmovzxdq (%rdi), %zmm0 {%k1} {z}
|
||||
; KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: zext_8x32mem_to_8x64:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
|
||||
; SKX-NEXT: vpmovw2m %xmm0, %k1
|
||||
; SKX-NEXT: vpmovzxdq (%rdi), %zmm0 {%k1} {z}
|
||||
; SKX-NEXT: vpmovzxdq {{.*#+}} zmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
|
||||
; SKX-NEXT: retq
|
||||
%a = load <8 x i32>,<8 x i32> *%i,align 1
|
||||
%x = zext <8 x i32> %a to <8 x i64>
|
||||
@ -1267,14 +1257,14 @@ define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind
|
||||
; KNL-NEXT: vpmovsxwq %xmm1, %zmm1
|
||||
; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
|
||||
; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
|
||||
; KNL-NEXT: vpmovzxdq %ymm0, %zmm0 {%k1} {z}
|
||||
; KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: zext_8x32_to_8x64mask:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpsllw $15, %xmm1, %xmm1
|
||||
; SKX-NEXT: vpmovw2m %xmm1, %k1
|
||||
; SKX-NEXT: vpmovzxdq %ymm0, %zmm0 {%k1} {z}
|
||||
; SKX-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
|
||||
; SKX-NEXT: retq
|
||||
%x = zext <8 x i32> %a to <8 x i64>
|
||||
%ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
|
||||
|
@ -5025,12 +5025,15 @@ define <8 x i16>@test_int_x86_avx512_mask_pmovzxb_w_128(<16 x i8> %x0, <8 x i16>
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_w_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpmovzxbw %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpmovzxbw %xmm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovzxbw %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpaddw %xmm2, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
; CHECK-NEXT: vpmovzxbw %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: ## xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; CHECK-NEXT: vpmovzxbw %xmm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: ## xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; CHECK-NEXT: vpmovzxbw %xmm0, %xmm0
|
||||
; CHECK-NEXT: ## xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; CHECK-NEXT: vpaddw %xmm2, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2)
|
||||
%res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8> %x0, <8 x i16> zeroinitializer, i8 %x2)
|
||||
%res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 -1)
|
||||
@ -5044,13 +5047,16 @@ declare <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8>, <16 x i16>, i1
|
||||
define <16 x i16>@test_int_x86_avx512_mask_pmovzxb_w_256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_w_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpmovzxbw %xmm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vpmovzxbw %xmm0, %ymm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpmovzxbw %xmm0, %ymm0
|
||||
; CHECK-NEXT: vpaddw %ymm2, %ymm1, %ymm1
|
||||
; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpmovzxbw %xmm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: ## ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; CHECK-NEXT: vpmovzxbw %xmm0, %ymm2 {%k1} {z}
|
||||
; CHECK-NEXT: ## ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; CHECK-NEXT: vpmovzxbw %xmm0, %ymm0
|
||||
; CHECK-NEXT: ## ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; CHECK-NEXT: vpaddw %ymm2, %ymm1, %ymm1
|
||||
; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2)
|
||||
%res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8> %x0, <16 x i16> zeroinitializer, i16 %x2)
|
||||
%res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 -1)
|
||||
|
@ -7064,8 +7064,11 @@ define <4 x i32>@test_int_x86_avx512_mask_pmovzxb_d_128(<16 x i8> %x0, <4 x i32>
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpmovzxbd %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: ## xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; CHECK-NEXT: vpmovzxbd %xmm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: ## xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; CHECK-NEXT: vpmovzxbd %xmm0, %xmm0
|
||||
; CHECK-NEXT: ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
@ -7084,8 +7087,11 @@ define <8 x i32>@test_int_x86_avx512_mask_pmovzxb_d_256(<16 x i8> %x0, <8 x i32>
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpmovzxbd %xmm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: ## ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
|
||||
; CHECK-NEXT: vpmovzxbd %xmm0, %ymm2 {%k1} {z}
|
||||
; CHECK-NEXT: ## ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
|
||||
; CHECK-NEXT: vpmovzxbd %xmm0, %ymm0
|
||||
; CHECK-NEXT: ## ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
|
||||
; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1
|
||||
; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
@ -7104,8 +7110,11 @@ define <2 x i64>@test_int_x86_avx512_mask_pmovzxb_q_128(<16 x i8> %x0, <2 x i64>
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpmovzxbq %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: ## xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; CHECK-NEXT: vpmovzxbq %xmm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: ## xmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; CHECK-NEXT: vpmovzxbq %xmm0, %xmm0
|
||||
; CHECK-NEXT: ## xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
@ -7124,8 +7133,11 @@ define <4 x i64>@test_int_x86_avx512_mask_pmovzxb_q_256(<16 x i8> %x0, <4 x i64>
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpmovzxbq %xmm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: ## ymm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
|
||||
; CHECK-NEXT: vpmovzxbq %xmm0, %ymm2 {%k1} {z}
|
||||
; CHECK-NEXT: ## ymm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
|
||||
; CHECK-NEXT: vpmovzxbq %xmm0, %ymm0
|
||||
; CHECK-NEXT: ## ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
|
||||
; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1
|
||||
; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
@ -7144,8 +7156,11 @@ define <2 x i64>@test_int_x86_avx512_mask_pmovzxd_q_128(<4 x i32> %x0, <2 x i64>
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpmovzxdq %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: ## xmm1 = xmm0[0],zero,xmm0[1],zero
|
||||
; CHECK-NEXT: vpmovzxdq %xmm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: ## xmm2 = xmm0[0],zero,xmm0[1],zero
|
||||
; CHECK-NEXT: vpmovzxdq %xmm0, %xmm0
|
||||
; CHECK-NEXT: ## xmm0 = xmm0[0],zero,xmm0[1],zero
|
||||
; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
@ -7164,8 +7179,11 @@ define <4 x i64>@test_int_x86_avx512_mask_pmovzxd_q_256(<4 x i32> %x0, <4 x i64>
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpmovzxdq %xmm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: ## ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; CHECK-NEXT: vpmovzxdq %xmm0, %ymm2 {%k1} {z}
|
||||
; CHECK-NEXT: ## ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; CHECK-NEXT: vpmovzxdq %xmm0, %ymm0
|
||||
; CHECK-NEXT: ## ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1
|
||||
; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
@ -7184,8 +7202,11 @@ define <4 x i32>@test_int_x86_avx512_mask_pmovzxw_d_128(<8 x i16> %x0, <4 x i32>
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpmovzxwd %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: ## xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; CHECK-NEXT: vpmovzxwd %xmm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: ## xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; CHECK-NEXT: vpmovzxwd %xmm0, %xmm0
|
||||
; CHECK-NEXT: ## xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
@ -7204,8 +7225,11 @@ define <8 x i32>@test_int_x86_avx512_mask_pmovzxw_d_256(<8 x i16> %x0, <8 x i32>
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpmovzxwd %xmm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: ## ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; CHECK-NEXT: vpmovzxwd %xmm0, %ymm2 {%k1} {z}
|
||||
; CHECK-NEXT: ## ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; CHECK-NEXT: vpmovzxwd %xmm0, %ymm0
|
||||
; CHECK-NEXT: ## ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1
|
||||
; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
@ -7224,8 +7248,11 @@ define <2 x i64>@test_int_x86_avx512_mask_pmovzxw_q_128(<8 x i16> %x0, <2 x i64>
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpmovzxwq %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: ## xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
|
||||
; CHECK-NEXT: vpmovzxwq %xmm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: ## xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
|
||||
; CHECK-NEXT: vpmovzxwq %xmm0, %xmm0
|
||||
; CHECK-NEXT: ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
|
||||
; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
@ -7244,8 +7271,11 @@ define <4 x i64>@test_int_x86_avx512_mask_pmovzxw_q_256(<8 x i16> %x0, <4 x i64>
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpmovzxwq %xmm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: ## ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; CHECK-NEXT: vpmovzxwq %xmm0, %ymm2 {%k1} {z}
|
||||
; CHECK-NEXT: ## ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; CHECK-NEXT: vpmovzxwq %xmm0, %ymm0
|
||||
; CHECK-NEXT: ## ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1
|
||||
; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
|
@ -744,7 +744,7 @@ define <8 x i16> @testv8i16(<8 x i16> %in) nounwind {
|
||||
;
|
||||
; AVX512VLCD-LABEL: testv8i16:
|
||||
; AVX512VLCD: ## BB#0:
|
||||
; AVX512VLCD-NEXT: vpmovzxwd %xmm0, %ymm0
|
||||
; AVX512VLCD-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512VLCD-NEXT: vplzcntd %ymm0, %ymm0
|
||||
; AVX512VLCD-NEXT: vpmovdw %ymm0, %xmm0
|
||||
; AVX512VLCD-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
|
||||
@ -966,7 +966,7 @@ define <8 x i16> @testv8i16u(<8 x i16> %in) nounwind {
|
||||
;
|
||||
; AVX512VLCD-LABEL: testv8i16u:
|
||||
; AVX512VLCD: ## BB#0:
|
||||
; AVX512VLCD-NEXT: vpmovzxwd %xmm0, %ymm0
|
||||
; AVX512VLCD-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512VLCD-NEXT: vplzcntd %ymm0, %ymm0
|
||||
; AVX512VLCD-NEXT: vpmovdw %ymm0, %xmm0
|
||||
; AVX512VLCD-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
|
||||
@ -1472,7 +1472,7 @@ define <16 x i8> @testv16i8(<16 x i8> %in) nounwind {
|
||||
;
|
||||
; AVX512-LABEL: testv16i8:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vpmovzxbd %xmm0, %zmm0
|
||||
; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
||||
; AVX512-NEXT: vplzcntd %zmm0, %zmm0
|
||||
; AVX512-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
|
||||
@ -1879,7 +1879,7 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) nounwind {
|
||||
;
|
||||
; AVX512-LABEL: testv16i8u:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vpmovzxbd %xmm0, %zmm0
|
||||
; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
||||
; AVX512-NEXT: vplzcntd %zmm0, %zmm0
|
||||
; AVX512-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
|
||||
|
@ -517,7 +517,7 @@ define <16 x i16> @testv16i16(<16 x i16> %in) nounwind {
|
||||
;
|
||||
; AVX512-LABEL: testv16i16:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vpmovzxwd %ymm0, %zmm0
|
||||
; AVX512-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
|
||||
; AVX512-NEXT: vplzcntd %zmm0, %zmm0
|
||||
; AVX512-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; AVX512-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0
|
||||
@ -669,7 +669,7 @@ define <16 x i16> @testv16i16u(<16 x i16> %in) nounwind {
|
||||
;
|
||||
; AVX512-LABEL: testv16i16u:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vpmovzxwd %ymm0, %zmm0
|
||||
; AVX512-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
|
||||
; AVX512-NEXT: vplzcntd %zmm0, %zmm0
|
||||
; AVX512-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; AVX512-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0
|
||||
@ -1016,12 +1016,12 @@ define <32 x i8> @testv32i8(<32 x i8> %in) nounwind {
|
||||
; AVX512VLCD-LABEL: testv32i8:
|
||||
; AVX512VLCD: ## BB#0:
|
||||
; AVX512VLCD-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX512VLCD-NEXT: vpmovzxbd %xmm1, %zmm1
|
||||
; AVX512VLCD-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
|
||||
; AVX512VLCD-NEXT: vplzcntd %zmm1, %zmm1
|
||||
; AVX512VLCD-NEXT: vpmovdb %zmm1, %xmm1
|
||||
; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
|
||||
; AVX512VLCD-NEXT: vpsubb %xmm2, %xmm1, %xmm1
|
||||
; AVX512VLCD-NEXT: vpmovzxbd %xmm0, %zmm0
|
||||
; AVX512VLCD-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
||||
; AVX512VLCD-NEXT: vplzcntd %zmm0, %zmm0
|
||||
; AVX512VLCD-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512VLCD-NEXT: vpsubb %xmm2, %xmm0, %xmm0
|
||||
@ -1031,12 +1031,12 @@ define <32 x i8> @testv32i8(<32 x i8> %in) nounwind {
|
||||
; AVX512CD-LABEL: testv32i8:
|
||||
; AVX512CD: ## BB#0:
|
||||
; AVX512CD-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX512CD-NEXT: vpmovzxbd %xmm1, %zmm1
|
||||
; AVX512CD-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
|
||||
; AVX512CD-NEXT: vplzcntd %zmm1, %zmm1
|
||||
; AVX512CD-NEXT: vpmovdb %zmm1, %xmm1
|
||||
; AVX512CD-NEXT: vmovdqa {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
|
||||
; AVX512CD-NEXT: vpsubb %xmm2, %xmm1, %xmm1
|
||||
; AVX512CD-NEXT: vpmovzxbd %xmm0, %zmm0
|
||||
; AVX512CD-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
||||
; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
|
||||
; AVX512CD-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512CD-NEXT: vpsubb %xmm2, %xmm0, %xmm0
|
||||
@ -1318,12 +1318,12 @@ define <32 x i8> @testv32i8u(<32 x i8> %in) nounwind {
|
||||
; AVX512VLCD-LABEL: testv32i8u:
|
||||
; AVX512VLCD: ## BB#0:
|
||||
; AVX512VLCD-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX512VLCD-NEXT: vpmovzxbd %xmm1, %zmm1
|
||||
; AVX512VLCD-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
|
||||
; AVX512VLCD-NEXT: vplzcntd %zmm1, %zmm1
|
||||
; AVX512VLCD-NEXT: vpmovdb %zmm1, %xmm1
|
||||
; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
|
||||
; AVX512VLCD-NEXT: vpsubb %xmm2, %xmm1, %xmm1
|
||||
; AVX512VLCD-NEXT: vpmovzxbd %xmm0, %zmm0
|
||||
; AVX512VLCD-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
||||
; AVX512VLCD-NEXT: vplzcntd %zmm0, %zmm0
|
||||
; AVX512VLCD-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512VLCD-NEXT: vpsubb %xmm2, %xmm0, %xmm0
|
||||
@ -1333,12 +1333,12 @@ define <32 x i8> @testv32i8u(<32 x i8> %in) nounwind {
|
||||
; AVX512CD-LABEL: testv32i8u:
|
||||
; AVX512CD: ## BB#0:
|
||||
; AVX512CD-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX512CD-NEXT: vpmovzxbd %xmm1, %zmm1
|
||||
; AVX512CD-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
|
||||
; AVX512CD-NEXT: vplzcntd %zmm1, %zmm1
|
||||
; AVX512CD-NEXT: vpmovdb %zmm1, %xmm1
|
||||
; AVX512CD-NEXT: vmovdqa {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
|
||||
; AVX512CD-NEXT: vpsubb %xmm2, %xmm1, %xmm1
|
||||
; AVX512CD-NEXT: vpmovzxbd %xmm0, %zmm0
|
||||
; AVX512CD-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
||||
; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
|
||||
; AVX512CD-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512CD-NEXT: vpsubb %xmm2, %xmm0, %xmm0
|
||||
|
@ -41,12 +41,12 @@ define <16 x i32> @testv16i32u(<16 x i32> %in) nounwind {
|
||||
define <32 x i16> @testv32i16(<32 x i16> %in) nounwind {
|
||||
; AVX512CD-LABEL: testv32i16:
|
||||
; AVX512CD: ## BB#0:
|
||||
; AVX512CD-NEXT: vpmovzxwd %ymm0, %zmm0
|
||||
; AVX512CD-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
|
||||
; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
|
||||
; AVX512CD-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512CD-NEXT: vpsubw %ymm2, %ymm0, %ymm0
|
||||
; AVX512CD-NEXT: vpmovzxwd %ymm1, %zmm1
|
||||
; AVX512CD-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
|
||||
; AVX512CD-NEXT: vplzcntd %zmm1, %zmm1
|
||||
; AVX512CD-NEXT: vpmovdw %zmm1, %ymm1
|
||||
; AVX512CD-NEXT: vpsubw %ymm2, %ymm1, %ymm1
|
||||
@ -55,12 +55,12 @@ define <32 x i16> @testv32i16(<32 x i16> %in) nounwind {
|
||||
; AVX512BW-LABEL: testv32i16:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
|
||||
; AVX512BW-NEXT: vpmovzxwd %ymm1, %zmm1
|
||||
; AVX512BW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
|
||||
; AVX512BW-NEXT: vplzcntd %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpmovdw %zmm1, %ymm1
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512BW-NEXT: vpsubw %ymm2, %ymm1, %ymm1
|
||||
; AVX512BW-NEXT: vpmovzxwd %ymm0, %zmm0
|
||||
; AVX512BW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
|
||||
; AVX512BW-NEXT: vplzcntd %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; AVX512BW-NEXT: vpsubw %ymm2, %ymm0, %ymm0
|
||||
@ -73,12 +73,12 @@ define <32 x i16> @testv32i16(<32 x i16> %in) nounwind {
|
||||
define <32 x i16> @testv32i16u(<32 x i16> %in) nounwind {
|
||||
; AVX512CD-LABEL: testv32i16u:
|
||||
; AVX512CD: ## BB#0:
|
||||
; AVX512CD-NEXT: vpmovzxwd %ymm0, %zmm0
|
||||
; AVX512CD-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
|
||||
; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
|
||||
; AVX512CD-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512CD-NEXT: vpsubw %ymm2, %ymm0, %ymm0
|
||||
; AVX512CD-NEXT: vpmovzxwd %ymm1, %zmm1
|
||||
; AVX512CD-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
|
||||
; AVX512CD-NEXT: vplzcntd %zmm1, %zmm1
|
||||
; AVX512CD-NEXT: vpmovdw %zmm1, %ymm1
|
||||
; AVX512CD-NEXT: vpsubw %ymm2, %ymm1, %ymm1
|
||||
@ -87,12 +87,12 @@ define <32 x i16> @testv32i16u(<32 x i16> %in) nounwind {
|
||||
; AVX512BW-LABEL: testv32i16u:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
|
||||
; AVX512BW-NEXT: vpmovzxwd %ymm1, %zmm1
|
||||
; AVX512BW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
|
||||
; AVX512BW-NEXT: vplzcntd %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpmovdw %zmm1, %ymm1
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512BW-NEXT: vpsubw %ymm2, %ymm1, %ymm1
|
||||
; AVX512BW-NEXT: vpmovzxwd %ymm0, %zmm0
|
||||
; AVX512BW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
|
||||
; AVX512BW-NEXT: vplzcntd %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; AVX512BW-NEXT: vpsubw %ymm2, %ymm0, %ymm0
|
||||
@ -106,22 +106,22 @@ define <64 x i8> @testv64i8(<64 x i8> %in) nounwind {
|
||||
; AVX512CD-LABEL: testv64i8:
|
||||
; AVX512CD: ## BB#0:
|
||||
; AVX512CD-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX512CD-NEXT: vpmovzxbd %xmm2, %zmm2
|
||||
; AVX512CD-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
|
||||
; AVX512CD-NEXT: vplzcntd %zmm2, %zmm2
|
||||
; AVX512CD-NEXT: vpmovdb %zmm2, %xmm2
|
||||
; AVX512CD-NEXT: vmovdqa {{.*#+}} xmm3 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
|
||||
; AVX512CD-NEXT: vpsubb %xmm3, %xmm2, %xmm2
|
||||
; AVX512CD-NEXT: vpmovzxbd %xmm0, %zmm0
|
||||
; AVX512CD-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
||||
; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
|
||||
; AVX512CD-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512CD-NEXT: vpsubb %xmm3, %xmm0, %xmm0
|
||||
; AVX512CD-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX512CD-NEXT: vextractf128 $1, %ymm1, %xmm2
|
||||
; AVX512CD-NEXT: vpmovzxbd %xmm2, %zmm2
|
||||
; AVX512CD-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
|
||||
; AVX512CD-NEXT: vplzcntd %zmm2, %zmm2
|
||||
; AVX512CD-NEXT: vpmovdb %zmm2, %xmm2
|
||||
; AVX512CD-NEXT: vpsubb %xmm3, %xmm2, %xmm2
|
||||
; AVX512CD-NEXT: vpmovzxbd %xmm1, %zmm1
|
||||
; AVX512CD-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
|
||||
; AVX512CD-NEXT: vplzcntd %zmm1, %zmm1
|
||||
; AVX512CD-NEXT: vpmovdb %zmm1, %xmm1
|
||||
; AVX512CD-NEXT: vpsubb %xmm3, %xmm1, %xmm1
|
||||
@ -132,22 +132,22 @@ define <64 x i8> @testv64i8(<64 x i8> %in) nounwind {
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
|
||||
; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX512BW-NEXT: vpmovzxbd %xmm2, %zmm2
|
||||
; AVX512BW-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
|
||||
; AVX512BW-NEXT: vplzcntd %zmm2, %zmm2
|
||||
; AVX512BW-NEXT: vpmovdb %zmm2, %xmm2
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
|
||||
; AVX512BW-NEXT: vpsubb %xmm3, %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpmovzxbd %xmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
|
||||
; AVX512BW-NEXT: vplzcntd %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpmovdb %zmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpsubb %xmm3, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
|
||||
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; AVX512BW-NEXT: vpmovzxbd %xmm2, %zmm2
|
||||
; AVX512BW-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
|
||||
; AVX512BW-NEXT: vplzcntd %zmm2, %zmm2
|
||||
; AVX512BW-NEXT: vpmovdb %zmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpsubb %xmm3, %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpmovzxbd %xmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
||||
; AVX512BW-NEXT: vplzcntd %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpsubb %xmm3, %xmm0, %xmm0
|
||||
@ -162,22 +162,22 @@ define <64 x i8> @testv64i8u(<64 x i8> %in) nounwind {
|
||||
; AVX512CD-LABEL: testv64i8u:
|
||||
; AVX512CD: ## BB#0:
|
||||
; AVX512CD-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX512CD-NEXT: vpmovzxbd %xmm2, %zmm2
|
||||
; AVX512CD-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
|
||||
; AVX512CD-NEXT: vplzcntd %zmm2, %zmm2
|
||||
; AVX512CD-NEXT: vpmovdb %zmm2, %xmm2
|
||||
; AVX512CD-NEXT: vmovdqa {{.*#+}} xmm3 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
|
||||
; AVX512CD-NEXT: vpsubb %xmm3, %xmm2, %xmm2
|
||||
; AVX512CD-NEXT: vpmovzxbd %xmm0, %zmm0
|
||||
; AVX512CD-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
||||
; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
|
||||
; AVX512CD-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512CD-NEXT: vpsubb %xmm3, %xmm0, %xmm0
|
||||
; AVX512CD-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX512CD-NEXT: vextractf128 $1, %ymm1, %xmm2
|
||||
; AVX512CD-NEXT: vpmovzxbd %xmm2, %zmm2
|
||||
; AVX512CD-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
|
||||
; AVX512CD-NEXT: vplzcntd %zmm2, %zmm2
|
||||
; AVX512CD-NEXT: vpmovdb %zmm2, %xmm2
|
||||
; AVX512CD-NEXT: vpsubb %xmm3, %xmm2, %xmm2
|
||||
; AVX512CD-NEXT: vpmovzxbd %xmm1, %zmm1
|
||||
; AVX512CD-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
|
||||
; AVX512CD-NEXT: vplzcntd %zmm1, %zmm1
|
||||
; AVX512CD-NEXT: vpmovdb %zmm1, %xmm1
|
||||
; AVX512CD-NEXT: vpsubb %xmm3, %xmm1, %xmm1
|
||||
@ -188,22 +188,22 @@ define <64 x i8> @testv64i8u(<64 x i8> %in) nounwind {
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
|
||||
; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX512BW-NEXT: vpmovzxbd %xmm2, %zmm2
|
||||
; AVX512BW-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
|
||||
; AVX512BW-NEXT: vplzcntd %zmm2, %zmm2
|
||||
; AVX512BW-NEXT: vpmovdb %zmm2, %xmm2
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
|
||||
; AVX512BW-NEXT: vpsubb %xmm3, %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpmovzxbd %xmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
|
||||
; AVX512BW-NEXT: vplzcntd %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpmovdb %zmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpsubb %xmm3, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
|
||||
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; AVX512BW-NEXT: vpmovzxbd %xmm2, %zmm2
|
||||
; AVX512BW-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
|
||||
; AVX512BW-NEXT: vplzcntd %zmm2, %zmm2
|
||||
; AVX512BW-NEXT: vpmovdb %zmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpsubb %xmm3, %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpmovzxbd %xmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
||||
; AVX512BW-NEXT: vplzcntd %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpsubb %xmm3, %xmm0, %xmm0
|
||||
|
@ -747,7 +747,7 @@ define <8 x i64> @load_zext_8i8_to_8i64(<8 x i8> *%ptr) {
|
||||
;
|
||||
; AVX512-LABEL: load_zext_8i8_to_8i64:
|
||||
; AVX512: # BB#0: # %entry
|
||||
; AVX512-NEXT: vpmovzxbq (%rdi), %zmm0
|
||||
; AVX512-NEXT: vpmovzxbq {{.*#+}} zmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512-NEXT: retq
|
||||
entry:
|
||||
%X = load <8 x i8>, <8 x i8>* %ptr
|
||||
|
Loading…
Reference in New Issue
Block a user