1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

[AVX512] Remove the masked vpcmpeq/vcmpgt intrinsics and autoupgrade them to native icmps.

llvm-svn: 273240
This commit is contained in:
Craig Topper 2016-06-21 03:53:24 +00:00
parent 65c96a9dac
commit f599cc3620
11 changed files with 581 additions and 653 deletions

View File

@ -6963,31 +6963,6 @@ let TargetPrefix = "x86" in {
def int_x86_avx512_vcomi_ss : GCCBuiltin<"__builtin_ia32_vcomiss">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pcmpeq_b_512 :
Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pcmpeq_w_512 :
Intrinsic<[llvm_i32_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pcmpeq_d_512 :
Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pcmpeq_q_512 :
Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pcmpgt_b_512:
Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pcmpgt_w_512:
Intrinsic<[llvm_i32_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pcmpgt_d_512:
Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pcmpgt_q_512:
Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_cmp_b_512: GCCBuiltin<"__builtin_ia32_cmpb512_mask">,
Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i32_ty,
@ -7016,32 +6991,6 @@ let TargetPrefix = "x86" in {
llvm_i8_ty], [IntrNoMem]>;
// 256-bit
def int_x86_avx512_mask_pcmpeq_b_256 :
Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pcmpeq_w_256 :
Intrinsic<[llvm_i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pcmpeq_d_256 :
Intrinsic<[llvm_i8_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pcmpeq_q_256 :
Intrinsic<[llvm_i8_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pcmpgt_b_256:
Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pcmpgt_w_256:
Intrinsic<[llvm_i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pcmpgt_d_256:
Intrinsic<[llvm_i8_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pcmpgt_q_256:
Intrinsic<[llvm_i8_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_cmp_b_256: GCCBuiltin<"__builtin_ia32_cmpb256_mask">,
Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty,
llvm_i32_ty], [IntrNoMem]>;
@ -7069,32 +7018,6 @@ let TargetPrefix = "x86" in {
llvm_i8_ty], [IntrNoMem]>;
// 128-bit
def int_x86_avx512_mask_pcmpeq_b_128 :
Intrinsic<[llvm_i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pcmpeq_w_128 :
Intrinsic<[llvm_i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pcmpeq_d_128 :
Intrinsic<[llvm_i8_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pcmpeq_q_128 :
Intrinsic<[llvm_i8_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pcmpgt_b_128:
Intrinsic<[llvm_i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pcmpgt_w_128:
Intrinsic<[llvm_i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pcmpgt_d_128:
Intrinsic<[llvm_i8_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pcmpgt_q_128:
Intrinsic<[llvm_i8_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_cmp_b_128: GCCBuiltin<"__builtin_ia32_cmpb128_mask">,
Intrinsic<[llvm_i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty,
llvm_i16_ty], [IntrNoMem]>;

View File

@ -174,6 +174,8 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
Name.startswith("x86.sse2.pcmpgt.") ||
Name.startswith("x86.avx2.pcmpeq.") ||
Name.startswith("x86.avx2.pcmpgt.") ||
Name.startswith("x86.avx512.mask.pcmpeq.") ||
Name.startswith("x86.avx512.mask.pcmpgt.") ||
Name == "x86.sse41.pmaxsb" ||
Name == "x86.sse2.pmaxs.w" ||
Name == "x86.sse41.pmaxsd" ||
@ -541,6 +543,30 @@ static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
return Builder.CreateSelect(Cmp, Op0, Op1);
}
static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
ICmpInst::Predicate Pred) {
Value *Op0 = CI.getArgOperand(0);
unsigned NumElts = Op0->getType()->getVectorNumElements();
Value *Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
Value *Mask = CI.getArgOperand(2);
const auto *C = dyn_cast<Constant>(Mask);
if (!C || !C->isAllOnesValue())
Cmp = Builder.CreateAnd(Cmp, getX86MaskVec(Builder, Mask, NumElts));
if (NumElts < 8) {
uint32_t Indices[8];
for (unsigned i = 0; i != NumElts; ++i)
Indices[i] = i;
for (unsigned i = NumElts; i != 8; ++i)
Indices[i] = NumElts;
Cmp = Builder.CreateShuffleVector(Cmp, UndefValue::get(Cmp->getType()),
Indices);
}
return Builder.CreateBitCast(Cmp, IntegerType::get(CI.getContext(),
std::max(NumElts, 8U)));
}
/// Upgrade a call to an old intrinsic. All argument and return casting must be
/// provided to seamlessly integrate with existing context.
void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
@ -567,6 +593,10 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1),
"pcmpgt");
Rep = Builder.CreateSExt(Rep, CI->getType(), "");
} else if (Name.startswith("llvm.x86.avx512.mask.pcmpeq.")) {
Rep = upgradeMaskedCompare(Builder, *CI, ICmpInst::ICMP_EQ);
} else if (Name.startswith("llvm.x86.avx512.mask.pcmpgt.")) {
Rep = upgradeMaskedCompare(Builder, *CI, ICmpInst::ICMP_SGT);
} else if (Name == "llvm.x86.sse41.pmaxsb" ||
Name == "llvm.x86.sse2.pmaxs.w" ||
Name == "llvm.x86.sse41.pmaxsd" ||

View File

@ -948,30 +948,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::VBROADCAST, 0),
X86_INTRINSIC_DATA(avx512_mask_pbroadcast_w_gpr_512, INTR_TYPE_1OP_MASK,
X86ISD::VBROADCAST, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_128, CMP_MASK, X86ISD::PCMPEQM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_256, CMP_MASK, X86ISD::PCMPEQM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_512, CMP_MASK, X86ISD::PCMPEQM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_d_128, CMP_MASK, X86ISD::PCMPEQM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_d_256, CMP_MASK, X86ISD::PCMPEQM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_d_512, CMP_MASK, X86ISD::PCMPEQM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_q_128, CMP_MASK, X86ISD::PCMPEQM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_q_256, CMP_MASK, X86ISD::PCMPEQM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_q_512, CMP_MASK, X86ISD::PCMPEQM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_w_128, CMP_MASK, X86ISD::PCMPEQM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_w_256, CMP_MASK, X86ISD::PCMPEQM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_w_512, CMP_MASK, X86ISD::PCMPEQM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_b_128, CMP_MASK, X86ISD::PCMPGTM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_b_256, CMP_MASK, X86ISD::PCMPGTM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_b_512, CMP_MASK, X86ISD::PCMPGTM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_d_128, CMP_MASK, X86ISD::PCMPGTM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_d_256, CMP_MASK, X86ISD::PCMPGTM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_d_512, CMP_MASK, X86ISD::PCMPGTM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_q_128, CMP_MASK, X86ISD::PCMPGTM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_q_256, CMP_MASK, X86ISD::PCMPGTM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_q_512, CMP_MASK, X86ISD::PCMPGTM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_128, CMP_MASK, X86ISD::PCMPGTM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_256, CMP_MASK, X86ISD::PCMPGTM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_512, CMP_MASK, X86ISD::PCMPGTM, 0),
X86_INTRINSIC_DATA(avx512_mask_perm_df_256, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VPERMI, 0),
X86_INTRINSIC_DATA(avx512_mask_perm_df_512, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VPERMI, 0),
X86_INTRINSIC_DATA(avx512_mask_perm_di_256, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VPERMI, 0),

View File

@ -276,3 +276,95 @@ define <16 x i32>@test_int_x86_avx512_mask_pshuf_d_512(<16 x i32> %x0, i32 %x1,
%res4 = add <16 x i32> %res3, %res2
ret <16 x i32> %res4
}
define i16 @test_pcmpeq_d(<16 x i32> %a, <16 x i32> %b) {
; CHECK-LABEL: test_pcmpeq_d:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
%res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
ret i16 %res
}
define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
; CHECK-LABEL: test_mask_pcmpeq_d:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
%res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
ret i16 %res
}
declare i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32>, <16 x i32>, i16)
define i8 @test_pcmpeq_q(<8 x i64> %a, <8 x i64> %b) {
; CHECK-LABEL: test_pcmpeq_q:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
ret i8 %res
}
define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
; CHECK-LABEL: test_mask_pcmpeq_q:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
ret i8 %res
}
declare i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64>, <8 x i64>, i8)
define i16 @test_pcmpgt_d(<16 x i32> %a, <16 x i32> %b) {
; CHECK-LABEL: test_pcmpgt_d:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
%res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
ret i16 %res
}
define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
; CHECK-LABEL: test_mask_pcmpgt_d:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
%res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
ret i16 %res
}
declare i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32>, <16 x i32>, i16)
define i8 @test_pcmpgt_q(<8 x i64> %a, <8 x i64> %b) {
; CHECK-LABEL: test_pcmpgt_q:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
ret i8 %res
}
define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
; CHECK-LABEL: test_mask_pcmpgt_q:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
ret i8 %res
}
declare i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64>, <8 x i64>, i8)

View File

@ -970,98 +970,6 @@ define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) {
declare void @llvm.x86.avx512.mask.store.ss(i8*, <4 x float>, i8 )
define i16 @test_pcmpeq_d(<16 x i32> %a, <16 x i32> %b) {
; CHECK-LABEL: test_pcmpeq_d:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
%res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
ret i16 %res
}
define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
; CHECK-LABEL: test_mask_pcmpeq_d:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
%res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
ret i16 %res
}
declare i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32>, <16 x i32>, i16)
define i8 @test_pcmpeq_q(<8 x i64> %a, <8 x i64> %b) {
; CHECK-LABEL: test_pcmpeq_q:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
ret i8 %res
}
define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
; CHECK-LABEL: test_mask_pcmpeq_q:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
ret i8 %res
}
declare i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64>, <8 x i64>, i8)
define i16 @test_pcmpgt_d(<16 x i32> %a, <16 x i32> %b) {
; CHECK-LABEL: test_pcmpgt_d:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
%res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
ret i16 %res
}
define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
; CHECK-LABEL: test_mask_pcmpgt_d:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
%res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
ret i16 %res
}
declare i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32>, <16 x i32>, i16)
define i8 @test_pcmpgt_q(<8 x i64> %a, <8 x i64> %b) {
; CHECK-LABEL: test_pcmpgt_q:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
ret i8 %res
}
define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
; CHECK-LABEL: test_mask_pcmpgt_q:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
ret i8 %res
}
declare i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64>, <8 x i64>, i8)
define <8 x i16> @test_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
; CHECK-LABEL: test_cmp_d_512:
; CHECK: ## BB#0:

View File

@ -271,3 +271,172 @@ define <32 x i16>@test_int_x86_avx512_mask_pshufl_w_512(<32 x i16> %x0, i32 %x1,
%res4 = add <32 x i16> %res3, %res2
ret <32 x i16> %res4
}
define i64 @test_pcmpeq_b(<64 x i8> %a, <64 x i8> %b) {
; AVX512BW-LABEL: test_pcmpeq_b:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
; AVX512BW-NEXT: kmovq %k0, %rax
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_pcmpeq_b:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: subl $12, %esp
; AVX512F-32-NEXT: .Ltmp0:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, (%esp)
; AVX512F-32-NEXT: movl (%esp), %eax
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: addl $12, %esp
; AVX512F-32-NEXT: retl
%res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
ret i64 %res
}
define i64 @test_mask_pcmpeq_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
; AVX512BW-LABEL: test_mask_pcmpeq_b:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovq %rdi, %k1
; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
; AVX512BW-NEXT: kmovq %k0, %rax
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_pcmpeq_b:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: subl $12, %esp
; AVX512F-32-NEXT: .Ltmp1:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovq %k0, (%esp)
; AVX512F-32-NEXT: movl (%esp), %eax
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: addl $12, %esp
; AVX512F-32-NEXT: retl
%res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
ret i64 %res
}
declare i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8>, <64 x i8>, i64)
define i32 @test_pcmpeq_w(<32 x i16> %a, <32 x i16> %b) {
; AVX512BW-LABEL: test_pcmpeq_w:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_pcmpeq_w:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovd %k0, %eax
; AVX512F-32-NEXT: retl
%res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1)
ret i32 %res
}
define i32 @test_mask_pcmpeq_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
; AVX512BW-LABEL: test_mask_pcmpeq_w:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovd %edi, %k1
; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_pcmpeq_w:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovd %k0, %eax
; AVX512F-32-NEXT: retl
%res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
ret i32 %res
}
declare i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16>, <32 x i16>, i32)
define i64 @test_pcmpgt_b(<64 x i8> %a, <64 x i8> %b) {
; AVX512BW-LABEL: test_pcmpgt_b:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k0
; AVX512BW-NEXT: kmovq %k0, %rax
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_pcmpgt_b:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: subl $12, %esp
; AVX512F-32-NEXT: .Ltmp2:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
; AVX512F-32-NEXT: vpcmpgtb %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, (%esp)
; AVX512F-32-NEXT: movl (%esp), %eax
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: addl $12, %esp
; AVX512F-32-NEXT: retl
%res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
ret i64 %res
}
define i64 @test_mask_pcmpgt_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
; AVX512BW-LABEL: test_mask_pcmpgt_b:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovq %rdi, %k1
; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1}
; AVX512BW-NEXT: kmovq %k0, %rax
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_pcmpgt_b:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: subl $12, %esp
; AVX512F-32-NEXT: .Ltmp3:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovq %k0, (%esp)
; AVX512F-32-NEXT: movl (%esp), %eax
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: addl $12, %esp
; AVX512F-32-NEXT: retl
%res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
ret i64 %res
}
declare i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8>, <64 x i8>, i64)
define i32 @test_pcmpgt_w(<32 x i16> %a, <32 x i16> %b) {
; AVX512BW-LABEL: test_pcmpgt_w:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_pcmpgt_w:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vpcmpgtw %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovd %k0, %eax
; AVX512F-32-NEXT: retl
%res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1)
ret i32 %res
}
define i32 @test_mask_pcmpgt_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
; AVX512BW-LABEL: test_mask_pcmpgt_w:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovd %edi, %k1
; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1}
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_pcmpgt_w:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovd %k0, %eax
; AVX512F-32-NEXT: retl
%res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
ret i32 %res
}
declare i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16>, <32 x i16>, i32)

View File

@ -2,178 +2,6 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW
; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F-32
define i64 @test_pcmpeq_b(<64 x i8> %a, <64 x i8> %b) {
; AVX512BW-LABEL: test_pcmpeq_b:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
; AVX512BW-NEXT: kmovq %k0, %rax
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_pcmpeq_b:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: subl $12, %esp
; AVX512F-32-NEXT: .Ltmp0:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, (%esp)
; AVX512F-32-NEXT: movl (%esp), %eax
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: addl $12, %esp
; AVX512F-32-NEXT: retl
%res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
ret i64 %res
}
define i64 @test_mask_pcmpeq_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
; AVX512BW-LABEL: test_mask_pcmpeq_b:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovq %rdi, %k1
; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
; AVX512BW-NEXT: kmovq %k0, %rax
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_pcmpeq_b:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: subl $12, %esp
; AVX512F-32-NEXT: .Ltmp1:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovq %k0, (%esp)
; AVX512F-32-NEXT: movl (%esp), %eax
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: addl $12, %esp
; AVX512F-32-NEXT: retl
%res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
ret i64 %res
}
declare i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8>, <64 x i8>, i64)
define i32 @test_pcmpeq_w(<32 x i16> %a, <32 x i16> %b) {
; AVX512BW-LABEL: test_pcmpeq_w:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_pcmpeq_w:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovd %k0, %eax
; AVX512F-32-NEXT: retl
%res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1)
ret i32 %res
}
define i32 @test_mask_pcmpeq_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
; AVX512BW-LABEL: test_mask_pcmpeq_w:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovd %edi, %k1
; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_pcmpeq_w:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovd %k0, %eax
; AVX512F-32-NEXT: retl
%res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
ret i32 %res
}
declare i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16>, <32 x i16>, i32)
define i64 @test_pcmpgt_b(<64 x i8> %a, <64 x i8> %b) {
; AVX512BW-LABEL: test_pcmpgt_b:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k0
; AVX512BW-NEXT: kmovq %k0, %rax
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_pcmpgt_b:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: subl $12, %esp
; AVX512F-32-NEXT: .Ltmp2:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
; AVX512F-32-NEXT: vpcmpgtb %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, (%esp)
; AVX512F-32-NEXT: movl (%esp), %eax
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: addl $12, %esp
; AVX512F-32-NEXT: retl
%res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
ret i64 %res
}
define i64 @test_mask_pcmpgt_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
; AVX512BW-LABEL: test_mask_pcmpgt_b:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovq %rdi, %k1
; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1}
; AVX512BW-NEXT: kmovq %k0, %rax
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_pcmpgt_b:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: subl $12, %esp
; AVX512F-32-NEXT: .Ltmp3:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
; AVX512F-32-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovq %k0, (%esp)
; AVX512F-32-NEXT: movl (%esp), %eax
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: addl $12, %esp
; AVX512F-32-NEXT: retl
%res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
ret i64 %res
}
declare i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8>, <64 x i8>, i64)
define i32 @test_pcmpgt_w(<32 x i16> %a, <32 x i16> %b) {
; AVX512BW-LABEL: test_pcmpgt_w:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_pcmpgt_w:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vpcmpgtw %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovd %k0, %eax
; AVX512F-32-NEXT: retl
%res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1)
ret i32 %res
}
define i32 @test_mask_pcmpgt_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
; AVX512BW-LABEL: test_mask_pcmpgt_w:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovd %edi, %k1
; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1}
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_pcmpgt_w:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovd %k0, %eax
; AVX512F-32-NEXT: retl
%res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
ret i32 %res
}
declare i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16>, <32 x i16>, i32)
define i64 @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
; AVX512BW-LABEL: test_cmp_b_512:
; AVX512BW: ## BB#0:
@ -205,7 +33,7 @@ define i64 @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
; AVX512F-32-LABEL: test_cmp_b_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: subl $68, %esp
; AVX512F-32-NEXT: .Ltmp4:
; AVX512F-32-NEXT: .Ltmp0:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 72
; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
@ -291,7 +119,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-LABEL: test_mask_cmp_b_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: subl $68, %esp
; AVX512F-32-NEXT: .Ltmp5:
; AVX512F-32-NEXT: .Ltmp1:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 72
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
@ -381,7 +209,7 @@ define i64 @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
; AVX512F-32-LABEL: test_ucmp_b_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: subl $68, %esp
; AVX512F-32-NEXT: .Ltmp6:
; AVX512F-32-NEXT: .Ltmp2:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 72
; AVX512F-32-NEXT: vpcmpequb %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
@ -467,7 +295,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-LABEL: test_mask_x86_avx512_ucmp_b_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: subl $68, %esp
; AVX512F-32-NEXT: .Ltmp7:
; AVX512F-32-NEXT: .Ltmp3:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 72
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
@ -2661,7 +2489,7 @@ define i64@test_int_x86_avx512_kunpck_qd(i64 %x0, i64 %x1) {
; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_qd:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: subl $12, %esp
; AVX512F-32-NEXT: .Ltmp8:
; AVX512F-32-NEXT: .Ltmp4:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
@ -2687,7 +2515,7 @@ define i64@test_int_x86_avx512_cvtb2mask_512(<64 x i8> %x0) {
; AVX512F-32-LABEL: test_int_x86_avx512_cvtb2mask_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: subl $12, %esp
; AVX512F-32-NEXT: .Ltmp9:
; AVX512F-32-NEXT: .Ltmp5:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, (%esp)
@ -3149,7 +2977,7 @@ define i64@test_int_x86_avx512_ptestm_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x
; AVX512F-32-LABEL: test_int_x86_avx512_ptestm_b_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: subl $20, %esp
; AVX512F-32-NEXT: .Ltmp10:
; AVX512F-32-NEXT: .Ltmp6:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 24
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
@ -3214,7 +3042,7 @@ define i64@test_int_x86_avx512_ptestnm_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %
; AVX512F-32-LABEL: test_int_x86_avx512_ptestnm_b_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: subl $20, %esp
; AVX512F-32-NEXT: .Ltmp11:
; AVX512F-32-NEXT: .Ltmp7:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 24
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1

View File

@ -266,3 +266,96 @@ define <16 x i16>@test_int_x86_avx512_mask_pshufl_w_256(<16 x i16> %x0, i32 %x1,
%res4 = add <16 x i16> %res3, %res2
ret <16 x i16> %res4
}
define i32 @test_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b) {
; CHECK-LABEL: test_pcmpeq_b_256:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 -1)
ret i32 %res
}
define i32 @test_mask_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
; CHECK-LABEL: test_mask_pcmpeq_b_256:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x74,0xc1]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask)
ret i32 %res
}
declare i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8>, <32 x i8>, i32)
define i16 @test_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b) {
; CHECK-LABEL: test_pcmpeq_w_256:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1)
ret i16 %res
}
define i16 @test_mask_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
; CHECK-LABEL: test_mask_pcmpeq_w_256:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask)
ret i16 %res
}
declare i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16>, <16 x i16>, i16)
define i32 @test_pcmpgt_b_256(<32 x i8> %a, <32 x i8> %b) {
; CHECK-LABEL: test_pcmpgt_b_256:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8> %a, <32 x i8> %b, i32 -1)
ret i32 %res
}
define i32 @test_mask_pcmpgt_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
; CHECK-LABEL: test_mask_pcmpgt_b_256:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x64,0xc1]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask)
ret i32 %res
}
declare i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8>, <32 x i8>, i32)
define i16 @test_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b) {
; CHECK-LABEL: test_pcmpgt_w_256:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x65,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1)
ret i16 %res
}
define i16 @test_mask_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
; CHECK-LABEL: test_mask_pcmpgt_w_256:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x65,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask)
ret i16 %res
}
declare i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16>, <16 x i16>, i16)

View File

@ -3,98 +3,6 @@
; 256-bit
define i32 @test_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b) {
; CHECK-LABEL: test_pcmpeq_b_256:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 -1)
ret i32 %res
}
define i32 @test_mask_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
; CHECK-LABEL: test_mask_pcmpeq_b_256:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x74,0xc1]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask)
ret i32 %res
}
declare i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8>, <32 x i8>, i32)
define i16 @test_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b) {
; CHECK-LABEL: test_pcmpeq_w_256:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1)
ret i16 %res
}
define i16 @test_mask_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
; CHECK-LABEL: test_mask_pcmpeq_w_256:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask)
ret i16 %res
}
declare i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16>, <16 x i16>, i16)
define i32 @test_pcmpgt_b_256(<32 x i8> %a, <32 x i8> %b) {
; CHECK-LABEL: test_pcmpgt_b_256:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8> %a, <32 x i8> %b, i32 -1)
ret i32 %res
}
define i32 @test_mask_pcmpgt_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
; CHECK-LABEL: test_mask_pcmpgt_b_256:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x64,0xc1]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask)
ret i32 %res
}
declare i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8>, <32 x i8>, i32)
define i16 @test_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b) {
; CHECK-LABEL: test_pcmpgt_w_256:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x65,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1)
ret i16 %res
}
define i16 @test_mask_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
; CHECK-LABEL: test_mask_pcmpgt_w_256:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x65,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask)
ret i16 %res
}
declare i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16>, <16 x i16>, i16)
define <8 x i32> @test_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1) {
; CHECK-LABEL: test_cmp_b_256:
; CHECK: ## BB#0:

View File

@ -558,3 +558,187 @@ define <8 x i32>@test_int_x86_avx512_mask_pshuf_d_256(<8 x i32> %x0, i32 %x1, <8
%res4 = add <8 x i32> %res3, %res2
ret <8 x i32> %res4
}
define i8 @test_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b) {
; CHECK-LABEL: test_pcmpeq_d_256:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1)
ret i8 %res
}
define i8 @test_mask_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
; CHECK-LABEL: test_mask_pcmpeq_d_256:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask)
ret i8 %res
}
declare i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32>, <8 x i32>, i8)
define i8 @test_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b) {
; CHECK-LABEL: test_pcmpeq_q_256:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x29,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1)
ret i8 %res
}
define i8 @test_mask_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
; CHECK-LABEL: test_mask_pcmpeq_q_256:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask)
ret i8 %res
}
declare i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64>, <4 x i64>, i8)
define i8 @test_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b) {
; CHECK-LABEL: test_pcmpgt_d_256:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x66,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1)
ret i8 %res
}
define i8 @test_mask_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
; CHECK-LABEL: test_mask_pcmpgt_d_256:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x66,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask)
ret i8 %res
}
declare i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32>, <8 x i32>, i8)
define i8 @test_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b) {
; CHECK-LABEL: test_pcmpgt_q_256:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x37,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1)
ret i8 %res
}
define i8 @test_mask_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
; CHECK-LABEL: test_mask_pcmpgt_q_256:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x37,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask)
ret i8 %res
}
declare i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64>, <4 x i64>, i8)
define i8 @test_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_pcmpeq_d_128:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1)
ret i8 %res
}
define i8 @test_mask_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
; CHECK-LABEL: test_mask_pcmpeq_d_128:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask)
ret i8 %res
}
declare i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32>, <4 x i32>, i8)
define i8 @test_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test_pcmpeq_q_128:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x29,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1)
ret i8 %res
}
define i8 @test_mask_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
; CHECK-LABEL: test_mask_pcmpeq_q_128:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask)
ret i8 %res
}
declare i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64>, <2 x i64>, i8)
define i8 @test_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_pcmpgt_d_128:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x66,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1)
ret i8 %res
}
define i8 @test_mask_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
; CHECK-LABEL: test_mask_pcmpgt_d_128:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x66,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask)
ret i8 %res
}
declare i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32>, <4 x i32>, i8)
define i8 @test_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test_pcmpgt_q_128:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x37,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1)
ret i8 %res
}
define i8 @test_mask_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
; CHECK-LABEL: test_mask_pcmpgt_q_128:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x37,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask)
ret i8 %res
}
declare i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64>, <2 x i64>, i8)

View File

@ -1,100 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s
; 256-bit
define i8 @test_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b) {
; CHECK-LABEL: test_pcmpeq_d_256:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1)
ret i8 %res
}
define i8 @test_mask_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
; CHECK-LABEL: test_mask_pcmpeq_d_256:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask)
ret i8 %res
}
declare i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32>, <8 x i32>, i8)
define i8 @test_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b) {
; CHECK-LABEL: test_pcmpeq_q_256:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x29,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1)
ret i8 %res
}
define i8 @test_mask_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
; CHECK-LABEL: test_mask_pcmpeq_q_256:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask)
ret i8 %res
}
declare i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64>, <4 x i64>, i8)
define i8 @test_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b) {
; CHECK-LABEL: test_pcmpgt_d_256:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x66,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1)
ret i8 %res
}
define i8 @test_mask_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
; CHECK-LABEL: test_mask_pcmpgt_d_256:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x66,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask)
ret i8 %res
}
declare i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32>, <8 x i32>, i8)
define i8 @test_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b) {
; CHECK-LABEL: test_pcmpgt_q_256:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x37,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1)
ret i8 %res
}
define i8 @test_mask_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
; CHECK-LABEL: test_mask_pcmpgt_q_256:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x37,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask)
ret i8 %res
}
declare i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64>, <4 x i64>, i8)
define <8 x i8> @test_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1) {
; CHECK-LABEL: test_cmp_d_256:
; CHECK: ## BB#0:
@ -549,98 +458,6 @@ declare i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64>, <4 x i64>, i32, i8) nounw
; 128-bit
define i8 @test_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_pcmpeq_d_128:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1)
ret i8 %res
}
define i8 @test_mask_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
; CHECK-LABEL: test_mask_pcmpeq_d_128:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask)
ret i8 %res
}
declare i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32>, <4 x i32>, i8)
define i8 @test_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test_pcmpeq_q_128:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x29,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1)
ret i8 %res
}
define i8 @test_mask_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
; CHECK-LABEL: test_mask_pcmpeq_q_128:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask)
ret i8 %res
}
declare i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64>, <2 x i64>, i8)
define i8 @test_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_pcmpgt_d_128:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x66,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1)
ret i8 %res
}
define i8 @test_mask_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
; CHECK-LABEL: test_mask_pcmpgt_d_128:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x66,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask)
ret i8 %res
}
declare i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32>, <4 x i32>, i8)
define i8 @test_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test_pcmpgt_q_128:
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x37,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1)
ret i8 %res
}
define i8 @test_mask_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
; CHECK-LABEL: test_mask_pcmpgt_q_128:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x37,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask)
ret i8 %res
}
declare i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64>, <2 x i64>, i8)
define <8 x i8> @test_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: test_cmp_d_128:
; CHECK: ## BB#0:
@ -7955,9 +7772,9 @@ define <8 x i32>@test_int_x86_avx512_mask_psrav8_si_const() {
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqa32 {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]
; CHECK-NEXT: ## encoding: [0x62,0xf1,0x7d,0x28,0x6f,0x05,A,A,A,A]
; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI510_0-4, kind: reloc_riprel_4byte
; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI494_0-4, kind: reloc_riprel_4byte
; CHECK-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x46,0x05,A,A,A,A]
; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI510_1-4, kind: reloc_riprel_4byte
; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI494_1-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> <i32 2, i32 9, i32 -12, i32 23, i32 -26, i32 37, i32 -40, i32 51>, <8 x i32> <i32 1, i32 18, i32 35, i32 52, i32 69, i32 15, i32 32, i32 49>, <8 x i32> zeroinitializer, i8 -1)
ret <8 x i32> %res
@ -7988,9 +7805,9 @@ define <2 x i64>@test_int_x86_avx512_mask_psrav_q_128_const(i8 %x3) {
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqa64 {{.*#+}} xmm0 = [2,18446744073709551607]
; CHECK-NEXT: ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0x05,A,A,A,A]
; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI512_0-4, kind: reloc_riprel_4byte
; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI496_0-4, kind: reloc_riprel_4byte
; CHECK-NEXT: vpsravq {{.*}}(%rip), %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x46,0x05,A,A,A,A]
; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI512_1-4, kind: reloc_riprel_4byte
; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI496_1-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> <i64 2, i64 -9>, <2 x i64> <i64 1, i64 90>, <2 x i64> zeroinitializer, i8 -1)
ret <2 x i64> %res