mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-19 19:12:56 +02:00
[X86] Remove masking from the 512-bit padds and psubs intrinsics. Use select in IR instead.
llvm-svn: 339842
This commit is contained in:
parent
6334f048ca
commit
13e4c1435f
@ -3653,18 +3653,18 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
}
|
||||
// Integer arithmetic ops
|
||||
let TargetPrefix = "x86" in {
|
||||
def int_x86_avx512_mask_padds_b_512 : GCCBuiltin<"__builtin_ia32_paddsb512_mask">,
|
||||
Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
|
||||
llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_padds_w_512 : GCCBuiltin<"__builtin_ia32_paddsw512_mask">,
|
||||
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
|
||||
llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psubs_b_512 : GCCBuiltin<"__builtin_ia32_psubsb512_mask">,
|
||||
Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
|
||||
llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_psubs_w_512 : GCCBuiltin<"__builtin_ia32_psubsw512_mask">,
|
||||
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
|
||||
llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_padds_b_512 : GCCBuiltin<"__builtin_ia32_paddsb512">,
|
||||
Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_padds_w_512 : GCCBuiltin<"__builtin_ia32_paddsw512">,
|
||||
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_psubs_b_512 : GCCBuiltin<"__builtin_ia32_psubsb512">,
|
||||
Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_psubs_w_512 : GCCBuiltin<"__builtin_ia32_psubsw512">,
|
||||
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_pmulhu_w_512 : GCCBuiltin<"__builtin_ia32_pmulhuw512">,
|
||||
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
|
||||
llvm_v32i16_ty], [IntrNoMem, Commutative]>;
|
||||
|
@ -282,14 +282,8 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
|
||||
Name.startswith("avx512.mask.pror.") || // Added in 7.0
|
||||
Name.startswith("avx512.mask.prolv.") || // Added in 7.0
|
||||
Name.startswith("avx512.mask.prol.") || // Added in 7.0
|
||||
Name == "avx512.mask.padds.b.128" || // Added in 8.0
|
||||
Name == "avx512.mask.padds.b.256" || // Added in 8.0
|
||||
Name == "avx512.mask.padds.w.128" || // Added in 8.0
|
||||
Name == "avx512.mask.padds.w.256" || // Added in 8.0
|
||||
Name == "avx512.mask.psubs.b.128" || // Added in 8.0
|
||||
Name == "avx512.mask.psubs.b.256" || // Added in 8.0
|
||||
Name == "avx512.mask.psubs.w.128" || // Added in 8.0
|
||||
Name == "avx512.mask.psubs.w.256" || // Added in 8.0
|
||||
Name.startswith("avx512.mask.padds.") || // Added in 8.0
|
||||
Name.startswith("avx512.mask.psubs.") || // Added in 8.0
|
||||
Name == "sse.cvtsi2ss" || // Added in 7.0
|
||||
Name == "sse.cvtsi642ss" || // Added in 7.0
|
||||
Name == "sse2.cvtsi2sd" || // Added in 7.0
|
||||
@ -1417,10 +1411,14 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
|
||||
IID = Intrinsic::x86_sse2_padds_b;
|
||||
else if (VecWidth == 256 && EltWidth == 8)
|
||||
IID = Intrinsic::x86_avx2_padds_b;
|
||||
else if (VecWidth == 512 && EltWidth == 8)
|
||||
IID = Intrinsic::x86_avx512_padds_b_512;
|
||||
else if (VecWidth == 128 && EltWidth == 16)
|
||||
IID = Intrinsic::x86_sse2_padds_w;
|
||||
else if (VecWidth == 256 && EltWidth == 16)
|
||||
IID = Intrinsic::x86_avx2_padds_w;
|
||||
else if (VecWidth == 512 && EltWidth == 16)
|
||||
IID = Intrinsic::x86_avx512_padds_w_512;
|
||||
else
|
||||
llvm_unreachable("Unexpected intrinsic");
|
||||
} else if (Name.startswith("psubs.")) {
|
||||
@ -1428,10 +1426,14 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
|
||||
IID = Intrinsic::x86_sse2_psubs_b;
|
||||
else if (VecWidth == 256 && EltWidth == 8)
|
||||
IID = Intrinsic::x86_avx2_psubs_b;
|
||||
else if (VecWidth == 512 && EltWidth == 8)
|
||||
IID = Intrinsic::x86_avx512_psubs_b_512;
|
||||
else if (VecWidth == 128 && EltWidth == 16)
|
||||
IID = Intrinsic::x86_sse2_psubs_w;
|
||||
else if (VecWidth == 256 && EltWidth == 16)
|
||||
IID = Intrinsic::x86_avx2_psubs_w;
|
||||
else if (VecWidth == 512 && EltWidth == 16)
|
||||
IID = Intrinsic::x86_avx512_psubs_w_512;
|
||||
else
|
||||
llvm_unreachable("Unexpected intrinsic");
|
||||
} else
|
||||
|
@ -673,8 +673,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86ISD::FMULS_RND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_mul_ss_round, INTR_TYPE_SCALAR_MASK_RM,
|
||||
X86ISD::FMULS_RND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_padds_b_512, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_padds_w_512, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_db_128, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNC, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_db_256, INTR_TYPE_1OP_MASK,
|
||||
@ -789,8 +787,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86ISD::MULTISHIFT, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmultishift_qb_512, INTR_TYPE_2OP_MASK,
|
||||
X86ISD::MULTISHIFT, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psubs_b_512, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psubs_w_512, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_range_pd_128, INTR_TYPE_3OP_MASK, X86ISD::VRANGE, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_range_pd_256, INTR_TYPE_3OP_MASK, X86ISD::VRANGE, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_range_pd_512, INTR_TYPE_3OP_MASK, X86ISD::VRANGE, X86ISD::VRANGE_RND),
|
||||
@ -926,6 +922,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx512_packsswb_512, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_packusdw_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_packuswb_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_padds_b_512, INTR_TYPE_2OP, X86ISD::ADDS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_padds_w_512, INTR_TYPE_2OP, X86ISD::ADDS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_permvar_df_256, VPERM_2OP, X86ISD::VPERMV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_permvar_df_512, VPERM_2OP, X86ISD::VPERMV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_permvar_di_256, VPERM_2OP, X86ISD::VPERMV, 0),
|
||||
@ -1008,6 +1006,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx512_psrlv_w_128, INTR_TYPE_2OP, ISD::SRL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_psrlv_w_256, INTR_TYPE_2OP, ISD::SRL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_psrlv_w_512, INTR_TYPE_2OP, ISD::SRL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_psubs_b_512, INTR_TYPE_2OP, X86ISD::SUBS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_psubs_w_512, INTR_TYPE_2OP, X86ISD::SUBS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_pternlog_d_128, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
|
||||
X86_INTRINSIC_DATA(avx512_pternlog_d_256, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
|
||||
X86_INTRINSIC_DATA(avx512_pternlog_d_512, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
|
||||
|
@ -245,8 +245,7 @@ Instruction *InstCombiner::SimplifyAnyMemSet(AnyMemSetInst *MI) {
|
||||
|
||||
static Value *simplifyX86AddsSubs(const IntrinsicInst &II,
|
||||
InstCombiner::BuilderTy &Builder) {
|
||||
bool IsAddition = false;
|
||||
bool IsMasked = false;
|
||||
bool IsAddition;
|
||||
|
||||
switch (II.getIntrinsicID()) {
|
||||
default: llvm_unreachable("Unexpected intrinsic!");
|
||||
@ -254,21 +253,17 @@ static Value *simplifyX86AddsSubs(const IntrinsicInst &II,
|
||||
case Intrinsic::x86_sse2_padds_w:
|
||||
case Intrinsic::x86_avx2_padds_b:
|
||||
case Intrinsic::x86_avx2_padds_w:
|
||||
IsAddition = true; IsMasked = false;
|
||||
case Intrinsic::x86_avx512_padds_b_512:
|
||||
case Intrinsic::x86_avx512_padds_w_512:
|
||||
IsAddition = true;
|
||||
break;
|
||||
case Intrinsic::x86_sse2_psubs_b:
|
||||
case Intrinsic::x86_sse2_psubs_w:
|
||||
case Intrinsic::x86_avx2_psubs_b:
|
||||
case Intrinsic::x86_avx2_psubs_w:
|
||||
IsAddition = false; IsMasked = false;
|
||||
break;
|
||||
case Intrinsic::x86_avx512_mask_padds_b_512:
|
||||
case Intrinsic::x86_avx512_mask_padds_w_512:
|
||||
IsAddition = true; IsMasked = true;
|
||||
break;
|
||||
case Intrinsic::x86_avx512_mask_psubs_b_512:
|
||||
case Intrinsic::x86_avx512_mask_psubs_w_512:
|
||||
IsAddition = false; IsMasked = true;
|
||||
case Intrinsic::x86_avx512_psubs_b_512:
|
||||
case Intrinsic::x86_avx512_psubs_w_512:
|
||||
IsAddition = false;
|
||||
break;
|
||||
}
|
||||
|
||||
@ -278,7 +273,7 @@ static Value *simplifyX86AddsSubs(const IntrinsicInst &II,
|
||||
auto SVT = VT->getElementType();
|
||||
unsigned NumElems = VT->getNumElements();
|
||||
|
||||
if (!Arg0 || !Arg1 || (IsMasked && !isa<Constant>(II.getOperand(2))))
|
||||
if (!Arg0 || !Arg1)
|
||||
return nullptr;
|
||||
|
||||
SmallVector<Constant *, 64> Result;
|
||||
@ -306,21 +301,7 @@ static Value *simplifyX86AddsSubs(const IntrinsicInst &II,
|
||||
Result.push_back(Constant::getIntegerValue(SVT, ResultElem));
|
||||
}
|
||||
|
||||
Value *ResultVec = ConstantVector::get(Result);
|
||||
|
||||
if (II.getNumArgOperands() == 4) { // For masked intrinsics.
|
||||
Value *Src = II.getOperand(2);
|
||||
auto Mask = II.getOperand(3);
|
||||
if (auto *C = dyn_cast<Constant>(Mask))
|
||||
if (C->isAllOnesValue())
|
||||
return ResultVec;
|
||||
auto *MaskTy = VectorType::get(
|
||||
Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
|
||||
Mask = Builder.CreateBitCast(Mask, MaskTy);
|
||||
ResultVec = Builder.CreateSelect(Mask, ResultVec, Src);
|
||||
}
|
||||
|
||||
return ResultVec;
|
||||
return ConstantVector::get(Result);
|
||||
}
|
||||
|
||||
static Value *simplifyX86immShift(const IntrinsicInst &II,
|
||||
@ -2614,15 +2595,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
||||
case Intrinsic::x86_avx2_padds_w:
|
||||
case Intrinsic::x86_avx2_psubs_b:
|
||||
case Intrinsic::x86_avx2_psubs_w:
|
||||
case Intrinsic::x86_avx512_mask_padds_b_512:
|
||||
case Intrinsic::x86_avx512_mask_padds_w_512:
|
||||
case Intrinsic::x86_avx512_mask_psubs_b_512:
|
||||
case Intrinsic::x86_avx512_mask_psubs_w_512:
|
||||
case Intrinsic::x86_avx512_padds_b_512:
|
||||
case Intrinsic::x86_avx512_padds_w_512:
|
||||
case Intrinsic::x86_avx512_psubs_b_512:
|
||||
case Intrinsic::x86_avx512_psubs_w_512:
|
||||
if (Value *V = simplifyX86AddsSubs(*II, Builder))
|
||||
return replaceInstUsesWith(*II, V);
|
||||
break;
|
||||
|
||||
|
||||
// Constant fold ashr( <A x Bi>, Ci ).
|
||||
// Constant fold lshr( <A x Bi>, Ci ).
|
||||
// Constant fold shl( <A x Bi>, Ci ).
|
||||
|
@ -3284,3 +3284,399 @@ define <64 x i8> @test_mask_subs_epu8_rmkz_512(<64 x i8> %a, <64 x i8>* %ptr_b,
|
||||
}
|
||||
|
||||
declare <64 x i8> @llvm.x86.avx512.mask.psubus.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
|
||||
|
||||
define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
|
||||
; CHECK-LABEL: test_mask_adds_epi16_rr_512:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0xc1]
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
|
||||
; X86-LABEL: test_mask_adds_epi16_rrk_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0xd1]
|
||||
; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mask_adds_epi16_rrk_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0xd1]
|
||||
; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
|
||||
; X86-LABEL: test_mask_adds_epi16_rrkz_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mask_adds_epi16_rrkz_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
|
||||
; X86-LABEL: test_mask_adds_epi16_rm_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: vpaddsw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0x00]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mask_adds_epi16_rm_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0x07]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_adds_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
|
||||
; X86-LABEL: test_mask_adds_epi16_rmk_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: vpaddsw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0x08]
|
||||
; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mask_adds_epi16_rmk_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
|
||||
; X64-NEXT: vpaddsw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0x0f]
|
||||
; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
|
||||
; X86-LABEL: test_mask_adds_epi16_rmkz_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: vpaddsw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0x00]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mask_adds_epi16_rmkz_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
|
||||
; X64-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0x07]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
declare <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
||||
|
||||
define <32 x i16> @test_mask_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
|
||||
; CHECK-LABEL: test_mask_subs_epi16_rr_512:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0xc1]
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
|
||||
; X86-LABEL: test_mask_subs_epi16_rrk_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0xd1]
|
||||
; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mask_subs_epi16_rrk_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0xd1]
|
||||
; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
|
||||
; X86-LABEL: test_mask_subs_epi16_rrkz_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mask_subs_epi16_rrkz_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_subs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
|
||||
; X86-LABEL: test_mask_subs_epi16_rm_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: vpsubsw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0x00]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mask_subs_epi16_rm_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0x07]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_subs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
|
||||
; X86-LABEL: test_mask_subs_epi16_rmk_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: vpsubsw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0x08]
|
||||
; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mask_subs_epi16_rmk_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
|
||||
; X64-NEXT: vpsubsw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0x0f]
|
||||
; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
|
||||
; X86-LABEL: test_mask_subs_epi16_rmkz_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: vpsubsw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0x00]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mask_subs_epi16_rmkz_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
|
||||
; X64-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0x07]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
}
|
||||
|
||||
declare <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
||||
|
||||
define <64 x i8> @test_mask_adds_epi8_rr_512(<64 x i8> %a, <64 x i8> %b) {
|
||||
; CHECK-LABEL: test_mask_adds_epi8_rr_512:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpaddsb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xec,0xc1]
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%res = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1)
|
||||
ret <64 x i8> %res
|
||||
}
|
||||
|
||||
define <64 x i8> @test_mask_adds_epi8_rrk_512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask) {
|
||||
; X86-LABEL: test_mask_adds_epi8_rrk_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vpaddsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xec,0xd1]
|
||||
; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mask_adds_epi8_rrk_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vpaddsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xec,0xd1]
|
||||
; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask)
|
||||
ret <64 x i8> %res
|
||||
}
|
||||
|
||||
define <64 x i8> @test_mask_adds_epi8_rrkz_512(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
|
||||
; X86-LABEL: test_mask_adds_epi8_rrkz_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vpaddsb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xec,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mask_adds_epi8_rrkz_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vpaddsb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xec,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask)
|
||||
ret <64 x i8> %res
|
||||
}
|
||||
|
||||
define <64 x i8> @test_mask_adds_epi8_rm_512(<64 x i8> %a, <64 x i8>* %ptr_b) {
|
||||
; X86-LABEL: test_mask_adds_epi8_rm_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: vpaddsb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xec,0x00]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mask_adds_epi8_rm_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpaddsb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xec,0x07]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%b = load <64 x i8>, <64 x i8>* %ptr_b
|
||||
%res = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1)
|
||||
ret <64 x i8> %res
|
||||
}
|
||||
|
||||
define <64 x i8> @test_mask_adds_epi8_rmk_512(<64 x i8> %a, <64 x i8>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
|
||||
; X86-LABEL: test_mask_adds_epi8_rmk_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: vpaddsb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xec,0x08]
|
||||
; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mask_adds_epi8_rmk_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
|
||||
; X64-NEXT: vpaddsb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xec,0x0f]
|
||||
; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%b = load <64 x i8>, <64 x i8>* %ptr_b
|
||||
%res = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask)
|
||||
ret <64 x i8> %res
|
||||
}
|
||||
|
||||
define <64 x i8> @test_mask_adds_epi8_rmkz_512(<64 x i8> %a, <64 x i8>* %ptr_b, i64 %mask) {
|
||||
; X86-LABEL: test_mask_adds_epi8_rmkz_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: vpaddsb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xec,0x00]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mask_adds_epi8_rmkz_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
|
||||
; X64-NEXT: vpaddsb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xec,0x07]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%b = load <64 x i8>, <64 x i8>* %ptr_b
|
||||
%res = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask)
|
||||
ret <64 x i8> %res
|
||||
}
|
||||
|
||||
declare <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
|
||||
|
||||
define <64 x i8> @test_mask_subs_epi8_rr_512(<64 x i8> %a, <64 x i8> %b) {
|
||||
; CHECK-LABEL: test_mask_subs_epi8_rr_512:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpsubsb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe8,0xc1]
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%res = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1)
|
||||
ret <64 x i8> %res
|
||||
}
|
||||
|
||||
define <64 x i8> @test_mask_subs_epi8_rrk_512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask) {
|
||||
; X86-LABEL: test_mask_subs_epi8_rrk_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vpsubsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe8,0xd1]
|
||||
; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mask_subs_epi8_rrk_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vpsubsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe8,0xd1]
|
||||
; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask)
|
||||
ret <64 x i8> %res
|
||||
}
|
||||
|
||||
define <64 x i8> @test_mask_subs_epi8_rrkz_512(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
|
||||
; X86-LABEL: test_mask_subs_epi8_rrkz_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vpsubsb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe8,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mask_subs_epi8_rrkz_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vpsubsb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe8,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask)
|
||||
ret <64 x i8> %res
|
||||
}
|
||||
|
||||
define <64 x i8> @test_mask_subs_epi8_rm_512(<64 x i8> %a, <64 x i8>* %ptr_b) {
|
||||
; X86-LABEL: test_mask_subs_epi8_rm_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: vpsubsb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe8,0x00]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mask_subs_epi8_rm_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpsubsb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe8,0x07]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%b = load <64 x i8>, <64 x i8>* %ptr_b
|
||||
%res = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1)
|
||||
ret <64 x i8> %res
|
||||
}
|
||||
|
||||
define <64 x i8> @test_mask_subs_epi8_rmk_512(<64 x i8> %a, <64 x i8>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
|
||||
; X86-LABEL: test_mask_subs_epi8_rmk_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: vpsubsb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe8,0x08]
|
||||
; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mask_subs_epi8_rmk_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
|
||||
; X64-NEXT: vpsubsb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe8,0x0f]
|
||||
; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%b = load <64 x i8>, <64 x i8>* %ptr_b
|
||||
%res = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask)
|
||||
ret <64 x i8> %res
|
||||
}
|
||||
|
||||
define <64 x i8> @test_mask_subs_epi8_rmkz_512(<64 x i8> %a, <64 x i8>* %ptr_b, i64 %mask) {
|
||||
; X86-LABEL: test_mask_subs_epi8_rmkz_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: vpsubsb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe8,0x00]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_mask_subs_epi8_rmkz_512:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
|
||||
; X64-NEXT: vpsubsb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe8,0x07]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%b = load <64 x i8>, <64 x i8>* %ptr_b
|
||||
%res = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask)
|
||||
ret <64 x i8> %res
|
||||
}
|
||||
|
||||
declare <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
|
||||
|
@ -564,8 +564,8 @@ define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0xc1]
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
|
||||
ret <32 x i16> %res
|
||||
%1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b)
|
||||
ret <32 x i16> %1
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
|
||||
@ -582,8 +582,10 @@ define <32 x i16> @test_mask_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <3
|
||||
; X64-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0xd1]
|
||||
; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
%1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b)
|
||||
%2 = bitcast i32 %mask to <32 x i1>
|
||||
%3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
|
||||
ret <32 x i16> %3
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
|
||||
@ -598,8 +600,10 @@ define <32 x i16> @test_mask_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
%1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b)
|
||||
%2 = bitcast i32 %mask to <32 x i1>
|
||||
%3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
|
||||
ret <32 x i16> %3
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
|
||||
@ -614,8 +618,8 @@ define <32 x i16> @test_mask_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b
|
||||
; X64-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0x07]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
|
||||
ret <32 x i16> %res
|
||||
%1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b)
|
||||
ret <32 x i16> %1
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_adds_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
|
||||
@ -634,8 +638,10 @@ define <32 x i16> @test_mask_adds_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_
|
||||
; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
%1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b)
|
||||
%2 = bitcast i32 %mask to <32 x i1>
|
||||
%3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
|
||||
ret <32 x i16> %3
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
|
||||
@ -652,19 +658,21 @@ define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr
|
||||
; X64-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0x07]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
%1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b)
|
||||
%2 = bitcast i32 %mask to <32 x i1>
|
||||
%3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
|
||||
ret <32 x i16> %3
|
||||
}
|
||||
|
||||
declare <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
||||
declare <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16>, <32 x i16>)
|
||||
|
||||
define <32 x i16> @test_mask_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
|
||||
; CHECK-LABEL: test_mask_subs_epi16_rr_512:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0xc1]
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
|
||||
ret <32 x i16> %res
|
||||
%1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b)
|
||||
ret <32 x i16> %1
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
|
||||
@ -681,8 +689,10 @@ define <32 x i16> @test_mask_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <3
|
||||
; X64-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0xd1]
|
||||
; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
%1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b)
|
||||
%2 = bitcast i32 %mask to <32 x i1>
|
||||
%3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
|
||||
ret <32 x i16> %3
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
|
||||
@ -697,8 +707,10 @@ define <32 x i16> @test_mask_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i
|
||||
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; X64-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
%1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b)
|
||||
%2 = bitcast i32 %mask to <32 x i1>
|
||||
%3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
|
||||
ret <32 x i16> %3
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_subs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
|
||||
@ -713,8 +725,8 @@ define <32 x i16> @test_mask_subs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b
|
||||
; X64-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0x07]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
|
||||
ret <32 x i16> %res
|
||||
%1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b)
|
||||
ret <32 x i16> %1
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_subs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
|
||||
@ -733,8 +745,10 @@ define <32 x i16> @test_mask_subs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_
|
||||
; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
%1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b)
|
||||
%2 = bitcast i32 %mask to <32 x i1>
|
||||
%3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
|
||||
ret <32 x i16> %3
|
||||
}
|
||||
|
||||
define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
|
||||
@ -751,11 +765,13 @@ define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr
|
||||
; X64-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0x07]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%b = load <32 x i16>, <32 x i16>* %ptr_b
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
|
||||
ret <32 x i16> %res
|
||||
%1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b)
|
||||
%2 = bitcast i32 %mask to <32 x i1>
|
||||
%3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
|
||||
ret <32 x i16> %3
|
||||
}
|
||||
|
||||
declare <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
||||
declare <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16>, <32 x i16>)
|
||||
|
||||
define <64 x i8> @test_mask_adds_epi8_rr_512(<64 x i8> %a, <64 x i8> %b) {
|
||||
; CHECK-LABEL: test_mask_adds_epi8_rr_512:
|
||||
@ -769,9 +785,7 @@ define <64 x i8> @test_mask_adds_epi8_rr_512(<64 x i8> %a, <64 x i8> %b) {
|
||||
define <64 x i8> @test_mask_adds_epi8_rrk_512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask) {
|
||||
; X86-LABEL: test_mask_adds_epi8_rrk_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04]
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: kunpckdq %k0, %k1, %k1 # encoding: [0xc4,0xe1,0xf4,0x4b,0xc8]
|
||||
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vpaddsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xec,0xd1]
|
||||
; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
@ -789,9 +803,7 @@ define <64 x i8> @test_mask_adds_epi8_rrk_512(<64 x i8> %a, <64 x i8> %b, <64 x
|
||||
define <64 x i8> @test_mask_adds_epi8_rrkz_512(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
|
||||
; X86-LABEL: test_mask_adds_epi8_rrkz_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04]
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: kunpckdq %k0, %k1, %k1 # encoding: [0xc4,0xe1,0xf4,0x4b,0xc8]
|
||||
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vpaddsb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xec,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
@ -824,9 +836,7 @@ define <64 x i8> @test_mask_adds_epi8_rmk_512(<64 x i8> %a, <64 x i8>* %ptr_b, <
|
||||
; X86-LABEL: test_mask_adds_epi8_rmk_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x08]
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x0c]
|
||||
; X86-NEXT: kunpckdq %k0, %k1, %k1 # encoding: [0xc4,0xe1,0xf4,0x4b,0xc8]
|
||||
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: vpaddsb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xec,0x08]
|
||||
; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
@ -846,9 +856,7 @@ define <64 x i8> @test_mask_adds_epi8_rmkz_512(<64 x i8> %a, <64 x i8>* %ptr_b,
|
||||
; X86-LABEL: test_mask_adds_epi8_rmkz_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x08]
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x0c]
|
||||
; X86-NEXT: kunpckdq %k0, %k1, %k1 # encoding: [0xc4,0xe1,0xf4,0x4b,0xc8]
|
||||
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: vpaddsb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xec,0x00]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
@ -876,9 +884,7 @@ define <64 x i8> @test_mask_subs_epi8_rr_512(<64 x i8> %a, <64 x i8> %b) {
|
||||
define <64 x i8> @test_mask_subs_epi8_rrk_512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask) {
|
||||
; X86-LABEL: test_mask_subs_epi8_rrk_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04]
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: kunpckdq %k0, %k1, %k1 # encoding: [0xc4,0xe1,0xf4,0x4b,0xc8]
|
||||
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vpsubsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe8,0xd1]
|
||||
; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
@ -896,9 +902,7 @@ define <64 x i8> @test_mask_subs_epi8_rrk_512(<64 x i8> %a, <64 x i8> %b, <64 x
|
||||
define <64 x i8> @test_mask_subs_epi8_rrkz_512(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
|
||||
; X86-LABEL: test_mask_subs_epi8_rrkz_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04]
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: kunpckdq %k0, %k1, %k1 # encoding: [0xc4,0xe1,0xf4,0x4b,0xc8]
|
||||
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vpsubsb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe8,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
@ -931,9 +935,7 @@ define <64 x i8> @test_mask_subs_epi8_rmk_512(<64 x i8> %a, <64 x i8>* %ptr_b, <
|
||||
; X86-LABEL: test_mask_subs_epi8_rmk_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x08]
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x0c]
|
||||
; X86-NEXT: kunpckdq %k0, %k1, %k1 # encoding: [0xc4,0xe1,0xf4,0x4b,0xc8]
|
||||
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: vpsubsb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe8,0x08]
|
||||
; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
@ -953,9 +955,7 @@ define <64 x i8> @test_mask_subs_epi8_rmkz_512(<64 x i8> %a, <64 x i8>* %ptr_b,
|
||||
; X86-LABEL: test_mask_subs_epi8_rmkz_512:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x08]
|
||||
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x0c]
|
||||
; X86-NEXT: kunpckdq %k0, %k1, %k1 # encoding: [0xc4,0xe1,0xf4,0x4b,0xc8]
|
||||
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
|
||||
; X86-NEXT: vpsubsb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe8,0x00]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
|
Loading…
Reference in New Issue
Block a user