mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-19 19:12:56 +02:00
[X86] Remove mask arguments from permvar builtins/intrinsics. Use a select in IR instead.
Someday maybe we'll use selects for all intrinsics. llvm-svn: 332824
This commit is contained in:
parent
51d6701090
commit
c9ae8654e1
@ -3259,42 +3259,42 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
}
|
||||
// Permute
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_avx512_mask_permvar_df_256 : GCCBuiltin<"__builtin_ia32_permvardf256_mask">,
|
||||
def int_x86_avx512_permvar_df_256 : GCCBuiltin<"__builtin_ia32_permvardf256">,
|
||||
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
|
||||
llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_permvar_df_512 : GCCBuiltin<"__builtin_ia32_permvardf512_mask">,
|
||||
llvm_v4i64_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_permvar_df_512 : GCCBuiltin<"__builtin_ia32_permvardf512">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty,
|
||||
llvm_v8i64_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_permvar_di_256 : GCCBuiltin<"__builtin_ia32_permvardi256_mask">,
|
||||
llvm_v8i64_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_permvar_di_256 : GCCBuiltin<"__builtin_ia32_permvardi256">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
|
||||
llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_permvar_di_512 : GCCBuiltin<"__builtin_ia32_permvardi512_mask">,
|
||||
llvm_v4i64_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_permvar_di_512 : GCCBuiltin<"__builtin_ia32_permvardi512">,
|
||||
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
|
||||
llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_permvar_hi_128 : GCCBuiltin<"__builtin_ia32_permvarhi128_mask">,
|
||||
llvm_v8i64_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_permvar_hi_128 : GCCBuiltin<"__builtin_ia32_permvarhi128">,
|
||||
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
|
||||
llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_permvar_hi_256 : GCCBuiltin<"__builtin_ia32_permvarhi256_mask">,
|
||||
llvm_v8i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_permvar_hi_256 : GCCBuiltin<"__builtin_ia32_permvarhi256">,
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
|
||||
llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_permvar_hi_512 : GCCBuiltin<"__builtin_ia32_permvarhi512_mask">,
|
||||
llvm_v16i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_permvar_hi_512 : GCCBuiltin<"__builtin_ia32_permvarhi512">,
|
||||
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
|
||||
llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_permvar_qi_128 : GCCBuiltin<"__builtin_ia32_permvarqi128_mask">,
|
||||
llvm_v32i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_permvar_qi_128 : GCCBuiltin<"__builtin_ia32_permvarqi128">,
|
||||
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
|
||||
llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_permvar_qi_256 : GCCBuiltin<"__builtin_ia32_permvarqi256_mask">,
|
||||
llvm_v16i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_permvar_qi_256 : GCCBuiltin<"__builtin_ia32_permvarqi256">,
|
||||
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
|
||||
llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_permvar_qi_512 : GCCBuiltin<"__builtin_ia32_permvarqi512_mask">,
|
||||
llvm_v32i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_permvar_qi_512 : GCCBuiltin<"__builtin_ia32_permvarqi512">,
|
||||
Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty,
|
||||
llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_permvar_sf_512 : GCCBuiltin<"__builtin_ia32_permvarsf512_mask">,
|
||||
llvm_v64i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_permvar_sf_512 : GCCBuiltin<"__builtin_ia32_permvarsf512">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty,
|
||||
llvm_v16i32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_permvar_si_512 : GCCBuiltin<"__builtin_ia32_permvarsi512_mask">,
|
||||
llvm_v16i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_permvar_si_512 : GCCBuiltin<"__builtin_ia32_permvarsi512">,
|
||||
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
|
||||
llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
|
||||
llvm_v16i32_ty], [IntrNoMem]>;
|
||||
}
|
||||
// Pack ops.
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
|
@ -182,8 +182,8 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
|
||||
Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
|
||||
Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
|
||||
Name == "avx512.cvtusi2sd" || // Added in 7.0
|
||||
Name == "avx512.mask.permvar.sf.256" || // Added in 7.0
|
||||
Name == "avx512.mask.permvar.si.256" || // Added in 7.0
|
||||
Name.startswith("avx512.mask.permvar.") || // Added in 7.0
|
||||
Name.startswith("avx512.mask.permvar.") || // Added in 7.0
|
||||
Name == "sse2.pmulu.dq" || // Added in 7.0
|
||||
Name == "sse41.pmuldq" || // Added in 7.0
|
||||
Name == "avx2.pmulu.dq" || // Added in 7.0
|
||||
@ -1207,10 +1207,38 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
|
||||
IID = Intrinsic::x86_sse2_cvttps2dq;
|
||||
} else if (Name == "cvttps2dq.256") {
|
||||
IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
|
||||
} else if (Name == "permvar.sf.256") {
|
||||
} else if (Name.startswith("permvar.")) {
|
||||
bool IsFloat = CI.getType()->isFPOrFPVectorTy();
|
||||
if (VecWidth == 256 && EltWidth == 32 && IsFloat)
|
||||
IID = Intrinsic::x86_avx2_permps;
|
||||
} else if (Name == "permvar.si.256") {
|
||||
else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
|
||||
IID = Intrinsic::x86_avx2_permd;
|
||||
else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
|
||||
IID = Intrinsic::x86_avx512_permvar_df_256;
|
||||
else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
|
||||
IID = Intrinsic::x86_avx512_permvar_di_256;
|
||||
else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
|
||||
IID = Intrinsic::x86_avx512_permvar_sf_512;
|
||||
else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
|
||||
IID = Intrinsic::x86_avx512_permvar_si_512;
|
||||
else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
|
||||
IID = Intrinsic::x86_avx512_permvar_df_512;
|
||||
else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
|
||||
IID = Intrinsic::x86_avx512_permvar_di_512;
|
||||
else if (VecWidth == 128 && EltWidth == 16)
|
||||
IID = Intrinsic::x86_avx512_permvar_hi_128;
|
||||
else if (VecWidth == 256 && EltWidth == 16)
|
||||
IID = Intrinsic::x86_avx512_permvar_hi_256;
|
||||
else if (VecWidth == 512 && EltWidth == 16)
|
||||
IID = Intrinsic::x86_avx512_permvar_hi_512;
|
||||
else if (VecWidth == 128 && EltWidth == 8)
|
||||
IID = Intrinsic::x86_avx512_permvar_qi_128;
|
||||
else if (VecWidth == 256 && EltWidth == 8)
|
||||
IID = Intrinsic::x86_avx512_permvar_qi_256;
|
||||
else if (VecWidth == 512 && EltWidth == 8)
|
||||
IID = Intrinsic::x86_avx512_permvar_qi_512;
|
||||
else
|
||||
llvm_unreachable("Unexpected intrinsic");
|
||||
} else
|
||||
return false;
|
||||
|
||||
|
@ -20508,15 +20508,12 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||
Src1, Src2, Src3),
|
||||
Mask, PassThru, Subtarget, DAG);
|
||||
}
|
||||
case VPERM_2OP_MASK : {
|
||||
case VPERM_2OP : {
|
||||
SDValue Src1 = Op.getOperand(1);
|
||||
SDValue Src2 = Op.getOperand(2);
|
||||
SDValue PassThru = Op.getOperand(3);
|
||||
SDValue Mask = Op.getOperand(4);
|
||||
|
||||
// Swap Src1 and Src2 in the node creation
|
||||
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,Src2, Src1),
|
||||
Mask, PassThru, Subtarget, DAG);
|
||||
return DAG.getNode(IntrData->Opc0, dl, VT,Src2, Src1);
|
||||
}
|
||||
case VPERM_3OP_MASKZ:
|
||||
case VPERM_3OP_MASK:{
|
||||
@ -20914,13 +20911,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||
switch (IntNo) {
|
||||
default: return SDValue(); // Don't custom lower most intrinsics.
|
||||
|
||||
case Intrinsic::x86_avx2_permd:
|
||||
case Intrinsic::x86_avx2_permps:
|
||||
// Operands intentionally swapped. Mask is last operand to intrinsic,
|
||||
// but second operand for node/instruction.
|
||||
return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(),
|
||||
Op.getOperand(2), Op.getOperand(1));
|
||||
|
||||
// ptest and testp intrinsics. The intrinsic these come from are designed to
|
||||
// return an integer value, not just an instruction so lower it to the ptest
|
||||
// or testp pattern and a setcc for the result.
|
||||
|
@ -31,7 +31,7 @@ enum IntrinsicType : uint16_t {
|
||||
FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3,
|
||||
FMA_OP_SCALAR_MASK, FMA_OP_SCALAR_MASKZ, FMA_OP_SCALAR_MASK3,
|
||||
IFMA_OP_MASK, IFMA_OP_MASKZ,
|
||||
VPERM_2OP_MASK, VPERM_3OP_MASK, VPERM_3OP_MASKZ, INTR_TYPE_SCALAR_MASK,
|
||||
VPERM_2OP, VPERM_3OP_MASK, VPERM_3OP_MASKZ, INTR_TYPE_SCALAR_MASK,
|
||||
INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK,
|
||||
COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
|
||||
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
|
||||
@ -406,6 +406,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx2_padds_w, INTR_TYPE_2OP, X86ISD::ADDS, 0),
|
||||
X86_INTRINSIC_DATA(avx2_paddus_b, INTR_TYPE_2OP, X86ISD::ADDUS, 0),
|
||||
X86_INTRINSIC_DATA(avx2_paddus_w, INTR_TYPE_2OP, X86ISD::ADDUS, 0),
|
||||
X86_INTRINSIC_DATA(avx2_permd, VPERM_2OP, X86ISD::VPERMV, 0),
|
||||
X86_INTRINSIC_DATA(avx2_permps, VPERM_2OP, X86ISD::VPERMV, 0),
|
||||
X86_INTRINSIC_DATA(avx2_phadd_d, INTR_TYPE_2OP, X86ISD::HADD, 0),
|
||||
X86_INTRINSIC_DATA(avx2_phadd_w, INTR_TYPE_2OP, X86ISD::HADD, 0),
|
||||
X86_INTRINSIC_DATA(avx2_phsub_d, INTR_TYPE_2OP, X86ISD::HSUB, 0),
|
||||
@ -797,30 +799,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx512_mask_paddus_w_128, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_paddus_w_256, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_paddus_w_512, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_permvar_df_256, VPERM_2OP_MASK,
|
||||
X86ISD::VPERMV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_permvar_df_512, VPERM_2OP_MASK,
|
||||
X86ISD::VPERMV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_permvar_di_256, VPERM_2OP_MASK,
|
||||
X86ISD::VPERMV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_permvar_di_512, VPERM_2OP_MASK,
|
||||
X86ISD::VPERMV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_permvar_hi_128, VPERM_2OP_MASK,
|
||||
X86ISD::VPERMV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_permvar_hi_256, VPERM_2OP_MASK,
|
||||
X86ISD::VPERMV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_permvar_hi_512, VPERM_2OP_MASK,
|
||||
X86ISD::VPERMV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_permvar_qi_128, VPERM_2OP_MASK,
|
||||
X86ISD::VPERMV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_permvar_qi_256, VPERM_2OP_MASK,
|
||||
X86ISD::VPERMV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_permvar_qi_512, VPERM_2OP_MASK,
|
||||
X86ISD::VPERMV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_permvar_sf_512, VPERM_2OP_MASK,
|
||||
X86ISD::VPERMV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_permvar_si_512, VPERM_2OP_MASK,
|
||||
X86ISD::VPERMV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_db_128, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::VTRUNC, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_db_256, INTR_TYPE_1OP_MASK,
|
||||
@ -1420,10 +1398,20 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx512_packsswb_512, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_packusdw_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_packuswb_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_pmaddubs_w_512, INTR_TYPE_2OP,
|
||||
X86ISD::VPMADDUBSW, 0),
|
||||
X86_INTRINSIC_DATA(avx512_pmaddw_d_512, INTR_TYPE_2OP,
|
||||
X86ISD::VPMADDWD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_permvar_df_256, VPERM_2OP, X86ISD::VPERMV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_permvar_df_512, VPERM_2OP, X86ISD::VPERMV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_permvar_di_256, VPERM_2OP, X86ISD::VPERMV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_permvar_di_512, VPERM_2OP, X86ISD::VPERMV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_permvar_hi_128, VPERM_2OP, X86ISD::VPERMV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_permvar_hi_256, VPERM_2OP, X86ISD::VPERMV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_permvar_hi_512, VPERM_2OP, X86ISD::VPERMV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_permvar_qi_128, VPERM_2OP, X86ISD::VPERMV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_permvar_qi_256, VPERM_2OP, X86ISD::VPERMV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_permvar_qi_512, VPERM_2OP, X86ISD::VPERMV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_permvar_sf_512, VPERM_2OP, X86ISD::VPERMV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_permvar_si_512, VPERM_2OP, X86ISD::VPERMV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_pmaddubs_w_512, INTR_TYPE_2OP, X86ISD::VPMADDUBSW, 0),
|
||||
X86_INTRINSIC_DATA(avx512_pmaddw_d_512, INTR_TYPE_2OP, X86ISD::VPMADDWD, 0),
|
||||
X86_INTRINSIC_DATA(avx512_pmul_hr_sw_512, INTR_TYPE_2OP, X86ISD::MULHRS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_pmulh_w_512, INTR_TYPE_2OP, ISD::MULHS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_pmulhu_w_512, INTR_TYPE_2OP, ISD::MULHU, 0),
|
||||
|
@ -2785,30 +2785,22 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
||||
|
||||
case Intrinsic::x86_avx2_permd:
|
||||
case Intrinsic::x86_avx2_permps:
|
||||
case Intrinsic::x86_avx512_permvar_df_256:
|
||||
case Intrinsic::x86_avx512_permvar_df_512:
|
||||
case Intrinsic::x86_avx512_permvar_di_256:
|
||||
case Intrinsic::x86_avx512_permvar_di_512:
|
||||
case Intrinsic::x86_avx512_permvar_hi_128:
|
||||
case Intrinsic::x86_avx512_permvar_hi_256:
|
||||
case Intrinsic::x86_avx512_permvar_hi_512:
|
||||
case Intrinsic::x86_avx512_permvar_qi_128:
|
||||
case Intrinsic::x86_avx512_permvar_qi_256:
|
||||
case Intrinsic::x86_avx512_permvar_qi_512:
|
||||
case Intrinsic::x86_avx512_permvar_sf_512:
|
||||
case Intrinsic::x86_avx512_permvar_si_512:
|
||||
if (Value *V = simplifyX86vpermv(*II, Builder))
|
||||
return replaceInstUsesWith(*II, V);
|
||||
break;
|
||||
|
||||
case Intrinsic::x86_avx512_mask_permvar_df_256:
|
||||
case Intrinsic::x86_avx512_mask_permvar_df_512:
|
||||
case Intrinsic::x86_avx512_mask_permvar_di_256:
|
||||
case Intrinsic::x86_avx512_mask_permvar_di_512:
|
||||
case Intrinsic::x86_avx512_mask_permvar_hi_128:
|
||||
case Intrinsic::x86_avx512_mask_permvar_hi_256:
|
||||
case Intrinsic::x86_avx512_mask_permvar_hi_512:
|
||||
case Intrinsic::x86_avx512_mask_permvar_qi_128:
|
||||
case Intrinsic::x86_avx512_mask_permvar_qi_256:
|
||||
case Intrinsic::x86_avx512_mask_permvar_qi_512:
|
||||
case Intrinsic::x86_avx512_mask_permvar_sf_512:
|
||||
case Intrinsic::x86_avx512_mask_permvar_si_512:
|
||||
if (Value *V = simplifyX86vpermv(*II, Builder)) {
|
||||
// We simplified the permuting, now create a select for the masking.
|
||||
V = emitX86MaskSelect(II->getArgOperand(3), V, II->getArgOperand(2),
|
||||
Builder);
|
||||
return replaceInstUsesWith(*II, V);
|
||||
}
|
||||
break;
|
||||
|
||||
case Intrinsic::x86_avx_maskload_ps:
|
||||
case Intrinsic::x86_avx_maskload_pd:
|
||||
case Intrinsic::x86_avx_maskload_ps_256:
|
||||
|
@ -4151,3 +4151,83 @@ define <8 x double> @test_x86_vbroadcast_sd_512(i8* %a0) {
|
||||
ret <8 x double> %res
|
||||
}
|
||||
declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8*) nounwind readonly
|
||||
|
||||
declare <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double>, <8 x i64>, <8 x double>, i8)
|
||||
|
||||
define <8 x double>@test_int_x86_avx512_mask_permvar_df_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_df_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm3
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
|
||||
; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
|
||||
%res1 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> zeroinitializer, i8 %x3)
|
||||
%res2 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
|
||||
%res3 = fadd <8 x double> %res, %res1
|
||||
%res4 = fadd <8 x double> %res3, %res2
|
||||
ret <8 x double> %res4
|
||||
}
|
||||
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
|
||||
|
||||
define <8 x i64>@test_int_x86_avx512_mask_permvar_di_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_di_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm3
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
|
||||
; CHECK-NEXT: vpaddq %zmm3, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
|
||||
%res1 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer, i8 %x3)
|
||||
%res2 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
|
||||
%res3 = add <8 x i64> %res, %res1
|
||||
%res4 = add <8 x i64> %res3, %res2
|
||||
ret <8 x i64> %res4
|
||||
}
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float>, <16 x i32>, <16 x float>, i16)
|
||||
|
||||
define <16 x float>@test_int_x86_avx512_mask_permvar_sf_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_sf_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm3
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
|
||||
; CHECK-NEXT: vaddps %zmm3, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
|
||||
%res1 = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> zeroinitializer, i16 %x3)
|
||||
%res2 = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
|
||||
%res3 = fadd <16 x float> %res, %res1
|
||||
%res4 = fadd <16 x float> %res3, %res2
|
||||
ret <16 x float> %res4
|
||||
}
|
||||
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
|
||||
|
||||
define <16 x i32>@test_int_x86_avx512_mask_permvar_si_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_si_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm3
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
|
||||
; CHECK-NEXT: vpaddd %zmm3, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
|
||||
%res1 = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> zeroinitializer, i16 %x3)
|
||||
%res2 = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
|
||||
%res3 = add <16 x i32> %res, %res1
|
||||
%res4 = add <16 x i32> %res3, %res2
|
||||
ret <16 x i32> %res4
|
||||
}
|
||||
|
@ -3448,83 +3448,99 @@ define <8 x i64>@test_int_x86_avx512_mask_prol_q_512(<8 x i64> %x0, i32 %x1, <8
|
||||
ret <8 x i64> %res4
|
||||
}
|
||||
|
||||
declare <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double>, <8 x i64>, <8 x double>, i8)
|
||||
declare <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double>, <8 x i64>)
|
||||
|
||||
define <8 x double>@test_int_x86_avx512_mask_permvar_df_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_df_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm3
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
|
||||
; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
|
||||
%res1 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> zeroinitializer, i8 %x3)
|
||||
%res2 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
|
||||
%res3 = fadd <8 x double> %res, %res1
|
||||
%res4 = fadd <8 x double> %res3, %res2
|
||||
%1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %x0, <8 x i64> %x1)
|
||||
%2 = bitcast i8 %x3 to <8 x i1>
|
||||
%3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %x2
|
||||
%4 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %x0, <8 x i64> %x1)
|
||||
%5 = bitcast i8 %x3 to <8 x i1>
|
||||
%6 = select <8 x i1> %5, <8 x double> %4, <8 x double> zeroinitializer
|
||||
%7 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %x0, <8 x i64> %x1)
|
||||
%res3 = fadd <8 x double> %3, %6
|
||||
%res4 = fadd <8 x double> %res3, %7
|
||||
ret <8 x double> %res4
|
||||
}
|
||||
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
|
||||
declare <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64>, <8 x i64>)
|
||||
|
||||
define <8 x i64>@test_int_x86_avx512_mask_permvar_di_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_di_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm3
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
|
||||
; CHECK-NEXT: vpaddq %zmm3, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
|
||||
%res1 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer, i8 %x3)
|
||||
%res2 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
|
||||
%res3 = add <8 x i64> %res, %res1
|
||||
%res4 = add <8 x i64> %res3, %res2
|
||||
%1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1)
|
||||
%2 = bitcast i8 %x3 to <8 x i1>
|
||||
%3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x2
|
||||
%4 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1)
|
||||
%5 = bitcast i8 %x3 to <8 x i1>
|
||||
%6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer
|
||||
%7 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1)
|
||||
%res3 = add <8 x i64> %3, %6
|
||||
%res4 = add <8 x i64> %res3, %7
|
||||
ret <8 x i64> %res4
|
||||
}
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float>, <16 x i32>, <16 x float>, i16)
|
||||
declare <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float>, <16 x i32>)
|
||||
|
||||
define <16 x float>@test_int_x86_avx512_mask_permvar_sf_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_sf_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm3
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
|
||||
; CHECK-NEXT: vaddps %zmm3, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
|
||||
%res1 = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> zeroinitializer, i16 %x3)
|
||||
%res2 = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
|
||||
%res3 = fadd <16 x float> %res, %res1
|
||||
%res4 = fadd <16 x float> %res3, %res2
|
||||
%1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1)
|
||||
%2 = bitcast i16 %x3 to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %x2
|
||||
%4 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1)
|
||||
%5 = bitcast i16 %x3 to <16 x i1>
|
||||
%6 = select <16 x i1> %5, <16 x float> %4, <16 x float> zeroinitializer
|
||||
%7 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1)
|
||||
%res3 = fadd <16 x float> %3, %6
|
||||
%res4 = fadd <16 x float> %res3, %7
|
||||
ret <16 x float> %res4
|
||||
}
|
||||
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
|
||||
declare <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32>, <16 x i32>)
|
||||
|
||||
define <16 x i32>@test_int_x86_avx512_mask_permvar_si_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_si_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm3
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
|
||||
; CHECK-NEXT: vpaddd %zmm3, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
|
||||
%res1 = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> zeroinitializer, i16 %x3)
|
||||
%res2 = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
|
||||
%res3 = add <16 x i32> %res, %res1
|
||||
%res4 = add <16 x i32> %res3, %res2
|
||||
%1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1)
|
||||
%2 = bitcast i16 %x3 to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2
|
||||
%4 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1)
|
||||
%5 = bitcast i16 %x3 to <16 x i1>
|
||||
%6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer
|
||||
%7 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1)
|
||||
%res3 = add <16 x i32> %3, %6
|
||||
%res4 = add <16 x i32> %res3, %7
|
||||
ret <16 x i32> %res4
|
||||
}
|
||||
|
||||
|
@ -2757,3 +2757,33 @@ define <16 x i32>@test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i1
|
||||
%res2 = add <16 x i32> %res, %res1
|
||||
ret <16 x i32> %res2
|
||||
}
|
||||
|
||||
declare <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
||||
|
||||
define <32 x i16>@test_int_x86_avx512_mask_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
|
||||
; AVX512BW-LABEL: test_int_x86_avx512_mask_permvar_hi_512:
|
||||
; AVX512BW: ## %bb.0:
|
||||
; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm3
|
||||
; AVX512BW-NEXT: kmovd %edi, %k1
|
||||
; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm2 {%k1}
|
||||
; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm0 {%k1} {z}
|
||||
; AVX512BW-NEXT: vpaddw %zmm3, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_int_x86_avx512_mask_permvar_hi_512:
|
||||
; AVX512F-32: # %bb.0:
|
||||
; AVX512F-32-NEXT: vpermw %zmm0, %zmm1, %zmm3
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
; AVX512F-32-NEXT: vpermw %zmm0, %zmm1, %zmm2 {%k1}
|
||||
; AVX512F-32-NEXT: vpermw %zmm0, %zmm1, %zmm0 {%k1} {z}
|
||||
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm0, %zmm0
|
||||
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
||||
%res1 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
|
||||
%res2 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
|
||||
%res3 = add <32 x i16> %res, %res1
|
||||
%res4 = add <32 x i16> %res3, %res2
|
||||
ret <32 x i16> %res4
|
||||
}
|
||||
|
@ -1573,33 +1573,37 @@ define <32 x i16>@test_int_x86_avx512_mask_psllv32hi(<32 x i16> %x0, <32 x i16>
|
||||
ret <32 x i16> %res4
|
||||
}
|
||||
|
||||
declare <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
||||
declare <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16>, <32 x i16>)
|
||||
|
||||
define <32 x i16>@test_int_x86_avx512_mask_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
|
||||
; AVX512BW-LABEL: test_int_x86_avx512_mask_permvar_hi_512:
|
||||
; AVX512BW: ## %bb.0:
|
||||
; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm3
|
||||
; AVX512BW-NEXT: kmovd %edi, %k1
|
||||
; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm2 {%k1}
|
||||
; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm3 {%k1} {z}
|
||||
; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm2
|
||||
; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm0 {%k1} {z}
|
||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
|
||||
; AVX512BW-NEXT: vpaddw %zmm3, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_int_x86_avx512_mask_permvar_hi_512:
|
||||
; AVX512F-32: # %bb.0:
|
||||
; AVX512F-32-NEXT: vpermw %zmm0, %zmm1, %zmm3
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
; AVX512F-32-NEXT: vpermw %zmm0, %zmm1, %zmm2 {%k1}
|
||||
; AVX512F-32-NEXT: vpermw %zmm0, %zmm1, %zmm3 {%k1} {z}
|
||||
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm2
|
||||
; AVX512F-32-NEXT: vpermw %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: vpermw %zmm0, %zmm1, %zmm0 {%k1} {z}
|
||||
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
|
||||
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm0, %zmm0
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
||||
%res1 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
|
||||
%res2 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
|
||||
%res3 = add <32 x i16> %res, %res1
|
||||
%res4 = add <32 x i16> %res3, %res2
|
||||
%1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1)
|
||||
%2 = bitcast i32 %x3 to <32 x i1>
|
||||
%3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2
|
||||
%4 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1)
|
||||
%5 = bitcast i32 %x3 to <32 x i1>
|
||||
%6 = select <32 x i1> %5, <32 x i16> %4, <32 x i16> zeroinitializer
|
||||
%7 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1)
|
||||
%res3 = add <32 x i16> %3, %6
|
||||
%res4 = add <32 x i16> %res3, %7
|
||||
ret <32 x i16> %res4
|
||||
}
|
||||
|
||||
|
@ -4010,3 +4010,43 @@ define <8 x i32>@test_int_x86_avx512_mask_pmaddw_d_256(<16 x i16> %x0, <16 x i16
|
||||
%res2 = add <8 x i32> %res, %res1
|
||||
ret <8 x i32> %res2
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
|
||||
|
||||
define <8 x i16>@test_int_x86_avx512_mask_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_hi_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpermw %xmm0, %xmm1, %xmm3 ## encoding: [0x62,0xf2,0xf5,0x08,0x8d,0xd8]
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermw %xmm0, %xmm1, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x8d,0xd0]
|
||||
; CHECK-NEXT: vpermw %xmm0, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0x8d,0xc0]
|
||||
; CHECK-NEXT: vpaddw %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3]
|
||||
; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
|
||||
%res1 = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
|
||||
%res2 = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
|
||||
%res3 = add <8 x i16> %res, %res1
|
||||
%res4 = add <8 x i16> %res3, %res2
|
||||
ret <8 x i16> %res4
|
||||
}
|
||||
|
||||
declare <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
|
||||
|
||||
define <16 x i16>@test_int_x86_avx512_mask_permvar_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_hi_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm3 ## encoding: [0x62,0xf2,0xf5,0x28,0x8d,0xd8]
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x8d,0xd0]
|
||||
; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xa9,0x8d,0xc0]
|
||||
; CHECK-NEXT: vpaddw %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3]
|
||||
; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
|
||||
%res1 = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
|
||||
%res2 = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
|
||||
%res3 = add <16 x i16> %res, %res1
|
||||
%res4 = add <16 x i16> %res3, %res2
|
||||
ret <16 x i16> %res4
|
||||
}
|
||||
|
@ -2445,43 +2445,51 @@ define <8 x i16>@test_int_x86_avx512_mask_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1
|
||||
ret <8 x i16> %res4
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
|
||||
declare <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16>, <8 x i16>)
|
||||
|
||||
define <8 x i16>@test_int_x86_avx512_mask_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_hi_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermw %xmm0, %xmm1, %xmm3 ## encoding: [0x62,0xf2,0xf5,0x08,0x8d,0xd8]
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermw %xmm0, %xmm1, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x8d,0xd0]
|
||||
; CHECK-NEXT: vpermw %xmm0, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0x8d,0xc0]
|
||||
; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
|
||||
; CHECK-NEXT: vpaddw %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
|
||||
%res1 = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
|
||||
%res2 = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
|
||||
%res3 = add <8 x i16> %res, %res1
|
||||
%res4 = add <8 x i16> %res3, %res2
|
||||
%1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1)
|
||||
%2 = bitcast i8 %x3 to <8 x i1>
|
||||
%3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2
|
||||
%4 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1)
|
||||
%5 = bitcast i8 %x3 to <8 x i1>
|
||||
%6 = select <8 x i1> %5, <8 x i16> %4, <8 x i16> zeroinitializer
|
||||
%7 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1)
|
||||
%res3 = add <8 x i16> %3, %6
|
||||
%res4 = add <8 x i16> %res3, %7
|
||||
ret <8 x i16> %res4
|
||||
}
|
||||
|
||||
declare <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
|
||||
declare <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16>, <16 x i16>)
|
||||
|
||||
define <16 x i16>@test_int_x86_avx512_mask_permvar_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_hi_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm3 ## encoding: [0x62,0xf2,0xf5,0x28,0x8d,0xd8]
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x8d,0xd0]
|
||||
; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xa9,0x8d,0xc0]
|
||||
; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
|
||||
; CHECK-NEXT: vpaddw %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
|
||||
%res1 = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
|
||||
%res2 = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
|
||||
%res3 = add <16 x i16> %res, %res1
|
||||
%res4 = add <16 x i16> %res3, %res2
|
||||
%1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1)
|
||||
%2 = bitcast i16 %x3 to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2
|
||||
%4 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1)
|
||||
%5 = bitcast i16 %x3 to <16 x i1>
|
||||
%6 = select <16 x i1> %5, <16 x i16> %4, <16 x i16> zeroinitializer
|
||||
%7 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1)
|
||||
%res3 = add <16 x i16> %3, %6
|
||||
%res4 = add <16 x i16> %res3, %7
|
||||
ret <16 x i16> %res4
|
||||
}
|
||||
|
||||
|
21
test/CodeGen/X86/avx512vbmi-intrinsics-upgrade.ll
Normal file
21
test/CodeGen/X86/avx512vbmi-intrinsics-upgrade.ll
Normal file
@ -0,0 +1,21 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vbmi | FileCheck %s
|
||||
declare <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
|
||||
|
||||
define <64 x i8>@test_int_x86_avx512_mask_permvar_qi_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_qi_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpermb %zmm0, %zmm1, %zmm3
|
||||
; CHECK-NEXT: kmovq %rdi, %k1
|
||||
; CHECK-NEXT: vpermb %zmm0, %zmm1, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vpermb %zmm0, %zmm1, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vpaddb %zmm3, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vpaddb %zmm0, %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
|
||||
%res1 = call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> zeroinitializer, i64 %x3)
|
||||
%res2 = call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
|
||||
%res3 = add <64 x i8> %res, %res1
|
||||
%res4 = add <64 x i8> %res3, %res2
|
||||
ret <64 x i8> %res4
|
||||
}
|
@ -1,22 +1,27 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vbmi | FileCheck %s
|
||||
declare <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
|
||||
|
||||
declare <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8>, <64 x i8>)
|
||||
|
||||
define <64 x i8>@test_int_x86_avx512_mask_permvar_qi_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_qi_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpermb %zmm0, %zmm1, %zmm3
|
||||
; CHECK-NEXT: kmovq %rdi, %k1
|
||||
; CHECK-NEXT: vpermb %zmm0, %zmm1, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vpermb %zmm0, %zmm1, %zmm3 {%k1} {z}
|
||||
; CHECK-NEXT: vpermb %zmm0, %zmm1, %zmm0
|
||||
; CHECK-NEXT: vpaddb %zmm0, %zmm3, %zmm0
|
||||
; CHECK-NEXT: vpermb %zmm0, %zmm1, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vpaddb %zmm3, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vpaddb %zmm0, %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
|
||||
%res1 = call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> zeroinitializer, i64 %x3)
|
||||
%res2 = call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
|
||||
%res3 = add <64 x i8> %res, %res1
|
||||
%res4 = add <64 x i8> %res3, %res2
|
||||
%1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %x0, <64 x i8> %x1)
|
||||
%2 = bitcast i64 %x3 to <64 x i1>
|
||||
%3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %x2
|
||||
%4 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %x0, <64 x i8> %x1)
|
||||
%5 = bitcast i64 %x3 to <64 x i1>
|
||||
%6 = select <64 x i1> %5, <64 x i8> %4, <64 x i8> zeroinitializer
|
||||
%7 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %x0, <64 x i8> %x1)
|
||||
%res3 = add <64 x i8> %3, %6
|
||||
%res4 = add <64 x i8> %res3, %7
|
||||
ret <64 x i8> %res4
|
||||
}
|
||||
|
||||
|
42
test/CodeGen/X86/avx512vbmivl-intrinsics-upgrade.ll
Normal file
42
test/CodeGen/X86/avx512vbmivl-intrinsics-upgrade.ll
Normal file
@ -0,0 +1,42 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl -mattr=+avx512vbmi --show-mc-encoding| FileCheck %s
|
||||
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
|
||||
|
||||
define <16 x i8>@test_int_x86_avx512_mask_permvar_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_qi_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpermb %xmm0, %xmm1, %xmm3 ## encoding: [0x62,0xf2,0x75,0x08,0x8d,0xd8]
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermb %xmm0, %xmm1, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x8d,0xd0]
|
||||
; CHECK-NEXT: vpermb %xmm0, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0x8d,0xc0]
|
||||
; CHECK-NEXT: vpaddb %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc3]
|
||||
; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
|
||||
%res1 = call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %x3)
|
||||
%res2 = call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
|
||||
%res3 = add <16 x i8> %res, %res1
|
||||
%res4 = add <16 x i8> %res3, %res2
|
||||
ret <16 x i8> %res4
|
||||
}
|
||||
|
||||
declare <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
|
||||
|
||||
define <32 x i8>@test_int_x86_avx512_mask_permvar_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_qi_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpermb %ymm0, %ymm1, %ymm3 ## encoding: [0x62,0xf2,0x75,0x28,0x8d,0xd8]
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermb %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x8d,0xd0]
|
||||
; CHECK-NEXT: vpermb %ymm0, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0x8d,0xc0]
|
||||
; CHECK-NEXT: vpaddb %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc3]
|
||||
; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
|
||||
%res1 = call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> zeroinitializer, i32 %x3)
|
||||
%res2 = call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
|
||||
%res3 = add <32 x i8> %res, %res1
|
||||
%res4 = add <32 x i8> %res3, %res2
|
||||
ret <32 x i8> %res4
|
||||
}
|
@ -1,43 +1,51 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl -mattr=+avx512vbmi --show-mc-encoding| FileCheck %s
|
||||
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
|
||||
declare <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8>, <16 x i8>)
|
||||
|
||||
define <16 x i8>@test_int_x86_avx512_mask_permvar_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_qi_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermb %xmm0, %xmm1, %xmm3 ## encoding: [0x62,0xf2,0x75,0x08,0x8d,0xd8]
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermb %xmm0, %xmm1, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x8d,0xd0]
|
||||
; CHECK-NEXT: vpermb %xmm0, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0x8d,0xc0]
|
||||
; CHECK-NEXT: vpaddb %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc3]
|
||||
; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
|
||||
%res1 = call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %x3)
|
||||
%res2 = call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
|
||||
%res3 = add <16 x i8> %res, %res1
|
||||
%res4 = add <16 x i8> %res3, %res2
|
||||
%1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1)
|
||||
%2 = bitcast i16 %x3 to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %x2
|
||||
%4 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1)
|
||||
%5 = bitcast i16 %x3 to <16 x i1>
|
||||
%6 = select <16 x i1> %5, <16 x i8> %4, <16 x i8> zeroinitializer
|
||||
%7 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1)
|
||||
%res3 = add <16 x i8> %3, %6
|
||||
%res4 = add <16 x i8> %res3, %7
|
||||
ret <16 x i8> %res4
|
||||
}
|
||||
|
||||
declare <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
|
||||
declare <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8>, <32 x i8>)
|
||||
|
||||
define <32 x i8>@test_int_x86_avx512_mask_permvar_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_qi_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpermb %ymm0, %ymm1, %ymm3 ## encoding: [0x62,0xf2,0x75,0x28,0x8d,0xd8]
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermb %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x8d,0xd0]
|
||||
; CHECK-NEXT: vpermb %ymm0, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0x8d,0xd8]
|
||||
; CHECK-NEXT: vpermb %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0x8d,0xc0]
|
||||
; CHECK-NEXT: vpaddb %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfc,0xc0]
|
||||
; CHECK-NEXT: vpermb %ymm0, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0x8d,0xc0]
|
||||
; CHECK-NEXT: vpaddb %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc3]
|
||||
; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
|
||||
%res1 = call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> zeroinitializer, i32 %x3)
|
||||
%res2 = call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
|
||||
%res3 = add <32 x i8> %res, %res1
|
||||
%res4 = add <32 x i8> %res3, %res2
|
||||
%1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1)
|
||||
%2 = bitcast i32 %x3 to <32 x i1>
|
||||
%3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %x2
|
||||
%4 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1)
|
||||
%5 = bitcast i32 %x3 to <32 x i1>
|
||||
%6 = select <32 x i1> %5, <32 x i8> %4, <32 x i8> zeroinitializer
|
||||
%7 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1)
|
||||
%res3 = add <32 x i8> %3, %6
|
||||
%res4 = add <32 x i8> %res3, %7
|
||||
ret <32 x i8> %res4
|
||||
}
|
||||
|
||||
|
@ -6777,3 +6777,43 @@ define <8 x i32>@test_int_x86_avx512_mask_permvar_si_256(<8 x i32> %x0, <8 x i32
|
||||
%res4 = add <8 x i32> %res3, %res2
|
||||
ret <8 x i32> %res4
|
||||
}
|
||||
|
||||
declare <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double>, <4 x i64>, <4 x double>, i8)
|
||||
|
||||
define <4 x double>@test_int_x86_avx512_mask_permvar_df_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_df_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpermpd %ymm0, %ymm1, %ymm3 ## encoding: [0x62,0xf2,0xf5,0x28,0x16,0xd8]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermpd %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x16,0xd0]
|
||||
; CHECK-NEXT: vpermpd %ymm0, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xa9,0x16,0xc0]
|
||||
; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
|
||||
; CHECK-NEXT: vaddpd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3)
|
||||
%res1 = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> zeroinitializer, i8 %x3)
|
||||
%res2 = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1)
|
||||
%res3 = fadd <4 x double> %res, %res1
|
||||
%res4 = fadd <4 x double> %res3, %res2
|
||||
ret <4 x double> %res4
|
||||
}
|
||||
|
||||
declare <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
|
||||
|
||||
define <4 x i64>@test_int_x86_avx512_mask_permvar_di_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_di_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vpermq %ymm0, %ymm1, %ymm3 ## encoding: [0x62,0xf2,0xf5,0x28,0x36,0xd8]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermq %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x36,0xd0]
|
||||
; CHECK-NEXT: vpermq %ymm0, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xa9,0x36,0xc0]
|
||||
; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
|
||||
; CHECK-NEXT: vpaddq %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
|
||||
%res1 = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
|
||||
%res2 = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
|
||||
%res3 = add <4 x i64> %res, %res1
|
||||
%res4 = add <4 x i64> %res3, %res2
|
||||
ret <4 x i64> %res4
|
||||
}
|
||||
|
@ -3035,43 +3035,55 @@ define <4 x i64>@test_int_x86_avx512_mask_pror_q_256(<4 x i64> %x0, i32 %x1, <4
|
||||
ret <4 x i64> %res4
|
||||
}
|
||||
|
||||
declare <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double>, <4 x i64>, <4 x double>, i8)
|
||||
declare <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double>, <4 x i64>)
|
||||
|
||||
define <4 x double>@test_int_x86_avx512_mask_permvar_df_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_df_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermpd %ymm0, %ymm1, %ymm3 ## encoding: [0x62,0xf2,0xf5,0x28,0x16,0xd8]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermpd %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x16,0xd0]
|
||||
; CHECK-NEXT: vpermpd %ymm0, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xa9,0x16,0xc0]
|
||||
; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
|
||||
; CHECK-NEXT: vaddpd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3)
|
||||
%res1 = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> zeroinitializer, i8 %x3)
|
||||
%res2 = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1)
|
||||
%res3 = fadd <4 x double> %res, %res1
|
||||
%res4 = fadd <4 x double> %res3, %res2
|
||||
%1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %x0, <4 x i64> %x1)
|
||||
%2 = bitcast i8 %x3 to <8 x i1>
|
||||
%extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%3 = select <4 x i1> %extract1, <4 x double> %1, <4 x double> %x2
|
||||
%4 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %x0, <4 x i64> %x1)
|
||||
%5 = bitcast i8 %x3 to <8 x i1>
|
||||
%extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%6 = select <4 x i1> %extract, <4 x double> %4, <4 x double> zeroinitializer
|
||||
%7 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %x0, <4 x i64> %x1)
|
||||
%res3 = fadd <4 x double> %3, %6
|
||||
%res4 = fadd <4 x double> %res3, %7
|
||||
ret <4 x double> %res4
|
||||
}
|
||||
|
||||
declare <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
|
||||
declare <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64>, <4 x i64>)
|
||||
|
||||
define <4 x i64>@test_int_x86_avx512_mask_permvar_di_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_permvar_di_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermq %ymm0, %ymm1, %ymm3 ## encoding: [0x62,0xf2,0xf5,0x28,0x36,0xd8]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpermq %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x36,0xd0]
|
||||
; CHECK-NEXT: vpermq %ymm0, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xa9,0x36,0xc0]
|
||||
; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
|
||||
; CHECK-NEXT: vpaddq %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
|
||||
%res1 = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
|
||||
%res2 = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
|
||||
%res3 = add <4 x i64> %res, %res1
|
||||
%res4 = add <4 x i64> %res3, %res2
|
||||
%1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1)
|
||||
%2 = bitcast i8 %x3 to <8 x i1>
|
||||
%extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> %x2
|
||||
%4 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1)
|
||||
%5 = bitcast i8 %x3 to <8 x i1>
|
||||
%extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%6 = select <4 x i1> %extract, <4 x i64> %4, <4 x i64> zeroinitializer
|
||||
%7 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1)
|
||||
%res3 = add <4 x i64> %3, %6
|
||||
%res4 = add <4 x i64> %res3, %7
|
||||
ret <4 x i64> %res4
|
||||
}
|
||||
|
||||
|
@ -714,21 +714,21 @@ define <8 x double> @stack_fold_permpdvar(<8 x i64> %a0, <8 x double> %a1) {
|
||||
;CHECK-LABEL: stack_fold_permpdvar
|
||||
;CHECK: vpermpd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %a1, <8 x i64> %a0, <8 x double> undef, i8 -1)
|
||||
%2 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a1, <8 x i64> %a0)
|
||||
; fadd forces execution domain
|
||||
%3 = fadd <8 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0, double 0x0, double 0x0, double 0x0, double 0x0>
|
||||
%3 = fadd <8 x double> %2, zeroinitializer
|
||||
ret <8 x double> %3
|
||||
}
|
||||
declare <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double>, <8 x i64>, <8 x double>, i8) nounwind readonly
|
||||
declare <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double>, <8 x i64>) nounwind readonly
|
||||
|
||||
define <16 x float> @stack_fold_permps(<16 x i32> %a0, <16 x float> %a1) {
|
||||
;CHECK-LABEL: stack_fold_permps
|
||||
;CHECK: vpermps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %a1, <16 x i32> %a0, <16 x float> undef, i16 -1)
|
||||
%2 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a1, <16 x i32> %a0)
|
||||
ret <16 x float> %2
|
||||
}
|
||||
declare <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float>, <16 x i32>, <16 x float>, i16) nounwind readonly
|
||||
declare <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float>, <16 x i32>) nounwind readonly
|
||||
|
||||
define <8 x double> @stack_fold_permilpd_zmm(<8 x double> %a0) {
|
||||
;CHECK-LABEL: stack_fold_permilpd_zmm
|
||||
|
@ -721,12 +721,12 @@ define <4 x double> @stack_fold_permpdvar(<4 x i64> %a0, <4 x double> %a1) {
|
||||
;CHECK-LABEL: stack_fold_permpdvar
|
||||
;CHECK: vpermpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %a1, <4 x i64> %a0, <4 x double> undef, i8 -1)
|
||||
%2 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a1, <4 x i64> %a0)
|
||||
; fadd forces execution domain
|
||||
%3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
|
||||
%3 = fadd <4 x double> %2, zeroinitializer
|
||||
ret <4 x double> %3
|
||||
}
|
||||
declare <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double>, <4 x i64>, <4 x double>, i8) nounwind readonly
|
||||
declare <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double>, <4 x i64>) nounwind readonly
|
||||
|
||||
define <8 x float> @stack_fold_permps(<8 x i32> %a0, <8 x float> %a1) {
|
||||
;CHECK-LABEL: stack_fold_permps
|
||||
|
@ -599,16 +599,16 @@ define <64 x i8> @stack_fold_permbvar(<64 x i8> %a0, <64 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_permbvar
|
||||
;CHECK: vpermb {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %a1, <64 x i8> %a0, <64 x i8> undef, i64 -1)
|
||||
%2 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a1, <64 x i8> %a0)
|
||||
ret <64 x i8> %2
|
||||
}
|
||||
declare <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) nounwind readonly
|
||||
declare <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8>, <64 x i8>) nounwind readonly
|
||||
|
||||
define <64 x i8> @stack_fold_permbvar_mask(<64 x i8>* %passthru, <64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
|
||||
;CHECK-LABEL: stack_fold_permbvar_mask
|
||||
;CHECK: vpermb {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %a1, <64 x i8> %a0, <64 x i8> undef, i64 -1)
|
||||
%2 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a1, <64 x i8> %a0)
|
||||
%3 = bitcast i64 %mask to <64 x i1>
|
||||
; load needed to keep the operation from being scheduled above the asm block
|
||||
%4 = load <64 x i8>, <64 x i8>* %passthru
|
||||
@ -620,7 +620,7 @@ define <64 x i8> @stack_fold_permbvar_maskz(<64 x i8> %a0, <64 x i8> %a1, i64 %m
|
||||
;CHECK-LABEL: stack_fold_permbvar_maskz
|
||||
;CHECK: vpermb {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %a1, <64 x i8> %a0, <64 x i8> undef, i64 -1)
|
||||
%2 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a1, <64 x i8> %a0)
|
||||
%3 = bitcast i64 %mask to <64 x i1>
|
||||
%4 = select <64 x i1> %3, <64 x i8> %2, <64 x i8> zeroinitializer
|
||||
ret <64 x i8> %4
|
||||
@ -630,12 +630,12 @@ define <16 x i32> @stack_fold_permd(<16 x i32> %a0, <16 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_permd
|
||||
;CHECK: vpermd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %a1, <16 x i32> %a0, <16 x i32> undef, i16 -1)
|
||||
%2 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a1, <16 x i32> %a0)
|
||||
; add forces execution domain
|
||||
%3 = add <16 x i32> %2, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||||
ret <16 x i32> %3
|
||||
}
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readonly
|
||||
declare <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32>, <16 x i32>) nounwind readonly
|
||||
|
||||
define <64 x i8> @stack_fold_vpermi2b(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2) {
|
||||
;CHECK-LABEL: stack_fold_vpermi2b
|
||||
@ -711,18 +711,18 @@ define <8 x i64> @stack_fold_permqvar(<8 x i64> %a0, <8 x i64> %a1) {
|
||||
;CHECK-LABEL: stack_fold_permqvar
|
||||
;CHECK: vpermq {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %a1, <8 x i64> %a0, <8 x i64> undef, i8 -1)
|
||||
%2 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a1, <8 x i64> %a0)
|
||||
; add forces execution domain
|
||||
%3 = add <8 x i64> %2, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
|
||||
ret <8 x i64> %3
|
||||
}
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readonly
|
||||
declare <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64>, <8 x i64>) nounwind readonly
|
||||
|
||||
define <8 x i64> @stack_fold_permqvar_mask(<8 x i64>* %passthru, <8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
|
||||
;CHECK-LABEL: stack_fold_permqvar_mask
|
||||
;CHECK: vpermq {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %a1, <8 x i64> %a0, <8 x i64> undef, i8 -1)
|
||||
%2 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a1, <8 x i64> %a0)
|
||||
%3 = bitcast i8 %mask to <8 x i1>
|
||||
; load needed to keep the operation from being scheduled above the asm block
|
||||
%4 = load <8 x i64>, <8 x i64>* %passthru
|
||||
@ -772,16 +772,16 @@ define <32 x i16> @stack_fold_permwvar(<32 x i16> %a0, <32 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_permwvar
|
||||
;CHECK: vpermw {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a1, <32 x i16> %a0, <32 x i16> undef, i32 -1)
|
||||
%2 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a1, <32 x i16> %a0)
|
||||
ret <32 x i16> %2
|
||||
}
|
||||
declare <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) nounwind readonly
|
||||
declare <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16>, <32 x i16>) nounwind readonly
|
||||
|
||||
define <32 x i16> @stack_fold_permwvar_mask(<32 x i16>* %passthru, <32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
|
||||
;CHECK-LABEL: stack_fold_permwvar_mask
|
||||
;CHECK: vpermw {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a1, <32 x i16> %a0, <32 x i16> undef, i32 -1)
|
||||
%2 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a1, <32 x i16> %a0)
|
||||
%3 = bitcast i32 %mask to <32 x i1>
|
||||
; load needed to keep the operation from being scheduled above the asm block
|
||||
%4 = load <32 x i16>, <32 x i16>* %passthru
|
||||
@ -793,7 +793,7 @@ define <32 x i16> @stack_fold_permwvar_maskz(<32 x i16> %a0, <32 x i16> %a1, i32
|
||||
;CHECK-LABEL: stack_fold_permwvar_maskz
|
||||
;CHECK: vpermw {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a1, <32 x i16> %a0, <32 x i16> undef, i32 -1)
|
||||
%2 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a1, <32 x i16> %a0)
|
||||
%3 = bitcast i32 %mask to <32 x i1>
|
||||
%4 = select <32 x i1> %3, <32 x i16> %2, <32 x i16> zeroinitializer
|
||||
ret <32 x i16> %4
|
||||
|
@ -568,12 +568,12 @@ define <32 x i8> @stack_fold_permbvar(<32 x i8> %a0, <32 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_permbvar
|
||||
;CHECK: vpermb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %a1, <32 x i8> %a0, <32 x i8> undef, i32 -1)
|
||||
%2 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a1, <32 x i8> %a0)
|
||||
; add forces execution domain
|
||||
%3 = add <32 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
||||
ret <32 x i8> %3
|
||||
}
|
||||
declare <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) nounwind readonly
|
||||
declare <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8>, <32 x i8>) nounwind readonly
|
||||
|
||||
define <8 x i32> @stack_fold_permd(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
;CHECK-LABEL: stack_fold_permd
|
||||
@ -672,12 +672,12 @@ define <4 x i64> @stack_fold_permqvar(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
;CHECK-LABEL: stack_fold_permqvar
|
||||
;CHECK: vpermq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %a1, <4 x i64> %a0, <4 x i64> undef, i8 -1)
|
||||
%2 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a1, <4 x i64> %a0)
|
||||
; add forces execution domain
|
||||
%3 = add <4 x i64> %2, <i64 1, i64 1, i64 1, i64 1>
|
||||
ret <4 x i64> %3
|
||||
}
|
||||
declare <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) nounwind readonly
|
||||
declare <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64>, <4 x i64>) nounwind readonly
|
||||
|
||||
define <16 x i8> @stack_fold_vpermt2b(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2) {
|
||||
;CHECK-LABEL: stack_fold_vpermt2b
|
||||
@ -755,12 +755,12 @@ define <16 x i16> @stack_fold_permwvar(<16 x i16> %a0, <16 x i16> %a1) {
|
||||
;CHECK-LABEL: stack_fold_permwvar
|
||||
;CHECK: vpermw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%2 = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %a1, <16 x i16> %a0, <16 x i16> undef, i16 -1)
|
||||
%2 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a1, <16 x i16> %a0)
|
||||
; add forces execution domain
|
||||
%3 = add <16 x i16> %2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
||||
ret <16 x i16> %3
|
||||
}
|
||||
declare <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) nounwind readonly
|
||||
declare <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16>, <16 x i16>) nounwind readonly
|
||||
|
||||
define <4 x i32> @stack_fold_vplzcntd(<4 x i32> %a0) {
|
||||
;CHECK-LABEL: stack_fold_vplzcntd
|
||||
|
@ -6,10 +6,10 @@ declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
|
||||
|
||||
declare <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double>, <8 x i64>, <8 x double>, i8)
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
|
||||
declare <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
|
||||
declare <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double>, <8 x i64>)
|
||||
declare <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64>, <8 x i64>)
|
||||
declare <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32>, <16 x i32>)
|
||||
declare <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16>, <32 x i16>)
|
||||
|
||||
declare <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64>, <8 x double>, <8 x double>, i8)
|
||||
declare <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16)
|
||||
@ -33,16 +33,16 @@ define <8 x double> @combine_permvar_8f64_identity(<8 x double> %x0, <8 x double
|
||||
; X64-LABEL: combine_permvar_8f64_identity:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: retq
|
||||
%res0 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x double> %x1, i8 -1)
|
||||
%res1 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %res0, <8 x i64> <i64 7, i64 14, i64 5, i64 12, i64 3, i64 10, i64 1, i64 8>, <8 x double> %res0, i8 -1)
|
||||
ret <8 x double> %res1
|
||||
%1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %x0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>)
|
||||
%2 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %1, <8 x i64> <i64 7, i64 14, i64 5, i64 12, i64 3, i64 10, i64 1, i64 8>)
|
||||
ret <8 x double> %2
|
||||
}
|
||||
define <8 x double> @combine_permvar_8f64_identity_mask(<8 x double> %x0, <8 x double> %x1, i8 %m) {
|
||||
; X32-LABEL: combine_permvar_8f64_identity_mask:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: vmovapd {{.*#+}} zmm2 = [7,0,6,0,5,0,4,0,3,0,2,0,1,0,0,0]
|
||||
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: kmovd %eax, %k1
|
||||
; X32-NEXT: vmovapd {{.*#+}} zmm2 = [7,0,6,0,5,0,4,0,3,0,2,0,1,0,0,0]
|
||||
; X32-NEXT: vpermpd %zmm0, %zmm2, %zmm1 {%k1}
|
||||
; X32-NEXT: vmovapd {{.*#+}} zmm0 = [7,0,14,0,5,0,12,0,3,0,10,0,1,0,8,0]
|
||||
; X32-NEXT: vpermpd %zmm1, %zmm0, %zmm1 {%k1}
|
||||
@ -51,16 +51,20 @@ define <8 x double> @combine_permvar_8f64_identity_mask(<8 x double> %x0, <8 x d
|
||||
;
|
||||
; X64-LABEL: combine_permvar_8f64_identity_mask:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1
|
||||
; X64-NEXT: vmovapd {{.*#+}} zmm2 = [7,6,5,4,3,2,1,0]
|
||||
; X64-NEXT: kmovd %edi, %k1
|
||||
; X64-NEXT: vpermpd %zmm0, %zmm2, %zmm1 {%k1}
|
||||
; X64-NEXT: vmovapd {{.*#+}} zmm0 = [7,14,5,12,3,10,1,8]
|
||||
; X64-NEXT: vpermpd %zmm1, %zmm0, %zmm1 {%k1}
|
||||
; X64-NEXT: vmovapd %zmm1, %zmm0
|
||||
; X64-NEXT: retq
|
||||
%res0 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x double> %x1, i8 %m)
|
||||
%res1 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %res0, <8 x i64> <i64 7, i64 14, i64 5, i64 12, i64 3, i64 10, i64 1, i64 8>, <8 x double> %res0, i8 %m)
|
||||
ret <8 x double> %res1
|
||||
%1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %x0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>)
|
||||
%2 = bitcast i8 %m to <8 x i1>
|
||||
%3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %x1
|
||||
%4 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %3, <8 x i64> <i64 7, i64 14, i64 5, i64 12, i64 3, i64 10, i64 1, i64 8>)
|
||||
%5 = bitcast i8 %m to <8 x i1>
|
||||
%6 = select <8 x i1> %5, <8 x double> %4, <8 x double> %3
|
||||
ret <8 x double> %6
|
||||
}
|
||||
|
||||
define <8 x i64> @combine_permvar_8i64_identity(<8 x i64> %x0, <8 x i64> %x1) {
|
||||
@ -71,16 +75,16 @@ define <8 x i64> @combine_permvar_8i64_identity(<8 x i64> %x0, <8 x i64> %x1) {
|
||||
; X64-LABEL: combine_permvar_8i64_identity:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: retq
|
||||
%res0 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x i64> %x1, i8 -1)
|
||||
%res1 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %res0, <8 x i64> <i64 7, i64 14, i64 5, i64 12, i64 3, i64 10, i64 1, i64 8>, <8 x i64> %res0, i8 -1)
|
||||
ret <8 x i64> %res1
|
||||
%1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %x0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>)
|
||||
%2 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %1, <8 x i64> <i64 7, i64 14, i64 5, i64 12, i64 3, i64 10, i64 1, i64 8>)
|
||||
ret <8 x i64> %2
|
||||
}
|
||||
define <8 x i64> @combine_permvar_8i64_identity_mask(<8 x i64> %x0, <8 x i64> %x1, i8 %m) {
|
||||
; X32-LABEL: combine_permvar_8i64_identity_mask:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,0,6,0,5,0,4,0,3,0,2,0,1,0,0,0]
|
||||
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: kmovd %eax, %k1
|
||||
; X32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,0,6,0,5,0,4,0,3,0,2,0,1,0,0,0]
|
||||
; X32-NEXT: vpermq %zmm0, %zmm2, %zmm1 {%k1}
|
||||
; X32-NEXT: vmovdqa64 {{.*#+}} zmm0 = [7,0,14,0,5,0,12,0,3,0,10,0,1,0,8,0]
|
||||
; X32-NEXT: vpermq %zmm1, %zmm0, %zmm1 {%k1}
|
||||
@ -89,16 +93,20 @@ define <8 x i64> @combine_permvar_8i64_identity_mask(<8 x i64> %x0, <8 x i64> %x
|
||||
;
|
||||
; X64-LABEL: combine_permvar_8i64_identity_mask:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovd %edi, %k1
|
||||
; X64-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,6,5,4,3,2,1,0]
|
||||
; X64-NEXT: kmovd %edi, %k1
|
||||
; X64-NEXT: vpermq %zmm0, %zmm2, %zmm1 {%k1}
|
||||
; X64-NEXT: vmovdqa64 {{.*#+}} zmm0 = [7,14,5,12,3,10,1,8]
|
||||
; X64-NEXT: vpermq %zmm1, %zmm0, %zmm1 {%k1}
|
||||
; X64-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; X64-NEXT: retq
|
||||
%res0 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x i64> %x1, i8 %m)
|
||||
%res1 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %res0, <8 x i64> <i64 7, i64 14, i64 5, i64 12, i64 3, i64 10, i64 1, i64 8>, <8 x i64> %res0, i8 %m)
|
||||
ret <8 x i64> %res1
|
||||
%1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %x0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>)
|
||||
%2 = bitcast i8 %m to <8 x i1>
|
||||
%3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x1
|
||||
%4 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %3, <8 x i64> <i64 7, i64 14, i64 5, i64 12, i64 3, i64 10, i64 1, i64 8>)
|
||||
%5 = bitcast i8 %m to <8 x i1>
|
||||
%6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> %3
|
||||
ret <8 x i64> %6
|
||||
}
|
||||
|
||||
define <8 x double> @combine_vpermt2var_8f64_identity(<8 x double> %x0, <8 x double> %x1) {
|
||||
@ -619,7 +627,7 @@ define <32 x i16> @combine_permvar_as_vpbroadcastw512(<32 x i16> %x0) {
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpbroadcastw %xmm0, %zmm0
|
||||
; X64-NEXT: retq
|
||||
%1 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> zeroinitializer, <32 x i16> undef, i32 -1)
|
||||
%1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %x0, <32 x i16> zeroinitializer)
|
||||
ret <32 x i16> %1
|
||||
}
|
||||
|
||||
@ -633,7 +641,7 @@ define <16 x i32> @combine_permvar_as_vpbroadcastd512(<16 x i32> %x0) {
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vbroadcastss %xmm0, %zmm0
|
||||
; X64-NEXT: retq
|
||||
%1 = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> zeroinitializer, <16 x i32> undef, i16 -1)
|
||||
%1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %x0, <16 x i32> zeroinitializer)
|
||||
ret <16 x i32> %1
|
||||
}
|
||||
|
||||
@ -647,7 +655,7 @@ define <8 x i64> @combine_permvar_as_vpbroadcastq512(<8 x i64> %x0) {
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vbroadcastsd %xmm0, %zmm0
|
||||
; X64-NEXT: retq
|
||||
%1 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> zeroinitializer, <8 x i64> undef, i8 -1)
|
||||
%1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %x0, <8 x i64> zeroinitializer)
|
||||
ret <8 x i64> %1
|
||||
}
|
||||
|
||||
@ -661,7 +669,7 @@ define <8 x i64> @combine_permvar_8i64_as_permq(<8 x i64> %x0, <8 x i64> %x1) {
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4]
|
||||
; X64-NEXT: retq
|
||||
%1 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> <i64 3, i64 2, i64 1, i64 undef, i64 undef, i64 6, i64 5, i64 4>, <8 x i64> %x1, i8 -1)
|
||||
%1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %x0, <8 x i64> <i64 3, i64 2, i64 1, i64 undef, i64 undef, i64 6, i64 5, i64 4>)
|
||||
ret <8 x i64> %1
|
||||
}
|
||||
define <8 x i64> @combine_permvar_8i64_as_permq_mask(<8 x i64> %x0, <8 x i64> %x1, i8 %m) {
|
||||
@ -679,8 +687,10 @@ define <8 x i64> @combine_permvar_8i64_as_permq_mask(<8 x i64> %x0, <8 x i64> %x
|
||||
; X64-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,2,1,0,7,6,5,4]
|
||||
; X64-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; X64-NEXT: retq
|
||||
%1 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> <i64 3, i64 2, i64 1, i64 undef, i64 undef, i64 6, i64 5, i64 4>, <8 x i64> %x1, i8 %m)
|
||||
ret <8 x i64> %1
|
||||
%1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %x0, <8 x i64> <i64 3, i64 2, i64 1, i64 undef, i64 undef, i64 6, i64 5, i64 4>)
|
||||
%2 = bitcast i8 %m to <8 x i1>
|
||||
%3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x1
|
||||
ret <8 x i64> %3
|
||||
}
|
||||
|
||||
define <8 x double> @combine_permvar_8f64_as_permpd(<8 x double> %x0, <8 x double> %x1) {
|
||||
@ -693,7 +703,7 @@ define <8 x double> @combine_permvar_8f64_as_permpd(<8 x double> %x0, <8 x doubl
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4]
|
||||
; X64-NEXT: retq
|
||||
%1 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> <i64 3, i64 2, i64 1, i64 undef, i64 undef, i64 6, i64 5, i64 4>, <8 x double> %x1, i8 -1)
|
||||
%1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %x0, <8 x i64> <i64 3, i64 2, i64 1, i64 undef, i64 undef, i64 6, i64 5, i64 4>)
|
||||
ret <8 x double> %1
|
||||
}
|
||||
define <8 x double> @combine_permvar_8f64_as_permpd_mask(<8 x double> %x0, <8 x double> %x1, i8 %m) {
|
||||
@ -711,8 +721,10 @@ define <8 x double> @combine_permvar_8f64_as_permpd_mask(<8 x double> %x0, <8 x
|
||||
; X64-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,2,1,0,7,6,5,4]
|
||||
; X64-NEXT: vmovapd %zmm1, %zmm0
|
||||
; X64-NEXT: retq
|
||||
%1 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> <i64 3, i64 2, i64 1, i64 undef, i64 undef, i64 6, i64 5, i64 4>, <8 x double> %x1, i8 %m)
|
||||
ret <8 x double> %1
|
||||
%1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %x0, <8 x i64> <i64 3, i64 2, i64 1, i64 undef, i64 undef, i64 6, i64 5, i64 4>)
|
||||
%2 = bitcast i8 %m to <8 x i1>
|
||||
%3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %x1
|
||||
ret <8 x double> %3
|
||||
}
|
||||
|
||||
define <16 x float> @combine_vpermilvar_16f32_230146759A8BCFDE(<16 x float> %x0) {
|
||||
@ -798,8 +810,8 @@ define <32 x i16> @combine_permvar_as_pshuflw(<32 x i16> %a0) {
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpshuflw {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15,17,16,19,18,20,21,22,23,25,24,27,26,28,29,30,31]
|
||||
; X64-NEXT: retq
|
||||
%res0 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 1, i16 0, i16 3, i16 2, i16 4, i16 5, i16 6, i16 7, i16 9, i16 8, i16 11, i16 10, i16 12, i16 13, i16 14, i16 15, i16 17, i16 16, i16 19, i16 18, i16 20, i16 21, i16 22, i16 23, i16 25, i16 24, i16 27, i16 26, i16 28, i16 29, i16 30, i16 31>, <32 x i16> undef, i32 -1)
|
||||
ret <32 x i16> %res0
|
||||
%1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 1, i16 0, i16 3, i16 2, i16 4, i16 5, i16 6, i16 7, i16 9, i16 8, i16 11, i16 10, i16 12, i16 13, i16 14, i16 15, i16 17, i16 16, i16 19, i16 18, i16 20, i16 21, i16 22, i16 23, i16 25, i16 24, i16 27, i16 26, i16 28, i16 29, i16 30, i16 31>)
|
||||
ret <32 x i16> %1
|
||||
}
|
||||
|
||||
define <32 x i16> @combine_pshufb_as_pshufhw(<32 x i16> %a0) {
|
||||
@ -812,8 +824,8 @@ define <32 x i16> @combine_pshufb_as_pshufhw(<32 x i16> %a0) {
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14,16,17,18,19,21,20,23,22,24,25,26,27,29,28,31,30]
|
||||
; X64-NEXT: retq
|
||||
%res0 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 5, i16 4, i16 7, i16 6, i16 8, i16 9, i16 10, i16 11, i16 13, i16 12, i16 15, i16 14, i16 16, i16 17, i16 18, i16 19, i16 21, i16 20, i16 23, i16 22, i16 24, i16 25, i16 26, i16 27, i16 29, i16 28, i16 31, i16 30>, <32 x i16> undef, i32 -1)
|
||||
ret <32 x i16> %res0
|
||||
%1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 5, i16 4, i16 7, i16 6, i16 8, i16 9, i16 10, i16 11, i16 13, i16 12, i16 15, i16 14, i16 16, i16 17, i16 18, i16 19, i16 21, i16 20, i16 23, i16 22, i16 24, i16 25, i16 26, i16 27, i16 29, i16 28, i16 31, i16 30>)
|
||||
ret <32 x i16> %1
|
||||
}
|
||||
|
||||
define <32 x i16> @combine_vpermi2var_32i16_as_pshufb(<32 x i16> %a0) {
|
||||
@ -826,9 +838,9 @@ define <32 x i16> @combine_vpermi2var_32i16_as_pshufb(<32 x i16> %a0) {
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13,18,19,16,17,22,23,20,21,26,27,24,25,30,31,28,29,34,35,32,33,38,39,36,37,42,43,40,41,46,47,44,45,50,51,48,49,54,55,52,53,58,59,56,57,62,63,60,61]
|
||||
; X64-NEXT: retq
|
||||
%res0 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 1, i16 0, i16 3, i16 2, i16 4, i16 5, i16 6, i16 7, i16 9, i16 8, i16 11, i16 10, i16 12, i16 13, i16 14, i16 15, i16 17, i16 16, i16 19, i16 18, i16 20, i16 21, i16 22, i16 23, i16 25, i16 24, i16 27, i16 26, i16 28, i16 29, i16 30, i16 31>, <32 x i16> undef, i32 -1)
|
||||
%res1 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %res0, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 5, i16 4, i16 7, i16 6, i16 8, i16 9, i16 10, i16 11, i16 13, i16 12, i16 15, i16 14, i16 16, i16 17, i16 18, i16 19, i16 21, i16 20, i16 23, i16 22, i16 24, i16 25, i16 26, i16 27, i16 29, i16 28, i16 31, i16 30>, <32 x i16> undef, i32 -1)
|
||||
ret <32 x i16> %res1
|
||||
%1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 1, i16 0, i16 3, i16 2, i16 4, i16 5, i16 6, i16 7, i16 9, i16 8, i16 11, i16 10, i16 12, i16 13, i16 14, i16 15, i16 17, i16 16, i16 19, i16 18, i16 20, i16 21, i16 22, i16 23, i16 25, i16 24, i16 27, i16 26, i16 28, i16 29, i16 30, i16 31>)
|
||||
%2 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %1, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 5, i16 4, i16 7, i16 6, i16 8, i16 9, i16 10, i16 11, i16 13, i16 12, i16 15, i16 14, i16 16, i16 17, i16 18, i16 19, i16 21, i16 20, i16 23, i16 22, i16 24, i16 25, i16 26, i16 27, i16 29, i16 28, i16 31, i16 30>)
|
||||
ret <32 x i16> %2
|
||||
}
|
||||
|
||||
define <8 x double> @combine_vpermi2var_8f64_identity(<8 x double> %x0, <8 x double> %x1) {
|
||||
@ -1093,8 +1105,8 @@ define <8 x double> @combine_vpermi2var_vpermvar_8f64_as_vperm2_zero(<8 x double
|
||||
; X64-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
|
||||
; X64-NEXT: retq
|
||||
%res0 = shufflevector <8 x double> %x0, <8 x double> zeroinitializer, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
|
||||
%res1 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %res0, <8 x i64> <i64 3, i64 2, i64 1, i64 7, i64 0, i64 6, i64 5, i64 4>, <8 x double> %res0, i8 -1)
|
||||
ret <8 x double> %res1
|
||||
%1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %res0, <8 x i64> <i64 3, i64 2, i64 1, i64 7, i64 0, i64 6, i64 5, i64 4>)
|
||||
ret <8 x double> %1
|
||||
}
|
||||
|
||||
define <16 x float> @combine_vpermi2var_vpermvar_16f32_as_vperm2_zero(<16 x float> %x0) {
|
||||
@ -1128,6 +1140,6 @@ define <8 x i64> @combine_vpermvar_insertion_as_broadcast_v8i64(i64 %a0) {
|
||||
; X64-NEXT: vpbroadcastq %xmm0, %zmm0
|
||||
; X64-NEXT: retq
|
||||
%1 = insertelement <8 x i64> undef, i64 %a0, i32 0
|
||||
%2 = tail call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %1, <8 x i64> zeroinitializer, <8 x i64> undef, i8 -1)
|
||||
%2 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %1, <8 x i64> zeroinitializer)
|
||||
ret <8 x i64> %2
|
||||
}
|
||||
|
@ -2,7 +2,6 @@
|
||||
; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefix=X32
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefix=X64
|
||||
|
||||
declare <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
|
||||
declare <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
|
||||
declare <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
|
||||
|
||||
|
@ -2,17 +2,14 @@
|
||||
; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512vbmi,+avx512vl | FileCheck %s --check-prefix=X32
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512vbmi,+avx512vl | FileCheck %s --check-prefix=X64
|
||||
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
|
||||
declare <16 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
|
||||
|
||||
declare <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
|
||||
declare <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
|
||||
declare <32 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
|
||||
declare <32 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
|
||||
|
||||
declare <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
|
||||
declare <64 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
|
||||
declare <64 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
|
||||
declare <64 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user