mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[X86][AVX512]fix dag & add intrinsics for fixupimm
cover all width and types (pd/ps/sd/ss) of fixupimm instruction and inrtinsics Differential Revision: http://reviews.llvm.org/D16313 llvm-svn: 258124
This commit is contained in:
parent
2e451b76ef
commit
19e99238a0
@ -6157,6 +6157,86 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_avx512_mask_sqrt_ps_512 : GCCBuiltin<"__builtin_ia32_sqrtps512_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
|
||||
llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_fixupimm_pd_128 :
|
||||
GCCBuiltin<"__builtin_ia32_fixupimmpd128_mask">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_maskz_fixupimm_pd_128 :
|
||||
GCCBuiltin<"__builtin_ia32_fixupimmpd128_maskz">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_fixupimm_pd_256 :
|
||||
GCCBuiltin<"__builtin_ia32_fixupimmpd256_mask">,
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_maskz_fixupimm_pd_256 :
|
||||
GCCBuiltin<"__builtin_ia32_fixupimmpd256_maskz">,
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_fixupimm_pd_512 :
|
||||
GCCBuiltin<"__builtin_ia32_fixupimmpd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8i64_ty, llvm_i32_ty, llvm_i8_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_maskz_fixupimm_pd_512 :
|
||||
GCCBuiltin<"__builtin_ia32_fixupimmpd512_maskz">,
|
||||
Intrinsic<[llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8i64_ty, llvm_i32_ty, llvm_i8_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_fixupimm_ps_128 :
|
||||
GCCBuiltin<"__builtin_ia32_fixupimmps128_mask">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_maskz_fixupimm_ps_128 :
|
||||
GCCBuiltin<"__builtin_ia32_fixupimmps128_maskz">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_fixupimm_ps_256 :
|
||||
GCCBuiltin<"__builtin_ia32_fixupimmps256_mask">,
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_maskz_fixupimm_ps_256 :
|
||||
GCCBuiltin<"__builtin_ia32_fixupimmps256_maskz">,
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_fixupimm_ps_512 :
|
||||
GCCBuiltin<"__builtin_ia32_fixupimmps512_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty],
|
||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16i32_ty, llvm_i32_ty,
|
||||
llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_maskz_fixupimm_ps_512 :
|
||||
GCCBuiltin<"__builtin_ia32_fixupimmps512_maskz">,
|
||||
Intrinsic<[llvm_v16f32_ty],
|
||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16i32_ty, llvm_i32_ty,
|
||||
llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_fixupimm_sd :
|
||||
GCCBuiltin<"__builtin_ia32_fixupimmsd_mask">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_i8_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_maskz_fixupimm_sd :
|
||||
GCCBuiltin<"__builtin_ia32_fixupimmsd_maskz">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_i8_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_fixupimm_ss :
|
||||
GCCBuiltin<"__builtin_ia32_fixupimmss_mask">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i8_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_maskz_fixupimm_ss :
|
||||
GCCBuiltin<"__builtin_ia32_fixupimmss_maskz">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i8_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_getexp_pd_128 : GCCBuiltin<"__builtin_ia32_getexppd128_mask">,
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
|
@ -17024,6 +17024,35 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
|
||||
Src2, Src1);
|
||||
return DAG.getBitcast(VT, Res);
|
||||
}
|
||||
case FIXUPIMMS:
|
||||
case FIXUPIMMS_MASKZ:
|
||||
case FIXUPIMM:
|
||||
case FIXUPIMM_MASKZ:{
|
||||
SDValue Src1 = Op.getOperand(1);
|
||||
SDValue Src2 = Op.getOperand(2);
|
||||
SDValue Src3 = Op.getOperand(3);
|
||||
SDValue Imm = Op.getOperand(4);
|
||||
SDValue Mask = Op.getOperand(5);
|
||||
SDValue Passthru = (IntrData->Type == FIXUPIMM || IntrData->Type == FIXUPIMMS ) ?
|
||||
Src1 : getZeroVector(VT, Subtarget, DAG, dl);
|
||||
// We specify 2 possible modes for intrinsics, with/without rounding
|
||||
// modes.
|
||||
// First, we check if the intrinsic have rounding mode (7 operands),
|
||||
// if not, we set rounding mode to "current".
|
||||
SDValue Rnd;
|
||||
if (Op.getNumOperands() == 7)
|
||||
Rnd = Op.getOperand(6);
|
||||
else
|
||||
Rnd = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
|
||||
if (IntrData->Type == FIXUPIMM || IntrData->Type == FIXUPIMM_MASKZ)
|
||||
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
|
||||
Src1, Src2, Src3, Imm, Rnd),
|
||||
Mask, Passthru, Subtarget, DAG);
|
||||
else // Scalar - FIXUPIMMS, FIXUPIMMS_MASKZ
|
||||
return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
|
||||
Src1, Src2, Src3, Imm, Rnd),
|
||||
Mask, Passthru, Subtarget, DAG);
|
||||
}
|
||||
case CONVERT_TO_MASK: {
|
||||
MVT SrcVT = Op.getOperand(1).getSimpleValueType();
|
||||
MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements());
|
||||
@ -20934,6 +20963,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case X86ISD::VPERMI: return "X86ISD::VPERMI";
|
||||
case X86ISD::VPTERNLOG: return "X86ISD::VPTERNLOG";
|
||||
case X86ISD::VFIXUPIMM: return "X86ISD::VFIXUPIMM";
|
||||
case X86ISD::VFIXUPIMMS: return "X86ISD::VFIXUPIMMS";
|
||||
case X86ISD::VRANGE: return "X86ISD::VRANGE";
|
||||
case X86ISD::PMULUDQ: return "X86ISD::PMULUDQ";
|
||||
case X86ISD::PMULDQ: return "X86ISD::PMULDQ";
|
||||
|
@ -402,6 +402,7 @@ namespace llvm {
|
||||
VPTERNLOG,
|
||||
// Fix Up Special Packed Float32/64 values
|
||||
VFIXUPIMM,
|
||||
VFIXUPIMMS,
|
||||
// Range Restriction Calculation For Packed Pairs of Float32/64 values
|
||||
VRANGE,
|
||||
// Reduce - Perform Reduction Transformation on scalar\packed FP
|
||||
|
@ -95,6 +95,12 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
|
||||
"v" # NumElts # "f" # EltSize,
|
||||
VTName)));
|
||||
|
||||
ValueType IntVT = !cast<ValueType>(
|
||||
!if (!eq (!srl(EltSize,5),0),
|
||||
VTName,
|
||||
!if (!eq(TypeVariantName, "f"),
|
||||
"v" # NumElts # "i" # EltSize,
|
||||
VTName)));
|
||||
// The string to specify embedded broadcast in assembly.
|
||||
string BroadcastStr = "{1to" # NumElts # "}";
|
||||
|
||||
@ -301,7 +307,7 @@ multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
|
||||
!con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
|
||||
!con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
|
||||
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
|
||||
(X86select _.KRCWM:$mask, RHS, _.RC:$src1)>;
|
||||
(X86select _.KRCWM:$mask, RHS, _.RC:$src1), X86select>;
|
||||
|
||||
multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
|
||||
dag Outs, dag Ins,
|
||||
@ -6913,19 +6919,6 @@ multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
|
||||
opcPd, OpNode, prd>, EVEX_CD8<64, CD8VF>, VEX_W;
|
||||
}
|
||||
|
||||
defm VFIXUPIMMPD : avx512_common_fp_sae_packed_imm<"vfixupimmpd",
|
||||
avx512vl_f64_info, 0x54, X86VFixupimm, HasAVX512>,
|
||||
AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
|
||||
defm VFIXUPIMMPS : avx512_common_fp_sae_packed_imm<"vfixupimmps",
|
||||
avx512vl_f32_info, 0x54, X86VFixupimm, HasAVX512>,
|
||||
AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
|
||||
|
||||
defm VFIXUPIMMSD: avx512_common_fp_sae_scalar_imm<"vfixupimmsd", f64x_info,
|
||||
0x55, X86VFixupimm, HasAVX512>,
|
||||
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
|
||||
defm VFIXUPIMMSS: avx512_common_fp_sae_scalar_imm<"vfixupimmss", f32x_info,
|
||||
0x55, X86VFixupimm, HasAVX512>,
|
||||
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
|
||||
|
||||
defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
|
||||
X86VReduce, HasDQI>, AVX512AIi8Base, EVEX;
|
||||
@ -7458,3 +7451,112 @@ multiclass avx512_common_ternlog<string OpcodeStr, AVX512VLVectorVTInfo _>{
|
||||
defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", avx512vl_i32_info>;
|
||||
defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", avx512vl_i64_info>, VEX_W;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AVX-512 - FixupImm
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
X86VectorVTInfo _>{
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
|
||||
OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
|
||||
(OpNode (_.VT _.RC:$src1),
|
||||
(_.VT _.RC:$src2),
|
||||
(_.IntVT _.RC:$src3),
|
||||
(i32 imm:$src4),
|
||||
(i32 FROUND_CURRENT))>;
|
||||
let mayLoad = 1 in {
|
||||
defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
|
||||
OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src3",
|
||||
(OpNode (_.VT _.RC:$src1),
|
||||
(_.VT _.RC:$src2),
|
||||
(_.IntVT (bitconvert (_.LdFrag addr:$src3))),
|
||||
(i32 imm:$src4),
|
||||
(i32 FROUND_CURRENT))>;
|
||||
defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
|
||||
OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
|
||||
"$src2, ${src3}"##_.BroadcastStr##", $src4",
|
||||
(OpNode (_.VT _.RC:$src1),
|
||||
(_.VT _.RC:$src2),
|
||||
(_.IntVT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
|
||||
(i32 imm:$src4),
|
||||
(i32 FROUND_CURRENT))>, EVEX_B;
|
||||
}
|
||||
} // Constraints = "$src1 = $dst"
|
||||
}
|
||||
|
||||
multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
|
||||
SDNode OpNode, X86VectorVTInfo _>{
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
|
||||
OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
|
||||
"$src2, $src3, {sae}, $src4",
|
||||
(OpNode (_.VT _.RC:$src1),
|
||||
(_.VT _.RC:$src2),
|
||||
(_.IntVT _.RC:$src3),
|
||||
(i32 imm:$src4),
|
||||
(i32 FROUND_NO_EXC))>, EVEX_B;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
X86VectorVTInfo _, X86VectorVTInfo _src3VT> {
|
||||
let Constraints = "$src1 = $dst" , Predicates = [HasAVX512] in {
|
||||
defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
|
||||
OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
|
||||
(OpNode (_.VT _.RC:$src1),
|
||||
(_.VT _.RC:$src2),
|
||||
(_src3VT.VT _src3VT.RC:$src3),
|
||||
(i32 imm:$src4),
|
||||
(i32 FROUND_CURRENT))>;
|
||||
|
||||
defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
|
||||
OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
|
||||
"$src2, $src3, {sae}, $src4",
|
||||
(OpNode (_.VT _.RC:$src1),
|
||||
(_.VT _.RC:$src2),
|
||||
(_src3VT.VT _src3VT.RC:$src3),
|
||||
(i32 imm:$src4),
|
||||
(i32 FROUND_NO_EXC))>, EVEX_B;
|
||||
let mayLoad = 1 in
|
||||
defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
|
||||
OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
|
||||
(OpNode (_.VT _.RC:$src1),
|
||||
(_.VT _.RC:$src2),
|
||||
(_src3VT.VT (scalar_to_vector
|
||||
(_src3VT.ScalarLdFrag addr:$src3))),
|
||||
(i32 imm:$src4),
|
||||
(i32 FROUND_CURRENT))>;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass avx512_fixupimm_packed_all<AVX512VLVectorVTInfo _Vec>{
|
||||
let Predicates = [HasAVX512] in
|
||||
defm Z : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, _Vec.info512>,
|
||||
avx512_fixupimm_packed_sae<0x54, "vfixupimm", X86VFixupimm, _Vec.info512>,
|
||||
AVX512AIi8Base, EVEX_4V, EVEX_V512;
|
||||
let Predicates = [HasAVX512, HasVLX] in {
|
||||
defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, _Vec.info128>,
|
||||
AVX512AIi8Base, EVEX_4V, EVEX_V128;
|
||||
defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, _Vec.info256>,
|
||||
AVX512AIi8Base, EVEX_4V, EVEX_V256;
|
||||
}
|
||||
}
|
||||
|
||||
defm VFIXUPIMMSS : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
|
||||
f32x_info, v4i32x_info>,
|
||||
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
|
||||
defm VFIXUPIMMSD : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
|
||||
f64x_info, v2i64x_info>,
|
||||
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
|
||||
defm VFIXUPIMMPS : avx512_fixupimm_packed_all<avx512vl_f32_info>,
|
||||
EVEX_CD8<32, CD8VF>;
|
||||
defm VFIXUPIMMPD : avx512_fixupimm_packed_all<avx512vl_f64_info>,
|
||||
EVEX_CD8<64, CD8VF>, VEX_W;
|
||||
|
@ -309,6 +309,10 @@ def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
|
||||
SDTCisSameAs<0,2>, SDTCisVT<3, i8>]>;
|
||||
def SDTFPBinOpImmRound: SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0,1>,
|
||||
SDTCisSameAs<0,2>, SDTCisInt<3>, SDTCisInt<4>]>;
|
||||
def SDTFPTernaryOpImmRound: SDTypeProfile<1, 5, [SDTCisVec<0>, SDTCisSameAs<0,1>,
|
||||
SDTCisSameAs<0,2>, SDTCisVec<3>, SDTCisInt<4>, SDTCisInt<5>]>;
|
||||
def SDTFPTernaryOpImmRounds: SDTypeProfile<1, 5, [SDTCisSameAs<0,1>,
|
||||
SDTCisSameAs<0,2>,SDTCisInt<3>, SDTCisInt<4>, SDTCisInt<5>]>;
|
||||
def SDTFPUnaryOpImmRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
|
||||
SDTCisInt<2>, SDTCisInt<3>]>;
|
||||
|
||||
@ -405,7 +409,8 @@ def X86vpternlog : SDNode<"X86ISD::VPTERNLOG", SDTTernlog>;
|
||||
|
||||
def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
|
||||
|
||||
def X86VFixupimm : SDNode<"X86ISD::VFIXUPIMM", SDTFPBinOpImmRound>;
|
||||
def X86VFixupimm : SDNode<"X86ISD::VFIXUPIMM", SDTFPTernaryOpImmRound>;
|
||||
def X86VFixupimmScalar : SDNode<"X86ISD::VFIXUPIMMS", SDTFPTernaryOpImmRounds>;
|
||||
def X86VRange : SDNode<"X86ISD::VRANGE", SDTFPBinOpImmRound>;
|
||||
def X86VReduce : SDNode<"X86ISD::VREDUCE", SDTFPUnaryOpImmRound>;
|
||||
def X86VRndScale : SDNode<"X86ISD::VRNDSCALE", SDTFPUnaryOpImmRound>;
|
||||
|
@ -30,8 +30,8 @@ enum IntrinsicType {
|
||||
COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, BRCST_SUBVEC_TO_VEC,
|
||||
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
|
||||
EXPAND_FROM_MEM, LOADA, LOADU, STOREA, STOREU, BLEND, INSERT_SUBVEC,
|
||||
TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK,
|
||||
CONVERT_MASK_TO_VEC, CONVERT_TO_MASK
|
||||
TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS,
|
||||
FIXUPIMMS_MASKZ, CONVERT_MASK_TO_VEC, CONVERT_TO_MASK
|
||||
};
|
||||
|
||||
struct IntrinsicData {
|
||||
@ -810,6 +810,14 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86ISD::EXPAND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_expand_q_512, COMPRESS_EXPAND_IN_REG,
|
||||
X86ISD::EXPAND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_fixupimm_pd_128, FIXUPIMM, X86ISD::VFIXUPIMM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_fixupimm_pd_256, FIXUPIMM, X86ISD::VFIXUPIMM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_fixupimm_pd_512, FIXUPIMM, X86ISD::VFIXUPIMM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_fixupimm_ps_128, FIXUPIMM, X86ISD::VFIXUPIMM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_fixupimm_ps_256, FIXUPIMM, X86ISD::VFIXUPIMM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_fixupimm_ps_512, FIXUPIMM, X86ISD::VFIXUPIMM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_fixupimm_sd, FIXUPIMMS, X86ISD::VFIXUPIMMS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_fixupimm_ss, FIXUPIMMS, X86ISD::VFIXUPIMMS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_fpclass_pd_128, FPCLASS, X86ISD::VFPCLASS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_fpclass_pd_256, FPCLASS, X86ISD::VFPCLASS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_fpclass_pd_512, FPCLASS, X86ISD::VFPCLASS, 0),
|
||||
@ -1842,6 +1850,22 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx512_mask_xor_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_xor_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_xor_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_fixupimm_pd_128, FIXUPIMM_MASKZ,
|
||||
X86ISD::VFIXUPIMM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_fixupimm_pd_256, FIXUPIMM_MASKZ,
|
||||
X86ISD::VFIXUPIMM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_fixupimm_pd_512, FIXUPIMM_MASKZ,
|
||||
X86ISD::VFIXUPIMM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_fixupimm_ps_128, FIXUPIMM_MASKZ,
|
||||
X86ISD::VFIXUPIMM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_fixupimm_ps_256, FIXUPIMM_MASKZ,
|
||||
X86ISD::VFIXUPIMM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_fixupimm_ps_512, FIXUPIMM_MASKZ,
|
||||
X86ISD::VFIXUPIMM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_fixupimm_sd, FIXUPIMMS_MASKZ,
|
||||
X86ISD::VFIXUPIMMS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_fixupimm_ss, FIXUPIMMS_MASKZ,
|
||||
X86ISD::VFIXUPIMMS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_pternlog_d_128, TERLOG_OP_MASKZ,
|
||||
X86ISD::VPTERNLOG, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_pternlog_d_256, TERLOG_OP_MASKZ,
|
||||
|
@ -7000,3 +7000,179 @@ define <16 x i32>@test_int_x86_avx512_mask_permvar_si_512(<16 x i32> %x0, <16 x
|
||||
%res4 = add <16 x i32> %res3, %res2
|
||||
ret <16 x i32> %res4
|
||||
}
|
||||
|
||||
declare <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double>, <8 x double>, <8 x i64>, i32, i8, i32)
|
||||
|
||||
define <8 x double>@test_int_x86_avx512_mask_fixupimm_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i8 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_pd_512
|
||||
; CHECK: kmovw %edi, %k1
|
||||
; CHECK: vmovaps %zmm0, %zmm3
|
||||
; CHECK: vfixupimmpd $4, %zmm2, %zmm1, %zmm3 {%k1}
|
||||
; CHECK: vpxord %zmm4, %zmm4, %zmm4
|
||||
; CHECK: vfixupimmpd $5, %zmm2, %zmm1, %zmm4 {%k1} {z}
|
||||
; CHECK: vfixupimmpd $3, {sae}, %zmm2, %zmm1, %zmm0
|
||||
; CHECK: vaddpd %zmm4, %zmm3, %zmm1
|
||||
; CHECK: vaddpd %zmm0, %zmm1, %zmm0
|
||||
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 4, i8 %x4, i32 4)
|
||||
%res1 = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> zeroinitializer, <8 x double> %x1, <8 x i64> %x2, i32 5, i8 %x4, i32 4)
|
||||
%res2 = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 3, i8 -1, i32 8)
|
||||
%res3 = fadd <8 x double> %res, %res1
|
||||
%res4 = fadd <8 x double> %res3, %res2
|
||||
ret <8 x double> %res4
|
||||
}
|
||||
|
||||
declare <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double>, <8 x double>, <8 x i64>, i32, i8, i32)
|
||||
|
||||
define <8 x double>@test_int_x86_avx512_maskz_fixupimm_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i8 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_pd_512
|
||||
; CHECK: kmovw %edi, %k1
|
||||
; CHECK: vmovaps %zmm0, %zmm3
|
||||
; CHECK: vfixupimmpd $3, %zmm2, %zmm1, %zmm3 {%k1} {z}
|
||||
; CHECK: vpxord %zmm4, %zmm4, %zmm4
|
||||
; CHECK: vmovaps %zmm0, %zmm5
|
||||
; CHECK: vfixupimmpd $5, %zmm4, %zmm1, %zmm5 {%k1} {z}
|
||||
; CHECK: vfixupimmpd $2, {sae}, %zmm2, %zmm1, %zmm0
|
||||
; CHECK: vaddpd %zmm5, %zmm3, %zmm1
|
||||
; CHECK: vaddpd %zmm0, %zmm1, %zmm0
|
||||
|
||||
%res = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 3, i8 %x4, i32 4)
|
||||
%res1 = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> zeroinitializer, i32 5, i8 %x4, i32 4)
|
||||
%res2 = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 2, i8 -1, i32 8)
|
||||
%res3 = fadd <8 x double> %res, %res1
|
||||
%res4 = fadd <8 x double> %res3, %res2
|
||||
ret <8 x double> %res4
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float>, <4 x float>, <4 x i32>, i32, i8, i32)
|
||||
|
||||
define <4 x float>@test_int_x86_avx512_mask_fixupimm_ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i8 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_ss
|
||||
; CHECK: kmovw %edi, %k1
|
||||
; CHECK: vmovaps %zmm0, %zmm3
|
||||
; CHECK: vfixupimmss $5, %xmm2, %xmm1, %xmm3 {%k1}
|
||||
; CHECK: vpxor %xmm4, %xmm4, %xmm4
|
||||
; CHECK: vmovaps %zmm0, %zmm5
|
||||
; CHECK: vfixupimmss $5, %xmm4, %xmm1, %xmm5 {%k1}
|
||||
; CHECK: vfixupimmss $5, {sae}, %xmm2, %xmm1, %xmm0
|
||||
; CHECK: vaddps %xmm5, %xmm3, %xmm1
|
||||
; CHECK: vaddps %xmm0, %xmm1, %xmm0
|
||||
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4, i32 4)
|
||||
%res1 = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 5, i8 %x4, i32 4)
|
||||
%res2 = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 -1, i32 8)
|
||||
%res3 = fadd <4 x float> %res, %res1
|
||||
%res4 = fadd <4 x float> %res3, %res2
|
||||
ret <4 x float> %res4
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float>, <4 x float>, <4 x i32>, i32, i8, i32)
|
||||
|
||||
define <4 x float>@test_int_x86_avx512_maskz_fixupimm_ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i8 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_ss
|
||||
; CHECK: kmovw %edi, %k1
|
||||
; CHECK: vmovaps %zmm0, %zmm3
|
||||
; CHECK: vfixupimmss $5, %xmm2, %xmm1, %xmm3 {%k1} {z}
|
||||
; CHECK: vmovaps %zmm0, %zmm4
|
||||
; CHECK: vfixupimmss $5, %xmm2, %xmm1, %xmm4
|
||||
; CHECK: vpxor %xmm2, %xmm2, %xmm2
|
||||
; CHECK: vfixupimmss $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
|
||||
; CHECK: vaddps %xmm0, %xmm3, %xmm0
|
||||
; CHECK: vaddps %xmm4, %xmm0, %xmm0
|
||||
|
||||
%res = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4, i32 4)
|
||||
%res1 = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 5, i8 %x4, i32 8)
|
||||
%res2 = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 -1, i32 4)
|
||||
%res3 = fadd <4 x float> %res, %res1
|
||||
%res4 = fadd <4 x float> %res3, %res2
|
||||
ret <4 x float> %res4
|
||||
}
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float>, <16 x float>, <16 x i32>, i32, i16, i32)
|
||||
|
||||
define <16 x float>@test_int_x86_avx512_mask_fixupimm_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i16 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_ps_512
|
||||
; CHECK: kmovw %edi, %k1
|
||||
; CHECK: vmovaps %zmm0, %zmm3
|
||||
; CHECK: vfixupimmps $5, %zmm2, %zmm1, %zmm3 {%k1}
|
||||
; CHECK: vpxord %zmm4, %zmm4, %zmm4
|
||||
; CHECK: vmovaps %zmm0, %zmm5
|
||||
; CHECK: vfixupimmps $5, %zmm4, %zmm1, %zmm5 {%k1}
|
||||
; CHECK: vfixupimmps $5, {sae}, %zmm2, %zmm1, %zmm0
|
||||
; CHECK: vaddps %zmm5, %zmm3, %zmm1
|
||||
; CHECK: vaddps %zmm0, %zmm1, %zmm0
|
||||
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 %x4, i32 4)
|
||||
%res1 = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> zeroinitializer, i32 5, i16 %x4, i32 4)
|
||||
%res2 = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 -1, i32 8)
|
||||
%res3 = fadd <16 x float> %res, %res1
|
||||
%res4 = fadd <16 x float> %res3, %res2
|
||||
ret <16 x float> %res4
|
||||
}
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float>, <16 x float>, <16 x i32>, i32, i16, i32)
|
||||
|
||||
define <16 x float>@test_int_x86_avx512_maskz_fixupimm_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i16 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_ps_512
|
||||
; CHECK: kmovw %edi, %k1
|
||||
; CHECK: vmovaps %zmm0, %zmm3
|
||||
; CHECK: vfixupimmps $5, %zmm2, %zmm1, %zmm3 {%k1} {z}
|
||||
; CHECK: vmovaps %zmm0, %zmm4
|
||||
; CHECK: vfixupimmps $5, %zmm2, %zmm1, %zmm4
|
||||
; CHECK: vpxord %zmm2, %zmm2, %zmm2
|
||||
; CHECK: vfixupimmps $5, {sae}, %zmm2, %zmm1, %zmm0 {%k1} {z}
|
||||
; CHECK: vaddps %zmm0, %zmm3, %zmm0
|
||||
; CHECK: vaddps %zmm4, %zmm0, %zmm0
|
||||
|
||||
%res = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 %x4, i32 4)
|
||||
%res1 = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> zeroinitializer, i32 5, i16 %x4, i32 8)
|
||||
%res2 = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 -1, i32 4)
|
||||
%res3 = fadd <16 x float> %res, %res1
|
||||
%res4 = fadd <16 x float> %res3, %res2
|
||||
ret <16 x float> %res4
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double>, <2 x double>, <2 x i64>, i32, i8, i32)
|
||||
|
||||
define <2 x double>@test_int_x86_avx512_mask_fixupimm_sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_sd
|
||||
; CHECK: kmovw %edi, %k1
|
||||
; CHECK: vmovaps %zmm0, %zmm3
|
||||
; CHECK: vfixupimmsd $5, %xmm2, %xmm1, %xmm3 {%k1}
|
||||
; CHECK: vmovaps %zmm0, %zmm4
|
||||
; CHECK: vfixupimmsd $5, %xmm2, %xmm1, %xmm4
|
||||
; CHECK: vpxor %xmm2, %xmm2, %xmm2
|
||||
; CHECK: vfixupimmsd $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1}
|
||||
; CHECK: vaddpd %xmm0, %xmm3, %xmm0
|
||||
; CHECK: vaddpd %xmm4, %xmm0, %xmm0
|
||||
|
||||
%res = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4, i32 4)
|
||||
%res1 = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> zeroinitializer, i32 5, i8 %x4, i32 8)
|
||||
%res2 = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 -1, i32 4)
|
||||
%res3 = fadd <2 x double> %res, %res1
|
||||
%res4 = fadd <2 x double> %res3, %res2
|
||||
ret <2 x double> %res4
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double>, <2 x double>, <2 x i64>, i32, i8, i32)
|
||||
|
||||
define <2 x double>@test_int_x86_avx512_maskz_fixupimm_sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_sd
|
||||
; CHECK: kmovw %edi, %k1
|
||||
; CHECK: vmovaps %zmm0, %zmm3
|
||||
; CHECK: vfixupimmsd $5, %xmm2, %xmm1, %xmm3 {%k1} {z}
|
||||
; CHECK: vpxor %xmm4, %xmm4, %xmm4
|
||||
; CHECK: vmovaps %zmm0, %zmm5
|
||||
; CHECK: vfixupimmsd $5, {sae}, %xmm4, %xmm1, %xmm5 {%k1} {z}
|
||||
; CHECK: vfixupimmsd $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
|
||||
; CHECK: vaddpd %xmm5, %xmm3, %xmm1
|
||||
; CHECK: vaddpd %xmm0, %xmm1, %xmm0
|
||||
|
||||
%res = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4, i32 4)
|
||||
%res1 = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> zeroinitializer, i32 5, i8 %x4, i32 8)
|
||||
%res2 = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4, i32 8)
|
||||
%res3 = fadd <2 x double> %res, %res1
|
||||
%res4 = fadd <2 x double> %res3, %res2
|
||||
ret <2 x double> %res4
|
||||
}
|
||||
|
||||
|
@ -7690,3 +7690,173 @@ define void@test_int_x86_avx512_mask_store_d_256(i8* %ptr1, i8* %ptr2, <8 x i32>
|
||||
call void @llvm.x86.avx512.mask.store.d.256(i8* %ptr2, <8 x i32> %x1, i8 -1)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.x86.avx512.mask.fixupimm.pd.128(<2 x double>, <2 x double>, <2 x i64>, i32, i8)
|
||||
|
||||
define <2 x double>@test_int_x86_avx512_mask_fixupimm_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) {
|
||||
; CHECK-LABEL:test_int_x86_avx512_mask_fixupimm_pd_128
|
||||
; CHECK: kmovw %edi, %k1
|
||||
; CHECK: vmovaps %zmm0, %zmm3
|
||||
; CHECK: vfixupimmpd $5, %xmm2, %xmm1, %xmm3 {%k1}
|
||||
; CHECK: vpxor %xmm4, %xmm4, %xmm4
|
||||
; CHECK: vfixupimmpd $4, %xmm2, %xmm1, %xmm4 {%k1} {z}
|
||||
; CHECK: vfixupimmpd $3, %xmm2, %xmm1, %xmm0
|
||||
; CHECK: vaddpd %xmm4, %xmm3, %xmm1
|
||||
; CHECK: vaddpd %xmm0, %xmm1, %xmm0
|
||||
|
||||
%res = call <2 x double> @llvm.x86.avx512.mask.fixupimm.pd.128(<2 x double> %x0, <2 x double> %x1,<2 x i64> %x2, i32 5, i8 %x4)
|
||||
%res1 = call <2 x double> @llvm.x86.avx512.mask.fixupimm.pd.128(<2 x double> zeroinitializer, <2 x double> %x1, <2 x i64> %x2, i32 4, i8 %x4)
|
||||
%res2 = call <2 x double> @llvm.x86.avx512.mask.fixupimm.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 3, i8 -1)
|
||||
%res3 = fadd <2 x double> %res, %res1
|
||||
%res4 = fadd <2 x double> %res3, %res2
|
||||
ret <2 x double> %res4
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.x86.avx512.maskz.fixupimm.pd.128(<2 x double>, <2 x double>, <2 x i64>, i32, i8)
|
||||
|
||||
define <2 x double>@test_int_x86_avx512_maskz_fixupimm_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_pd_128
|
||||
; CHECK: kmovw %edi, %k1
|
||||
; CHECK: vmovaps %zmm0, %zmm3
|
||||
; CHECK: vfixupimmpd $5, %xmm2, %xmm1, %xmm3 {%k1} {z}
|
||||
; CHECK: vpxor %xmm2, %xmm2, %xmm2
|
||||
; CHECK: vfixupimmpd $3, %xmm2, %xmm1, %xmm0 {%k1} {z}
|
||||
; CHECK: vaddpd %xmm0, %xmm3, %xmm0
|
||||
%res = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4)
|
||||
%res1 = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x i64> zeroinitializer, i32 3, i8 %x4)
|
||||
;%res2 = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 4, i8 -1)
|
||||
%res3 = fadd <2 x double> %res, %res1
|
||||
;%res4 = fadd <2 x double> %res3, %res2
|
||||
ret <2 x double> %res3
|
||||
}
|
||||
|
||||
declare <4 x double> @llvm.x86.avx512.mask.fixupimm.pd.256(<4 x double>, <4 x double>, <4 x i64>, i32, i8)
|
||||
|
||||
define <4 x double>@test_int_x86_avx512_mask_fixupimm_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i8 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_pd_256
|
||||
; CHECK: kmovw %edi, %k1
|
||||
; CHECK: vmovaps %zmm0, %zmm3
|
||||
; CHECK: vfixupimmpd $4, %ymm2, %ymm1, %ymm3 {%k1}
|
||||
; CHECK: vpxor %ymm4, %ymm4, %ymm4
|
||||
; CHECK: vfixupimmpd $5, %ymm2, %ymm1, %ymm4 {%k1} {z}
|
||||
; CHECK: vfixupimmpd $3, %ymm2, %ymm1, %ymm0
|
||||
; CHECK: vaddpd %ymm4, %ymm3, %ymm1
|
||||
; CHECK: vaddpd %ymm0, %ymm1, %ymm0
|
||||
|
||||
%res = call <4 x double> @llvm.x86.avx512.mask.fixupimm.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i32 4, i8 %x4)
|
||||
%res1 = call <4 x double> @llvm.x86.avx512.mask.fixupimm.pd.256(<4 x double> zeroinitializer, <4 x double> %x1, <4 x i64> %x2 , i32 5, i8 %x4)
|
||||
%res2 = call <4 x double> @llvm.x86.avx512.mask.fixupimm.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i32 3, i8 -1)
|
||||
%res3 = fadd <4 x double> %res, %res1
|
||||
%res4 = fadd <4 x double> %res3, %res2
|
||||
ret <4 x double> %res4
|
||||
}
|
||||
|
||||
declare <4 x double> @llvm.x86.avx512.maskz.fixupimm.pd.256(<4 x double>, <4 x double>, <4 x i64>, i32, i8)
|
||||
|
||||
define <4 x double>@test_int_x86_avx512_maskz_fixupimm_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i8 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_pd_256
|
||||
; CHECK: kmovw %edi, %k1
|
||||
; CHECK: vmovaps %zmm0, %zmm3
|
||||
; CHECK: vfixupimmpd $5, %ymm2, %ymm1, %ymm3 {%k1} {z}
|
||||
; CHECK: vpxor %ymm4, %ymm4, %ymm4
|
||||
; CHECK: vmovaps %zmm0, %zmm5
|
||||
; CHECK: vfixupimmpd $4, %ymm4, %ymm1, %ymm5 {%k1} {z}
|
||||
; CHECK: vfixupimmpd $3, %ymm2, %ymm1, %ymm0
|
||||
; CHECK: vaddpd %ymm5, %ymm3, %ymm1
|
||||
; CHECK: vaddpd %ymm0, %ymm1, %ymm0
|
||||
|
||||
%res = call <4 x double> @llvm.x86.avx512.maskz.fixupimm.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i32 5, i8 %x4)
|
||||
%res1 = call <4 x double> @llvm.x86.avx512.maskz.fixupimm.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x i64> zeroinitializer, i32 4, i8 %x4)
|
||||
%res2 = call <4 x double> @llvm.x86.avx512.maskz.fixupimm.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i32 3, i8 -1)
|
||||
%res3 = fadd <4 x double> %res, %res1
|
||||
%res4 = fadd <4 x double> %res3, %res2
|
||||
ret <4 x double> %res4
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.avx512.mask.fixupimm.ps.128(<4 x float>, <4 x float>, <4 x i32>, i32, i8)
|
||||
|
||||
define <4 x float>@test_int_x86_avx512_mask_fixupimm_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i8 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_ps_128
|
||||
; CHECK: kmovw %edi, %k1
|
||||
; CHECK: vmovaps %zmm0, %zmm3
|
||||
; CHECK: vfixupimmps $5, %xmm2, %xmm1, %xmm3 {%k1}
|
||||
; CHECK: vmovaps %zmm0, %zmm4
|
||||
; CHECK: vfixupimmps $5, %xmm2, %xmm1, %xmm4
|
||||
; CHECK: vpxor %xmm2, %xmm2, %xmm2
|
||||
; CHECK: vfixupimmps $5, %xmm2, %xmm1, %xmm0 {%k1}
|
||||
; CHECK: vaddps %xmm0, %xmm3, %xmm0
|
||||
; CHECK: vaddps %xmm4, %xmm0, %xmm0
|
||||
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4)
|
||||
%res1 = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 5, i8 %x4)
|
||||
%res2 = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 -1)
|
||||
%res3 = fadd <4 x float> %res, %res1
|
||||
%res4 = fadd <4 x float> %res3, %res2
|
||||
ret <4 x float> %res4
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.avx512.maskz.fixupimm.ps.128(<4 x float>, <4 x float>, <4 x i32>, i32, i8)
|
||||
|
||||
define <4 x float>@test_int_x86_avx512_maskz_fixupimm_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i8 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_ps_128
|
||||
; CHECK: kmovw %edi, %k1
|
||||
; CHECK: vmovaps %zmm0, %zmm3
|
||||
; CHECK: vfixupimmps $5, %xmm2, %xmm1, %xmm3 {%k1} {z}
|
||||
; CHECK: vmovaps %zmm0, %zmm4
|
||||
; CHECK: vfixupimmps $5, %xmm2, %xmm1, %xmm4
|
||||
; CHECK: vpxor %xmm2, %xmm2, %xmm2
|
||||
; CHECK: vfixupimmps $5, %xmm2, %xmm1, %xmm0 {%k1} {z}
|
||||
; CHECK: vaddps %xmm0, %xmm3, %xmm0
|
||||
; CHECK: vaddps %xmm4, %xmm0, %xmm0
|
||||
|
||||
%res = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4)
|
||||
%res1 = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 5, i8 %x4)
|
||||
%res2 = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 -1)
|
||||
%res3 = fadd <4 x float> %res, %res1
|
||||
%res4 = fadd <4 x float> %res3, %res2
|
||||
ret <4 x float> %res4
|
||||
}
|
||||
|
||||
declare <8 x float> @llvm.x86.avx512.mask.fixupimm.ps.256(<8 x float>, <8 x float>, <8 x i32>, i32, i8)
|
||||
|
||||
define <8 x float>@test_int_x86_avx512_mask_fixupimm_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i8 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_ps_256
|
||||
; CHECK: kmovw %edi, %k1
|
||||
; CHECK: vmovaps %zmm0, %zmm3
|
||||
; CHECK: vfixupimmps $5, %ymm2, %ymm1, %ymm3 {%k1}
|
||||
; CHECK: vmovaps %zmm0, %zmm4
|
||||
; CHECK: vfixupimmps $5, %ymm2, %ymm1, %ymm4
|
||||
; CHECK: vpxor %ymm2, %ymm2, %ymm2
|
||||
; CHECK: vfixupimmps $5, %ymm2, %ymm1, %ymm0 {%k1}
|
||||
; CHECK: vaddps %ymm0, %ymm3, %ymm0
|
||||
; CHECK: vaddps %ymm4, %ymm0, %ymm0
|
||||
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i32 5, i8 %x4)
|
||||
%res1 = call <8 x float> @llvm.x86.avx512.mask.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> zeroinitializer, i32 5, i8 %x4)
|
||||
%res2 = call <8 x float> @llvm.x86.avx512.mask.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i32 5, i8 -1)
|
||||
%res3 = fadd <8 x float> %res, %res1
|
||||
%res4 = fadd <8 x float> %res3, %res2
|
||||
ret <8 x float> %res4
|
||||
}
|
||||
|
||||
declare <8 x float> @llvm.x86.avx512.maskz.fixupimm.ps.256(<8 x float>, <8 x float>, <8 x i32>, i32, i8)
|
||||
|
||||
define <8 x float>@test_int_x86_avx512_maskz_fixupimm_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i8 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_ps_256
|
||||
; CHECK: kmovw %edi, %k1
|
||||
; CHECK: vmovaps %zmm0, %zmm3
|
||||
; CHECK: vfixupimmps $5, %ymm2, %ymm1, %ymm3 {%k1} {z}
|
||||
; CHECK: vmovaps %zmm0, %zmm4
|
||||
; CHECK: vfixupimmps $5, %ymm2, %ymm1, %ymm4
|
||||
; CHECK: vpxor %ymm2, %ymm2, %ymm2
|
||||
; CHECK: vfixupimmps $5, %ymm2, %ymm1, %ymm0 {%k1} {z}
|
||||
; CHECK: vaddps %ymm0, %ymm3, %ymm0
|
||||
; CHECK: vaddps %ymm4, %ymm0, %ymm0
|
||||
|
||||
%res = call <8 x float> @llvm.x86.avx512.maskz.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i32 5, i8 %x4)
|
||||
%res1 = call <8 x float> @llvm.x86.avx512.maskz.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> zeroinitializer, i32 5, i8 %x4)
|
||||
%res2 = call <8 x float> @llvm.x86.avx512.maskz.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i32 5, i8 -1)
|
||||
%res3 = fadd <8 x float> %res, %res1
|
||||
%res4 = fadd <8 x float> %res3, %res2
|
||||
ret <8 x float> %res4
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user