mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[X86] Convert all uses of WriteFAdd to X86SchedWriteWidths.
In preparation of splitting WriteFAdd by vector width. llvm-svn: 331273
This commit is contained in:
parent
076a6683eb
commit
3ad9ad0ee2
@ -2642,39 +2642,42 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
|
||||
multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
|
||||
bits<8> opc, SDNode OpNode,
|
||||
X86FoldableSchedWrite sched, Predicate prd,
|
||||
X86SchedWriteWidths sched, Predicate prd,
|
||||
string broadcast>{
|
||||
let Predicates = [prd] in {
|
||||
defm Z : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched,
|
||||
defm Z : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.ZMM,
|
||||
_.info512, "{z}", broadcast>, EVEX_V512;
|
||||
}
|
||||
let Predicates = [prd, HasVLX] in {
|
||||
defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched,
|
||||
defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.XMM,
|
||||
_.info128, "{x}", broadcast>, EVEX_V128;
|
||||
defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched,
|
||||
defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.YMM,
|
||||
_.info256, "{y}", broadcast>, EVEX_V256;
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: Is there a better scheduler class for VFPCLASS?
|
||||
multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
|
||||
bits<8> opcScalar, SDNode VecOpNode, SDNode ScalarOpNode, Predicate prd>{
|
||||
bits<8> opcScalar, SDNode VecOpNode,
|
||||
SDNode ScalarOpNode, X86SchedWriteWidths sched,
|
||||
Predicate prd> {
|
||||
defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec,
|
||||
VecOpNode, WriteFAdd, prd, "{l}">,
|
||||
VecOpNode, sched, prd, "{l}">,
|
||||
EVEX_CD8<32, CD8VF>;
|
||||
defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec,
|
||||
VecOpNode, WriteFAdd, prd, "{q}">,
|
||||
VecOpNode, sched, prd, "{q}">,
|
||||
EVEX_CD8<64, CD8VF> , VEX_W;
|
||||
defm SS : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
|
||||
WriteFAdd, f32x_info, prd>,
|
||||
sched.Scl, f32x_info, prd>,
|
||||
EVEX_CD8<32, CD8VT1>;
|
||||
defm SD : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
|
||||
WriteFAdd, f64x_info, prd>,
|
||||
sched.Scl, f64x_info, prd>,
|
||||
EVEX_CD8<64, CD8VT1>, VEX_W;
|
||||
}
|
||||
|
||||
// FIXME: Is there a better scheduler class for VFPCLASS?
|
||||
defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass,
|
||||
X86Vfpclasss, HasDQI>, AVX512AIi8Base,EVEX;
|
||||
X86Vfpclasss, SchedWriteFAdd, HasDQI>,
|
||||
AVX512AIi8Base, EVEX;
|
||||
|
||||
//-----------------------------------------------------------------
|
||||
// Mask register copy, including
|
||||
@ -4811,6 +4814,7 @@ defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AVX-512 FP arithmetic
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
|
||||
SDNode OpNode, SDNode VecNode,
|
||||
X86FoldableSchedWrite sched, bit IsCommutable> {
|
||||
@ -4925,14 +4929,18 @@ multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
VecNode, SaeNode, sched, IsCommutable>,
|
||||
XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
|
||||
}
|
||||
defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnds, WriteFAdd, 1>;
|
||||
defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds, WriteFMul, 1>;
|
||||
defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds, WriteFAdd, 0>;
|
||||
defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds, WriteFDiv, 0>;
|
||||
defm VMIN : avx512_binop_s_sae <0x5D, "vmin", X86fmin, X86fmins, X86fminRnds,
|
||||
WriteFCmp, 0>;
|
||||
defm VMAX : avx512_binop_s_sae <0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds,
|
||||
WriteFCmp, 0>;
|
||||
defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnds,
|
||||
SchedWriteFAdd.Scl, 1>;
|
||||
defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds,
|
||||
SchedWriteFMul.Scl, 1>;
|
||||
defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds,
|
||||
SchedWriteFAdd.Scl, 0>;
|
||||
defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds,
|
||||
SchedWriteFDiv.Scl, 0>;
|
||||
defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminRnds,
|
||||
SchedWriteFCmp.Scl, 0>;
|
||||
defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds,
|
||||
SchedWriteFCmp.Scl, 0>;
|
||||
|
||||
// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
|
||||
// X86fminc and X86fmaxc instead of X86fmin and X86fmax
|
||||
@ -5247,33 +5255,36 @@ multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
}
|
||||
}
|
||||
|
||||
multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, SDNode OpNode, SDNode OpNodeScal> {
|
||||
defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, WriteFAdd, v16f32_info>,
|
||||
avx512_fp_round_packed<opc, OpcodeStr, OpNode, WriteFAdd, v16f32_info>,
|
||||
multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
|
||||
SDNode OpNode, SDNode OpNodeScal,
|
||||
X86SchedWriteWidths sched> {
|
||||
defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.ZMM, v16f32_info>,
|
||||
avx512_fp_round_packed<opc, OpcodeStr, OpNode, sched.ZMM, v16f32_info>,
|
||||
EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, WriteFAdd, v8f64_info>,
|
||||
avx512_fp_round_packed<opc, OpcodeStr, OpNode, WriteFAdd, v8f64_info>,
|
||||
defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.ZMM, v8f64_info>,
|
||||
avx512_fp_round_packed<opc, OpcodeStr, OpNode, sched.ZMM, v8f64_info>,
|
||||
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
defm SSZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, WriteFAdd, f32x_info>,
|
||||
avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNodeScal, WriteFAdd>,
|
||||
defm SSZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, sched.Scl, f32x_info>,
|
||||
avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNodeScal, sched.Scl>,
|
||||
EVEX_4V,EVEX_CD8<32, CD8VT1>;
|
||||
defm SDZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, WriteFAdd, f64x_info>,
|
||||
avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNodeScal, WriteFAdd>,
|
||||
defm SDZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, sched.Scl, f64x_info>,
|
||||
avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNodeScal, sched.Scl>,
|
||||
EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
|
||||
|
||||
// Define only if AVX512VL feature is present.
|
||||
let Predicates = [HasVLX] in {
|
||||
defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, WriteFAdd, v4f32x_info>,
|
||||
defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.XMM, v4f32x_info>,
|
||||
EVEX_V128, EVEX_CD8<32, CD8VF>;
|
||||
defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, WriteFAdd, v8f32x_info>,
|
||||
defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.YMM, v8f32x_info>,
|
||||
EVEX_V256, EVEX_CD8<32, CD8VF>;
|
||||
defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, WriteFAdd, v2f64x_info>,
|
||||
defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.XMM, v2f64x_info>,
|
||||
EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, WriteFAdd, v4f64x_info>,
|
||||
defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.YMM, v4f64x_info>,
|
||||
EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
}
|
||||
}
|
||||
defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs>, T8PD;
|
||||
defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs,
|
||||
SchedWriteFAdd>, T8PD;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AVX-512 VPTESTM instructions
|
||||
@ -7947,14 +7958,14 @@ multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
}
|
||||
|
||||
let Predicates = [HasERI] in {
|
||||
defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, WriteFRcp>,
|
||||
T8PD, EVEX_4V;
|
||||
defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, WriteFRsqrt>,
|
||||
defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, SchedWriteFRcp.Scl>,
|
||||
T8PD, EVEX_4V;
|
||||
defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s,
|
||||
SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V;
|
||||
}
|
||||
|
||||
defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds, WriteFAdd>,
|
||||
T8PD, EVEX_4V;
|
||||
defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds,
|
||||
SchedWriteFAdd.Scl>, T8PD, EVEX_4V;
|
||||
/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
|
||||
|
||||
multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
|
||||
@ -7992,38 +8003,38 @@ multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
|
||||
}
|
||||
|
||||
multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
X86FoldableSchedWrite sched> {
|
||||
defm PS : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched>,
|
||||
avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched>,
|
||||
X86SchedWriteWidths sched> {
|
||||
defm PS : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
|
||||
avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
|
||||
T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm PD : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched>,
|
||||
avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched>,
|
||||
defm PD : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
|
||||
avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
|
||||
T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
}
|
||||
|
||||
multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
|
||||
SDNode OpNode, X86FoldableSchedWrite sched> {
|
||||
SDNode OpNode, X86SchedWriteWidths sched> {
|
||||
// Define only if AVX512VL feature is present.
|
||||
let Predicates = [HasVLX] in {
|
||||
defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, sched>,
|
||||
defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, sched.XMM>,
|
||||
EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
|
||||
defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, sched>,
|
||||
defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, sched.YMM>,
|
||||
EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
|
||||
defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, sched>,
|
||||
defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, sched.XMM>,
|
||||
EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
|
||||
defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, sched>,
|
||||
defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, sched.YMM>,
|
||||
EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
|
||||
}
|
||||
}
|
||||
let Predicates = [HasERI] in {
|
||||
|
||||
defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, WriteFRsqrt>, EVEX;
|
||||
defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, WriteFRcp>, EVEX;
|
||||
defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, WriteFAdd>, EVEX;
|
||||
let Predicates = [HasERI] in {
|
||||
defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, SchedWriteFRsqrt>, EVEX;
|
||||
defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, SchedWriteFRcp>, EVEX;
|
||||
defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, SchedWriteFAdd>, EVEX;
|
||||
}
|
||||
defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, WriteFAdd>,
|
||||
defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, SchedWriteFAdd>,
|
||||
avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd,
|
||||
WriteFAdd>, EVEX;
|
||||
SchedWriteFAdd>, EVEX;
|
||||
|
||||
multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
|
||||
X86FoldableSchedWrite sched, X86VectorVTInfo _>{
|
||||
@ -8234,12 +8245,15 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
|
||||
}
|
||||
}
|
||||
|
||||
defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", WriteFAdd,
|
||||
f32x_info>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VT1>;
|
||||
defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless",
|
||||
SchedWriteFAdd.Scl, f32x_info>,
|
||||
AVX512AIi8Base, EVEX_4V,
|
||||
EVEX_CD8<32, CD8VT1>;
|
||||
|
||||
defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", WriteFAdd,
|
||||
f64x_info>, VEX_W, AVX512AIi8Base, EVEX_4V,
|
||||
EVEX_CD8<64, CD8VT1>;
|
||||
defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd",
|
||||
SchedWriteFAdd.Scl, f64x_info>,
|
||||
VEX_W, AVX512AIi8Base, EVEX_4V,
|
||||
EVEX_CD8<64, CD8VT1>;
|
||||
|
||||
//-------------------------------------------------
|
||||
// Integer truncate and extend operations
|
||||
@ -9128,17 +9142,17 @@ multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
|
||||
|
||||
multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
|
||||
AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
|
||||
SDNode OpNodeRnd, X86FoldableSchedWrite sched, Predicate prd>{
|
||||
SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{
|
||||
let Predicates = [prd] in {
|
||||
defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched,
|
||||
defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM,
|
||||
_.info512>,
|
||||
avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd,
|
||||
sched, _.info512>, EVEX_V512;
|
||||
sched.ZMM, _.info512>, EVEX_V512;
|
||||
}
|
||||
let Predicates = [prd, HasVLX] in {
|
||||
defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched,
|
||||
defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM,
|
||||
_.info128>, EVEX_V128;
|
||||
defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched,
|
||||
defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM,
|
||||
_.info256>, EVEX_V256;
|
||||
}
|
||||
}
|
||||
@ -9273,17 +9287,17 @@ multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode
|
||||
|
||||
multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
|
||||
AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
|
||||
SDNode OpNodeRnd, X86FoldableSchedWrite sched, Predicate prd>{
|
||||
SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{
|
||||
let Predicates = [prd] in {
|
||||
defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched, _.info512>,
|
||||
avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd, sched, _.info512>,
|
||||
defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
|
||||
avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd, sched.ZMM, _.info512>,
|
||||
EVEX_V512;
|
||||
|
||||
}
|
||||
let Predicates = [prd, HasVLX] in {
|
||||
defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched, _.info128>,
|
||||
defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
|
||||
EVEX_V128;
|
||||
defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched, _.info256>,
|
||||
defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
|
||||
EVEX_V256;
|
||||
}
|
||||
}
|
||||
@ -9320,16 +9334,16 @@ multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
|
||||
|
||||
multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
|
||||
X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
|
||||
SDNode OpNodeRnd, X86FoldableSchedWrite sched, Predicate prd>{
|
||||
SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd> {
|
||||
let Predicates = [prd] in {
|
||||
defm Z128 : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched, _>,
|
||||
avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeRnd, sched, _>;
|
||||
defm Z128 : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
|
||||
avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeRnd, sched.XMM, _>;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
|
||||
bits<8> opcPs, bits<8> opcPd, SDNode OpNode,
|
||||
SDNode OpNodeRnd, X86FoldableSchedWrite sched, Predicate prd>{
|
||||
SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{
|
||||
defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
|
||||
opcPs, OpNode, OpNodeRnd, sched, prd>,
|
||||
EVEX_CD8<32, CD8VF>;
|
||||
@ -9339,43 +9353,43 @@ multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
|
||||
}
|
||||
|
||||
defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
|
||||
X86VReduce, X86VReduceRnd, WriteFAdd, HasDQI>,
|
||||
X86VReduce, X86VReduceRnd, SchedWriteFAdd, HasDQI>,
|
||||
AVX512AIi8Base, EVEX;
|
||||
defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
|
||||
X86VRndScale, X86VRndScaleRnd, WriteFAdd, HasAVX512>,
|
||||
X86VRndScale, X86VRndScaleRnd, SchedWriteFAdd, HasAVX512>,
|
||||
AVX512AIi8Base, EVEX;
|
||||
defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
|
||||
X86VGetMant, X86VGetMantRnd, WriteFAdd, HasAVX512>,
|
||||
X86VGetMant, X86VGetMantRnd, SchedWriteFAdd, HasAVX512>,
|
||||
AVX512AIi8Base, EVEX;
|
||||
|
||||
defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
|
||||
0x50, X86VRange, X86VRangeRnd,
|
||||
WriteFAdd, HasDQI>,
|
||||
SchedWriteFAdd, HasDQI>,
|
||||
AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
|
||||
defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
|
||||
0x50, X86VRange, X86VRangeRnd,
|
||||
WriteFAdd, HasDQI>,
|
||||
SchedWriteFAdd, HasDQI>,
|
||||
AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
|
||||
|
||||
defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
|
||||
f64x_info, 0x51, X86Ranges, X86RangesRnd, WriteFAdd, HasDQI>,
|
||||
f64x_info, 0x51, X86Ranges, X86RangesRnd, SchedWriteFAdd, HasDQI>,
|
||||
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
|
||||
defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
|
||||
0x51, X86Ranges, X86RangesRnd, WriteFAdd, HasDQI>,
|
||||
0x51, X86Ranges, X86RangesRnd, SchedWriteFAdd, HasDQI>,
|
||||
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
|
||||
|
||||
defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
|
||||
0x57, X86Reduces, X86ReducesRnd, WriteFAdd, HasDQI>,
|
||||
0x57, X86Reduces, X86ReducesRnd, SchedWriteFAdd, HasDQI>,
|
||||
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
|
||||
defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
|
||||
0x57, X86Reduces, X86ReducesRnd, WriteFAdd, HasDQI>,
|
||||
0x57, X86Reduces, X86ReducesRnd, SchedWriteFAdd, HasDQI>,
|
||||
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
|
||||
|
||||
defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
|
||||
0x27, X86GetMants, X86GetMantsRnd, WriteFAdd, HasAVX512>,
|
||||
0x27, X86GetMants, X86GetMantsRnd, SchedWriteFAdd, HasAVX512>,
|
||||
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
|
||||
defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
|
||||
0x27, X86GetMants, X86GetMantsRnd, WriteFAdd, HasAVX512>,
|
||||
0x27, X86GetMants, X86GetMantsRnd, SchedWriteFAdd, HasAVX512>,
|
||||
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
|
||||
|
||||
let Predicates = [HasAVX512] in {
|
||||
@ -10520,33 +10534,32 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
}
|
||||
}
|
||||
|
||||
multiclass avx512_fixupimm_packed_all<X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _Vec> {
|
||||
multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
|
||||
AVX512VLVectorVTInfo _Vec> {
|
||||
let Predicates = [HasAVX512] in
|
||||
defm Z : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched,
|
||||
defm Z : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.ZMM,
|
||||
_Vec.info512>,
|
||||
avx512_fixupimm_packed_sae<0x54, "vfixupimm", X86VFixupimm, sched,
|
||||
avx512_fixupimm_packed_sae<0x54, "vfixupimm", X86VFixupimm, sched.ZMM,
|
||||
_Vec.info512>, AVX512AIi8Base, EVEX_4V, EVEX_V512;
|
||||
let Predicates = [HasAVX512, HasVLX] in {
|
||||
defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched,
|
||||
defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.XMM,
|
||||
_Vec.info128>, AVX512AIi8Base, EVEX_4V, EVEX_V128;
|
||||
defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched,
|
||||
defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.YMM,
|
||||
_Vec.info256>, AVX512AIi8Base, EVEX_4V, EVEX_V256;
|
||||
}
|
||||
}
|
||||
|
||||
defm VFIXUPIMMSS : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
|
||||
WriteFAdd, f32x_info, v4i32x_info>,
|
||||
SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
|
||||
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
|
||||
defm VFIXUPIMMSD : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
|
||||
WriteFAdd, f64x_info, v2i64x_info>,
|
||||
SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
|
||||
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
|
||||
defm VFIXUPIMMPS : avx512_fixupimm_packed_all<WriteFAdd, avx512vl_f32_info>,
|
||||
defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info>,
|
||||
EVEX_CD8<32, CD8VF>;
|
||||
defm VFIXUPIMMPD : avx512_fixupimm_packed_all<WriteFAdd, avx512vl_f64_info>,
|
||||
defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info>,
|
||||
EVEX_CD8<64, CD8VF>, VEX_W;
|
||||
|
||||
|
||||
|
||||
// Patterns used to select SSE scalar fp arithmetic instructions from
|
||||
// either:
|
||||
//
|
||||
|
@ -2508,99 +2508,99 @@ def : Pat<(X86fandn VR128:$src1, (memopv4f32 addr:$src2)),
|
||||
/// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those
|
||||
/// classes below
|
||||
multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr,
|
||||
SDNode OpNode, X86FoldableSchedWrite sched> {
|
||||
SDNode OpNode, X86SchedWriteWidths sched> {
|
||||
let Predicates = [HasAVX, NoVLX] in {
|
||||
defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
|
||||
VR128, v4f32, f128mem, loadv4f32,
|
||||
SSEPackedSingle, sched, 0>, PS, VEX_4V, VEX_WIG;
|
||||
SSEPackedSingle, sched.XMM, 0>, PS, VEX_4V, VEX_WIG;
|
||||
defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
|
||||
VR128, v2f64, f128mem, loadv2f64,
|
||||
SSEPackedDouble, sched, 0>, PD, VEX_4V, VEX_WIG;
|
||||
SSEPackedDouble, sched.XMM, 0>, PD, VEX_4V, VEX_WIG;
|
||||
|
||||
defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"),
|
||||
OpNode, VR256, v8f32, f256mem, loadv8f32,
|
||||
SSEPackedSingle, sched, 0>, PS, VEX_4V, VEX_L, VEX_WIG;
|
||||
SSEPackedSingle, sched.YMM, 0>, PS, VEX_4V, VEX_L, VEX_WIG;
|
||||
defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"),
|
||||
OpNode, VR256, v4f64, f256mem, loadv4f64,
|
||||
SSEPackedDouble, sched, 0>, PD, VEX_4V, VEX_L, VEX_WIG;
|
||||
SSEPackedDouble, sched.YMM, 0>, PD, VEX_4V, VEX_L, VEX_WIG;
|
||||
}
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128,
|
||||
v4f32, f128mem, memopv4f32, SSEPackedSingle,
|
||||
sched>, PS;
|
||||
sched.XMM>, PS;
|
||||
defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128,
|
||||
v2f64, f128mem, memopv2f64, SSEPackedDouble,
|
||||
sched>, PD;
|
||||
sched.XMM>, PD;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
X86FoldableSchedWrite sched> {
|
||||
X86SchedWriteWidths sched> {
|
||||
defm V#NAME#SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
|
||||
OpNode, FR32, f32mem, SSEPackedSingle, sched, 0>,
|
||||
OpNode, FR32, f32mem, SSEPackedSingle, sched.Scl, 0>,
|
||||
XS, VEX_4V, VEX_LIG, VEX_WIG;
|
||||
defm V#NAME#SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
|
||||
OpNode, FR64, f64mem, SSEPackedDouble, sched, 0>,
|
||||
OpNode, FR64, f64mem, SSEPackedDouble, sched.Scl, 0>,
|
||||
XD, VEX_4V, VEX_LIG, VEX_WIG;
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
|
||||
OpNode, FR32, f32mem, SSEPackedSingle,
|
||||
sched>, XS;
|
||||
sched.Scl>, XS;
|
||||
defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
|
||||
OpNode, FR64, f64mem, SSEPackedDouble,
|
||||
sched>, XD;
|
||||
sched.Scl>, XD;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
|
||||
SDPatternOperator OpNode,
|
||||
X86FoldableSchedWrite sched> {
|
||||
X86SchedWriteWidths sched> {
|
||||
defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v4f32,
|
||||
!strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32,
|
||||
SSEPackedSingle, sched, 0>, XS, VEX_4V, VEX_LIG, VEX_WIG;
|
||||
SSEPackedSingle, sched.Scl, 0>, XS, VEX_4V, VEX_LIG, VEX_WIG;
|
||||
defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v2f64,
|
||||
!strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64,
|
||||
SSEPackedDouble, sched, 0>, XD, VEX_4V, VEX_LIG, VEX_WIG;
|
||||
SSEPackedDouble, sched.Scl, 0>, XD, VEX_4V, VEX_LIG, VEX_WIG;
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
defm SS : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v4f32,
|
||||
!strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32,
|
||||
SSEPackedSingle, sched>, XS;
|
||||
SSEPackedSingle, sched.Scl>, XS;
|
||||
defm SD : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v2f64,
|
||||
!strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64,
|
||||
SSEPackedDouble, sched>, XD;
|
||||
SSEPackedDouble, sched.Scl>, XD;
|
||||
}
|
||||
}
|
||||
|
||||
// Binary Arithmetic instructions
|
||||
defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, WriteFAdd>,
|
||||
basic_sse12_fp_binop_s<0x58, "add", fadd, WriteFAdd>,
|
||||
basic_sse12_fp_binop_s_int<0x58, "add", null_frag, WriteFAdd>;
|
||||
defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, WriteFMul>,
|
||||
basic_sse12_fp_binop_s<0x59, "mul", fmul, WriteFMul>,
|
||||
basic_sse12_fp_binop_s_int<0x59, "mul", null_frag, WriteFMul>;
|
||||
defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SchedWriteFAdd>,
|
||||
basic_sse12_fp_binop_s<0x58, "add", fadd, SchedWriteFAdd>,
|
||||
basic_sse12_fp_binop_s_int<0x58, "add", null_frag, SchedWriteFAdd>;
|
||||
defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SchedWriteFMul>,
|
||||
basic_sse12_fp_binop_s<0x59, "mul", fmul, SchedWriteFMul>,
|
||||
basic_sse12_fp_binop_s_int<0x59, "mul", null_frag, SchedWriteFMul>;
|
||||
let isCommutable = 0 in {
|
||||
defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, WriteFAdd>,
|
||||
basic_sse12_fp_binop_s<0x5C, "sub", fsub, WriteFAdd>,
|
||||
basic_sse12_fp_binop_s_int<0x5C, "sub", null_frag, WriteFAdd>;
|
||||
defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, WriteFDiv>,
|
||||
basic_sse12_fp_binop_s<0x5E, "div", fdiv, WriteFDiv>,
|
||||
basic_sse12_fp_binop_s_int<0x5E, "div", null_frag, WriteFDiv>;
|
||||
defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, WriteFCmp>,
|
||||
basic_sse12_fp_binop_s<0x5F, "max", X86fmax, WriteFCmp>,
|
||||
basic_sse12_fp_binop_s_int<0x5F, "max", X86fmaxs, WriteFCmp>;
|
||||
defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, WriteFCmp>,
|
||||
basic_sse12_fp_binop_s<0x5D, "min", X86fmin, WriteFCmp>,
|
||||
basic_sse12_fp_binop_s_int<0x5D, "min", X86fmins, WriteFCmp>;
|
||||
defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SchedWriteFAdd>,
|
||||
basic_sse12_fp_binop_s<0x5C, "sub", fsub, SchedWriteFAdd>,
|
||||
basic_sse12_fp_binop_s_int<0x5C, "sub", null_frag, SchedWriteFAdd>;
|
||||
defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SchedWriteFDiv>,
|
||||
basic_sse12_fp_binop_s<0x5E, "div", fdiv, SchedWriteFDiv>,
|
||||
basic_sse12_fp_binop_s_int<0x5E, "div", null_frag, SchedWriteFDiv>;
|
||||
defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SchedWriteFCmp>,
|
||||
basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SchedWriteFCmp>,
|
||||
basic_sse12_fp_binop_s_int<0x5F, "max", X86fmaxs, SchedWriteFCmp>;
|
||||
defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SchedWriteFCmp>,
|
||||
basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SchedWriteFCmp>,
|
||||
basic_sse12_fp_binop_s_int<0x5D, "min", X86fmins, SchedWriteFCmp>;
|
||||
}
|
||||
|
||||
let isCodeGenOnly = 1 in {
|
||||
defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, WriteFCmp>,
|
||||
basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, WriteFCmp>;
|
||||
defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, WriteFCmp>,
|
||||
basic_sse12_fp_binop_s<0x5D, "min", X86fminc, WriteFCmp>;
|
||||
defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SchedWriteFCmp>,
|
||||
basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SchedWriteFCmp>;
|
||||
defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SchedWriteFCmp>,
|
||||
basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SchedWriteFCmp>;
|
||||
}
|
||||
|
||||
// Patterns used to select SSE scalar fp arithmetic instructions from
|
||||
@ -4417,28 +4417,28 @@ multiclass sse3_addsub<string OpcodeStr, ValueType vt, RegisterClass RC,
|
||||
let Predicates = [HasAVX] in {
|
||||
let ExeDomain = SSEPackedSingle in {
|
||||
defm VADDSUBPS : sse3_addsub<"vaddsubps", v4f32, VR128, f128mem,
|
||||
WriteFAdd, loadv4f32, 0>, XD, VEX_4V,
|
||||
VEX_WIG;
|
||||
SchedWriteFAdd.XMM, loadv4f32, 0>,
|
||||
XD, VEX_4V, VEX_WIG;
|
||||
defm VADDSUBPSY : sse3_addsub<"vaddsubps", v8f32, VR256, f256mem,
|
||||
WriteFAdd, loadv8f32, 0>, XD, VEX_4V,
|
||||
VEX_L, VEX_WIG;
|
||||
SchedWriteFAdd.YMM, loadv8f32, 0>,
|
||||
XD, VEX_4V, VEX_L, VEX_WIG;
|
||||
}
|
||||
let ExeDomain = SSEPackedDouble in {
|
||||
defm VADDSUBPD : sse3_addsub<"vaddsubpd", v2f64, VR128, f128mem,
|
||||
WriteFAdd, loadv2f64, 0>, PD, VEX_4V,
|
||||
VEX_WIG;
|
||||
SchedWriteFAdd.XMM, loadv2f64, 0>,
|
||||
PD, VEX_4V, VEX_WIG;
|
||||
defm VADDSUBPDY : sse3_addsub<"vaddsubpd", v4f64, VR256, f256mem,
|
||||
WriteFAdd, loadv4f64, 0>, PD, VEX_4V,
|
||||
VEX_L, VEX_WIG;
|
||||
SchedWriteFAdd.YMM, loadv4f64, 0>,
|
||||
PD, VEX_4V, VEX_L, VEX_WIG;
|
||||
}
|
||||
}
|
||||
let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in {
|
||||
let ExeDomain = SSEPackedSingle in
|
||||
defm ADDSUBPS : sse3_addsub<"addsubps", v4f32, VR128, f128mem, WriteFAdd,
|
||||
memopv4f32>, XD;
|
||||
defm ADDSUBPS : sse3_addsub<"addsubps", v4f32, VR128, f128mem,
|
||||
SchedWriteFAdd.XMM, memopv4f32>, XD;
|
||||
let ExeDomain = SSEPackedDouble in
|
||||
defm ADDSUBPD : sse3_addsub<"addsubpd", v2f64, VR128, f128mem, WriteFAdd,
|
||||
memopv2f64>, PD;
|
||||
defm ADDSUBPD : sse3_addsub<"addsubpd", v2f64, VR128, f128mem,
|
||||
SchedWriteFAdd.XMM, memopv2f64>, PD;
|
||||
}
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
@ -5500,26 +5500,27 @@ let Predicates = [HasAVX, NoVLX] in {
|
||||
let ExeDomain = SSEPackedSingle in {
|
||||
// Intrinsic form
|
||||
defm VROUNDPS : sse41_fp_unop_p<0x08, "vroundps", f128mem, VR128, v4f32,
|
||||
loadv4f32, X86VRndScale, WriteFAdd>,
|
||||
loadv4f32, X86VRndScale, SchedWriteFAdd.XMM>,
|
||||
VEX, VEX_WIG;
|
||||
defm VROUNDPSY : sse41_fp_unop_p<0x08, "vroundps", f256mem, VR256, v8f32,
|
||||
loadv8f32, X86VRndScale, WriteFAdd>,
|
||||
loadv8f32, X86VRndScale, SchedWriteFAdd.YMM>,
|
||||
VEX, VEX_L, VEX_WIG;
|
||||
}
|
||||
|
||||
let ExeDomain = SSEPackedDouble in {
|
||||
defm VROUNDPD : sse41_fp_unop_p<0x09, "vroundpd", f128mem, VR128, v2f64,
|
||||
loadv2f64, X86VRndScale, WriteFAdd>,
|
||||
loadv2f64, X86VRndScale, SchedWriteFAdd.XMM>,
|
||||
VEX, VEX_WIG;
|
||||
defm VROUNDPDY : sse41_fp_unop_p<0x09, "vroundpd", f256mem, VR256, v4f64,
|
||||
loadv4f64, X86VRndScale, WriteFAdd>,
|
||||
loadv4f64, X86VRndScale, SchedWriteFAdd.YMM>,
|
||||
VEX, VEX_L, VEX_WIG;
|
||||
}
|
||||
}
|
||||
let Predicates = [HasAVX, NoAVX512] in {
|
||||
defm VROUND : sse41_fp_binop_s<0x0A, 0x0B, "vround", WriteFAdd, v4f32, v2f64,
|
||||
X86RndScales, 0>, VEX_4V, VEX_LIG, VEX_WIG;
|
||||
defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround", WriteFAdd>,
|
||||
defm VROUND : sse41_fp_binop_s<0x0A, 0x0B, "vround", SchedWriteFAdd.Scl,
|
||||
v4f32, v2f64, X86RndScales, 0>,
|
||||
VEX_4V, VEX_LIG, VEX_WIG;
|
||||
defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround", SchedWriteFAdd.Scl>,
|
||||
VEX_4V, VEX_LIG, VEX_WIG;
|
||||
}
|
||||
|
||||
@ -5594,15 +5595,15 @@ let Predicates = [HasAVX, NoVLX] in {
|
||||
|
||||
let ExeDomain = SSEPackedSingle in
|
||||
defm ROUNDPS : sse41_fp_unop_p<0x08, "roundps", f128mem, VR128, v4f32,
|
||||
memopv4f32, X86VRndScale, WriteFAdd>;
|
||||
memopv4f32, X86VRndScale, SchedWriteFAdd.XMM>;
|
||||
let ExeDomain = SSEPackedDouble in
|
||||
defm ROUNDPD : sse41_fp_unop_p<0x09, "roundpd", f128mem, VR128, v2f64,
|
||||
memopv2f64, X86VRndScale, WriteFAdd>;
|
||||
memopv2f64, X86VRndScale, SchedWriteFAdd.XMM>;
|
||||
|
||||
defm ROUND : sse41_fp_unop_s<0x0A, 0x0B, "round", WriteFAdd>;
|
||||
defm ROUND : sse41_fp_unop_s<0x0A, 0x0B, "round", SchedWriteFAdd.Scl>;
|
||||
|
||||
let Constraints = "$src1 = $dst" in
|
||||
defm ROUND : sse41_fp_binop_s<0x0A, 0x0B, "round", WriteFAdd,
|
||||
defm ROUND : sse41_fp_binop_s<0x0A, 0x0B, "round", SchedWriteFAdd.Scl,
|
||||
v4f32, v2f64, X86RndScales>;
|
||||
|
||||
let Predicates = [UseSSE41] in {
|
||||
@ -5996,15 +5997,15 @@ let Predicates = [HasAVX] in {
|
||||
let ExeDomain = SSEPackedSingle in
|
||||
defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
|
||||
VR128, loadv4f32, f128mem, 0,
|
||||
WriteFAdd>, VEX_4V, VEX_WIG;
|
||||
SchedWriteFAdd.XMM>, VEX_4V, VEX_WIG;
|
||||
let ExeDomain = SSEPackedDouble in
|
||||
defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
|
||||
VR128, loadv2f64, f128mem, 0,
|
||||
WriteFAdd>, VEX_4V, VEX_WIG;
|
||||
SchedWriteFAdd.XMM>, VEX_4V, VEX_WIG;
|
||||
let ExeDomain = SSEPackedSingle in
|
||||
defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256,
|
||||
VR256, loadv8f32, i256mem, 0,
|
||||
WriteFAdd>, VEX_4V, VEX_L, VEX_WIG;
|
||||
SchedWriteFAdd.YMM>, VEX_4V, VEX_L, VEX_WIG;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX2] in {
|
||||
@ -6024,11 +6025,11 @@ let Constraints = "$src1 = $dst" in {
|
||||
let ExeDomain = SSEPackedSingle in
|
||||
defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
|
||||
VR128, memopv4f32, f128mem, 1,
|
||||
WriteFAdd>;
|
||||
SchedWriteFAdd.XMM>;
|
||||
let ExeDomain = SSEPackedDouble in
|
||||
defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd,
|
||||
VR128, memopv2f64, f128mem, 1,
|
||||
WriteFAdd>;
|
||||
SchedWriteFAdd.XMM>;
|
||||
}
|
||||
|
||||
/// SS41I_blend_rmi - SSE 4.1 blend with 8-bit immediate
|
||||
|
@ -208,6 +208,10 @@ def SchedWriteFMul
|
||||
: X86SchedWriteWidths<WriteFMul, WriteFMul, WriteFMul, WriteFMul>;
|
||||
def SchedWriteFDiv
|
||||
: X86SchedWriteWidths<WriteFDiv, WriteFDiv, WriteFDiv, WriteFDiv>;
|
||||
def SchedWriteFRcp
|
||||
: X86SchedWriteWidths<WriteFRcp, WriteFRcp, WriteFRcp, WriteFRcp>;
|
||||
def SchedWriteFRsqrt
|
||||
: X86SchedWriteWidths<WriteFRsqrt, WriteFRsqrt, WriteFRsqrt, WriteFRsqrt>;
|
||||
def SchedWriteFLogic
|
||||
: X86SchedWriteWidths<WriteFLogic, WriteFLogic, WriteFLogicY, WriteFLogicY>;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user