1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00

[X86] Convert all uses of WriteFAdd to X86SchedWriteWidths.

In preparation of splitting WriteFAdd by vector width.

llvm-svn: 331273
This commit is contained in:
Simon Pilgrim 2018-05-01 15:57:17 +00:00
parent 076a6683eb
commit 3ad9ad0ee2
3 changed files with 180 additions and 162 deletions

View File

@ -2642,39 +2642,42 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
bits<8> opc, SDNode OpNode,
X86FoldableSchedWrite sched, Predicate prd,
X86SchedWriteWidths sched, Predicate prd,
string broadcast>{
let Predicates = [prd] in {
defm Z : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched,
defm Z : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.ZMM,
_.info512, "{z}", broadcast>, EVEX_V512;
}
let Predicates = [prd, HasVLX] in {
defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched,
defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.XMM,
_.info128, "{x}", broadcast>, EVEX_V128;
defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched,
defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.YMM,
_.info256, "{y}", broadcast>, EVEX_V256;
}
}
// FIXME: Is there a better scheduler class for VFPCLASS?
multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
bits<8> opcScalar, SDNode VecOpNode, SDNode ScalarOpNode, Predicate prd>{
bits<8> opcScalar, SDNode VecOpNode,
SDNode ScalarOpNode, X86SchedWriteWidths sched,
Predicate prd> {
defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec,
VecOpNode, WriteFAdd, prd, "{l}">,
VecOpNode, sched, prd, "{l}">,
EVEX_CD8<32, CD8VF>;
defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec,
VecOpNode, WriteFAdd, prd, "{q}">,
VecOpNode, sched, prd, "{q}">,
EVEX_CD8<64, CD8VF> , VEX_W;
defm SS : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
WriteFAdd, f32x_info, prd>,
sched.Scl, f32x_info, prd>,
EVEX_CD8<32, CD8VT1>;
defm SD : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
WriteFAdd, f64x_info, prd>,
sched.Scl, f64x_info, prd>,
EVEX_CD8<64, CD8VT1>, VEX_W;
}
// FIXME: Is there a better scheduler class for VFPCLASS?
defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass,
X86Vfpclasss, HasDQI>, AVX512AIi8Base,EVEX;
X86Vfpclasss, SchedWriteFAdd, HasDQI>,
AVX512AIi8Base, EVEX;
//-----------------------------------------------------------------
// Mask register copy, including
@ -4811,6 +4814,7 @@ defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
//===----------------------------------------------------------------------===//
// AVX-512 FP arithmetic
//===----------------------------------------------------------------------===//
multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
SDNode OpNode, SDNode VecNode,
X86FoldableSchedWrite sched, bit IsCommutable> {
@ -4925,14 +4929,18 @@ multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
VecNode, SaeNode, sched, IsCommutable>,
XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
}
defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnds, WriteFAdd, 1>;
defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds, WriteFMul, 1>;
defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds, WriteFAdd, 0>;
defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds, WriteFDiv, 0>;
defm VMIN : avx512_binop_s_sae <0x5D, "vmin", X86fmin, X86fmins, X86fminRnds,
WriteFCmp, 0>;
defm VMAX : avx512_binop_s_sae <0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds,
WriteFCmp, 0>;
defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnds,
SchedWriteFAdd.Scl, 1>;
defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds,
SchedWriteFMul.Scl, 1>;
defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds,
SchedWriteFAdd.Scl, 0>;
defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds,
SchedWriteFDiv.Scl, 0>;
defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminRnds,
SchedWriteFCmp.Scl, 0>;
defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds,
SchedWriteFCmp.Scl, 0>;
// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
// X86fminc and X86fmaxc instead of X86fmin and X86fmax
@ -5247,33 +5255,36 @@ multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
}
multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, SDNode OpNode, SDNode OpNodeScal> {
defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, WriteFAdd, v16f32_info>,
avx512_fp_round_packed<opc, OpcodeStr, OpNode, WriteFAdd, v16f32_info>,
multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
SDNode OpNode, SDNode OpNodeScal,
X86SchedWriteWidths sched> {
defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.ZMM, v16f32_info>,
avx512_fp_round_packed<opc, OpcodeStr, OpNode, sched.ZMM, v16f32_info>,
EVEX_V512, EVEX_CD8<32, CD8VF>;
defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, WriteFAdd, v8f64_info>,
avx512_fp_round_packed<opc, OpcodeStr, OpNode, WriteFAdd, v8f64_info>,
defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.ZMM, v8f64_info>,
avx512_fp_round_packed<opc, OpcodeStr, OpNode, sched.ZMM, v8f64_info>,
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
defm SSZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, WriteFAdd, f32x_info>,
avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNodeScal, WriteFAdd>,
defm SSZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, sched.Scl, f32x_info>,
avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNodeScal, sched.Scl>,
EVEX_4V,EVEX_CD8<32, CD8VT1>;
defm SDZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, WriteFAdd, f64x_info>,
avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNodeScal, WriteFAdd>,
defm SDZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, sched.Scl, f64x_info>,
avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNodeScal, sched.Scl>,
EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
// Define only if AVX512VL feature is present.
let Predicates = [HasVLX] in {
defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, WriteFAdd, v4f32x_info>,
defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.XMM, v4f32x_info>,
EVEX_V128, EVEX_CD8<32, CD8VF>;
defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, WriteFAdd, v8f32x_info>,
defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.YMM, v8f32x_info>,
EVEX_V256, EVEX_CD8<32, CD8VF>;
defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, WriteFAdd, v2f64x_info>,
defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.XMM, v2f64x_info>,
EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, WriteFAdd, v4f64x_info>,
defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.YMM, v4f64x_info>,
EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
}
}
defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs>, T8PD;
defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs,
SchedWriteFAdd>, T8PD;
//===----------------------------------------------------------------------===//
// AVX-512 VPTESTM instructions
@ -7947,14 +7958,14 @@ multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
let Predicates = [HasERI] in {
defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, WriteFRcp>,
T8PD, EVEX_4V;
defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, WriteFRsqrt>,
defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, SchedWriteFRcp.Scl>,
T8PD, EVEX_4V;
defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s,
SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V;
}
defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds, WriteFAdd>,
T8PD, EVEX_4V;
defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds,
SchedWriteFAdd.Scl>, T8PD, EVEX_4V;
/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
@ -7992,38 +8003,38 @@ multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
}
multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86FoldableSchedWrite sched> {
defm PS : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched>,
avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched>,
X86SchedWriteWidths sched> {
defm PS : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
defm PD : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched>,
avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched>,
defm PD : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
}
multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
SDNode OpNode, X86FoldableSchedWrite sched> {
SDNode OpNode, X86SchedWriteWidths sched> {
// Define only if AVX512VL feature is present.
let Predicates = [HasVLX] in {
defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, sched>,
defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, sched.XMM>,
EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, sched>,
defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, sched.YMM>,
EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, sched>,
defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, sched.XMM>,
EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, sched>,
defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, sched.YMM>,
EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
}
}
let Predicates = [HasERI] in {
defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, WriteFRsqrt>, EVEX;
defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, WriteFRcp>, EVEX;
defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, WriteFAdd>, EVEX;
let Predicates = [HasERI] in {
defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, SchedWriteFRsqrt>, EVEX;
defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, SchedWriteFRcp>, EVEX;
defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, SchedWriteFAdd>, EVEX;
}
defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, WriteFAdd>,
defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, SchedWriteFAdd>,
avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd,
WriteFAdd>, EVEX;
SchedWriteFAdd>, EVEX;
multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched, X86VectorVTInfo _>{
@ -8234,12 +8245,15 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
}
}
defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", WriteFAdd,
f32x_info>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VT1>;
defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless",
SchedWriteFAdd.Scl, f32x_info>,
AVX512AIi8Base, EVEX_4V,
EVEX_CD8<32, CD8VT1>;
defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", WriteFAdd,
f64x_info>, VEX_W, AVX512AIi8Base, EVEX_4V,
EVEX_CD8<64, CD8VT1>;
defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd",
SchedWriteFAdd.Scl, f64x_info>,
VEX_W, AVX512AIi8Base, EVEX_4V,
EVEX_CD8<64, CD8VT1>;
//-------------------------------------------------
// Integer truncate and extend operations
@ -9128,17 +9142,17 @@ multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
SDNode OpNodeRnd, X86FoldableSchedWrite sched, Predicate prd>{
SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{
let Predicates = [prd] in {
defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched,
defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM,
_.info512>,
avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd,
sched, _.info512>, EVEX_V512;
sched.ZMM, _.info512>, EVEX_V512;
}
let Predicates = [prd, HasVLX] in {
defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched,
defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM,
_.info128>, EVEX_V128;
defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched,
defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM,
_.info256>, EVEX_V256;
}
}
@ -9273,17 +9287,17 @@ multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode
multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
SDNode OpNodeRnd, X86FoldableSchedWrite sched, Predicate prd>{
SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{
let Predicates = [prd] in {
defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched, _.info512>,
avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd, sched, _.info512>,
defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd, sched.ZMM, _.info512>,
EVEX_V512;
}
let Predicates = [prd, HasVLX] in {
defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched, _.info128>,
defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
EVEX_V128;
defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched, _.info256>,
defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
EVEX_V256;
}
}
@ -9320,16 +9334,16 @@ multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
SDNode OpNodeRnd, X86FoldableSchedWrite sched, Predicate prd>{
SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd> {
let Predicates = [prd] in {
defm Z128 : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched, _>,
avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeRnd, sched, _>;
defm Z128 : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeRnd, sched.XMM, _>;
}
}
multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
bits<8> opcPs, bits<8> opcPd, SDNode OpNode,
SDNode OpNodeRnd, X86FoldableSchedWrite sched, Predicate prd>{
SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{
defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
opcPs, OpNode, OpNodeRnd, sched, prd>,
EVEX_CD8<32, CD8VF>;
@ -9339,43 +9353,43 @@ multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
}
defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
X86VReduce, X86VReduceRnd, WriteFAdd, HasDQI>,
X86VReduce, X86VReduceRnd, SchedWriteFAdd, HasDQI>,
AVX512AIi8Base, EVEX;
defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
X86VRndScale, X86VRndScaleRnd, WriteFAdd, HasAVX512>,
X86VRndScale, X86VRndScaleRnd, SchedWriteFAdd, HasAVX512>,
AVX512AIi8Base, EVEX;
defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
X86VGetMant, X86VGetMantRnd, WriteFAdd, HasAVX512>,
X86VGetMant, X86VGetMantRnd, SchedWriteFAdd, HasAVX512>,
AVX512AIi8Base, EVEX;
defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
0x50, X86VRange, X86VRangeRnd,
WriteFAdd, HasDQI>,
SchedWriteFAdd, HasDQI>,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
0x50, X86VRange, X86VRangeRnd,
WriteFAdd, HasDQI>,
SchedWriteFAdd, HasDQI>,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
f64x_info, 0x51, X86Ranges, X86RangesRnd, WriteFAdd, HasDQI>,
f64x_info, 0x51, X86Ranges, X86RangesRnd, SchedWriteFAdd, HasDQI>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
0x51, X86Ranges, X86RangesRnd, WriteFAdd, HasDQI>,
0x51, X86Ranges, X86RangesRnd, SchedWriteFAdd, HasDQI>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
0x57, X86Reduces, X86ReducesRnd, WriteFAdd, HasDQI>,
0x57, X86Reduces, X86ReducesRnd, SchedWriteFAdd, HasDQI>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
0x57, X86Reduces, X86ReducesRnd, WriteFAdd, HasDQI>,
0x57, X86Reduces, X86ReducesRnd, SchedWriteFAdd, HasDQI>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
0x27, X86GetMants, X86GetMantsRnd, WriteFAdd, HasAVX512>,
0x27, X86GetMants, X86GetMantsRnd, SchedWriteFAdd, HasAVX512>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
0x27, X86GetMants, X86GetMantsRnd, WriteFAdd, HasAVX512>,
0x27, X86GetMants, X86GetMantsRnd, SchedWriteFAdd, HasAVX512>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
let Predicates = [HasAVX512] in {
@ -10520,33 +10534,32 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
}
multiclass avx512_fixupimm_packed_all<X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _Vec> {
multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
AVX512VLVectorVTInfo _Vec> {
let Predicates = [HasAVX512] in
defm Z : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched,
defm Z : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.ZMM,
_Vec.info512>,
avx512_fixupimm_packed_sae<0x54, "vfixupimm", X86VFixupimm, sched,
avx512_fixupimm_packed_sae<0x54, "vfixupimm", X86VFixupimm, sched.ZMM,
_Vec.info512>, AVX512AIi8Base, EVEX_4V, EVEX_V512;
let Predicates = [HasAVX512, HasVLX] in {
defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched,
defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.XMM,
_Vec.info128>, AVX512AIi8Base, EVEX_4V, EVEX_V128;
defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched,
defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.YMM,
_Vec.info256>, AVX512AIi8Base, EVEX_4V, EVEX_V256;
}
}
defm VFIXUPIMMSS : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
WriteFAdd, f32x_info, v4i32x_info>,
SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
defm VFIXUPIMMSD : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
WriteFAdd, f64x_info, v2i64x_info>,
SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
defm VFIXUPIMMPS : avx512_fixupimm_packed_all<WriteFAdd, avx512vl_f32_info>,
defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info>,
EVEX_CD8<32, CD8VF>;
defm VFIXUPIMMPD : avx512_fixupimm_packed_all<WriteFAdd, avx512vl_f64_info>,
defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info>,
EVEX_CD8<64, CD8VF>, VEX_W;
// Patterns used to select SSE scalar fp arithmetic instructions from
// either:
//

View File

@ -2508,99 +2508,99 @@ def : Pat<(X86fandn VR128:$src1, (memopv4f32 addr:$src2)),
/// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those
/// classes below
multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr,
SDNode OpNode, X86FoldableSchedWrite sched> {
SDNode OpNode, X86SchedWriteWidths sched> {
let Predicates = [HasAVX, NoVLX] in {
defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
VR128, v4f32, f128mem, loadv4f32,
SSEPackedSingle, sched, 0>, PS, VEX_4V, VEX_WIG;
SSEPackedSingle, sched.XMM, 0>, PS, VEX_4V, VEX_WIG;
defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
VR128, v2f64, f128mem, loadv2f64,
SSEPackedDouble, sched, 0>, PD, VEX_4V, VEX_WIG;
SSEPackedDouble, sched.XMM, 0>, PD, VEX_4V, VEX_WIG;
defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"),
OpNode, VR256, v8f32, f256mem, loadv8f32,
SSEPackedSingle, sched, 0>, PS, VEX_4V, VEX_L, VEX_WIG;
SSEPackedSingle, sched.YMM, 0>, PS, VEX_4V, VEX_L, VEX_WIG;
defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"),
OpNode, VR256, v4f64, f256mem, loadv4f64,
SSEPackedDouble, sched, 0>, PD, VEX_4V, VEX_L, VEX_WIG;
SSEPackedDouble, sched.YMM, 0>, PD, VEX_4V, VEX_L, VEX_WIG;
}
let Constraints = "$src1 = $dst" in {
defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128,
v4f32, f128mem, memopv4f32, SSEPackedSingle,
sched>, PS;
sched.XMM>, PS;
defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128,
v2f64, f128mem, memopv2f64, SSEPackedDouble,
sched>, PD;
sched.XMM>, PD;
}
}
multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86FoldableSchedWrite sched> {
X86SchedWriteWidths sched> {
defm V#NAME#SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
OpNode, FR32, f32mem, SSEPackedSingle, sched, 0>,
OpNode, FR32, f32mem, SSEPackedSingle, sched.Scl, 0>,
XS, VEX_4V, VEX_LIG, VEX_WIG;
defm V#NAME#SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
OpNode, FR64, f64mem, SSEPackedDouble, sched, 0>,
OpNode, FR64, f64mem, SSEPackedDouble, sched.Scl, 0>,
XD, VEX_4V, VEX_LIG, VEX_WIG;
let Constraints = "$src1 = $dst" in {
defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
OpNode, FR32, f32mem, SSEPackedSingle,
sched>, XS;
sched.Scl>, XS;
defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
OpNode, FR64, f64mem, SSEPackedDouble,
sched>, XD;
sched.Scl>, XD;
}
}
multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
SDPatternOperator OpNode,
X86FoldableSchedWrite sched> {
X86SchedWriteWidths sched> {
defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v4f32,
!strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32,
SSEPackedSingle, sched, 0>, XS, VEX_4V, VEX_LIG, VEX_WIG;
SSEPackedSingle, sched.Scl, 0>, XS, VEX_4V, VEX_LIG, VEX_WIG;
defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v2f64,
!strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64,
SSEPackedDouble, sched, 0>, XD, VEX_4V, VEX_LIG, VEX_WIG;
SSEPackedDouble, sched.Scl, 0>, XD, VEX_4V, VEX_LIG, VEX_WIG;
let Constraints = "$src1 = $dst" in {
defm SS : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v4f32,
!strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32,
SSEPackedSingle, sched>, XS;
SSEPackedSingle, sched.Scl>, XS;
defm SD : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v2f64,
!strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64,
SSEPackedDouble, sched>, XD;
SSEPackedDouble, sched.Scl>, XD;
}
}
// Binary Arithmetic instructions
defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, WriteFAdd>,
basic_sse12_fp_binop_s<0x58, "add", fadd, WriteFAdd>,
basic_sse12_fp_binop_s_int<0x58, "add", null_frag, WriteFAdd>;
defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, WriteFMul>,
basic_sse12_fp_binop_s<0x59, "mul", fmul, WriteFMul>,
basic_sse12_fp_binop_s_int<0x59, "mul", null_frag, WriteFMul>;
defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SchedWriteFAdd>,
basic_sse12_fp_binop_s<0x58, "add", fadd, SchedWriteFAdd>,
basic_sse12_fp_binop_s_int<0x58, "add", null_frag, SchedWriteFAdd>;
defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SchedWriteFMul>,
basic_sse12_fp_binop_s<0x59, "mul", fmul, SchedWriteFMul>,
basic_sse12_fp_binop_s_int<0x59, "mul", null_frag, SchedWriteFMul>;
let isCommutable = 0 in {
defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, WriteFAdd>,
basic_sse12_fp_binop_s<0x5C, "sub", fsub, WriteFAdd>,
basic_sse12_fp_binop_s_int<0x5C, "sub", null_frag, WriteFAdd>;
defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, WriteFDiv>,
basic_sse12_fp_binop_s<0x5E, "div", fdiv, WriteFDiv>,
basic_sse12_fp_binop_s_int<0x5E, "div", null_frag, WriteFDiv>;
defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, WriteFCmp>,
basic_sse12_fp_binop_s<0x5F, "max", X86fmax, WriteFCmp>,
basic_sse12_fp_binop_s_int<0x5F, "max", X86fmaxs, WriteFCmp>;
defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, WriteFCmp>,
basic_sse12_fp_binop_s<0x5D, "min", X86fmin, WriteFCmp>,
basic_sse12_fp_binop_s_int<0x5D, "min", X86fmins, WriteFCmp>;
defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SchedWriteFAdd>,
basic_sse12_fp_binop_s<0x5C, "sub", fsub, SchedWriteFAdd>,
basic_sse12_fp_binop_s_int<0x5C, "sub", null_frag, SchedWriteFAdd>;
defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SchedWriteFDiv>,
basic_sse12_fp_binop_s<0x5E, "div", fdiv, SchedWriteFDiv>,
basic_sse12_fp_binop_s_int<0x5E, "div", null_frag, SchedWriteFDiv>;
defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SchedWriteFCmp>,
basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SchedWriteFCmp>,
basic_sse12_fp_binop_s_int<0x5F, "max", X86fmaxs, SchedWriteFCmp>;
defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SchedWriteFCmp>,
basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SchedWriteFCmp>,
basic_sse12_fp_binop_s_int<0x5D, "min", X86fmins, SchedWriteFCmp>;
}
let isCodeGenOnly = 1 in {
defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, WriteFCmp>,
basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, WriteFCmp>;
defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, WriteFCmp>,
basic_sse12_fp_binop_s<0x5D, "min", X86fminc, WriteFCmp>;
defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SchedWriteFCmp>,
basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SchedWriteFCmp>;
defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SchedWriteFCmp>,
basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SchedWriteFCmp>;
}
// Patterns used to select SSE scalar fp arithmetic instructions from
@ -4417,28 +4417,28 @@ multiclass sse3_addsub<string OpcodeStr, ValueType vt, RegisterClass RC,
let Predicates = [HasAVX] in {
let ExeDomain = SSEPackedSingle in {
defm VADDSUBPS : sse3_addsub<"vaddsubps", v4f32, VR128, f128mem,
WriteFAdd, loadv4f32, 0>, XD, VEX_4V,
VEX_WIG;
SchedWriteFAdd.XMM, loadv4f32, 0>,
XD, VEX_4V, VEX_WIG;
defm VADDSUBPSY : sse3_addsub<"vaddsubps", v8f32, VR256, f256mem,
WriteFAdd, loadv8f32, 0>, XD, VEX_4V,
VEX_L, VEX_WIG;
SchedWriteFAdd.YMM, loadv8f32, 0>,
XD, VEX_4V, VEX_L, VEX_WIG;
}
let ExeDomain = SSEPackedDouble in {
defm VADDSUBPD : sse3_addsub<"vaddsubpd", v2f64, VR128, f128mem,
WriteFAdd, loadv2f64, 0>, PD, VEX_4V,
VEX_WIG;
SchedWriteFAdd.XMM, loadv2f64, 0>,
PD, VEX_4V, VEX_WIG;
defm VADDSUBPDY : sse3_addsub<"vaddsubpd", v4f64, VR256, f256mem,
WriteFAdd, loadv4f64, 0>, PD, VEX_4V,
VEX_L, VEX_WIG;
SchedWriteFAdd.YMM, loadv4f64, 0>,
PD, VEX_4V, VEX_L, VEX_WIG;
}
}
let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in {
let ExeDomain = SSEPackedSingle in
defm ADDSUBPS : sse3_addsub<"addsubps", v4f32, VR128, f128mem, WriteFAdd,
memopv4f32>, XD;
defm ADDSUBPS : sse3_addsub<"addsubps", v4f32, VR128, f128mem,
SchedWriteFAdd.XMM, memopv4f32>, XD;
let ExeDomain = SSEPackedDouble in
defm ADDSUBPD : sse3_addsub<"addsubpd", v2f64, VR128, f128mem, WriteFAdd,
memopv2f64>, PD;
defm ADDSUBPD : sse3_addsub<"addsubpd", v2f64, VR128, f128mem,
SchedWriteFAdd.XMM, memopv2f64>, PD;
}
//===---------------------------------------------------------------------===//
@ -5500,26 +5500,27 @@ let Predicates = [HasAVX, NoVLX] in {
let ExeDomain = SSEPackedSingle in {
// Intrinsic form
defm VROUNDPS : sse41_fp_unop_p<0x08, "vroundps", f128mem, VR128, v4f32,
loadv4f32, X86VRndScale, WriteFAdd>,
loadv4f32, X86VRndScale, SchedWriteFAdd.XMM>,
VEX, VEX_WIG;
defm VROUNDPSY : sse41_fp_unop_p<0x08, "vroundps", f256mem, VR256, v8f32,
loadv8f32, X86VRndScale, WriteFAdd>,
loadv8f32, X86VRndScale, SchedWriteFAdd.YMM>,
VEX, VEX_L, VEX_WIG;
}
let ExeDomain = SSEPackedDouble in {
defm VROUNDPD : sse41_fp_unop_p<0x09, "vroundpd", f128mem, VR128, v2f64,
loadv2f64, X86VRndScale, WriteFAdd>,
loadv2f64, X86VRndScale, SchedWriteFAdd.XMM>,
VEX, VEX_WIG;
defm VROUNDPDY : sse41_fp_unop_p<0x09, "vroundpd", f256mem, VR256, v4f64,
loadv4f64, X86VRndScale, WriteFAdd>,
loadv4f64, X86VRndScale, SchedWriteFAdd.YMM>,
VEX, VEX_L, VEX_WIG;
}
}
let Predicates = [HasAVX, NoAVX512] in {
defm VROUND : sse41_fp_binop_s<0x0A, 0x0B, "vround", WriteFAdd, v4f32, v2f64,
X86RndScales, 0>, VEX_4V, VEX_LIG, VEX_WIG;
defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround", WriteFAdd>,
defm VROUND : sse41_fp_binop_s<0x0A, 0x0B, "vround", SchedWriteFAdd.Scl,
v4f32, v2f64, X86RndScales, 0>,
VEX_4V, VEX_LIG, VEX_WIG;
defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround", SchedWriteFAdd.Scl>,
VEX_4V, VEX_LIG, VEX_WIG;
}
@ -5594,15 +5595,15 @@ let Predicates = [HasAVX, NoVLX] in {
let ExeDomain = SSEPackedSingle in
defm ROUNDPS : sse41_fp_unop_p<0x08, "roundps", f128mem, VR128, v4f32,
memopv4f32, X86VRndScale, WriteFAdd>;
memopv4f32, X86VRndScale, SchedWriteFAdd.XMM>;
let ExeDomain = SSEPackedDouble in
defm ROUNDPD : sse41_fp_unop_p<0x09, "roundpd", f128mem, VR128, v2f64,
memopv2f64, X86VRndScale, WriteFAdd>;
memopv2f64, X86VRndScale, SchedWriteFAdd.XMM>;
defm ROUND : sse41_fp_unop_s<0x0A, 0x0B, "round", WriteFAdd>;
defm ROUND : sse41_fp_unop_s<0x0A, 0x0B, "round", SchedWriteFAdd.Scl>;
let Constraints = "$src1 = $dst" in
defm ROUND : sse41_fp_binop_s<0x0A, 0x0B, "round", WriteFAdd,
defm ROUND : sse41_fp_binop_s<0x0A, 0x0B, "round", SchedWriteFAdd.Scl,
v4f32, v2f64, X86RndScales>;
let Predicates = [UseSSE41] in {
@ -5996,15 +5997,15 @@ let Predicates = [HasAVX] in {
let ExeDomain = SSEPackedSingle in
defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
VR128, loadv4f32, f128mem, 0,
WriteFAdd>, VEX_4V, VEX_WIG;
SchedWriteFAdd.XMM>, VEX_4V, VEX_WIG;
let ExeDomain = SSEPackedDouble in
defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
VR128, loadv2f64, f128mem, 0,
WriteFAdd>, VEX_4V, VEX_WIG;
SchedWriteFAdd.XMM>, VEX_4V, VEX_WIG;
let ExeDomain = SSEPackedSingle in
defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256,
VR256, loadv8f32, i256mem, 0,
WriteFAdd>, VEX_4V, VEX_L, VEX_WIG;
SchedWriteFAdd.YMM>, VEX_4V, VEX_L, VEX_WIG;
}
let Predicates = [HasAVX2] in {
@ -6024,11 +6025,11 @@ let Constraints = "$src1 = $dst" in {
let ExeDomain = SSEPackedSingle in
defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
VR128, memopv4f32, f128mem, 1,
WriteFAdd>;
SchedWriteFAdd.XMM>;
let ExeDomain = SSEPackedDouble in
defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd,
VR128, memopv2f64, f128mem, 1,
WriteFAdd>;
SchedWriteFAdd.XMM>;
}
/// SS41I_blend_rmi - SSE 4.1 blend with 8-bit immediate

View File

@ -208,6 +208,10 @@ def SchedWriteFMul
: X86SchedWriteWidths<WriteFMul, WriteFMul, WriteFMul, WriteFMul>;
def SchedWriteFDiv
: X86SchedWriteWidths<WriteFDiv, WriteFDiv, WriteFDiv, WriteFDiv>;
def SchedWriteFRcp
: X86SchedWriteWidths<WriteFRcp, WriteFRcp, WriteFRcp, WriteFRcp>;
def SchedWriteFRsqrt
: X86SchedWriteWidths<WriteFRsqrt, WriteFRsqrt, WriteFRsqrt, WriteFRsqrt>;
def SchedWriteFLogic
: X86SchedWriteWidths<WriteFLogic, WriteFLogic, WriteFLogicY, WriteFLogicY>;