mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[X86][AVX512] Tag RCP/RSQRT/GETEXP instructions scheduler classes (REVERSION)
Accidental commit of incomplete patch llvm-svn: 319346
This commit is contained in:
parent
1e7c2158fe
commit
e19432f57e
@ -4549,7 +4549,7 @@ multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
|
||||
"$src2, $src1", "$src1, $src2",
|
||||
(_.VT (VecNode _.RC:$src1, _.RC:$src2,
|
||||
(i32 FROUND_CURRENT))),
|
||||
itins.rr>, Sched<[itins.Sched]>;
|
||||
itins.rr>;
|
||||
|
||||
defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
|
||||
@ -4557,21 +4557,20 @@ multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
|
||||
(_.VT (VecNode _.RC:$src1,
|
||||
_.ScalarIntMemCPat:$src2,
|
||||
(i32 FROUND_CURRENT))),
|
||||
itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
|
||||
itins.rm>;
|
||||
let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
|
||||
def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
|
||||
(ins _.FRC:$src1, _.FRC:$src2),
|
||||
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
|
||||
itins.rr>, Sched<[itins.Sched]> {
|
||||
itins.rr> {
|
||||
let isCommutable = IsCommutable;
|
||||
}
|
||||
def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
|
||||
(ins _.FRC:$src1, _.ScalarMemOp:$src2),
|
||||
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set _.FRC:$dst, (OpNode _.FRC:$src1,
|
||||
(_.ScalarLdFrag addr:$src2)))], itins.rm>,
|
||||
Sched<[itins.Sched.Folded, ReadAfterLd]>;
|
||||
(_.ScalarLdFrag addr:$src2)))], itins.rm>;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -4584,7 +4583,7 @@ multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo
|
||||
"$rc, $src2, $src1", "$src1, $src2, $rc",
|
||||
(VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
|
||||
(i32 imm:$rc)), itins.rr, IsCommutable>,
|
||||
EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
|
||||
EVEX_B, EVEX_RC;
|
||||
}
|
||||
multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
|
||||
SDNode OpNode, SDNode VecNode, SDNode SaeNode,
|
||||
@ -4594,36 +4593,35 @@ multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
|
||||
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
|
||||
"$src2, $src1", "$src1, $src2",
|
||||
(_.VT (VecNode _.RC:$src1, _.RC:$src2)),
|
||||
itins.rr>, Sched<[itins.Sched]>;
|
||||
itins.rr>;
|
||||
|
||||
defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
|
||||
"$src2, $src1", "$src1, $src2",
|
||||
(_.VT (VecNode _.RC:$src1,
|
||||
_.ScalarIntMemCPat:$src2)),
|
||||
itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
|
||||
itins.rm>;
|
||||
|
||||
let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
|
||||
def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
|
||||
(ins _.FRC:$src1, _.FRC:$src2),
|
||||
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
|
||||
itins.rr>, Sched<[itins.Sched]> {
|
||||
itins.rr> {
|
||||
let isCommutable = IsCommutable;
|
||||
}
|
||||
def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
|
||||
(ins _.FRC:$src1, _.ScalarMemOp:$src2),
|
||||
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set _.FRC:$dst, (OpNode _.FRC:$src1,
|
||||
(_.ScalarLdFrag addr:$src2)))], itins.rm>,
|
||||
Sched<[itins.Sched.Folded, ReadAfterLd]>;
|
||||
(_.ScalarLdFrag addr:$src2)))], itins.rm>;
|
||||
}
|
||||
|
||||
defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
|
||||
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
|
||||
(SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
|
||||
(i32 FROUND_NO_EXC))>, EVEX_B, Sched<[itins.Sched]>;
|
||||
(i32 FROUND_NO_EXC))>, EVEX_B;
|
||||
}
|
||||
}
|
||||
|
||||
@ -7366,34 +7364,32 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in {
|
||||
|
||||
/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
|
||||
multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
OpndItins itins, X86VectorVTInfo _> {
|
||||
X86VectorVTInfo _> {
|
||||
let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
|
||||
defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
|
||||
"$src2, $src1", "$src1, $src2",
|
||||
(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)), itins.rr>,
|
||||
EVEX_4V, Sched<[itins.Sched]>;
|
||||
(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, EVEX_4V;
|
||||
defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
|
||||
"$src2, $src1", "$src1, $src2",
|
||||
(OpNode (_.VT _.RC:$src1),
|
||||
_.ScalarIntMemCPat:$src2), itins.rm>, EVEX_4V,
|
||||
Sched<[itins.Sched.Folded, ReadAfterLd]>;
|
||||
_.ScalarIntMemCPat:$src2)>, EVEX_4V;
|
||||
}
|
||||
}
|
||||
|
||||
defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SSE_RCPS, f32x_info>,
|
||||
defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, f32x_info>,
|
||||
EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable;
|
||||
defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SSE_RCPS, f64x_info>,
|
||||
defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, f64x_info>,
|
||||
VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable;
|
||||
defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, SSE_RSQRTSS, f32x_info>,
|
||||
defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, f32x_info>,
|
||||
EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable;
|
||||
defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, SSE_RSQRTSS, f64x_info>,
|
||||
defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, f64x_info>,
|
||||
VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable;
|
||||
|
||||
/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
|
||||
multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
OpndItins itins, X86VectorVTInfo _> {
|
||||
X86VectorVTInfo _> {
|
||||
let ExeDomain = _.ExeDomain in {
|
||||
defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src), OpcodeStr, "$src", "$src",
|
||||
@ -7411,36 +7407,35 @@ multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
}
|
||||
}
|
||||
|
||||
multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
SizeItins itins> {
|
||||
defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, itins.s,
|
||||
v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, itins.d,
|
||||
v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode> {
|
||||
defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, v16f32_info>,
|
||||
EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, v8f64_info>,
|
||||
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
|
||||
// Define only if AVX512VL feature is present.
|
||||
let Predicates = [HasVLX] in {
|
||||
defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
|
||||
OpNode, itins.s, v4f32x_info>,
|
||||
OpNode, v4f32x_info>,
|
||||
EVEX_V128, EVEX_CD8<32, CD8VF>;
|
||||
defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
|
||||
OpNode, itins.s, v8f32x_info>,
|
||||
OpNode, v8f32x_info>,
|
||||
EVEX_V256, EVEX_CD8<32, CD8VF>;
|
||||
defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
|
||||
OpNode, itins.d, v2f64x_info>,
|
||||
OpNode, v2f64x_info>,
|
||||
EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
|
||||
OpNode, itins.d, v4f64x_info>,
|
||||
OpNode, v4f64x_info>,
|
||||
EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
}
|
||||
}
|
||||
|
||||
defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SSE_RSQRT_P>;
|
||||
defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SSE_RCP_P>;
|
||||
defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14>;
|
||||
defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14>;
|
||||
|
||||
/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
|
||||
multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
|
||||
SDNode OpNode, OpndItins itins> {
|
||||
SDNode OpNode> {
|
||||
let ExeDomain = _.ExeDomain in {
|
||||
defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
|
||||
@ -7462,27 +7457,23 @@ multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
|
||||
}
|
||||
}
|
||||
|
||||
multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
SizeItins itins> {
|
||||
defm SS : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, itins.s>,
|
||||
multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode> {
|
||||
defm SS : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode>,
|
||||
EVEX_CD8<32, CD8VT1>;
|
||||
defm SD : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, itins.d>,
|
||||
defm SD : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode>,
|
||||
EVEX_CD8<64, CD8VT1>, VEX_W;
|
||||
}
|
||||
|
||||
let Predicates = [HasERI] in {
|
||||
defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, SSE_RCP_S>,
|
||||
T8PD, EVEX_4V;
|
||||
defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, SSE_RSQRT_S>,
|
||||
T8PD, EVEX_4V;
|
||||
defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s>, T8PD, EVEX_4V;
|
||||
defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s>, T8PD, EVEX_4V;
|
||||
}
|
||||
|
||||
defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds, SSE_ALU_ITINS_S>,
|
||||
T8PD, EVEX_4V;
|
||||
defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds>, T8PD, EVEX_4V;
|
||||
/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
|
||||
|
||||
multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
|
||||
SDNode OpNode, OpndItins itins> {
|
||||
SDNode OpNode> {
|
||||
let ExeDomain = _.ExeDomain in {
|
||||
defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src), OpcodeStr, "$src", "$src",
|
||||
@ -7503,7 +7494,7 @@ multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
|
||||
}
|
||||
}
|
||||
multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
|
||||
SDNode OpNode, OpndItins itins> {
|
||||
SDNode OpNode> {
|
||||
let ExeDomain = _.ExeDomain in
|
||||
defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src), OpcodeStr,
|
||||
@ -7511,39 +7502,37 @@ multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
|
||||
(OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC))>, EVEX_B;
|
||||
}
|
||||
|
||||
multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
SizeItins itins> {
|
||||
defm PS : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, itins.s>,
|
||||
avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode, itins.s>,
|
||||
multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode> {
|
||||
defm PS : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode>,
|
||||
avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode>,
|
||||
T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm PD : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, itins.d>,
|
||||
avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode, itins.d>,
|
||||
defm PD : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode>,
|
||||
avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode>,
|
||||
T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
}
|
||||
|
||||
multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
|
||||
SDNode OpNode, SizeItins itins> {
|
||||
SDNode OpNode> {
|
||||
// Define only if AVX512VL feature is present.
|
||||
let Predicates = [HasVLX] in {
|
||||
defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, itins.s>,
|
||||
defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode>,
|
||||
EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
|
||||
defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, itins.s>,
|
||||
defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode>,
|
||||
EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
|
||||
defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, itins.d>,
|
||||
defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode>,
|
||||
EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
|
||||
defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, itins.d>,
|
||||
defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode>,
|
||||
EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
|
||||
}
|
||||
}
|
||||
let Predicates = [HasERI] in {
|
||||
|
||||
defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, SSE_RSQRT_P>, EVEX;
|
||||
defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, SSE_RCP_P>, EVEX;
|
||||
defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, SSE_ALU_ITINS_P>, EVEX;
|
||||
defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28>, EVEX;
|
||||
defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28>, EVEX;
|
||||
defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2>, EVEX;
|
||||
}
|
||||
defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, SSE_ALU_ITINS_P>,
|
||||
avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd,
|
||||
SSE_ALU_ITINS_P>, EVEX;
|
||||
defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd>,
|
||||
avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd> , EVEX;
|
||||
|
||||
multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr, OpndItins itins,
|
||||
X86VectorVTInfo _>{
|
||||
|
@ -3040,14 +3040,6 @@ def SSE_RSQRTSS : OpndItins<
|
||||
>;
|
||||
}
|
||||
|
||||
def SSE_RSQRT_P : SizeItins<
|
||||
SSE_RSQRTPS, SSE_RSQRTPS
|
||||
>;
|
||||
|
||||
def SSE_RSQRT_S : SizeItins<
|
||||
SSE_RSQRTSS, SSE_RSQRTSS
|
||||
>;
|
||||
|
||||
let Sched = WriteFRcp in {
|
||||
def SSE_RCPP : OpndItins<
|
||||
IIC_SSE_RCPP_RR, IIC_SSE_RCPP_RM
|
||||
@ -3058,14 +3050,6 @@ def SSE_RCPS : OpndItins<
|
||||
>;
|
||||
}
|
||||
|
||||
def SSE_RCP_P : SizeItins<
|
||||
SSE_RCPP, SSE_RCPP
|
||||
>;
|
||||
|
||||
def SSE_RCP_S : SizeItins<
|
||||
SSE_RCPS, SSE_RCPS
|
||||
>;
|
||||
|
||||
/// sse_fp_unop_s - SSE1 unops in scalar form
|
||||
/// For the non-AVX defs, we need $src1 to be tied to $dst because
|
||||
/// the HW instructions are 2 operand / destructive.
|
||||
|
@ -380,12 +380,12 @@ define float @f32_two_step_2(float %x) #2 {
|
||||
;
|
||||
; SKX-LABEL: f32_two_step_2:
|
||||
; SKX: # BB#0:
|
||||
; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
|
||||
; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm2 # sched: [4:1.00]
|
||||
; SKX-NEXT: vmovaps %xmm2, %xmm3 # sched: [1:1.00]
|
||||
; SKX-NEXT: vfnmadd213ss %xmm1, %xmm0, %xmm3 # sched: [4:0.33]
|
||||
; SKX-NEXT: vfmadd132ss %xmm2, %xmm2, %xmm3 # sched: [4:0.33]
|
||||
; SKX-NEXT: vfnmadd213ss %xmm1, %xmm3, %xmm0 # sched: [4:0.33]
|
||||
; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
|
||||
; SKX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
|
||||
; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
|
||||
; SKX-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [4:0.33]
|
||||
; SKX-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 # sched: [4:0.33]
|
||||
; SKX-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 # sched: [4:0.33]
|
||||
; SKX-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 # sched: [4:0.33]
|
||||
; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
|
||||
; SKX-NEXT: retq # sched: [7:1.00]
|
||||
|
Loading…
Reference in New Issue
Block a user