1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00

[X86][AVX512] Tag RCP/RSQRT/GETEXP instructions scheduler classes (REVERSION)

Accidental commit of incomplete patch

llvm-svn: 319346
This commit is contained in:
Simon Pilgrim 2017-11-29 19:37:38 +00:00
parent 1e7c2158fe
commit e19432f57e
3 changed files with 59 additions and 86 deletions

View File

@ -4549,7 +4549,7 @@ multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
"$src2, $src1", "$src1, $src2",
(_.VT (VecNode _.RC:$src1, _.RC:$src2,
(i32 FROUND_CURRENT))),
itins.rr>, Sched<[itins.Sched]>;
itins.rr>;
defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
@ -4557,21 +4557,20 @@ multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
(_.VT (VecNode _.RC:$src1,
_.ScalarIntMemCPat:$src2,
(i32 FROUND_CURRENT))),
itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
itins.rm>;
let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.FRC:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
itins.rr>, Sched<[itins.Sched]> {
itins.rr> {
let isCommutable = IsCommutable;
}
def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.ScalarMemOp:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.FRC:$dst, (OpNode _.FRC:$src1,
(_.ScalarLdFrag addr:$src2)))], itins.rm>,
Sched<[itins.Sched.Folded, ReadAfterLd]>;
(_.ScalarLdFrag addr:$src2)))], itins.rm>;
}
}
}
@ -4584,7 +4583,7 @@ multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo
"$rc, $src2, $src1", "$src1, $src2, $rc",
(VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
(i32 imm:$rc)), itins.rr, IsCommutable>,
EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
EVEX_B, EVEX_RC;
}
multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
SDNode OpNode, SDNode VecNode, SDNode SaeNode,
@ -4594,36 +4593,35 @@ multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (VecNode _.RC:$src1, _.RC:$src2)),
itins.rr>, Sched<[itins.Sched]>;
itins.rr>;
defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (VecNode _.RC:$src1,
_.ScalarIntMemCPat:$src2)),
itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
itins.rm>;
let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.FRC:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
itins.rr>, Sched<[itins.Sched]> {
itins.rr> {
let isCommutable = IsCommutable;
}
def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.ScalarMemOp:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.FRC:$dst, (OpNode _.FRC:$src1,
(_.ScalarLdFrag addr:$src2)))], itins.rm>,
Sched<[itins.Sched.Folded, ReadAfterLd]>;
(_.ScalarLdFrag addr:$src2)))], itins.rm>;
}
defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
(SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
(i32 FROUND_NO_EXC))>, EVEX_B, Sched<[itins.Sched]>;
(i32 FROUND_NO_EXC))>, EVEX_B;
}
}
@ -7366,34 +7364,32 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in {
/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpndItins itins, X86VectorVTInfo _> {
X86VectorVTInfo _> {
let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)), itins.rr>,
EVEX_4V, Sched<[itins.Sched]>;
(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, EVEX_4V;
defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(OpNode (_.VT _.RC:$src1),
_.ScalarIntMemCPat:$src2), itins.rm>, EVEX_4V,
Sched<[itins.Sched.Folded, ReadAfterLd]>;
_.ScalarIntMemCPat:$src2)>, EVEX_4V;
}
}
defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SSE_RCPS, f32x_info>,
defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, f32x_info>,
EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable;
defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SSE_RCPS, f64x_info>,
defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, f64x_info>,
VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable;
defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, SSE_RSQRTSS, f32x_info>,
defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, f32x_info>,
EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable;
defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, SSE_RSQRTSS, f64x_info>,
defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, f64x_info>,
VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable;
/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpndItins itins, X86VectorVTInfo _> {
X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in {
defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src), OpcodeStr, "$src", "$src",
@ -7411,36 +7407,35 @@ multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
}
multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
SizeItins itins> {
defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, itins.s,
v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, itins.d,
v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode> {
defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, v16f32_info>,
EVEX_V512, EVEX_CD8<32, CD8VF>;
defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, v8f64_info>,
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
// Define only if AVX512VL feature is present.
let Predicates = [HasVLX] in {
defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
OpNode, itins.s, v4f32x_info>,
OpNode, v4f32x_info>,
EVEX_V128, EVEX_CD8<32, CD8VF>;
defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
OpNode, itins.s, v8f32x_info>,
OpNode, v8f32x_info>,
EVEX_V256, EVEX_CD8<32, CD8VF>;
defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
OpNode, itins.d, v2f64x_info>,
OpNode, v2f64x_info>,
EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
OpNode, itins.d, v4f64x_info>,
OpNode, v4f64x_info>,
EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
}
}
defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SSE_RSQRT_P>;
defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SSE_RCP_P>;
defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14>;
defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14>;
/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
SDNode OpNode, OpndItins itins> {
SDNode OpNode> {
let ExeDomain = _.ExeDomain in {
defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
@ -7462,27 +7457,23 @@ multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
}
}
multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
SizeItins itins> {
defm SS : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, itins.s>,
multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode> {
defm SS : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode>,
EVEX_CD8<32, CD8VT1>;
defm SD : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, itins.d>,
defm SD : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode>,
EVEX_CD8<64, CD8VT1>, VEX_W;
}
let Predicates = [HasERI] in {
defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, SSE_RCP_S>,
T8PD, EVEX_4V;
defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, SSE_RSQRT_S>,
T8PD, EVEX_4V;
defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s>, T8PD, EVEX_4V;
defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s>, T8PD, EVEX_4V;
}
defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds, SSE_ALU_ITINS_S>,
T8PD, EVEX_4V;
defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds>, T8PD, EVEX_4V;
/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
SDNode OpNode, OpndItins itins> {
SDNode OpNode> {
let ExeDomain = _.ExeDomain in {
defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src), OpcodeStr, "$src", "$src",
@ -7503,7 +7494,7 @@ multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
}
}
multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
SDNode OpNode, OpndItins itins> {
SDNode OpNode> {
let ExeDomain = _.ExeDomain in
defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src), OpcodeStr,
@ -7511,39 +7502,37 @@ multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
(OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC))>, EVEX_B;
}
multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
SizeItins itins> {
defm PS : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, itins.s>,
avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode, itins.s>,
multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode> {
defm PS : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode>,
avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode>,
T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
defm PD : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, itins.d>,
avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode, itins.d>,
defm PD : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode>,
avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode>,
T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
}
multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
SDNode OpNode, SizeItins itins> {
SDNode OpNode> {
// Define only if AVX512VL feature is present.
let Predicates = [HasVLX] in {
defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, itins.s>,
defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode>,
EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, itins.s>,
defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode>,
EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, itins.d>,
defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode>,
EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, itins.d>,
defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode>,
EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
}
}
let Predicates = [HasERI] in {
defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, SSE_RSQRT_P>, EVEX;
defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, SSE_RCP_P>, EVEX;
defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, SSE_ALU_ITINS_P>, EVEX;
defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28>, EVEX;
defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28>, EVEX;
defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2>, EVEX;
}
defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, SSE_ALU_ITINS_P>,
avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd,
SSE_ALU_ITINS_P>, EVEX;
defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd>,
avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd> , EVEX;
multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr, OpndItins itins,
X86VectorVTInfo _>{

View File

@ -3040,14 +3040,6 @@ def SSE_RSQRTSS : OpndItins<
>;
}
def SSE_RSQRT_P : SizeItins<
SSE_RSQRTPS, SSE_RSQRTPS
>;
def SSE_RSQRT_S : SizeItins<
SSE_RSQRTSS, SSE_RSQRTSS
>;
let Sched = WriteFRcp in {
def SSE_RCPP : OpndItins<
IIC_SSE_RCPP_RR, IIC_SSE_RCPP_RM
@ -3058,14 +3050,6 @@ def SSE_RCPS : OpndItins<
>;
}
def SSE_RCP_P : SizeItins<
SSE_RCPP, SSE_RCPP
>;
def SSE_RCP_S : SizeItins<
SSE_RCPS, SSE_RCPS
>;
/// sse_fp_unop_s - SSE1 unops in scalar form
/// For the non-AVX defs, we need $src1 to be tied to $dst because
/// the HW instructions are 2 operand / destructive.

View File

@ -380,12 +380,12 @@ define float @f32_two_step_2(float %x) #2 {
;
; SKX-LABEL: f32_two_step_2:
; SKX: # BB#0:
; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm2 # sched: [4:1.00]
; SKX-NEXT: vmovaps %xmm2, %xmm3 # sched: [1:1.00]
; SKX-NEXT: vfnmadd213ss %xmm1, %xmm0, %xmm3 # sched: [4:0.33]
; SKX-NEXT: vfmadd132ss %xmm2, %xmm2, %xmm3 # sched: [4:0.33]
; SKX-NEXT: vfnmadd213ss %xmm1, %xmm3, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
; SKX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; SKX-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [4:0.33]
; SKX-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 # sched: [4:0.33]
; SKX-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]