1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-24 19:52:54 +01:00

[X86] Add FP comparison scheduler classes

Split VCMP/VMAX/VMIN instructions off to WriteFCmp and VCOMIS instructions off to WriteFCom instead of assuming they match WriteFAdd

Differential Revision: https://reviews.llvm.org/D45656

llvm-svn: 330179
This commit is contained in:
Simon Pilgrim 2018-04-17 07:22:44 +00:00
parent d6a6778f3e
commit 8ae32b4f07
14 changed files with 117 additions and 327 deletions

View File

@ -2051,10 +2051,10 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd,
let Predicates = [HasAVX512] in {
let ExeDomain = SSEPackedSingle in
defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsRnd,
WriteFAdd>, AVX512XSIi8Base;
WriteFCmp>, AVX512XSIi8Base;
let ExeDomain = SSEPackedDouble in
defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsRnd,
WriteFAdd>, AVX512XDIi8Base, VEX_W;
WriteFCmp>, AVX512XDIi8Base, VEX_W;
}
multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode,
@ -2511,9 +2511,9 @@ multiclass avx512_vcmp<X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
}
}
defm VCMPPD : avx512_vcmp<WriteFAdd, avx512vl_f64_info>,
defm VCMPPD : avx512_vcmp<WriteFCmp, avx512vl_f64_info>,
AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
defm VCMPPS : avx512_vcmp<WriteFAdd, avx512vl_f32_info>,
defm VCMPPS : avx512_vcmp<WriteFCmp, avx512vl_f32_info>,
AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
@ -4906,9 +4906,9 @@ defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds, WriteFMul, 1>;
defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds, WriteFAdd, 0>;
defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds, WriteFDiv, 0>;
defm VMIN : avx512_binop_s_sae <0x5D, "vmin", X86fmin, X86fmins, X86fminRnds,
WriteFAdd, 0>;
WriteFCmp, 0>;
defm VMAX : avx512_binop_s_sae <0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds,
WriteFAdd, 0>;
WriteFCmp, 0>;
// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
// X86fminc and X86fmaxc instead of X86fmin and X86fmax
@ -4932,19 +4932,19 @@ multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
}
}
defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
WriteFAdd>, XS, EVEX_4V, VEX_LIG,
WriteFCmp>, XS, EVEX_4V, VEX_LIG,
EVEX_CD8<32, CD8VT1>;
defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
WriteFAdd>, XD, VEX_W, EVEX_4V, VEX_LIG,
WriteFCmp>, XD, VEX_W, EVEX_4V, VEX_LIG,
EVEX_CD8<64, CD8VT1>;
defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
WriteFAdd>, XS, EVEX_4V, VEX_LIG,
WriteFCmp>, XS, EVEX_4V, VEX_LIG,
EVEX_CD8<32, CD8VT1>;
defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
WriteFAdd>, XD, VEX_W, EVEX_4V, VEX_LIG,
WriteFCmp>, XD, VEX_W, EVEX_4V, VEX_LIG,
EVEX_CD8<64, CD8VT1>;
multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
@ -5050,13 +5050,13 @@ defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512, WriteFAdd>,
avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, WriteFAdd>;
defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512, WriteFDiv>,
avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, WriteFDiv>;
defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512, WriteFAdd, 0>,
avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, WriteFAdd>;
defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512, WriteFAdd, 0>,
avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, WriteFAdd>;
defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512, WriteFCmp, 0>,
avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, WriteFCmp>;
defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512, WriteFCmp, 0>,
avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, WriteFCmp>;
let isCodeGenOnly = 1 in {
defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512, WriteFAdd, 1>;
defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512, WriteFAdd, 1>;
defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512, WriteFCmp, 1>;
defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512, WriteFCmp, 1>;
}
defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI, WriteFAdd, 1>;
defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI, WriteFAdd, 0>;
@ -7732,44 +7732,44 @@ multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
}
let Defs = [EFLAGS], Predicates = [HasAVX512] in {
defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", WriteFAdd>,
defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", WriteFCom>,
AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", WriteFAdd>,
defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", WriteFCom>,
AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", WriteFAdd>,
defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", WriteFCom>,
AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", WriteFAdd>,
defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", WriteFCom>,
AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
}
let Defs = [EFLAGS], Predicates = [HasAVX512] in {
defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
"ucomiss", WriteFAdd>, PS, EVEX, VEX_LIG,
"ucomiss", WriteFCom>, PS, EVEX, VEX_LIG,
EVEX_CD8<32, CD8VT1>;
defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
"ucomisd", WriteFAdd>, PD, EVEX,
"ucomisd", WriteFCom>, PD, EVEX,
VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
let Pattern = []<dag> in {
defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32,
"comiss", WriteFAdd>, PS, EVEX, VEX_LIG,
"comiss", WriteFCom>, PS, EVEX, VEX_LIG,
EVEX_CD8<32, CD8VT1>;
defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64,
"comisd", WriteFAdd>, PD, EVEX,
"comisd", WriteFCom>, PD, EVEX,
VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
}
let isCodeGenOnly = 1 in {
defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
sse_load_f32, "ucomiss", WriteFAdd>, PS, EVEX, VEX_LIG,
sse_load_f32, "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG,
EVEX_CD8<32, CD8VT1>;
defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
sse_load_f64, "ucomisd", WriteFAdd>, PD, EVEX,
sse_load_f64, "ucomisd", WriteFCom>, PD, EVEX,
VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
sse_load_f32, "comiss", WriteFAdd>, PS, EVEX, VEX_LIG,
sse_load_f32, "comiss", WriteFCom>, PS, EVEX, VEX_LIG,
EVEX_CD8<32, CD8VT1>;
defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
sse_load_f64, "comisd", WriteFAdd>, PD, EVEX,
sse_load_f64, "comisd", WriteFCom>, PD, EVEX,
VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
}
}

View File

@ -277,6 +277,8 @@ def SUB_FPrST0 : FPrST0PInst<MRM5r, "fsub{r}p\t$op">;
def SUB_FST0r : FPST0rInst <MRM4r, "fsub\t$op">;
def SUBR_FrST0 : FPrST0Inst <MRM4r, "fsub{|r}\t{%st(0), $op|$op, st(0)}">;
def SUBR_FPrST0 : FPrST0PInst<MRM4r, "fsub{|r}p\t$op">;
} // SchedRW
let SchedRW = [WriteFCom] in {
def COM_FST0r : FPST0rInst <MRM2r, "fcom\t$op">;
def COMP_FST0r : FPST0rInst <MRM3r, "fcomp\t$op">;
} // SchedRW
@ -320,7 +322,7 @@ defm SIN : FPUnary<fsin, MRM_FE, "fsin">;
defm COS : FPUnary<fcos, MRM_FF, "fcos">;
}
let SchedRW = [WriteFAdd] in {
let SchedRW = [WriteFCom] in {
let hasSideEffects = 0 in {
def TST_Fp32 : FpIf32<(outs), (ins RFP32:$src), OneArgFP, []>;
def TST_Fp64 : FpIf64<(outs), (ins RFP64:$src), OneArgFP, []>;
@ -333,7 +335,7 @@ def TST_F : FPI<0xD9, MRM_E4, (outs), (ins), "ftst">;
// Versions of FP instructions that take a single memory operand. Added for the
// disassembler; remove as they are included with patterns elsewhere.
let SchedRW = [WriteFAddLd] in {
let SchedRW = [WriteFComLd] in {
def FCOM32m : FPI<0xD8, MRM2m, (outs), (ins f32mem:$src), "fcom{s}\t$src">;
def FCOMP32m : FPI<0xD8, MRM3m, (outs), (ins f32mem:$src), "fcomp{s}\t$src">;
@ -568,7 +570,7 @@ def LD_F1 : FPI<0xD9, MRM_E8, (outs), (ins), "fld1">;
}
// Floating point compares.
let SchedRW = [WriteFAdd] in {
let SchedRW = [WriteFCom] in {
def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
[(set FPSW, (trunc (X86cmp RFP32:$lhs, RFP32:$rhs)))]>;
def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
@ -578,7 +580,7 @@ def UCOM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
} // SchedRW
} // Defs = [FPSW]
let SchedRW = [WriteFAdd] in {
let SchedRW = [WriteFCom] in {
// CC = ST(0) cmp ST(i)
let Defs = [EFLAGS, FPSW] in {
def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,

View File

@ -1854,23 +1854,23 @@ let ExeDomain = SSEPackedSingle in
defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, AVXCC, X86cmps, f32, loadf32,
"cmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
WriteFAdd>, XS, VEX_4V, VEX_LIG, VEX_WIG;
WriteFCmp>, XS, VEX_4V, VEX_LIG, VEX_WIG;
let ExeDomain = SSEPackedDouble in
defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, AVXCC, X86cmps, f64, loadf64,
"cmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
WriteFAdd>, // same latency as 32 bit compare
WriteFCmp>, // same latency as 32 bit compare
XD, VEX_4V, VEX_LIG, VEX_WIG;
let Constraints = "$src1 = $dst" in {
let ExeDomain = SSEPackedSingle in
defm CMPSS : sse12_cmp_scalar<FR32, f32mem, SSECC, X86cmps, f32, loadf32,
"cmp${cc}ss\t{$src2, $dst|$dst, $src2}",
"cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", WriteFAdd>, XS;
"cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", WriteFCmp>, XS;
let ExeDomain = SSEPackedDouble in
defm CMPSD : sse12_cmp_scalar<FR64, f64mem, SSECC, X86cmps, f64, loadf64,
"cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
"cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}", WriteFAdd>, XD;
"cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}", WriteFCmp>, XD;
}
multiclass sse12_cmp_scalar_int<Operand memop, Operand CC,
@ -1894,21 +1894,21 @@ let isCodeGenOnly = 1 in {
let ExeDomain = SSEPackedSingle in
defm VCMPSS : sse12_cmp_scalar_int<ssmem, AVXCC, int_x86_sse_cmp_ss,
"cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}",
WriteFAdd, sse_load_f32>, XS, VEX_4V;
WriteFCmp, sse_load_f32>, XS, VEX_4V;
let ExeDomain = SSEPackedDouble in
defm VCMPSD : sse12_cmp_scalar_int<sdmem, AVXCC, int_x86_sse2_cmp_sd,
"cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}",
WriteFAdd, sse_load_f64>, // same latency as f32
WriteFCmp, sse_load_f64>, // same latency as f32
XD, VEX_4V;
let Constraints = "$src1 = $dst" in {
let ExeDomain = SSEPackedSingle in
defm CMPSS : sse12_cmp_scalar_int<ssmem, SSECC, int_x86_sse_cmp_ss,
"cmp${cc}ss\t{$src, $dst|$dst, $src}",
WriteFAdd, sse_load_f32>, XS;
WriteFCmp, sse_load_f32>, XS;
let ExeDomain = SSEPackedDouble in
defm CMPSD : sse12_cmp_scalar_int<sdmem, SSECC, int_x86_sse2_cmp_sd,
"cmp${cc}sd\t{$src, $dst|$dst, $src}",
WriteFAdd, sse_load_f64>, XD;
WriteFCmp, sse_load_f64>, XD;
}
}
@ -1951,49 +1951,49 @@ let mayLoad = 1 in
let Defs = [EFLAGS] in {
defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
"ucomiss", WriteFAdd>, PS, VEX, VEX_LIG, VEX_WIG;
"ucomiss", WriteFCom>, PS, VEX, VEX_LIG, VEX_WIG;
defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
"ucomisd", WriteFAdd>, PD, VEX, VEX_LIG, VEX_WIG;
"ucomisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG;
let Pattern = []<dag> in {
defm VCOMISS : sse12_ord_cmp<0x2F, FR32, undef, f32, f32mem, loadf32,
"comiss", WriteFAdd>, PS, VEX, VEX_LIG, VEX_WIG;
"comiss", WriteFCom>, PS, VEX, VEX_LIG, VEX_WIG;
defm VCOMISD : sse12_ord_cmp<0x2F, FR64, undef, f64, f64mem, loadf64,
"comisd", WriteFAdd>, PD, VEX, VEX_LIG, VEX_WIG;
"comisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG;
}
let isCodeGenOnly = 1 in {
defm VUCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem,
sse_load_f32, "ucomiss", WriteFAdd>, PS, VEX, VEX_WIG;
sse_load_f32, "ucomiss", WriteFCom>, PS, VEX, VEX_WIG;
defm VUCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem,
sse_load_f64, "ucomisd", WriteFAdd>, PD, VEX, VEX_WIG;
sse_load_f64, "ucomisd", WriteFCom>, PD, VEX, VEX_WIG;
defm VCOMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem,
sse_load_f32, "comiss", WriteFAdd>, PS, VEX, VEX_WIG;
sse_load_f32, "comiss", WriteFCom>, PS, VEX, VEX_WIG;
defm VCOMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem,
sse_load_f64, "comisd", WriteFAdd>, PD, VEX, VEX_WIG;
sse_load_f64, "comisd", WriteFCom>, PD, VEX, VEX_WIG;
}
defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
"ucomiss", WriteFAdd>, PS;
"ucomiss", WriteFCom>, PS;
defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
"ucomisd", WriteFAdd>, PD;
"ucomisd", WriteFCom>, PD;
let Pattern = []<dag> in {
defm COMISS : sse12_ord_cmp<0x2F, FR32, undef, f32, f32mem, loadf32,
"comiss", WriteFAdd>, PS;
"comiss", WriteFCom>, PS;
defm COMISD : sse12_ord_cmp<0x2F, FR64, undef, f64, f64mem, loadf64,
"comisd", WriteFAdd>, PD;
"comisd", WriteFCom>, PD;
}
let isCodeGenOnly = 1 in {
defm UCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem,
sse_load_f32, "ucomiss", WriteFAdd>, PS;
sse_load_f32, "ucomiss", WriteFCom>, PS;
defm UCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem,
sse_load_f64, "ucomisd", WriteFAdd>, PD;
sse_load_f64, "ucomisd", WriteFCom>, PD;
defm COMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem,
sse_load_f32, "comiss", WriteFAdd>, PS;
sse_load_f32, "comiss", WriteFCom>, PS;
defm COMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem,
sse_load_f64, "comisd", WriteFAdd>, PD;
sse_load_f64, "comisd", WriteFCom>, PD;
}
} // Defs = [EFLAGS]
@ -2028,28 +2028,28 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
defm VCMPPS : sse12_cmp_packed<VR128, f128mem, AVXCC, v4f32,
"cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
WriteFAdd, SSEPackedSingle, loadv4f32>, PS, VEX_4V, VEX_WIG;
WriteFCmp, SSEPackedSingle, loadv4f32>, PS, VEX_4V, VEX_WIG;
defm VCMPPD : sse12_cmp_packed<VR128, f128mem, AVXCC, v2f64,
"cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
WriteFAdd, SSEPackedDouble, loadv2f64>, PD, VEX_4V, VEX_WIG;
WriteFCmp, SSEPackedDouble, loadv2f64>, PD, VEX_4V, VEX_WIG;
defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, AVXCC, v8f32,
"cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
WriteFAdd, SSEPackedSingle, loadv8f32>, PS, VEX_4V, VEX_L;
WriteFCmp, SSEPackedSingle, loadv8f32>, PS, VEX_4V, VEX_L;
defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, AVXCC, v4f64,
"cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
WriteFAdd, SSEPackedDouble, loadv4f64>, PD, VEX_4V, VEX_L;
WriteFCmp, SSEPackedDouble, loadv4f64>, PD, VEX_4V, VEX_L;
let Constraints = "$src1 = $dst" in {
defm CMPPS : sse12_cmp_packed<VR128, f128mem, SSECC, v4f32,
"cmp${cc}ps\t{$src2, $dst|$dst, $src2}",
"cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}",
WriteFAdd, SSEPackedSingle, memopv4f32>, PS;
WriteFCmp, SSEPackedSingle, memopv4f32>, PS;
defm CMPPD : sse12_cmp_packed<VR128, f128mem, SSECC, v2f64,
"cmp${cc}pd\t{$src2, $dst|$dst, $src2}",
"cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
WriteFAdd, SSEPackedDouble, memopv2f64>, PD;
WriteFCmp, SSEPackedDouble, memopv2f64>, PD;
}
def CommutableCMPCC : PatLeaf<(imm), [{
@ -2583,19 +2583,19 @@ let isCommutable = 0 in {
defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, WriteFDiv>,
basic_sse12_fp_binop_s<0x5E, "div", fdiv, WriteFDiv>,
basic_sse12_fp_binop_s_int<0x5E, "div", null_frag, WriteFDiv>;
defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, WriteFAdd>,
basic_sse12_fp_binop_s<0x5F, "max", X86fmax, WriteFAdd>,
basic_sse12_fp_binop_s_int<0x5F, "max", X86fmaxs, WriteFAdd>;
defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, WriteFAdd>,
basic_sse12_fp_binop_s<0x5D, "min", X86fmin, WriteFAdd>,
basic_sse12_fp_binop_s_int<0x5D, "min", X86fmins, WriteFAdd>;
defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, WriteFCmp>,
basic_sse12_fp_binop_s<0x5F, "max", X86fmax, WriteFCmp>,
basic_sse12_fp_binop_s_int<0x5F, "max", X86fmaxs, WriteFCmp>;
defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, WriteFCmp>,
basic_sse12_fp_binop_s<0x5D, "min", X86fmin, WriteFCmp>,
basic_sse12_fp_binop_s_int<0x5D, "min", X86fmins, WriteFCmp>;
}
let isCodeGenOnly = 1 in {
defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, WriteFAdd>,
basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, WriteFAdd>;
defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, WriteFAdd>,
basic_sse12_fp_binop_s<0x5D, "min", X86fminc, WriteFAdd>;
defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, WriteFCmp>,
basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, WriteFCmp>;
defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, WriteFCmp>,
basic_sse12_fp_binop_s<0x5D, "min", X86fminc, WriteFCmp>;
}
// Patterns used to select SSE scalar fp arithmetic instructions from

View File

@ -154,7 +154,9 @@ def : WriteRes<WriteFLoad, [BWPort23]> { let Latency = 5; }
def : WriteRes<WriteFStore, [BWPort237, BWPort4]>;
def : WriteRes<WriteFMove, [BWPort5]>;
defm : BWWriteResPair<WriteFAdd, [BWPort1], 3>; // Floating point add/sub/compare.
defm : BWWriteResPair<WriteFAdd, [BWPort1], 3>; // Floating point add/sub.
defm : BWWriteResPair<WriteFCmp, [BWPort1], 3>; // Floating point compare.
defm : BWWriteResPair<WriteFCom, [BWPort1], 3>; // Floating point compare to flags.
defm : BWWriteResPair<WriteFMul, [BWPort0], 5>; // Floating point multiplication.
defm : BWWriteResPair<WriteFDiv, [BWPort0], 12>; // 10-14 cycles. // Floating point division.
defm : BWWriteResPair<WriteFSqrt, [BWPort0], 15>; // Floating point square root.
@ -843,29 +845,13 @@ def: InstRW<[BWWriteResGroup27], (instregex "ADD_FPrST0",
"(V?)ADDSSrr",
"(V?)ADDSUBPD(Y?)rr",
"(V?)ADDSUBPS(Y?)rr",
"(V?)CMPPD(Y?)rri",
"(V?)CMPPS(Y?)rri",
"(V?)CMPSDrr",
"(V?)CMPSSrr",
"(V?)COMISDrr",
"(V?)COMISSrr",
"(V?)CVTDQ2PS(Y?)rr",
"(V?)CVTPS2DQ(Y?)rr",
"(V?)CVTTPS2DQ(Y?)rr",
"(V?)MAX(C?)PD(Y?)rr",
"(V?)MAX(C?)PS(Y?)rr",
"(V?)MAX(C?)SDrr",
"(V?)MAX(C?)SSrr",
"(V?)MIN(C?)PD(Y?)rr",
"(V?)MIN(C?)PS(Y?)rr",
"(V?)MIN(C?)SDrr",
"(V?)MIN(C?)SSrr",
"(V?)SUBPD(Y?)rr",
"(V?)SUBPS(Y?)rr",
"(V?)SUBSDrr",
"(V?)SUBSSrr",
"(V?)UCOMISDrr",
"(V?)UCOMISSrr")>;
"(V?)SUBSSrr")>;
def BWWriteResGroup27_16 : SchedWriteRes<[BWPort1, BWPort0156]> {
let Latency = 3;
@ -1832,29 +1818,13 @@ def: InstRW<[BWWriteResGroup91], (instregex "MMX_CVTPI2PSirm",
"(V?)ADDSSrm",
"(V?)ADDSUBPDrm",
"(V?)ADDSUBPSrm",
"(V?)CMPPDrmi",
"(V?)CMPPSrmi",
"(V?)CMPSDrm",
"(V?)CMPSSrm",
"(V?)COMISDrm",
"(V?)COMISSrm",
"(V?)CVTDQ2PSrm",
"(V?)CVTPS2DQrm",
"(V?)CVTTPS2DQrm",
"(V?)MAX(C?)PDrm",
"(V?)MAX(C?)PSrm",
"(V?)MAX(C?)SDrm",
"(V?)MAX(C?)SSrm",
"(V?)MIN(C?)PDrm",
"(V?)MIN(C?)PSrm",
"(V?)MIN(C?)SDrm",
"(V?)MIN(C?)SSrm",
"(V?)SUBPDrm",
"(V?)SUBPSrm",
"(V?)SUBSDrm",
"(V?)SUBSSrm",
"(V?)UCOMISDrm",
"(V?)UCOMISSrm")>;
"(V?)SUBSSrm")>;
def BWWriteResGroup91_16 : SchedWriteRes<[BWPort1, BWPort0156, BWPort23]> {
let Latency = 8;

View File

@ -149,6 +149,8 @@ def : WriteRes<WriteFLoad, [HWPort23]> { let Latency = 5; }
def : WriteRes<WriteFMove, [HWPort5]>;
defm : HWWriteResPair<WriteFAdd, [HWPort1], 3>;
defm : HWWriteResPair<WriteFCmp, [HWPort1], 3, [1], 1, 6>;
defm : HWWriteResPair<WriteFCom, [HWPort1], 3>;
defm : HWWriteResPair<WriteFMul, [HWPort0], 5>;
defm : HWWriteResPair<WriteFDiv, [HWPort0], 12>; // 10-14 cycles.
defm : HWWriteResPair<WriteFRcp, [HWPort0], 5>;
@ -1041,16 +1043,12 @@ def: InstRW<[HWWriteResGroup12], (instregex "FCOM32m",
"(V?)ADDSSrm",
"(V?)CMPSDrm",
"(V?)CMPSSrm",
"(V?)COMISDrm",
"(V?)COMISSrm",
"(V?)MAX(C?)SDrm",
"(V?)MAX(C?)SSrm",
"(V?)MIN(C?)SDrm",
"(V?)MIN(C?)SSrm",
"(V?)SUBSDrm",
"(V?)SUBSSrm",
"(V?)UCOMISDrm",
"(V?)UCOMISSrm")>;
"(V?)SUBSSrm")>;
def HWWriteResGroup13 : SchedWriteRes<[HWPort5,HWPort23]> {
let Latency = 7;
@ -1730,29 +1728,13 @@ def: InstRW<[HWWriteResGroup50], (instregex "ADD_FPrST0",
"(V?)ADDSSrr",
"(V?)ADDSUBPD(Y?)rr",
"(V?)ADDSUBPS(Y?)rr",
"(V?)CMPPD(Y?)rri",
"(V?)CMPPS(Y?)rri",
"(V?)CMPSDrr",
"(V?)CMPSSrr",
"(V?)COMISDrr",
"(V?)COMISSrr",
"(V?)CVTDQ2PS(Y?)rr",
"(V?)CVTPS2DQ(Y?)rr",
"(V?)CVTTPS2DQ(Y?)rr",
"(V?)MAX(C?)PD(Y?)rr",
"(V?)MAX(C?)PS(Y?)rr",
"(V?)MAX(C?)SDrr",
"(V?)MAX(C?)SSrr",
"(V?)MIN(C?)PD(Y?)rr",
"(V?)MIN(C?)PS(Y?)rr",
"(V?)MIN(C?)SDrr",
"(V?)MIN(C?)SSrr",
"(V?)SUBPD(Y?)rr",
"(V?)SUBPS(Y?)rr",
"(V?)SUBSDrr",
"(V?)SUBSSrr",
"(V?)UCOMISDrr",
"(V?)UCOMISSrr")>;
"(V?)SUBSSrr")>;
def HWWriteResGroup50_16i : SchedWriteRes<[HWPort1, HWPort0156]> {
let Latency = 3;
@ -1804,15 +1786,9 @@ def: InstRW<[HWWriteResGroup52], (instregex "(V?)ADDPDrm",
"(V?)ADDPSrm",
"(V?)ADDSUBPDrm",
"(V?)ADDSUBPSrm",
"(V?)CMPPDrmi",
"(V?)CMPPSrmi",
"(V?)CVTDQ2PSrm",
"(V?)CVTPS2DQrm",
"(V?)CVTTPS2DQrm",
"(V?)MAX(C?)PDrm",
"(V?)MAX(C?)PSrm",
"(V?)MIN(C?)PDrm",
"(V?)MIN(C?)PSrm",
"(V?)SUBPDrm",
"(V?)SUBPSrm")>;

View File

@ -139,6 +139,8 @@ def : WriteRes<WriteFLoad, [SBPort23]> { let Latency = 6; }
def : WriteRes<WriteFMove, [SBPort5]>;
defm : SBWriteResPair<WriteFAdd, [SBPort1], 3>;
defm : SBWriteResPair<WriteFCmp, [SBPort1], 3, [1], 1, 6>;
defm : SBWriteResPair<WriteFCom, [SBPort1], 3>;
defm : SBWriteResPair<WriteFMul, [SBPort0], 5>;
defm : SBWriteResPair<WriteFDiv, [SBPort0], 24>;
defm : SBWriteResPair<WriteFRcp, [SBPort0], 5>;
@ -685,21 +687,9 @@ def: InstRW<[SBWriteResGroup21], (instregex "ADD_FPrST0",
"(V?)ADDSSrr",
"(V?)ADDSUBPD(Y?)rr",
"(V?)ADDSUBPS(Y?)rr",
"(V?)CMPPD(Y?)rri",
"(V?)CMPPS(Y?)rri",
"(V?)CMPSDrr",
"(V?)CMPSSrr",
"(V?)CVTDQ2PS(Y?)rr",
"(V?)CVTPS2DQ(Y?)rr",
"(V?)CVTTPS2DQ(Y?)rr",
"(V?)MAX(C?)PD(Y?)rr",
"(V?)MAX(C?)PS(Y?)rr",
"(V?)MAX(C?)SDrr",
"(V?)MAX(C?)SSrr",
"(V?)MIN(C?)PD(Y?)rr",
"(V?)MIN(C?)PS(Y?)rr",
"(V?)MIN(C?)SDrr",
"(V?)MIN(C?)SSrr",
"(V?)ROUNDPD(Y?)r",
"(V?)ROUNDPS(Y?)r",
"(V?)ROUNDSDr",
@ -1562,23 +1552,11 @@ def: InstRW<[SBWriteResGroup90], (instregex "MMX_CVTPI2PSirm",
"(V?)ADDSSrm",
"(V?)ADDSUBPDrm",
"(V?)ADDSUBPSrm",
"(V?)CMPPDrmi",
"(V?)CMPPSrmi",
"(V?)CMPSDrm",
"(V?)CMPSSrm",
"(V?)CVTDQ2PSrm",
"(V?)CVTPS2DQrm",
"(V?)CVTSI642SDrm",
"(V?)CVTSI2SDrm",
"(V?)CVTTPS2DQrm",
"(V?)MAX(C?)PDrm",
"(V?)MAX(C?)PSrm",
"(V?)MAX(C?)SDrm",
"(V?)MAX(C?)SSrm",
"(V?)MIN(C?)PDrm",
"(V?)MIN(C?)PSrm",
"(V?)MIN(C?)SDrm",
"(V?)MIN(C?)SSrm",
"(V?)ROUNDPDm",
"(V?)ROUNDPSm",
"(V?)ROUNDSDm",

View File

@ -151,7 +151,9 @@ def : WriteRes<WriteFLoad, [SKLPort23]> { let Latency = 6; }
def : WriteRes<WriteFStore, [SKLPort237, SKLPort4]>;
def : WriteRes<WriteFMove, [SKLPort015]>;
defm : SKLWriteResPair<WriteFAdd, [SKLPort1], 3>; // Floating point add/sub/compare.
defm : SKLWriteResPair<WriteFAdd, [SKLPort1], 3>; // Floating point add/sub.
defm : SKLWriteResPair<WriteFCmp, [SKLPort01], 4, [1], 1, 6>; // Floating point compare.
defm : SKLWriteResPair<WriteFCom, [SKLPort0], 2>; // Floating point compare to flags.
defm : SKLWriteResPair<WriteFMul, [SKLPort0], 5>; // Floating point multiplication.
defm : SKLWriteResPair<WriteFDiv, [SKLPort0], 12>; // 10-14 cycles. // Floating point division.
defm : SKLWriteResPair<WriteFSqrt, [SKLPort0], 15>; // Floating point square root.
@ -672,14 +674,10 @@ def SKLWriteResGroup12 : SchedWriteRes<[SKLPort0]> {
}
def: InstRW<[SKLWriteResGroup12], (instregex "MMX_MOVD64from64rr",
"MMX_MOVD64grr",
"(V?)COMISDrr",
"(V?)COMISSrr",
"(V?)MOVPDI2DIrr",
"(V?)MOVPQIto64rr",
"VTESTPD(Y?)rr",
"VTESTPS(Y?)rr",
"(V?)UCOMISDrr",
"(V?)UCOMISSrr")>;
"VTESTPS(Y?)rr")>;
def SKLWriteResGroup13 : SchedWriteRes<[SKLPort5]> {
let Latency = 2;
@ -1067,21 +1065,9 @@ def: InstRW<[SKLWriteResGroup48], (instregex "(V?)ADDPD(Y?)rr",
"(V?)ADDSSrr",
"(V?)ADDSUBPD(Y?)rr",
"(V?)ADDSUBPS(Y?)rr",
"(V?)CMPPD(Y?)rri",
"(V?)CMPPS(Y?)rri",
"(V?)CMPSDrr",
"(V?)CMPSSrr",
"(V?)CVTDQ2PS(Y?)rr",
"(V?)CVTPS2DQ(Y?)rr",
"(V?)CVTTPS2DQ(Y?)rr",
"(V?)MAX(C?)PD(Y?)rr",
"(V?)MAX(C?)PS(Y?)rr",
"(V?)MAX(C?)SDrr",
"(V?)MAX(C?)SSrr",
"(V?)MIN(C?)PD(Y?)rr",
"(V?)MIN(C?)PS(Y?)rr",
"(V?)MIN(C?)SDrr",
"(V?)MIN(C?)SSrr",
"(V?)MULPD(Y?)rr",
"(V?)MULPS(Y?)rr",
"(V?)MULSDrr",
@ -1547,16 +1533,6 @@ def SKLWriteResGroup86 : SchedWriteRes<[SKLPort0,SKLPort5]> {
}
def: InstRW<[SKLWriteResGroup86], (instregex "VCVTDQ2PDYrr")>;
def SKLWriteResGroup87 : SchedWriteRes<[SKLPort0,SKLPort23]> {
let Latency = 7;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup87], (instregex "(V?)COMISDrm",
"(V?)COMISSrm",
"(V?)UCOMISDrm",
"(V?)UCOMISSrm")>;
def SKLWriteResGroup88 : SchedWriteRes<[SKLPort5,SKLPort23]> {
let Latency = 7;
let NumMicroOps = 2;
@ -2196,17 +2172,11 @@ def: InstRW<[SKLWriteResGroup134], (instregex "(V?)ADDPDrm",
"(V?)ADDPSrm",
"(V?)ADDSUBPDrm",
"(V?)ADDSUBPSrm",
"(V?)CMPPDrmi",
"(V?)CMPPSrmi",
"(V?)CVTDQ2PSrm",
"(V?)CVTPH2PSYrm",
"(V?)CVTPS2DQrm",
"(V?)CVTSS2SDrm",
"(V?)CVTTPS2DQrm",
"(V?)MAX(C?)PDrm",
"(V?)MAX(C?)PSrm",
"(V?)MIN(C?)PDrm",
"(V?)MIN(C?)PSrm",
"(V?)MULPDrm",
"(V?)MULPSrm",
"(V?)PHMINPOSUWrm",

View File

@ -151,7 +151,9 @@ def : WriteRes<WriteFLoad, [SKXPort23]> { let Latency = 5; }
def : WriteRes<WriteFStore, [SKXPort237, SKXPort4]>;
def : WriteRes<WriteFMove, [SKXPort015]>;
defm : SKXWriteResPair<WriteFAdd, [SKXPort1], 3>; // Floating point add/sub/compare.
defm : SKXWriteResPair<WriteFAdd, [SKXPort1], 3>; // Floating point add/sub.
defm : SKXWriteResPair<WriteFCmp, [SKXPort015], 4, [1], 1, 6>; // Floating point compare.
defm : SKXWriteResPair<WriteFCom, [SKXPort0], 2>; // Floating point compare to flags.
defm : SKXWriteResPair<WriteFMul, [SKXPort0], 5>; // Floating point multiplication.
defm : SKXWriteResPair<WriteFDiv, [SKXPort0], 12>; // 10-14 cycles. // Floating point division.
defm : SKXWriteResPair<WriteFSqrt, [SKXPort0], 15>; // Floating point square root.
@ -1406,18 +1408,10 @@ def SKXWriteResGroup12 : SchedWriteRes<[SKXPort0]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
def: InstRW<[SKXWriteResGroup12], (instregex "COMISDrr",
"COMISSrr",
"MMX_MOVD64from64rr",
def: InstRW<[SKXWriteResGroup12], (instregex "MMX_MOVD64from64rr",
"MMX_MOVD64grr",
"MOVPDI2DIrr",
"MOVPQIto64rr",
"UCOMISDrr",
"UCOMISSrr",
"VCOMISDZrr(b?)",
"VCOMISDrr",
"VCOMISSZrr(b?)",
"VCOMISSrr",
"VMOVPDI2DIZrr",
"VMOVPDI2DIrr",
"VMOVPQIto64Zrr",
@ -1425,11 +1419,7 @@ def: InstRW<[SKXWriteResGroup12], (instregex "COMISDrr",
"VTESTPDYrr",
"VTESTPDrr",
"VTESTPSYrr",
"VTESTPSrr",
"VUCOMISDZrr(b?)",
"VUCOMISDrr",
"VUCOMISSZrr(b?)",
"VUCOMISSrr")>;
"VTESTPSrr")>;
def SKXWriteResGroup13 : SchedWriteRes<[SKXPort5]> {
let Latency = 2;
@ -2162,21 +2152,9 @@ def: InstRW<[SKXWriteResGroup50], (instregex "ADDPDrr",
"ADDSSrr",
"ADDSUBPDrr",
"ADDSUBPSrr",
"CMPPDrri",
"CMPPSrri",
"CMPSDrr",
"CMPSSrr",
"CVTDQ2PSrr",
"CVTPS2DQrr",
"CVTTPS2DQrr",
"MAX(C?)PDrr",
"MAX(C?)PSrr",
"MAX(C?)SDrr",
"MAX(C?)SSrr",
"MIN(C?)PDrr",
"MIN(C?)PSrr",
"MIN(C?)SDrr",
"MIN(C?)SSrr",
"MULPDrr",
"MULPSrr",
"MULSDrr",
@ -2212,12 +2190,6 @@ def: InstRW<[SKXWriteResGroup50], (instregex "ADDPDrr",
"VADDSUBPDrr",
"VADDSUBPSYrr",
"VADDSUBPSrr",
"VCMPPDYrri",
"VCMPPDrri",
"VCMPPSYrri",
"VCMPPSrri",
"VCMPSDrr",
"VCMPSSrr",
"VCVTDQ2PSYrr",
"VCVTDQ2PSZ128rr",
"VCVTDQ2PSZ256rr",
@ -2284,34 +2256,6 @@ def: InstRW<[SKXWriteResGroup50], (instregex "ADDPDrr",
"VGETMANTPSZrri",
"VGETMANTSDZ128rri",
"VGETMANTSSZ128rri",
"VMAX(C?)PDYrr",
"VMAX(C?)PDZ128rr",
"VMAX(C?)PDZ256rr",
"VMAX(C?)PDZrr",
"VMAX(C?)PDrr",
"VMAX(C?)PSYrr",
"VMAX(C?)PSZ128rr",
"VMAX(C?)PSZ256rr",
"VMAX(C?)PSZrr",
"VMAX(C?)PSrr",
"VMAX(C?)SDZrr",
"VMAX(C?)SDrr",
"VMAX(C?)SSZrr",
"VMAX(C?)SSrr",
"VMIN(C?)PDYrr",
"VMIN(C?)PDZ128rr",
"VMIN(C?)PDZ256rr",
"VMIN(C?)PDZrr",
"VMIN(C?)PDrr",
"VMIN(C?)PSYrr",
"VMIN(C?)PSZ128rr",
"VMIN(C?)PSZ256rr",
"VMIN(C?)PSZrr",
"VMIN(C?)PSrr",
"VMIN(C?)SDZrr",
"VMIN(C?)SDrr",
"VMIN(C?)SSZrr",
"VMIN(C?)SSrr",
"VMULPDYrr",
"VMULPDZ128rr",
"VMULPDZ256rr",
@ -3145,24 +3089,6 @@ def SKXWriteResGroup90 : SchedWriteRes<[SKXPort0,SKXPort5]> {
}
def: InstRW<[SKXWriteResGroup90], (instregex "VCVTDQ2PDYrr")>;
def SKXWriteResGroup91 : SchedWriteRes<[SKXPort0,SKXPort23]> {
let Latency = 7;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup91], (instregex "COMISDrm",
"COMISSrm",
"UCOMISDrm",
"UCOMISSrm",
"VCOMISDZrm(b?)",
"VCOMISDrm",
"VCOMISSZrm(b?)",
"VCOMISSrm",
"VUCOMISDZrm(b?)",
"VUCOMISDrm",
"VUCOMISSZrm(b?)",
"VUCOMISSrm")>;
def SKXWriteResGroup92 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 7;
let NumMicroOps = 2;
@ -4744,16 +4670,10 @@ def: InstRW<[SKXWriteResGroup149], (instregex "ADDPDrm",
"ADDPSrm",
"ADDSUBPDrm",
"ADDSUBPSrm",
"CMPPDrmi",
"CMPPSrmi",
"CVTDQ2PSrm",
"CVTPS2DQrm",
"CVTSS2SDrm",
"CVTTPS2DQrm",
"MAX(C?)PDrm",
"MAX(C?)PSrm",
"MIN(C?)PDrm",
"MIN(C?)PSrm",
"MULPDrm",
"MULPSrm",
"PHMINPOSUWrm",
@ -4775,8 +4695,6 @@ def: InstRW<[SKXWriteResGroup149], (instregex "ADDPDrm",
"VADDSSZrm",
"VADDSUBPDrm",
"VADDSUBPSrm",
"VCMPPDrmi",
"VCMPPSrmi",
"VCVTDQ2PDZ128rm(b?)",
"VCVTDQ2PSZ128rm(b?)",
"VCVTDQ2PSrm",
@ -4817,18 +4735,6 @@ def: InstRW<[SKXWriteResGroup149], (instregex "ADDPDrm",
"VGETMANTPSZ128rm(b?)i",
"VGETMANTSDZ128rmi(b?)",
"VGETMANTSSZ128rmi(b?)",
"VMAX(C?)PDZ128rm(b?)",
"VMAX(C?)PDrm",
"VMAX(C?)PSZ128rm(b?)",
"VMAX(C?)PSrm",
"VMAX(C?)SDZrm",
"VMAX(C?)SSZrm",
"VMIN(C?)PDZ128rm(b?)",
"VMIN(C?)PDrm",
"VMIN(C?)PSZ128rm(b?)",
"VMIN(C?)PSrm",
"VMIN(C?)SDZrm",
"VMIN(C?)SSZrm",
"VMULPDZ128rm(b?)",
"VMULPDrm",
"VMULPSZ128rm(b?)",

View File

@ -78,7 +78,9 @@ defm WriteJump : X86SchedWritePair;
def WriteFLoad : SchedWrite;
def WriteFStore : SchedWrite;
def WriteFMove : SchedWrite;
defm WriteFAdd : X86SchedWritePair; // Floating point add/sub/compare.
defm WriteFAdd : X86SchedWritePair; // Floating point add/sub.
defm WriteFCmp : X86SchedWritePair; // Floating point compare.
defm WriteFCom : X86SchedWritePair; // Floating point compare to flags.
defm WriteFMul : X86SchedWritePair; // Floating point multiplication.
defm WriteFDiv : X86SchedWritePair; // Floating point division.
defm WriteFSqrt : X86SchedWritePair; // Floating point square root.

View File

@ -203,6 +203,8 @@ def : WriteRes<WriteFStore, [AtomPort0]>;
def : WriteRes<WriteFMove, [AtomPort01]>;
defm : AtomWriteResPair<WriteFAdd, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFCmp, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFCom, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFMul, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
defm : AtomWriteResPair<WriteFRcp, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
defm : AtomWriteResPair<WriteFRsqrt, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;

View File

@ -294,6 +294,8 @@ def : WriteRes<WriteFStore, [JSAGU, JFPU1, JSTC]>;
def : WriteRes<WriteFMove, [JFPU01, JFPX]>;
defm : JWriteResFpuPair<WriteFAdd, [JFPU0, JFPA], 3>;
defm : JWriteResFpuPair<WriteFCmp, [JFPU0, JFPA], 2>;
defm : JWriteResFpuPair<WriteFCom, [JFPU0, JFPA, JALU0], 3>;
defm : JWriteResFpuPair<WriteFMul, [JFPU1, JFPM], 2>;
defm : JWriteResFpuPair<WriteFMA, [JFPU1, JFPM], 2>; // NOTE: Doesn't exist on Jaguar.
defm : JWriteResFpuPair<WriteFRcp, [JFPU1, JFPM], 2>;
@ -704,28 +706,6 @@ def JWriteVMOVNTPYSt: SchedWriteRes<[JFPU1, JSTC, JSAGU]> {
}
def : InstRW<[JWriteVMOVNTPYSt], (instrs VMOVNTDQYmr, VMOVNTPDYmr, VMOVNTPSYmr)>;
def JWriteFComi : SchedWriteRes<[JFPU0, JFPA, JALU0]> {
let Latency = 3;
}
def : InstRW<[JWriteFComi], (instregex "(V)?(U)?COMIS(D|S)rr")>;
def JWriteFComiLd : SchedWriteRes<[JLAGU, JFPU0, JFPA, JALU0]> {
let Latency = 8;
}
def : InstRW<[JWriteFComiLd], (instregex "(V)?(U)?COMIS(D|S)rm")>;
def JWriteFCmp: SchedWriteRes<[JFPU0, JFPA]> {
let Latency = 2;
}
def : InstRW<[JWriteFCmp], (instregex "(V)?M(AX|IN)(P|S)(D|S)rr",
"(V)?CMPP(S|D)rri", "(V)?CMPS(S|D)rr")>;
def JWriteFCmpLd: SchedWriteRes<[JLAGU, JFPU0, JFPA]> {
let Latency = 7;
}
def : InstRW<[JWriteFCmpLd], (instregex "(V)?M(AX|IN)(P|S)(D|S)rm",
"(V)?CMPP(S|D)rmi", "(V)?CMPS(S|D)rm")>;
def JWriteFCmpY: SchedWriteRes<[JFPU0, JFPA]> {
let Latency = 2;
let ResourceCycles = [2, 2];

View File

@ -125,6 +125,8 @@ def : WriteRes<WriteFLoad, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteFMove, [SLM_FPC_RSV01]>;
defm : SLMWriteResPair<WriteFAdd, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFCmp, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFCom, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFMul, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
defm : SLMWriteResPair<WriteFDiv, [SLM_FPC_RSV0, SLMFPDivider], 34, [1,34]>;
defm : SLMWriteResPair<WriteFRcp, [SLM_FPC_RSV0], 5>;

View File

@ -193,6 +193,8 @@ def : WriteRes<WriteFLoad, [ZnAGU]> { let Latency = 8; }
defm : ZnWriteResFpuPair<WriteFHAdd, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFAdd, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFCmp, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFCom, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFBlend, [ZnFPU01], 1>;
defm : ZnWriteResFpuPair<WriteFVarBlend, [ZnFPU01], 1>;
defm : ZnWriteResFpuPair<WriteVarBlend, [ZnFPU0], 1>;

View File

@ -1187,16 +1187,16 @@ define void @test_fcomi_fcomip() optsize {
; SKYLAKE-LABEL: test_fcomi_fcomip:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
; SKYLAKE-NEXT: fcomi %st(3) # sched: [3:1.00]
; SKYLAKE-NEXT: fcompi %st(3) # sched: [3:1.00]
; SKYLAKE-NEXT: fcomi %st(3) # sched: [2:1.00]
; SKYLAKE-NEXT: fcompi %st(3) # sched: [2:1.00]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: retl # sched: [6:0.50]
;
; SKX-LABEL: test_fcomi_fcomip:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: fcomi %st(3) # sched: [3:1.00]
; SKX-NEXT: fcompi %st(3) # sched: [3:1.00]
; SKX-NEXT: fcomi %st(3) # sched: [2:1.00]
; SKX-NEXT: fcompi %st(3) # sched: [2:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
@ -5073,14 +5073,14 @@ define void @test_ftst() optsize {
; SKYLAKE-LABEL: test_ftst:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
; SKYLAKE-NEXT: ftst # sched: [3:1.00]
; SKYLAKE-NEXT: ftst # sched: [2:1.00]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: retl # sched: [6:0.50]
;
; SKX-LABEL: test_ftst:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: ftst # sched: [3:1.00]
; SKX-NEXT: ftst # sched: [2:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
@ -5175,7 +5175,7 @@ define void @test_fucom_fucomp_fucompp() optsize {
; SKYLAKE-NEXT: fucom %st(3) # sched: [1:1.00]
; SKYLAKE-NEXT: fucomp %st(1) # sched: [1:1.00]
; SKYLAKE-NEXT: fucomp %st(3) # sched: [1:1.00]
; SKYLAKE-NEXT: fucompp # sched: [3:1.00]
; SKYLAKE-NEXT: fucompp # sched: [2:1.00]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: retl # sched: [6:0.50]
;
@ -5186,7 +5186,7 @@ define void @test_fucom_fucomp_fucompp() optsize {
; SKX-NEXT: fucom %st(3) # sched: [1:1.00]
; SKX-NEXT: fucomp %st(1) # sched: [1:1.00]
; SKX-NEXT: fucomp %st(3) # sched: [1:1.00]
; SKX-NEXT: fucompp # sched: [3:1.00]
; SKX-NEXT: fucompp # sched: [2:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
@ -5267,16 +5267,16 @@ define void @test_fucomi_fucomip() optsize {
; SKYLAKE-LABEL: test_fucomi_fucomip:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: #APP
; SKYLAKE-NEXT: fucomi %st(3) # sched: [3:1.00]
; SKYLAKE-NEXT: fucompi %st(3) # sched: [3:1.00]
; SKYLAKE-NEXT: fucomi %st(3) # sched: [2:1.00]
; SKYLAKE-NEXT: fucompi %st(3) # sched: [2:1.00]
; SKYLAKE-NEXT: #NO_APP
; SKYLAKE-NEXT: retl # sched: [6:0.50]
;
; SKX-LABEL: test_fucomi_fucomip:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: fucomi %st(3) # sched: [3:1.00]
; SKX-NEXT: fucompi %st(3) # sched: [3:1.00]
; SKX-NEXT: fucomi %st(3) # sched: [2:1.00]
; SKX-NEXT: fucompi %st(3) # sched: [2:1.00]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;