From 8ae32b4f079ac43d0f838c2b7ebe9c26c9dd562b Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 17 Apr 2018 07:22:44 +0000 Subject: [PATCH] [X86] Add FP comparison scheduler classes Split VCMP/VMAX/VMIN instructions off to WriteFCmp and VCOMIS instructions off to WriteFCom instead of assuming they match WriteFAdd Differential Revision: https://reviews.llvm.org/D45656 llvm-svn: 330179 --- lib/Target/X86/X86InstrAVX512.td | 56 ++++++------- lib/Target/X86/X86InstrFPStack.td | 10 ++- lib/Target/X86/X86InstrSSE.td | 80 +++++++++--------- lib/Target/X86/X86SchedBroadwell.td | 40 ++------- lib/Target/X86/X86SchedHaswell.td | 32 +------- lib/Target/X86/X86SchedSandyBridge.td | 26 +----- lib/Target/X86/X86SchedSkylakeClient.td | 38 +-------- lib/Target/X86/X86SchedSkylakeServer.td | 104 ++---------------------- lib/Target/X86/X86Schedule.td | 4 +- lib/Target/X86/X86ScheduleAtom.td | 2 + lib/Target/X86/X86ScheduleBtVer2.td | 24 +----- lib/Target/X86/X86ScheduleSLM.td | 2 + lib/Target/X86/X86ScheduleZnver1.td | 2 + test/CodeGen/X86/x87-schedule.ll | 24 +++--- 14 files changed, 117 insertions(+), 327 deletions(-) diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index c5a76da3a5b..bd718ad19e5 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -2051,10 +2051,10 @@ multiclass avx512_cmp_scalar, AVX512XSIi8Base; + WriteFCmp>, AVX512XSIi8Base; let ExeDomain = SSEPackedDouble in defm VCMPSDZ : avx512_cmp_scalar, AVX512XDIi8Base, VEX_W; + WriteFCmp>, AVX512XDIi8Base, VEX_W; } multiclass avx512_icmp_packed opc, string OpcodeStr, PatFrag OpNode, @@ -2511,9 +2511,9 @@ multiclass avx512_vcmp { } } -defm VCMPPD : avx512_vcmp, +defm VCMPPD : avx512_vcmp, AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; -defm VCMPPS : avx512_vcmp, +defm VCMPPS : avx512_vcmp, AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; @@ -4906,9 +4906,9 @@ defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds, WriteFMul, 1>; defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds, WriteFAdd, 0>; defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds, WriteFDiv, 0>; defm VMIN : avx512_binop_s_sae <0x5D, "vmin", X86fmin, X86fmins, X86fminRnds, - WriteFAdd, 0>; + WriteFCmp, 0>; defm VMAX : avx512_binop_s_sae <0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds, - WriteFAdd, 0>; + WriteFCmp, 0>; // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use // X86fminc and X86fmaxc instead of X86fmin and X86fmax @@ -4932,19 +4932,19 @@ multiclass avx512_comutable_binop_s opc, string OpcodeStr, } } defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc, - WriteFAdd>, XS, EVEX_4V, VEX_LIG, + WriteFCmp>, XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc, - WriteFAdd>, XD, VEX_W, EVEX_4V, VEX_LIG, + WriteFCmp>, XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc, - WriteFAdd>, XS, EVEX_4V, VEX_LIG, + WriteFCmp>, XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc, - WriteFAdd>, XD, VEX_W, EVEX_4V, VEX_LIG, + WriteFCmp>, XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; multiclass avx512_fp_packed opc, string OpcodeStr, SDPatternOperator OpNode, @@ -5050,13 +5050,13 @@ defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512, WriteFAdd>, avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, WriteFAdd>; defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512, WriteFDiv>, avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, WriteFDiv>; -defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512, WriteFAdd, 0>, - avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, WriteFAdd>; -defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512, WriteFAdd, 0>, - avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, WriteFAdd>; +defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512, WriteFCmp, 0>, + avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, WriteFCmp>; +defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512, WriteFCmp, 0>, + avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, WriteFCmp>; let isCodeGenOnly = 1 in { - defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512, WriteFAdd, 1>; - defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512, WriteFAdd, 1>; + defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512, WriteFCmp, 1>; + defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512, WriteFCmp, 1>; } defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI, WriteFAdd, 1>; defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI, WriteFAdd, 0>; @@ -7732,44 +7732,44 @@ multiclass avx512_ord_cmp_sae opc, X86VectorVTInfo _, } let Defs = [EFLAGS], Predicates = [HasAVX512] in { - defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", WriteFAdd>, + defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", WriteFCom>, AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; - defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", WriteFAdd>, + defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", WriteFCom>, AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; - defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", WriteFAdd>, + defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", WriteFCom>, AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; - defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", WriteFAdd>, + defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", WriteFCom>, AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; } let Defs = [EFLAGS], Predicates = [HasAVX512] in { defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32, - "ucomiss", WriteFAdd>, PS, EVEX, VEX_LIG, + "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64, - "ucomisd", WriteFAdd>, PD, EVEX, + "ucomisd", WriteFCom>, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; let Pattern = [] in { defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32, - "comiss", WriteFAdd>, PS, EVEX, VEX_LIG, + "comiss", WriteFCom>, PS, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64, - "comisd", WriteFAdd>, PD, EVEX, + "comisd", WriteFCom>, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; } let isCodeGenOnly = 1 in { defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem, - sse_load_f32, "ucomiss", WriteFAdd>, PS, EVEX, VEX_LIG, + sse_load_f32, "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem, - sse_load_f64, "ucomisd", WriteFAdd>, PD, EVEX, + sse_load_f64, "ucomisd", WriteFCom>, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem, - sse_load_f32, "comiss", WriteFAdd>, PS, EVEX, VEX_LIG, + sse_load_f32, "comiss", WriteFCom>, PS, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem, - sse_load_f64, "comisd", WriteFAdd>, PD, EVEX, + sse_load_f64, "comisd", WriteFCom>, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; } } diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td index 23f986d2dee..19a5b406158 100644 --- a/lib/Target/X86/X86InstrFPStack.td +++ b/lib/Target/X86/X86InstrFPStack.td @@ -277,6 +277,8 @@ def SUB_FPrST0 : FPrST0PInst; def SUB_FST0r : FPST0rInst ; def SUBR_FrST0 : FPrST0Inst ; def SUBR_FPrST0 : FPrST0PInst; +} // SchedRW +let SchedRW = [WriteFCom] in { def COM_FST0r : FPST0rInst ; def COMP_FST0r : FPST0rInst ; } // SchedRW @@ -320,7 +322,7 @@ defm SIN : FPUnary; defm COS : FPUnary; } -let SchedRW = [WriteFAdd] in { +let SchedRW = [WriteFCom] in { let hasSideEffects = 0 in { def TST_Fp32 : FpIf32<(outs), (ins RFP32:$src), OneArgFP, []>; def TST_Fp64 : FpIf64<(outs), (ins RFP64:$src), OneArgFP, []>; @@ -333,7 +335,7 @@ def TST_F : FPI<0xD9, MRM_E4, (outs), (ins), "ftst">; // Versions of FP instructions that take a single memory operand. Added for the // disassembler; remove as they are included with patterns elsewhere. -let SchedRW = [WriteFAddLd] in { +let SchedRW = [WriteFComLd] in { def FCOM32m : FPI<0xD8, MRM2m, (outs), (ins f32mem:$src), "fcom{s}\t$src">; def FCOMP32m : FPI<0xD8, MRM3m, (outs), (ins f32mem:$src), "fcomp{s}\t$src">; @@ -568,7 +570,7 @@ def LD_F1 : FPI<0xD9, MRM_E8, (outs), (ins), "fld1">; } // Floating point compares. -let SchedRW = [WriteFAdd] in { +let SchedRW = [WriteFCom] in { def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP, [(set FPSW, (trunc (X86cmp RFP32:$lhs, RFP32:$rhs)))]>; def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP, @@ -578,7 +580,7 @@ def UCOM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP, } // SchedRW } // Defs = [FPSW] -let SchedRW = [WriteFAdd] in { +let SchedRW = [WriteFCom] in { // CC = ST(0) cmp ST(i) let Defs = [EFLAGS, FPSW] in { def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP, diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 558903e9308..5b4f29c8059 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1854,23 +1854,23 @@ let ExeDomain = SSEPackedSingle in defm VCMPSS : sse12_cmp_scalar, XS, VEX_4V, VEX_LIG, VEX_WIG; + WriteFCmp>, XS, VEX_4V, VEX_LIG, VEX_WIG; let ExeDomain = SSEPackedDouble in defm VCMPSD : sse12_cmp_scalar, // same latency as 32 bit compare + WriteFCmp>, // same latency as 32 bit compare XD, VEX_4V, VEX_LIG, VEX_WIG; let Constraints = "$src1 = $dst" in { let ExeDomain = SSEPackedSingle in defm CMPSS : sse12_cmp_scalar, XS; + "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", WriteFCmp>, XS; let ExeDomain = SSEPackedDouble in defm CMPSD : sse12_cmp_scalar, XD; + "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}", WriteFCmp>, XD; } multiclass sse12_cmp_scalar_int, XS, VEX_4V; + WriteFCmp, sse_load_f32>, XS, VEX_4V; let ExeDomain = SSEPackedDouble in defm VCMPSD : sse12_cmp_scalar_int, // same latency as f32 + WriteFCmp, sse_load_f64>, // same latency as f32 XD, VEX_4V; let Constraints = "$src1 = $dst" in { let ExeDomain = SSEPackedSingle in defm CMPSS : sse12_cmp_scalar_int, XS; + WriteFCmp, sse_load_f32>, XS; let ExeDomain = SSEPackedDouble in defm CMPSD : sse12_cmp_scalar_int, XD; + WriteFCmp, sse_load_f64>, XD; } } @@ -1951,49 +1951,49 @@ let mayLoad = 1 in let Defs = [EFLAGS] in { defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, - "ucomiss", WriteFAdd>, PS, VEX, VEX_LIG, VEX_WIG; + "ucomiss", WriteFCom>, PS, VEX, VEX_LIG, VEX_WIG; defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, - "ucomisd", WriteFAdd>, PD, VEX, VEX_LIG, VEX_WIG; + "ucomisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG; let Pattern = [] in { defm VCOMISS : sse12_ord_cmp<0x2F, FR32, undef, f32, f32mem, loadf32, - "comiss", WriteFAdd>, PS, VEX, VEX_LIG, VEX_WIG; + "comiss", WriteFCom>, PS, VEX, VEX_LIG, VEX_WIG; defm VCOMISD : sse12_ord_cmp<0x2F, FR64, undef, f64, f64mem, loadf64, - "comisd", WriteFAdd>, PD, VEX, VEX_LIG, VEX_WIG; + "comisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG; } let isCodeGenOnly = 1 in { defm VUCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem, - sse_load_f32, "ucomiss", WriteFAdd>, PS, VEX, VEX_WIG; + sse_load_f32, "ucomiss", WriteFCom>, PS, VEX, VEX_WIG; defm VUCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem, - sse_load_f64, "ucomisd", WriteFAdd>, PD, VEX, VEX_WIG; + sse_load_f64, "ucomisd", WriteFCom>, PD, VEX, VEX_WIG; defm VCOMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem, - sse_load_f32, "comiss", WriteFAdd>, PS, VEX, VEX_WIG; + sse_load_f32, "comiss", WriteFCom>, PS, VEX, VEX_WIG; defm VCOMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem, - sse_load_f64, "comisd", WriteFAdd>, PD, VEX, VEX_WIG; + sse_load_f64, "comisd", WriteFCom>, PD, VEX, VEX_WIG; } defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, - "ucomiss", WriteFAdd>, PS; + "ucomiss", WriteFCom>, PS; defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, - "ucomisd", WriteFAdd>, PD; + "ucomisd", WriteFCom>, PD; let Pattern = [] in { defm COMISS : sse12_ord_cmp<0x2F, FR32, undef, f32, f32mem, loadf32, - "comiss", WriteFAdd>, PS; + "comiss", WriteFCom>, PS; defm COMISD : sse12_ord_cmp<0x2F, FR64, undef, f64, f64mem, loadf64, - "comisd", WriteFAdd>, PD; + "comisd", WriteFCom>, PD; } let isCodeGenOnly = 1 in { defm UCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem, - sse_load_f32, "ucomiss", WriteFAdd>, PS; + sse_load_f32, "ucomiss", WriteFCom>, PS; defm UCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem, - sse_load_f64, "ucomisd", WriteFAdd>, PD; + sse_load_f64, "ucomisd", WriteFCom>, PD; defm COMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem, - sse_load_f32, "comiss", WriteFAdd>, PS; + sse_load_f32, "comiss", WriteFCom>, PS; defm COMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem, - sse_load_f64, "comisd", WriteFAdd>, PD; + sse_load_f64, "comisd", WriteFCom>, PD; } } // Defs = [EFLAGS] @@ -2028,28 +2028,28 @@ multiclass sse12_cmp_packed, PS, VEX_4V, VEX_WIG; + WriteFCmp, SSEPackedSingle, loadv4f32>, PS, VEX_4V, VEX_WIG; defm VCMPPD : sse12_cmp_packed, PD, VEX_4V, VEX_WIG; + WriteFCmp, SSEPackedDouble, loadv2f64>, PD, VEX_4V, VEX_WIG; defm VCMPPSY : sse12_cmp_packed, PS, VEX_4V, VEX_L; + WriteFCmp, SSEPackedSingle, loadv8f32>, PS, VEX_4V, VEX_L; defm VCMPPDY : sse12_cmp_packed, PD, VEX_4V, VEX_L; + WriteFCmp, SSEPackedDouble, loadv4f64>, PD, VEX_4V, VEX_L; let Constraints = "$src1 = $dst" in { defm CMPPS : sse12_cmp_packed, PS; + WriteFCmp, SSEPackedSingle, memopv4f32>, PS; defm CMPPD : sse12_cmp_packed, PD; + WriteFCmp, SSEPackedDouble, memopv2f64>, PD; } def CommutableCMPCC : PatLeaf<(imm), [{ @@ -2583,19 +2583,19 @@ let isCommutable = 0 in { defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, WriteFDiv>, basic_sse12_fp_binop_s<0x5E, "div", fdiv, WriteFDiv>, basic_sse12_fp_binop_s_int<0x5E, "div", null_frag, WriteFDiv>; - defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, WriteFAdd>, - basic_sse12_fp_binop_s<0x5F, "max", X86fmax, WriteFAdd>, - basic_sse12_fp_binop_s_int<0x5F, "max", X86fmaxs, WriteFAdd>; - defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, WriteFAdd>, - basic_sse12_fp_binop_s<0x5D, "min", X86fmin, WriteFAdd>, - basic_sse12_fp_binop_s_int<0x5D, "min", X86fmins, WriteFAdd>; + defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, WriteFCmp>, + basic_sse12_fp_binop_s<0x5F, "max", X86fmax, WriteFCmp>, + basic_sse12_fp_binop_s_int<0x5F, "max", X86fmaxs, WriteFCmp>; + defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, WriteFCmp>, + basic_sse12_fp_binop_s<0x5D, "min", X86fmin, WriteFCmp>, + basic_sse12_fp_binop_s_int<0x5D, "min", X86fmins, WriteFCmp>; } let isCodeGenOnly = 1 in { - defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, WriteFAdd>, - basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, WriteFAdd>; - defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, WriteFAdd>, - basic_sse12_fp_binop_s<0x5D, "min", X86fminc, WriteFAdd>; + defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, WriteFCmp>, + basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, WriteFCmp>; + defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, WriteFCmp>, + basic_sse12_fp_binop_s<0x5D, "min", X86fminc, WriteFCmp>; } // Patterns used to select SSE scalar fp arithmetic instructions from diff --git a/lib/Target/X86/X86SchedBroadwell.td b/lib/Target/X86/X86SchedBroadwell.td index 79a25959262..6c1f6fc8c13 100755 --- a/lib/Target/X86/X86SchedBroadwell.td +++ b/lib/Target/X86/X86SchedBroadwell.td @@ -154,7 +154,9 @@ def : WriteRes { let Latency = 5; } def : WriteRes; def : WriteRes; -defm : BWWriteResPair; // Floating point add/sub/compare. +defm : BWWriteResPair; // Floating point add/sub. +defm : BWWriteResPair; // Floating point compare. +defm : BWWriteResPair; // Floating point compare to flags. defm : BWWriteResPair; // Floating point multiplication. defm : BWWriteResPair; // 10-14 cycles. // Floating point division. defm : BWWriteResPair; // Floating point square root. @@ -843,29 +845,13 @@ def: InstRW<[BWWriteResGroup27], (instregex "ADD_FPrST0", "(V?)ADDSSrr", "(V?)ADDSUBPD(Y?)rr", "(V?)ADDSUBPS(Y?)rr", - "(V?)CMPPD(Y?)rri", - "(V?)CMPPS(Y?)rri", - "(V?)CMPSDrr", - "(V?)CMPSSrr", - "(V?)COMISDrr", - "(V?)COMISSrr", "(V?)CVTDQ2PS(Y?)rr", "(V?)CVTPS2DQ(Y?)rr", "(V?)CVTTPS2DQ(Y?)rr", - "(V?)MAX(C?)PD(Y?)rr", - "(V?)MAX(C?)PS(Y?)rr", - "(V?)MAX(C?)SDrr", - "(V?)MAX(C?)SSrr", - "(V?)MIN(C?)PD(Y?)rr", - "(V?)MIN(C?)PS(Y?)rr", - "(V?)MIN(C?)SDrr", - "(V?)MIN(C?)SSrr", "(V?)SUBPD(Y?)rr", "(V?)SUBPS(Y?)rr", "(V?)SUBSDrr", - "(V?)SUBSSrr", - "(V?)UCOMISDrr", - "(V?)UCOMISSrr")>; + "(V?)SUBSSrr")>; def BWWriteResGroup27_16 : SchedWriteRes<[BWPort1, BWPort0156]> { let Latency = 3; @@ -1832,29 +1818,13 @@ def: InstRW<[BWWriteResGroup91], (instregex "MMX_CVTPI2PSirm", "(V?)ADDSSrm", "(V?)ADDSUBPDrm", "(V?)ADDSUBPSrm", - "(V?)CMPPDrmi", - "(V?)CMPPSrmi", - "(V?)CMPSDrm", - "(V?)CMPSSrm", - "(V?)COMISDrm", - "(V?)COMISSrm", "(V?)CVTDQ2PSrm", "(V?)CVTPS2DQrm", "(V?)CVTTPS2DQrm", - "(V?)MAX(C?)PDrm", - "(V?)MAX(C?)PSrm", - "(V?)MAX(C?)SDrm", - "(V?)MAX(C?)SSrm", - "(V?)MIN(C?)PDrm", - "(V?)MIN(C?)PSrm", - "(V?)MIN(C?)SDrm", - "(V?)MIN(C?)SSrm", "(V?)SUBPDrm", "(V?)SUBPSrm", "(V?)SUBSDrm", - "(V?)SUBSSrm", - "(V?)UCOMISDrm", - "(V?)UCOMISSrm")>; + "(V?)SUBSSrm")>; def BWWriteResGroup91_16 : SchedWriteRes<[BWPort1, BWPort0156, BWPort23]> { let Latency = 8; diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td index 8022ddad111..d25420c420d 100644 --- a/lib/Target/X86/X86SchedHaswell.td +++ b/lib/Target/X86/X86SchedHaswell.td @@ -149,6 +149,8 @@ def : WriteRes { let Latency = 5; } def : WriteRes; defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; // 10-14 cycles. defm : HWWriteResPair; @@ -1041,16 +1043,12 @@ def: InstRW<[HWWriteResGroup12], (instregex "FCOM32m", "(V?)ADDSSrm", "(V?)CMPSDrm", "(V?)CMPSSrm", - "(V?)COMISDrm", - "(V?)COMISSrm", "(V?)MAX(C?)SDrm", "(V?)MAX(C?)SSrm", "(V?)MIN(C?)SDrm", "(V?)MIN(C?)SSrm", "(V?)SUBSDrm", - "(V?)SUBSSrm", - "(V?)UCOMISDrm", - "(V?)UCOMISSrm")>; + "(V?)SUBSSrm")>; def HWWriteResGroup13 : SchedWriteRes<[HWPort5,HWPort23]> { let Latency = 7; @@ -1730,29 +1728,13 @@ def: InstRW<[HWWriteResGroup50], (instregex "ADD_FPrST0", "(V?)ADDSSrr", "(V?)ADDSUBPD(Y?)rr", "(V?)ADDSUBPS(Y?)rr", - "(V?)CMPPD(Y?)rri", - "(V?)CMPPS(Y?)rri", - "(V?)CMPSDrr", - "(V?)CMPSSrr", - "(V?)COMISDrr", - "(V?)COMISSrr", "(V?)CVTDQ2PS(Y?)rr", "(V?)CVTPS2DQ(Y?)rr", "(V?)CVTTPS2DQ(Y?)rr", - "(V?)MAX(C?)PD(Y?)rr", - "(V?)MAX(C?)PS(Y?)rr", - "(V?)MAX(C?)SDrr", - "(V?)MAX(C?)SSrr", - "(V?)MIN(C?)PD(Y?)rr", - "(V?)MIN(C?)PS(Y?)rr", - "(V?)MIN(C?)SDrr", - "(V?)MIN(C?)SSrr", "(V?)SUBPD(Y?)rr", "(V?)SUBPS(Y?)rr", "(V?)SUBSDrr", - "(V?)SUBSSrr", - "(V?)UCOMISDrr", - "(V?)UCOMISSrr")>; + "(V?)SUBSSrr")>; def HWWriteResGroup50_16i : SchedWriteRes<[HWPort1, HWPort0156]> { let Latency = 3; @@ -1804,15 +1786,9 @@ def: InstRW<[HWWriteResGroup52], (instregex "(V?)ADDPDrm", "(V?)ADDPSrm", "(V?)ADDSUBPDrm", "(V?)ADDSUBPSrm", - "(V?)CMPPDrmi", - "(V?)CMPPSrmi", "(V?)CVTDQ2PSrm", "(V?)CVTPS2DQrm", "(V?)CVTTPS2DQrm", - "(V?)MAX(C?)PDrm", - "(V?)MAX(C?)PSrm", - "(V?)MIN(C?)PDrm", - "(V?)MIN(C?)PSrm", "(V?)SUBPDrm", "(V?)SUBPSrm")>; diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td index aefbfb64cf8..c95771b4c2c 100644 --- a/lib/Target/X86/X86SchedSandyBridge.td +++ b/lib/Target/X86/X86SchedSandyBridge.td @@ -139,6 +139,8 @@ def : WriteRes { let Latency = 6; } def : WriteRes; defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; @@ -685,21 +687,9 @@ def: InstRW<[SBWriteResGroup21], (instregex "ADD_FPrST0", "(V?)ADDSSrr", "(V?)ADDSUBPD(Y?)rr", "(V?)ADDSUBPS(Y?)rr", - "(V?)CMPPD(Y?)rri", - "(V?)CMPPS(Y?)rri", - "(V?)CMPSDrr", - "(V?)CMPSSrr", "(V?)CVTDQ2PS(Y?)rr", "(V?)CVTPS2DQ(Y?)rr", "(V?)CVTTPS2DQ(Y?)rr", - "(V?)MAX(C?)PD(Y?)rr", - "(V?)MAX(C?)PS(Y?)rr", - "(V?)MAX(C?)SDrr", - "(V?)MAX(C?)SSrr", - "(V?)MIN(C?)PD(Y?)rr", - "(V?)MIN(C?)PS(Y?)rr", - "(V?)MIN(C?)SDrr", - "(V?)MIN(C?)SSrr", "(V?)ROUNDPD(Y?)r", "(V?)ROUNDPS(Y?)r", "(V?)ROUNDSDr", @@ -1562,23 +1552,11 @@ def: InstRW<[SBWriteResGroup90], (instregex "MMX_CVTPI2PSirm", "(V?)ADDSSrm", "(V?)ADDSUBPDrm", "(V?)ADDSUBPSrm", - "(V?)CMPPDrmi", - "(V?)CMPPSrmi", - "(V?)CMPSDrm", - "(V?)CMPSSrm", "(V?)CVTDQ2PSrm", "(V?)CVTPS2DQrm", "(V?)CVTSI642SDrm", "(V?)CVTSI2SDrm", "(V?)CVTTPS2DQrm", - "(V?)MAX(C?)PDrm", - "(V?)MAX(C?)PSrm", - "(V?)MAX(C?)SDrm", - "(V?)MAX(C?)SSrm", - "(V?)MIN(C?)PDrm", - "(V?)MIN(C?)PSrm", - "(V?)MIN(C?)SDrm", - "(V?)MIN(C?)SSrm", "(V?)ROUNDPDm", "(V?)ROUNDPSm", "(V?)ROUNDSDm", diff --git a/lib/Target/X86/X86SchedSkylakeClient.td b/lib/Target/X86/X86SchedSkylakeClient.td index 6511206992d..b36f5797bb5 100644 --- a/lib/Target/X86/X86SchedSkylakeClient.td +++ b/lib/Target/X86/X86SchedSkylakeClient.td @@ -151,7 +151,9 @@ def : WriteRes { let Latency = 6; } def : WriteRes; def : WriteRes; -defm : SKLWriteResPair; // Floating point add/sub/compare. +defm : SKLWriteResPair; // Floating point add/sub. +defm : SKLWriteResPair; // Floating point compare. +defm : SKLWriteResPair; // Floating point compare to flags. defm : SKLWriteResPair; // Floating point multiplication. defm : SKLWriteResPair; // 10-14 cycles. // Floating point division. defm : SKLWriteResPair; // Floating point square root. @@ -672,14 +674,10 @@ def SKLWriteResGroup12 : SchedWriteRes<[SKLPort0]> { } def: InstRW<[SKLWriteResGroup12], (instregex "MMX_MOVD64from64rr", "MMX_MOVD64grr", - "(V?)COMISDrr", - "(V?)COMISSrr", "(V?)MOVPDI2DIrr", "(V?)MOVPQIto64rr", "VTESTPD(Y?)rr", - "VTESTPS(Y?)rr", - "(V?)UCOMISDrr", - "(V?)UCOMISSrr")>; + "VTESTPS(Y?)rr")>; def SKLWriteResGroup13 : SchedWriteRes<[SKLPort5]> { let Latency = 2; @@ -1067,21 +1065,9 @@ def: InstRW<[SKLWriteResGroup48], (instregex "(V?)ADDPD(Y?)rr", "(V?)ADDSSrr", "(V?)ADDSUBPD(Y?)rr", "(V?)ADDSUBPS(Y?)rr", - "(V?)CMPPD(Y?)rri", - "(V?)CMPPS(Y?)rri", - "(V?)CMPSDrr", - "(V?)CMPSSrr", "(V?)CVTDQ2PS(Y?)rr", "(V?)CVTPS2DQ(Y?)rr", "(V?)CVTTPS2DQ(Y?)rr", - "(V?)MAX(C?)PD(Y?)rr", - "(V?)MAX(C?)PS(Y?)rr", - "(V?)MAX(C?)SDrr", - "(V?)MAX(C?)SSrr", - "(V?)MIN(C?)PD(Y?)rr", - "(V?)MIN(C?)PS(Y?)rr", - "(V?)MIN(C?)SDrr", - "(V?)MIN(C?)SSrr", "(V?)MULPD(Y?)rr", "(V?)MULPS(Y?)rr", "(V?)MULSDrr", @@ -1547,16 +1533,6 @@ def SKLWriteResGroup86 : SchedWriteRes<[SKLPort0,SKLPort5]> { } def: InstRW<[SKLWriteResGroup86], (instregex "VCVTDQ2PDYrr")>; -def SKLWriteResGroup87 : SchedWriteRes<[SKLPort0,SKLPort23]> { - let Latency = 7; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup87], (instregex "(V?)COMISDrm", - "(V?)COMISSrm", - "(V?)UCOMISDrm", - "(V?)UCOMISSrm")>; - def SKLWriteResGroup88 : SchedWriteRes<[SKLPort5,SKLPort23]> { let Latency = 7; let NumMicroOps = 2; @@ -2196,17 +2172,11 @@ def: InstRW<[SKLWriteResGroup134], (instregex "(V?)ADDPDrm", "(V?)ADDPSrm", "(V?)ADDSUBPDrm", "(V?)ADDSUBPSrm", - "(V?)CMPPDrmi", - "(V?)CMPPSrmi", "(V?)CVTDQ2PSrm", "(V?)CVTPH2PSYrm", "(V?)CVTPS2DQrm", "(V?)CVTSS2SDrm", "(V?)CVTTPS2DQrm", - "(V?)MAX(C?)PDrm", - "(V?)MAX(C?)PSrm", - "(V?)MIN(C?)PDrm", - "(V?)MIN(C?)PSrm", "(V?)MULPDrm", "(V?)MULPSrm", "(V?)PHMINPOSUWrm", diff --git a/lib/Target/X86/X86SchedSkylakeServer.td b/lib/Target/X86/X86SchedSkylakeServer.td index 5a79f240273..283a3ed37e5 100755 --- a/lib/Target/X86/X86SchedSkylakeServer.td +++ b/lib/Target/X86/X86SchedSkylakeServer.td @@ -151,7 +151,9 @@ def : WriteRes { let Latency = 5; } def : WriteRes; def : WriteRes; -defm : SKXWriteResPair; // Floating point add/sub/compare. +defm : SKXWriteResPair; // Floating point add/sub. +defm : SKXWriteResPair; // Floating point compare. +defm : SKXWriteResPair; // Floating point compare to flags. defm : SKXWriteResPair; // Floating point multiplication. defm : SKXWriteResPair; // 10-14 cycles. // Floating point division. defm : SKXWriteResPair; // Floating point square root. @@ -1406,18 +1408,10 @@ def SKXWriteResGroup12 : SchedWriteRes<[SKXPort0]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKXWriteResGroup12], (instregex "COMISDrr", - "COMISSrr", - "MMX_MOVD64from64rr", +def: InstRW<[SKXWriteResGroup12], (instregex "MMX_MOVD64from64rr", "MMX_MOVD64grr", "MOVPDI2DIrr", "MOVPQIto64rr", - "UCOMISDrr", - "UCOMISSrr", - "VCOMISDZrr(b?)", - "VCOMISDrr", - "VCOMISSZrr(b?)", - "VCOMISSrr", "VMOVPDI2DIZrr", "VMOVPDI2DIrr", "VMOVPQIto64Zrr", @@ -1425,11 +1419,7 @@ def: InstRW<[SKXWriteResGroup12], (instregex "COMISDrr", "VTESTPDYrr", "VTESTPDrr", "VTESTPSYrr", - "VTESTPSrr", - "VUCOMISDZrr(b?)", - "VUCOMISDrr", - "VUCOMISSZrr(b?)", - "VUCOMISSrr")>; + "VTESTPSrr")>; def SKXWriteResGroup13 : SchedWriteRes<[SKXPort5]> { let Latency = 2; @@ -2162,21 +2152,9 @@ def: InstRW<[SKXWriteResGroup50], (instregex "ADDPDrr", "ADDSSrr", "ADDSUBPDrr", "ADDSUBPSrr", - "CMPPDrri", - "CMPPSrri", - "CMPSDrr", - "CMPSSrr", "CVTDQ2PSrr", "CVTPS2DQrr", "CVTTPS2DQrr", - "MAX(C?)PDrr", - "MAX(C?)PSrr", - "MAX(C?)SDrr", - "MAX(C?)SSrr", - "MIN(C?)PDrr", - "MIN(C?)PSrr", - "MIN(C?)SDrr", - "MIN(C?)SSrr", "MULPDrr", "MULPSrr", "MULSDrr", @@ -2212,12 +2190,6 @@ def: InstRW<[SKXWriteResGroup50], (instregex "ADDPDrr", "VADDSUBPDrr", "VADDSUBPSYrr", "VADDSUBPSrr", - "VCMPPDYrri", - "VCMPPDrri", - "VCMPPSYrri", - "VCMPPSrri", - "VCMPSDrr", - "VCMPSSrr", "VCVTDQ2PSYrr", "VCVTDQ2PSZ128rr", "VCVTDQ2PSZ256rr", @@ -2284,34 +2256,6 @@ def: InstRW<[SKXWriteResGroup50], (instregex "ADDPDrr", "VGETMANTPSZrri", "VGETMANTSDZ128rri", "VGETMANTSSZ128rri", - "VMAX(C?)PDYrr", - "VMAX(C?)PDZ128rr", - "VMAX(C?)PDZ256rr", - "VMAX(C?)PDZrr", - "VMAX(C?)PDrr", - "VMAX(C?)PSYrr", - "VMAX(C?)PSZ128rr", - "VMAX(C?)PSZ256rr", - "VMAX(C?)PSZrr", - "VMAX(C?)PSrr", - "VMAX(C?)SDZrr", - "VMAX(C?)SDrr", - "VMAX(C?)SSZrr", - "VMAX(C?)SSrr", - "VMIN(C?)PDYrr", - "VMIN(C?)PDZ128rr", - "VMIN(C?)PDZ256rr", - "VMIN(C?)PDZrr", - "VMIN(C?)PDrr", - "VMIN(C?)PSYrr", - "VMIN(C?)PSZ128rr", - "VMIN(C?)PSZ256rr", - "VMIN(C?)PSZrr", - "VMIN(C?)PSrr", - "VMIN(C?)SDZrr", - "VMIN(C?)SDrr", - "VMIN(C?)SSZrr", - "VMIN(C?)SSrr", "VMULPDYrr", "VMULPDZ128rr", "VMULPDZ256rr", @@ -3145,24 +3089,6 @@ def SKXWriteResGroup90 : SchedWriteRes<[SKXPort0,SKXPort5]> { } def: InstRW<[SKXWriteResGroup90], (instregex "VCVTDQ2PDYrr")>; -def SKXWriteResGroup91 : SchedWriteRes<[SKXPort0,SKXPort23]> { - let Latency = 7; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKXWriteResGroup91], (instregex "COMISDrm", - "COMISSrm", - "UCOMISDrm", - "UCOMISSrm", - "VCOMISDZrm(b?)", - "VCOMISDrm", - "VCOMISSZrm(b?)", - "VCOMISSrm", - "VUCOMISDZrm(b?)", - "VUCOMISDrm", - "VUCOMISSZrm(b?)", - "VUCOMISSrm")>; - def SKXWriteResGroup92 : SchedWriteRes<[SKXPort5,SKXPort23]> { let Latency = 7; let NumMicroOps = 2; @@ -4744,16 +4670,10 @@ def: InstRW<[SKXWriteResGroup149], (instregex "ADDPDrm", "ADDPSrm", "ADDSUBPDrm", "ADDSUBPSrm", - "CMPPDrmi", - "CMPPSrmi", "CVTDQ2PSrm", "CVTPS2DQrm", "CVTSS2SDrm", "CVTTPS2DQrm", - "MAX(C?)PDrm", - "MAX(C?)PSrm", - "MIN(C?)PDrm", - "MIN(C?)PSrm", "MULPDrm", "MULPSrm", "PHMINPOSUWrm", @@ -4775,8 +4695,6 @@ def: InstRW<[SKXWriteResGroup149], (instregex "ADDPDrm", "VADDSSZrm", "VADDSUBPDrm", "VADDSUBPSrm", - "VCMPPDrmi", - "VCMPPSrmi", "VCVTDQ2PDZ128rm(b?)", "VCVTDQ2PSZ128rm(b?)", "VCVTDQ2PSrm", @@ -4817,18 +4735,6 @@ def: InstRW<[SKXWriteResGroup149], (instregex "ADDPDrm", "VGETMANTPSZ128rm(b?)i", "VGETMANTSDZ128rmi(b?)", "VGETMANTSSZ128rmi(b?)", - "VMAX(C?)PDZ128rm(b?)", - "VMAX(C?)PDrm", - "VMAX(C?)PSZ128rm(b?)", - "VMAX(C?)PSrm", - "VMAX(C?)SDZrm", - "VMAX(C?)SSZrm", - "VMIN(C?)PDZ128rm(b?)", - "VMIN(C?)PDrm", - "VMIN(C?)PSZ128rm(b?)", - "VMIN(C?)PSrm", - "VMIN(C?)SDZrm", - "VMIN(C?)SSZrm", "VMULPDZ128rm(b?)", "VMULPDrm", "VMULPSZ128rm(b?)", diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index a3d715dbcb2..cb709e9f698 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -78,7 +78,9 @@ defm WriteJump : X86SchedWritePair; def WriteFLoad : SchedWrite; def WriteFStore : SchedWrite; def WriteFMove : SchedWrite; -defm WriteFAdd : X86SchedWritePair; // Floating point add/sub/compare. +defm WriteFAdd : X86SchedWritePair; // Floating point add/sub. +defm WriteFCmp : X86SchedWritePair; // Floating point compare. +defm WriteFCom : X86SchedWritePair; // Floating point compare to flags. defm WriteFMul : X86SchedWritePair; // Floating point multiplication. defm WriteFDiv : X86SchedWritePair; // Floating point division. defm WriteFSqrt : X86SchedWritePair; // Floating point square root. diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td index 8305c6c1e94..fa398d0e785 100644 --- a/lib/Target/X86/X86ScheduleAtom.td +++ b/lib/Target/X86/X86ScheduleAtom.td @@ -203,6 +203,8 @@ def : WriteRes; def : WriteRes; defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; diff --git a/lib/Target/X86/X86ScheduleBtVer2.td b/lib/Target/X86/X86ScheduleBtVer2.td index 2ab593af326..3e4c0a6d5e0 100644 --- a/lib/Target/X86/X86ScheduleBtVer2.td +++ b/lib/Target/X86/X86ScheduleBtVer2.td @@ -294,6 +294,8 @@ def : WriteRes; def : WriteRes; defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; defm : JWriteResFpuPair; defm : JWriteResFpuPair; // NOTE: Doesn't exist on Jaguar. defm : JWriteResFpuPair; @@ -704,28 +706,6 @@ def JWriteVMOVNTPYSt: SchedWriteRes<[JFPU1, JSTC, JSAGU]> { } def : InstRW<[JWriteVMOVNTPYSt], (instrs VMOVNTDQYmr, VMOVNTPDYmr, VMOVNTPSYmr)>; -def JWriteFComi : SchedWriteRes<[JFPU0, JFPA, JALU0]> { - let Latency = 3; -} -def : InstRW<[JWriteFComi], (instregex "(V)?(U)?COMIS(D|S)rr")>; - -def JWriteFComiLd : SchedWriteRes<[JLAGU, JFPU0, JFPA, JALU0]> { - let Latency = 8; -} -def : InstRW<[JWriteFComiLd], (instregex "(V)?(U)?COMIS(D|S)rm")>; - -def JWriteFCmp: SchedWriteRes<[JFPU0, JFPA]> { - let Latency = 2; -} -def : InstRW<[JWriteFCmp], (instregex "(V)?M(AX|IN)(P|S)(D|S)rr", - "(V)?CMPP(S|D)rri", "(V)?CMPS(S|D)rr")>; - -def JWriteFCmpLd: SchedWriteRes<[JLAGU, JFPU0, JFPA]> { - let Latency = 7; -} -def : InstRW<[JWriteFCmpLd], (instregex "(V)?M(AX|IN)(P|S)(D|S)rm", - "(V)?CMPP(S|D)rmi", "(V)?CMPS(S|D)rm")>; - def JWriteFCmpY: SchedWriteRes<[JFPU0, JFPA]> { let Latency = 2; let ResourceCycles = [2, 2]; diff --git a/lib/Target/X86/X86ScheduleSLM.td b/lib/Target/X86/X86ScheduleSLM.td index 8147c94a2e7..8e913b48a12 100644 --- a/lib/Target/X86/X86ScheduleSLM.td +++ b/lib/Target/X86/X86ScheduleSLM.td @@ -125,6 +125,8 @@ def : WriteRes { let Latency = 3; } def : WriteRes; defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; diff --git a/lib/Target/X86/X86ScheduleZnver1.td b/lib/Target/X86/X86ScheduleZnver1.td index 2b775b44316..dc9438c8dae 100644 --- a/lib/Target/X86/X86ScheduleZnver1.td +++ b/lib/Target/X86/X86ScheduleZnver1.td @@ -193,6 +193,8 @@ def : WriteRes { let Latency = 8; } defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; +defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; diff --git a/test/CodeGen/X86/x87-schedule.ll b/test/CodeGen/X86/x87-schedule.ll index 5d01286cc60..bd58c18c68b 100644 --- a/test/CodeGen/X86/x87-schedule.ll +++ b/test/CodeGen/X86/x87-schedule.ll @@ -1187,16 +1187,16 @@ define void @test_fcomi_fcomip() optsize { ; SKYLAKE-LABEL: test_fcomi_fcomip: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fcomi %st(3) # sched: [3:1.00] -; SKYLAKE-NEXT: fcompi %st(3) # sched: [3:1.00] +; SKYLAKE-NEXT: fcomi %st(3) # sched: [2:1.00] +; SKYLAKE-NEXT: fcompi %st(3) # sched: [2:1.00] ; SKYLAKE-NEXT: #NO_APP ; SKYLAKE-NEXT: retl # sched: [6:0.50] ; ; SKX-LABEL: test_fcomi_fcomip: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: fcomi %st(3) # sched: [3:1.00] -; SKX-NEXT: fcompi %st(3) # sched: [3:1.00] +; SKX-NEXT: fcomi %st(3) # sched: [2:1.00] +; SKX-NEXT: fcompi %st(3) # sched: [2:1.00] ; SKX-NEXT: #NO_APP ; SKX-NEXT: retl # sched: [6:0.50] ; @@ -5073,14 +5073,14 @@ define void @test_ftst() optsize { ; SKYLAKE-LABEL: test_ftst: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: ftst # sched: [3:1.00] +; SKYLAKE-NEXT: ftst # sched: [2:1.00] ; SKYLAKE-NEXT: #NO_APP ; SKYLAKE-NEXT: retl # sched: [6:0.50] ; ; SKX-LABEL: test_ftst: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: ftst # sched: [3:1.00] +; SKX-NEXT: ftst # sched: [2:1.00] ; SKX-NEXT: #NO_APP ; SKX-NEXT: retl # sched: [6:0.50] ; @@ -5175,7 +5175,7 @@ define void @test_fucom_fucomp_fucompp() optsize { ; SKYLAKE-NEXT: fucom %st(3) # sched: [1:1.00] ; SKYLAKE-NEXT: fucomp %st(1) # sched: [1:1.00] ; SKYLAKE-NEXT: fucomp %st(3) # sched: [1:1.00] -; SKYLAKE-NEXT: fucompp # sched: [3:1.00] +; SKYLAKE-NEXT: fucompp # sched: [2:1.00] ; SKYLAKE-NEXT: #NO_APP ; SKYLAKE-NEXT: retl # sched: [6:0.50] ; @@ -5186,7 +5186,7 @@ define void @test_fucom_fucomp_fucompp() optsize { ; SKX-NEXT: fucom %st(3) # sched: [1:1.00] ; SKX-NEXT: fucomp %st(1) # sched: [1:1.00] ; SKX-NEXT: fucomp %st(3) # sched: [1:1.00] -; SKX-NEXT: fucompp # sched: [3:1.00] +; SKX-NEXT: fucompp # sched: [2:1.00] ; SKX-NEXT: #NO_APP ; SKX-NEXT: retl # sched: [6:0.50] ; @@ -5267,16 +5267,16 @@ define void @test_fucomi_fucomip() optsize { ; SKYLAKE-LABEL: test_fucomi_fucomip: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fucomi %st(3) # sched: [3:1.00] -; SKYLAKE-NEXT: fucompi %st(3) # sched: [3:1.00] +; SKYLAKE-NEXT: fucomi %st(3) # sched: [2:1.00] +; SKYLAKE-NEXT: fucompi %st(3) # sched: [2:1.00] ; SKYLAKE-NEXT: #NO_APP ; SKYLAKE-NEXT: retl # sched: [6:0.50] ; ; SKX-LABEL: test_fucomi_fucomip: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: fucomi %st(3) # sched: [3:1.00] -; SKX-NEXT: fucompi %st(3) # sched: [3:1.00] +; SKX-NEXT: fucomi %st(3) # sched: [2:1.00] +; SKX-NEXT: fucompi %st(3) # sched: [2:1.00] ; SKX-NEXT: #NO_APP ; SKX-NEXT: retl # sched: [6:0.50] ;