mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 04:02:41 +01:00
[X86] Add FP comparison scheduler classes
Split VCMP/VMAX/VMIN instructions off to WriteFCmp and VCOMIS instructions off to WriteFCom instead of assuming they match WriteFAdd Differential Revision: https://reviews.llvm.org/D45656 llvm-svn: 330179
This commit is contained in:
parent
d6a6778f3e
commit
8ae32b4f07
@ -2051,10 +2051,10 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd,
|
||||
let Predicates = [HasAVX512] in {
|
||||
let ExeDomain = SSEPackedSingle in
|
||||
defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsRnd,
|
||||
WriteFAdd>, AVX512XSIi8Base;
|
||||
WriteFCmp>, AVX512XSIi8Base;
|
||||
let ExeDomain = SSEPackedDouble in
|
||||
defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsRnd,
|
||||
WriteFAdd>, AVX512XDIi8Base, VEX_W;
|
||||
WriteFCmp>, AVX512XDIi8Base, VEX_W;
|
||||
}
|
||||
|
||||
multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode,
|
||||
@ -2511,9 +2511,9 @@ multiclass avx512_vcmp<X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
|
||||
}
|
||||
}
|
||||
|
||||
defm VCMPPD : avx512_vcmp<WriteFAdd, avx512vl_f64_info>,
|
||||
defm VCMPPD : avx512_vcmp<WriteFCmp, avx512vl_f64_info>,
|
||||
AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
|
||||
defm VCMPPS : avx512_vcmp<WriteFAdd, avx512vl_f32_info>,
|
||||
defm VCMPPS : avx512_vcmp<WriteFCmp, avx512vl_f32_info>,
|
||||
AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
|
||||
|
||||
|
||||
@ -4906,9 +4906,9 @@ defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds, WriteFMul, 1>;
|
||||
defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds, WriteFAdd, 0>;
|
||||
defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds, WriteFDiv, 0>;
|
||||
defm VMIN : avx512_binop_s_sae <0x5D, "vmin", X86fmin, X86fmins, X86fminRnds,
|
||||
WriteFAdd, 0>;
|
||||
WriteFCmp, 0>;
|
||||
defm VMAX : avx512_binop_s_sae <0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds,
|
||||
WriteFAdd, 0>;
|
||||
WriteFCmp, 0>;
|
||||
|
||||
// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
|
||||
// X86fminc and X86fmaxc instead of X86fmin and X86fmax
|
||||
@ -4932,19 +4932,19 @@ multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
|
||||
}
|
||||
}
|
||||
defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
|
||||
WriteFAdd>, XS, EVEX_4V, VEX_LIG,
|
||||
WriteFCmp>, XS, EVEX_4V, VEX_LIG,
|
||||
EVEX_CD8<32, CD8VT1>;
|
||||
|
||||
defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
|
||||
WriteFAdd>, XD, VEX_W, EVEX_4V, VEX_LIG,
|
||||
WriteFCmp>, XD, VEX_W, EVEX_4V, VEX_LIG,
|
||||
EVEX_CD8<64, CD8VT1>;
|
||||
|
||||
defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
|
||||
WriteFAdd>, XS, EVEX_4V, VEX_LIG,
|
||||
WriteFCmp>, XS, EVEX_4V, VEX_LIG,
|
||||
EVEX_CD8<32, CD8VT1>;
|
||||
|
||||
defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
|
||||
WriteFAdd>, XD, VEX_W, EVEX_4V, VEX_LIG,
|
||||
WriteFCmp>, XD, VEX_W, EVEX_4V, VEX_LIG,
|
||||
EVEX_CD8<64, CD8VT1>;
|
||||
|
||||
multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
|
||||
@ -5050,13 +5050,13 @@ defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512, WriteFAdd>,
|
||||
avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, WriteFAdd>;
|
||||
defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512, WriteFDiv>,
|
||||
avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, WriteFDiv>;
|
||||
defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512, WriteFAdd, 0>,
|
||||
avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, WriteFAdd>;
|
||||
defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512, WriteFAdd, 0>,
|
||||
avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, WriteFAdd>;
|
||||
defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512, WriteFCmp, 0>,
|
||||
avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, WriteFCmp>;
|
||||
defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512, WriteFCmp, 0>,
|
||||
avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, WriteFCmp>;
|
||||
let isCodeGenOnly = 1 in {
|
||||
defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512, WriteFAdd, 1>;
|
||||
defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512, WriteFAdd, 1>;
|
||||
defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512, WriteFCmp, 1>;
|
||||
defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512, WriteFCmp, 1>;
|
||||
}
|
||||
defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI, WriteFAdd, 1>;
|
||||
defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI, WriteFAdd, 0>;
|
||||
@ -7732,44 +7732,44 @@ multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
|
||||
}
|
||||
|
||||
let Defs = [EFLAGS], Predicates = [HasAVX512] in {
|
||||
defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", WriteFAdd>,
|
||||
defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", WriteFCom>,
|
||||
AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
|
||||
defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", WriteFAdd>,
|
||||
defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", WriteFCom>,
|
||||
AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
|
||||
defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", WriteFAdd>,
|
||||
defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", WriteFCom>,
|
||||
AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
|
||||
defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", WriteFAdd>,
|
||||
defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", WriteFCom>,
|
||||
AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
|
||||
}
|
||||
|
||||
let Defs = [EFLAGS], Predicates = [HasAVX512] in {
|
||||
defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
|
||||
"ucomiss", WriteFAdd>, PS, EVEX, VEX_LIG,
|
||||
"ucomiss", WriteFCom>, PS, EVEX, VEX_LIG,
|
||||
EVEX_CD8<32, CD8VT1>;
|
||||
defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
|
||||
"ucomisd", WriteFAdd>, PD, EVEX,
|
||||
"ucomisd", WriteFCom>, PD, EVEX,
|
||||
VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
|
||||
let Pattern = []<dag> in {
|
||||
defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32,
|
||||
"comiss", WriteFAdd>, PS, EVEX, VEX_LIG,
|
||||
"comiss", WriteFCom>, PS, EVEX, VEX_LIG,
|
||||
EVEX_CD8<32, CD8VT1>;
|
||||
defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64,
|
||||
"comisd", WriteFAdd>, PD, EVEX,
|
||||
"comisd", WriteFCom>, PD, EVEX,
|
||||
VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
|
||||
}
|
||||
let isCodeGenOnly = 1 in {
|
||||
defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
|
||||
sse_load_f32, "ucomiss", WriteFAdd>, PS, EVEX, VEX_LIG,
|
||||
sse_load_f32, "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG,
|
||||
EVEX_CD8<32, CD8VT1>;
|
||||
defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
|
||||
sse_load_f64, "ucomisd", WriteFAdd>, PD, EVEX,
|
||||
sse_load_f64, "ucomisd", WriteFCom>, PD, EVEX,
|
||||
VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
|
||||
|
||||
defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
|
||||
sse_load_f32, "comiss", WriteFAdd>, PS, EVEX, VEX_LIG,
|
||||
sse_load_f32, "comiss", WriteFCom>, PS, EVEX, VEX_LIG,
|
||||
EVEX_CD8<32, CD8VT1>;
|
||||
defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
|
||||
sse_load_f64, "comisd", WriteFAdd>, PD, EVEX,
|
||||
sse_load_f64, "comisd", WriteFCom>, PD, EVEX,
|
||||
VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
|
||||
}
|
||||
}
|
||||
|
@ -277,6 +277,8 @@ def SUB_FPrST0 : FPrST0PInst<MRM5r, "fsub{r}p\t$op">;
|
||||
def SUB_FST0r : FPST0rInst <MRM4r, "fsub\t$op">;
|
||||
def SUBR_FrST0 : FPrST0Inst <MRM4r, "fsub{|r}\t{%st(0), $op|$op, st(0)}">;
|
||||
def SUBR_FPrST0 : FPrST0PInst<MRM4r, "fsub{|r}p\t$op">;
|
||||
} // SchedRW
|
||||
let SchedRW = [WriteFCom] in {
|
||||
def COM_FST0r : FPST0rInst <MRM2r, "fcom\t$op">;
|
||||
def COMP_FST0r : FPST0rInst <MRM3r, "fcomp\t$op">;
|
||||
} // SchedRW
|
||||
@ -320,7 +322,7 @@ defm SIN : FPUnary<fsin, MRM_FE, "fsin">;
|
||||
defm COS : FPUnary<fcos, MRM_FF, "fcos">;
|
||||
}
|
||||
|
||||
let SchedRW = [WriteFAdd] in {
|
||||
let SchedRW = [WriteFCom] in {
|
||||
let hasSideEffects = 0 in {
|
||||
def TST_Fp32 : FpIf32<(outs), (ins RFP32:$src), OneArgFP, []>;
|
||||
def TST_Fp64 : FpIf64<(outs), (ins RFP64:$src), OneArgFP, []>;
|
||||
@ -333,7 +335,7 @@ def TST_F : FPI<0xD9, MRM_E4, (outs), (ins), "ftst">;
|
||||
|
||||
// Versions of FP instructions that take a single memory operand. Added for the
|
||||
// disassembler; remove as they are included with patterns elsewhere.
|
||||
let SchedRW = [WriteFAddLd] in {
|
||||
let SchedRW = [WriteFComLd] in {
|
||||
def FCOM32m : FPI<0xD8, MRM2m, (outs), (ins f32mem:$src), "fcom{s}\t$src">;
|
||||
def FCOMP32m : FPI<0xD8, MRM3m, (outs), (ins f32mem:$src), "fcomp{s}\t$src">;
|
||||
|
||||
@ -568,7 +570,7 @@ def LD_F1 : FPI<0xD9, MRM_E8, (outs), (ins), "fld1">;
|
||||
}
|
||||
|
||||
// Floating point compares.
|
||||
let SchedRW = [WriteFAdd] in {
|
||||
let SchedRW = [WriteFCom] in {
|
||||
def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
|
||||
[(set FPSW, (trunc (X86cmp RFP32:$lhs, RFP32:$rhs)))]>;
|
||||
def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
|
||||
@ -578,7 +580,7 @@ def UCOM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
|
||||
} // SchedRW
|
||||
} // Defs = [FPSW]
|
||||
|
||||
let SchedRW = [WriteFAdd] in {
|
||||
let SchedRW = [WriteFCom] in {
|
||||
// CC = ST(0) cmp ST(i)
|
||||
let Defs = [EFLAGS, FPSW] in {
|
||||
def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
|
||||
|
@ -1854,23 +1854,23 @@ let ExeDomain = SSEPackedSingle in
|
||||
defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, AVXCC, X86cmps, f32, loadf32,
|
||||
"cmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
"cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
|
||||
WriteFAdd>, XS, VEX_4V, VEX_LIG, VEX_WIG;
|
||||
WriteFCmp>, XS, VEX_4V, VEX_LIG, VEX_WIG;
|
||||
let ExeDomain = SSEPackedDouble in
|
||||
defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, AVXCC, X86cmps, f64, loadf64,
|
||||
"cmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
"cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
|
||||
WriteFAdd>, // same latency as 32 bit compare
|
||||
WriteFCmp>, // same latency as 32 bit compare
|
||||
XD, VEX_4V, VEX_LIG, VEX_WIG;
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
let ExeDomain = SSEPackedSingle in
|
||||
defm CMPSS : sse12_cmp_scalar<FR32, f32mem, SSECC, X86cmps, f32, loadf32,
|
||||
"cmp${cc}ss\t{$src2, $dst|$dst, $src2}",
|
||||
"cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", WriteFAdd>, XS;
|
||||
"cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", WriteFCmp>, XS;
|
||||
let ExeDomain = SSEPackedDouble in
|
||||
defm CMPSD : sse12_cmp_scalar<FR64, f64mem, SSECC, X86cmps, f64, loadf64,
|
||||
"cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
|
||||
"cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}", WriteFAdd>, XD;
|
||||
"cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}", WriteFCmp>, XD;
|
||||
}
|
||||
|
||||
multiclass sse12_cmp_scalar_int<Operand memop, Operand CC,
|
||||
@ -1894,21 +1894,21 @@ let isCodeGenOnly = 1 in {
|
||||
let ExeDomain = SSEPackedSingle in
|
||||
defm VCMPSS : sse12_cmp_scalar_int<ssmem, AVXCC, int_x86_sse_cmp_ss,
|
||||
"cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}",
|
||||
WriteFAdd, sse_load_f32>, XS, VEX_4V;
|
||||
WriteFCmp, sse_load_f32>, XS, VEX_4V;
|
||||
let ExeDomain = SSEPackedDouble in
|
||||
defm VCMPSD : sse12_cmp_scalar_int<sdmem, AVXCC, int_x86_sse2_cmp_sd,
|
||||
"cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}",
|
||||
WriteFAdd, sse_load_f64>, // same latency as f32
|
||||
WriteFCmp, sse_load_f64>, // same latency as f32
|
||||
XD, VEX_4V;
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
let ExeDomain = SSEPackedSingle in
|
||||
defm CMPSS : sse12_cmp_scalar_int<ssmem, SSECC, int_x86_sse_cmp_ss,
|
||||
"cmp${cc}ss\t{$src, $dst|$dst, $src}",
|
||||
WriteFAdd, sse_load_f32>, XS;
|
||||
WriteFCmp, sse_load_f32>, XS;
|
||||
let ExeDomain = SSEPackedDouble in
|
||||
defm CMPSD : sse12_cmp_scalar_int<sdmem, SSECC, int_x86_sse2_cmp_sd,
|
||||
"cmp${cc}sd\t{$src, $dst|$dst, $src}",
|
||||
WriteFAdd, sse_load_f64>, XD;
|
||||
WriteFCmp, sse_load_f64>, XD;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1951,49 +1951,49 @@ let mayLoad = 1 in
|
||||
|
||||
let Defs = [EFLAGS] in {
|
||||
defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
|
||||
"ucomiss", WriteFAdd>, PS, VEX, VEX_LIG, VEX_WIG;
|
||||
"ucomiss", WriteFCom>, PS, VEX, VEX_LIG, VEX_WIG;
|
||||
defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
|
||||
"ucomisd", WriteFAdd>, PD, VEX, VEX_LIG, VEX_WIG;
|
||||
"ucomisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG;
|
||||
let Pattern = []<dag> in {
|
||||
defm VCOMISS : sse12_ord_cmp<0x2F, FR32, undef, f32, f32mem, loadf32,
|
||||
"comiss", WriteFAdd>, PS, VEX, VEX_LIG, VEX_WIG;
|
||||
"comiss", WriteFCom>, PS, VEX, VEX_LIG, VEX_WIG;
|
||||
defm VCOMISD : sse12_ord_cmp<0x2F, FR64, undef, f64, f64mem, loadf64,
|
||||
"comisd", WriteFAdd>, PD, VEX, VEX_LIG, VEX_WIG;
|
||||
"comisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG;
|
||||
}
|
||||
|
||||
let isCodeGenOnly = 1 in {
|
||||
defm VUCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem,
|
||||
sse_load_f32, "ucomiss", WriteFAdd>, PS, VEX, VEX_WIG;
|
||||
sse_load_f32, "ucomiss", WriteFCom>, PS, VEX, VEX_WIG;
|
||||
defm VUCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem,
|
||||
sse_load_f64, "ucomisd", WriteFAdd>, PD, VEX, VEX_WIG;
|
||||
sse_load_f64, "ucomisd", WriteFCom>, PD, VEX, VEX_WIG;
|
||||
|
||||
defm VCOMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem,
|
||||
sse_load_f32, "comiss", WriteFAdd>, PS, VEX, VEX_WIG;
|
||||
sse_load_f32, "comiss", WriteFCom>, PS, VEX, VEX_WIG;
|
||||
defm VCOMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem,
|
||||
sse_load_f64, "comisd", WriteFAdd>, PD, VEX, VEX_WIG;
|
||||
sse_load_f64, "comisd", WriteFCom>, PD, VEX, VEX_WIG;
|
||||
}
|
||||
defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
|
||||
"ucomiss", WriteFAdd>, PS;
|
||||
"ucomiss", WriteFCom>, PS;
|
||||
defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
|
||||
"ucomisd", WriteFAdd>, PD;
|
||||
"ucomisd", WriteFCom>, PD;
|
||||
|
||||
let Pattern = []<dag> in {
|
||||
defm COMISS : sse12_ord_cmp<0x2F, FR32, undef, f32, f32mem, loadf32,
|
||||
"comiss", WriteFAdd>, PS;
|
||||
"comiss", WriteFCom>, PS;
|
||||
defm COMISD : sse12_ord_cmp<0x2F, FR64, undef, f64, f64mem, loadf64,
|
||||
"comisd", WriteFAdd>, PD;
|
||||
"comisd", WriteFCom>, PD;
|
||||
}
|
||||
|
||||
let isCodeGenOnly = 1 in {
|
||||
defm UCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem,
|
||||
sse_load_f32, "ucomiss", WriteFAdd>, PS;
|
||||
sse_load_f32, "ucomiss", WriteFCom>, PS;
|
||||
defm UCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem,
|
||||
sse_load_f64, "ucomisd", WriteFAdd>, PD;
|
||||
sse_load_f64, "ucomisd", WriteFCom>, PD;
|
||||
|
||||
defm COMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem,
|
||||
sse_load_f32, "comiss", WriteFAdd>, PS;
|
||||
sse_load_f32, "comiss", WriteFCom>, PS;
|
||||
defm COMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem,
|
||||
sse_load_f64, "comisd", WriteFAdd>, PD;
|
||||
sse_load_f64, "comisd", WriteFCom>, PD;
|
||||
}
|
||||
} // Defs = [EFLAGS]
|
||||
|
||||
@ -2028,28 +2028,28 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
|
||||
defm VCMPPS : sse12_cmp_packed<VR128, f128mem, AVXCC, v4f32,
|
||||
"cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
"cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
|
||||
WriteFAdd, SSEPackedSingle, loadv4f32>, PS, VEX_4V, VEX_WIG;
|
||||
WriteFCmp, SSEPackedSingle, loadv4f32>, PS, VEX_4V, VEX_WIG;
|
||||
defm VCMPPD : sse12_cmp_packed<VR128, f128mem, AVXCC, v2f64,
|
||||
"cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
"cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
|
||||
WriteFAdd, SSEPackedDouble, loadv2f64>, PD, VEX_4V, VEX_WIG;
|
||||
WriteFCmp, SSEPackedDouble, loadv2f64>, PD, VEX_4V, VEX_WIG;
|
||||
defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, AVXCC, v8f32,
|
||||
"cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
"cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
|
||||
WriteFAdd, SSEPackedSingle, loadv8f32>, PS, VEX_4V, VEX_L;
|
||||
WriteFCmp, SSEPackedSingle, loadv8f32>, PS, VEX_4V, VEX_L;
|
||||
defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, AVXCC, v4f64,
|
||||
"cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
"cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
|
||||
WriteFAdd, SSEPackedDouble, loadv4f64>, PD, VEX_4V, VEX_L;
|
||||
WriteFCmp, SSEPackedDouble, loadv4f64>, PD, VEX_4V, VEX_L;
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
defm CMPPS : sse12_cmp_packed<VR128, f128mem, SSECC, v4f32,
|
||||
"cmp${cc}ps\t{$src2, $dst|$dst, $src2}",
|
||||
"cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}",
|
||||
WriteFAdd, SSEPackedSingle, memopv4f32>, PS;
|
||||
WriteFCmp, SSEPackedSingle, memopv4f32>, PS;
|
||||
defm CMPPD : sse12_cmp_packed<VR128, f128mem, SSECC, v2f64,
|
||||
"cmp${cc}pd\t{$src2, $dst|$dst, $src2}",
|
||||
"cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
|
||||
WriteFAdd, SSEPackedDouble, memopv2f64>, PD;
|
||||
WriteFCmp, SSEPackedDouble, memopv2f64>, PD;
|
||||
}
|
||||
|
||||
def CommutableCMPCC : PatLeaf<(imm), [{
|
||||
@ -2583,19 +2583,19 @@ let isCommutable = 0 in {
|
||||
defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, WriteFDiv>,
|
||||
basic_sse12_fp_binop_s<0x5E, "div", fdiv, WriteFDiv>,
|
||||
basic_sse12_fp_binop_s_int<0x5E, "div", null_frag, WriteFDiv>;
|
||||
defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, WriteFAdd>,
|
||||
basic_sse12_fp_binop_s<0x5F, "max", X86fmax, WriteFAdd>,
|
||||
basic_sse12_fp_binop_s_int<0x5F, "max", X86fmaxs, WriteFAdd>;
|
||||
defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, WriteFAdd>,
|
||||
basic_sse12_fp_binop_s<0x5D, "min", X86fmin, WriteFAdd>,
|
||||
basic_sse12_fp_binop_s_int<0x5D, "min", X86fmins, WriteFAdd>;
|
||||
defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, WriteFCmp>,
|
||||
basic_sse12_fp_binop_s<0x5F, "max", X86fmax, WriteFCmp>,
|
||||
basic_sse12_fp_binop_s_int<0x5F, "max", X86fmaxs, WriteFCmp>;
|
||||
defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, WriteFCmp>,
|
||||
basic_sse12_fp_binop_s<0x5D, "min", X86fmin, WriteFCmp>,
|
||||
basic_sse12_fp_binop_s_int<0x5D, "min", X86fmins, WriteFCmp>;
|
||||
}
|
||||
|
||||
let isCodeGenOnly = 1 in {
|
||||
defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, WriteFAdd>,
|
||||
basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, WriteFAdd>;
|
||||
defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, WriteFAdd>,
|
||||
basic_sse12_fp_binop_s<0x5D, "min", X86fminc, WriteFAdd>;
|
||||
defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, WriteFCmp>,
|
||||
basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, WriteFCmp>;
|
||||
defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, WriteFCmp>,
|
||||
basic_sse12_fp_binop_s<0x5D, "min", X86fminc, WriteFCmp>;
|
||||
}
|
||||
|
||||
// Patterns used to select SSE scalar fp arithmetic instructions from
|
||||
|
@ -154,7 +154,9 @@ def : WriteRes<WriteFLoad, [BWPort23]> { let Latency = 5; }
|
||||
def : WriteRes<WriteFStore, [BWPort237, BWPort4]>;
|
||||
def : WriteRes<WriteFMove, [BWPort5]>;
|
||||
|
||||
defm : BWWriteResPair<WriteFAdd, [BWPort1], 3>; // Floating point add/sub/compare.
|
||||
defm : BWWriteResPair<WriteFAdd, [BWPort1], 3>; // Floating point add/sub.
|
||||
defm : BWWriteResPair<WriteFCmp, [BWPort1], 3>; // Floating point compare.
|
||||
defm : BWWriteResPair<WriteFCom, [BWPort1], 3>; // Floating point compare to flags.
|
||||
defm : BWWriteResPair<WriteFMul, [BWPort0], 5>; // Floating point multiplication.
|
||||
defm : BWWriteResPair<WriteFDiv, [BWPort0], 12>; // 10-14 cycles. // Floating point division.
|
||||
defm : BWWriteResPair<WriteFSqrt, [BWPort0], 15>; // Floating point square root.
|
||||
@ -843,29 +845,13 @@ def: InstRW<[BWWriteResGroup27], (instregex "ADD_FPrST0",
|
||||
"(V?)ADDSSrr",
|
||||
"(V?)ADDSUBPD(Y?)rr",
|
||||
"(V?)ADDSUBPS(Y?)rr",
|
||||
"(V?)CMPPD(Y?)rri",
|
||||
"(V?)CMPPS(Y?)rri",
|
||||
"(V?)CMPSDrr",
|
||||
"(V?)CMPSSrr",
|
||||
"(V?)COMISDrr",
|
||||
"(V?)COMISSrr",
|
||||
"(V?)CVTDQ2PS(Y?)rr",
|
||||
"(V?)CVTPS2DQ(Y?)rr",
|
||||
"(V?)CVTTPS2DQ(Y?)rr",
|
||||
"(V?)MAX(C?)PD(Y?)rr",
|
||||
"(V?)MAX(C?)PS(Y?)rr",
|
||||
"(V?)MAX(C?)SDrr",
|
||||
"(V?)MAX(C?)SSrr",
|
||||
"(V?)MIN(C?)PD(Y?)rr",
|
||||
"(V?)MIN(C?)PS(Y?)rr",
|
||||
"(V?)MIN(C?)SDrr",
|
||||
"(V?)MIN(C?)SSrr",
|
||||
"(V?)SUBPD(Y?)rr",
|
||||
"(V?)SUBPS(Y?)rr",
|
||||
"(V?)SUBSDrr",
|
||||
"(V?)SUBSSrr",
|
||||
"(V?)UCOMISDrr",
|
||||
"(V?)UCOMISSrr")>;
|
||||
"(V?)SUBSSrr")>;
|
||||
|
||||
def BWWriteResGroup27_16 : SchedWriteRes<[BWPort1, BWPort0156]> {
|
||||
let Latency = 3;
|
||||
@ -1832,29 +1818,13 @@ def: InstRW<[BWWriteResGroup91], (instregex "MMX_CVTPI2PSirm",
|
||||
"(V?)ADDSSrm",
|
||||
"(V?)ADDSUBPDrm",
|
||||
"(V?)ADDSUBPSrm",
|
||||
"(V?)CMPPDrmi",
|
||||
"(V?)CMPPSrmi",
|
||||
"(V?)CMPSDrm",
|
||||
"(V?)CMPSSrm",
|
||||
"(V?)COMISDrm",
|
||||
"(V?)COMISSrm",
|
||||
"(V?)CVTDQ2PSrm",
|
||||
"(V?)CVTPS2DQrm",
|
||||
"(V?)CVTTPS2DQrm",
|
||||
"(V?)MAX(C?)PDrm",
|
||||
"(V?)MAX(C?)PSrm",
|
||||
"(V?)MAX(C?)SDrm",
|
||||
"(V?)MAX(C?)SSrm",
|
||||
"(V?)MIN(C?)PDrm",
|
||||
"(V?)MIN(C?)PSrm",
|
||||
"(V?)MIN(C?)SDrm",
|
||||
"(V?)MIN(C?)SSrm",
|
||||
"(V?)SUBPDrm",
|
||||
"(V?)SUBPSrm",
|
||||
"(V?)SUBSDrm",
|
||||
"(V?)SUBSSrm",
|
||||
"(V?)UCOMISDrm",
|
||||
"(V?)UCOMISSrm")>;
|
||||
"(V?)SUBSSrm")>;
|
||||
|
||||
def BWWriteResGroup91_16 : SchedWriteRes<[BWPort1, BWPort0156, BWPort23]> {
|
||||
let Latency = 8;
|
||||
|
@ -149,6 +149,8 @@ def : WriteRes<WriteFLoad, [HWPort23]> { let Latency = 5; }
|
||||
def : WriteRes<WriteFMove, [HWPort5]>;
|
||||
|
||||
defm : HWWriteResPair<WriteFAdd, [HWPort1], 3>;
|
||||
defm : HWWriteResPair<WriteFCmp, [HWPort1], 3, [1], 1, 6>;
|
||||
defm : HWWriteResPair<WriteFCom, [HWPort1], 3>;
|
||||
defm : HWWriteResPair<WriteFMul, [HWPort0], 5>;
|
||||
defm : HWWriteResPair<WriteFDiv, [HWPort0], 12>; // 10-14 cycles.
|
||||
defm : HWWriteResPair<WriteFRcp, [HWPort0], 5>;
|
||||
@ -1041,16 +1043,12 @@ def: InstRW<[HWWriteResGroup12], (instregex "FCOM32m",
|
||||
"(V?)ADDSSrm",
|
||||
"(V?)CMPSDrm",
|
||||
"(V?)CMPSSrm",
|
||||
"(V?)COMISDrm",
|
||||
"(V?)COMISSrm",
|
||||
"(V?)MAX(C?)SDrm",
|
||||
"(V?)MAX(C?)SSrm",
|
||||
"(V?)MIN(C?)SDrm",
|
||||
"(V?)MIN(C?)SSrm",
|
||||
"(V?)SUBSDrm",
|
||||
"(V?)SUBSSrm",
|
||||
"(V?)UCOMISDrm",
|
||||
"(V?)UCOMISSrm")>;
|
||||
"(V?)SUBSSrm")>;
|
||||
|
||||
def HWWriteResGroup13 : SchedWriteRes<[HWPort5,HWPort23]> {
|
||||
let Latency = 7;
|
||||
@ -1730,29 +1728,13 @@ def: InstRW<[HWWriteResGroup50], (instregex "ADD_FPrST0",
|
||||
"(V?)ADDSSrr",
|
||||
"(V?)ADDSUBPD(Y?)rr",
|
||||
"(V?)ADDSUBPS(Y?)rr",
|
||||
"(V?)CMPPD(Y?)rri",
|
||||
"(V?)CMPPS(Y?)rri",
|
||||
"(V?)CMPSDrr",
|
||||
"(V?)CMPSSrr",
|
||||
"(V?)COMISDrr",
|
||||
"(V?)COMISSrr",
|
||||
"(V?)CVTDQ2PS(Y?)rr",
|
||||
"(V?)CVTPS2DQ(Y?)rr",
|
||||
"(V?)CVTTPS2DQ(Y?)rr",
|
||||
"(V?)MAX(C?)PD(Y?)rr",
|
||||
"(V?)MAX(C?)PS(Y?)rr",
|
||||
"(V?)MAX(C?)SDrr",
|
||||
"(V?)MAX(C?)SSrr",
|
||||
"(V?)MIN(C?)PD(Y?)rr",
|
||||
"(V?)MIN(C?)PS(Y?)rr",
|
||||
"(V?)MIN(C?)SDrr",
|
||||
"(V?)MIN(C?)SSrr",
|
||||
"(V?)SUBPD(Y?)rr",
|
||||
"(V?)SUBPS(Y?)rr",
|
||||
"(V?)SUBSDrr",
|
||||
"(V?)SUBSSrr",
|
||||
"(V?)UCOMISDrr",
|
||||
"(V?)UCOMISSrr")>;
|
||||
"(V?)SUBSSrr")>;
|
||||
|
||||
def HWWriteResGroup50_16i : SchedWriteRes<[HWPort1, HWPort0156]> {
|
||||
let Latency = 3;
|
||||
@ -1804,15 +1786,9 @@ def: InstRW<[HWWriteResGroup52], (instregex "(V?)ADDPDrm",
|
||||
"(V?)ADDPSrm",
|
||||
"(V?)ADDSUBPDrm",
|
||||
"(V?)ADDSUBPSrm",
|
||||
"(V?)CMPPDrmi",
|
||||
"(V?)CMPPSrmi",
|
||||
"(V?)CVTDQ2PSrm",
|
||||
"(V?)CVTPS2DQrm",
|
||||
"(V?)CVTTPS2DQrm",
|
||||
"(V?)MAX(C?)PDrm",
|
||||
"(V?)MAX(C?)PSrm",
|
||||
"(V?)MIN(C?)PDrm",
|
||||
"(V?)MIN(C?)PSrm",
|
||||
"(V?)SUBPDrm",
|
||||
"(V?)SUBPSrm")>;
|
||||
|
||||
|
@ -139,6 +139,8 @@ def : WriteRes<WriteFLoad, [SBPort23]> { let Latency = 6; }
|
||||
def : WriteRes<WriteFMove, [SBPort5]>;
|
||||
|
||||
defm : SBWriteResPair<WriteFAdd, [SBPort1], 3>;
|
||||
defm : SBWriteResPair<WriteFCmp, [SBPort1], 3, [1], 1, 6>;
|
||||
defm : SBWriteResPair<WriteFCom, [SBPort1], 3>;
|
||||
defm : SBWriteResPair<WriteFMul, [SBPort0], 5>;
|
||||
defm : SBWriteResPair<WriteFDiv, [SBPort0], 24>;
|
||||
defm : SBWriteResPair<WriteFRcp, [SBPort0], 5>;
|
||||
@ -685,21 +687,9 @@ def: InstRW<[SBWriteResGroup21], (instregex "ADD_FPrST0",
|
||||
"(V?)ADDSSrr",
|
||||
"(V?)ADDSUBPD(Y?)rr",
|
||||
"(V?)ADDSUBPS(Y?)rr",
|
||||
"(V?)CMPPD(Y?)rri",
|
||||
"(V?)CMPPS(Y?)rri",
|
||||
"(V?)CMPSDrr",
|
||||
"(V?)CMPSSrr",
|
||||
"(V?)CVTDQ2PS(Y?)rr",
|
||||
"(V?)CVTPS2DQ(Y?)rr",
|
||||
"(V?)CVTTPS2DQ(Y?)rr",
|
||||
"(V?)MAX(C?)PD(Y?)rr",
|
||||
"(V?)MAX(C?)PS(Y?)rr",
|
||||
"(V?)MAX(C?)SDrr",
|
||||
"(V?)MAX(C?)SSrr",
|
||||
"(V?)MIN(C?)PD(Y?)rr",
|
||||
"(V?)MIN(C?)PS(Y?)rr",
|
||||
"(V?)MIN(C?)SDrr",
|
||||
"(V?)MIN(C?)SSrr",
|
||||
"(V?)ROUNDPD(Y?)r",
|
||||
"(V?)ROUNDPS(Y?)r",
|
||||
"(V?)ROUNDSDr",
|
||||
@ -1562,23 +1552,11 @@ def: InstRW<[SBWriteResGroup90], (instregex "MMX_CVTPI2PSirm",
|
||||
"(V?)ADDSSrm",
|
||||
"(V?)ADDSUBPDrm",
|
||||
"(V?)ADDSUBPSrm",
|
||||
"(V?)CMPPDrmi",
|
||||
"(V?)CMPPSrmi",
|
||||
"(V?)CMPSDrm",
|
||||
"(V?)CMPSSrm",
|
||||
"(V?)CVTDQ2PSrm",
|
||||
"(V?)CVTPS2DQrm",
|
||||
"(V?)CVTSI642SDrm",
|
||||
"(V?)CVTSI2SDrm",
|
||||
"(V?)CVTTPS2DQrm",
|
||||
"(V?)MAX(C?)PDrm",
|
||||
"(V?)MAX(C?)PSrm",
|
||||
"(V?)MAX(C?)SDrm",
|
||||
"(V?)MAX(C?)SSrm",
|
||||
"(V?)MIN(C?)PDrm",
|
||||
"(V?)MIN(C?)PSrm",
|
||||
"(V?)MIN(C?)SDrm",
|
||||
"(V?)MIN(C?)SSrm",
|
||||
"(V?)ROUNDPDm",
|
||||
"(V?)ROUNDPSm",
|
||||
"(V?)ROUNDSDm",
|
||||
|
@ -151,7 +151,9 @@ def : WriteRes<WriteFLoad, [SKLPort23]> { let Latency = 6; }
|
||||
def : WriteRes<WriteFStore, [SKLPort237, SKLPort4]>;
|
||||
def : WriteRes<WriteFMove, [SKLPort015]>;
|
||||
|
||||
defm : SKLWriteResPair<WriteFAdd, [SKLPort1], 3>; // Floating point add/sub/compare.
|
||||
defm : SKLWriteResPair<WriteFAdd, [SKLPort1], 3>; // Floating point add/sub.
|
||||
defm : SKLWriteResPair<WriteFCmp, [SKLPort01], 4, [1], 1, 6>; // Floating point compare.
|
||||
defm : SKLWriteResPair<WriteFCom, [SKLPort0], 2>; // Floating point compare to flags.
|
||||
defm : SKLWriteResPair<WriteFMul, [SKLPort0], 5>; // Floating point multiplication.
|
||||
defm : SKLWriteResPair<WriteFDiv, [SKLPort0], 12>; // 10-14 cycles. // Floating point division.
|
||||
defm : SKLWriteResPair<WriteFSqrt, [SKLPort0], 15>; // Floating point square root.
|
||||
@ -672,14 +674,10 @@ def SKLWriteResGroup12 : SchedWriteRes<[SKLPort0]> {
|
||||
}
|
||||
def: InstRW<[SKLWriteResGroup12], (instregex "MMX_MOVD64from64rr",
|
||||
"MMX_MOVD64grr",
|
||||
"(V?)COMISDrr",
|
||||
"(V?)COMISSrr",
|
||||
"(V?)MOVPDI2DIrr",
|
||||
"(V?)MOVPQIto64rr",
|
||||
"VTESTPD(Y?)rr",
|
||||
"VTESTPS(Y?)rr",
|
||||
"(V?)UCOMISDrr",
|
||||
"(V?)UCOMISSrr")>;
|
||||
"VTESTPS(Y?)rr")>;
|
||||
|
||||
def SKLWriteResGroup13 : SchedWriteRes<[SKLPort5]> {
|
||||
let Latency = 2;
|
||||
@ -1067,21 +1065,9 @@ def: InstRW<[SKLWriteResGroup48], (instregex "(V?)ADDPD(Y?)rr",
|
||||
"(V?)ADDSSrr",
|
||||
"(V?)ADDSUBPD(Y?)rr",
|
||||
"(V?)ADDSUBPS(Y?)rr",
|
||||
"(V?)CMPPD(Y?)rri",
|
||||
"(V?)CMPPS(Y?)rri",
|
||||
"(V?)CMPSDrr",
|
||||
"(V?)CMPSSrr",
|
||||
"(V?)CVTDQ2PS(Y?)rr",
|
||||
"(V?)CVTPS2DQ(Y?)rr",
|
||||
"(V?)CVTTPS2DQ(Y?)rr",
|
||||
"(V?)MAX(C?)PD(Y?)rr",
|
||||
"(V?)MAX(C?)PS(Y?)rr",
|
||||
"(V?)MAX(C?)SDrr",
|
||||
"(V?)MAX(C?)SSrr",
|
||||
"(V?)MIN(C?)PD(Y?)rr",
|
||||
"(V?)MIN(C?)PS(Y?)rr",
|
||||
"(V?)MIN(C?)SDrr",
|
||||
"(V?)MIN(C?)SSrr",
|
||||
"(V?)MULPD(Y?)rr",
|
||||
"(V?)MULPS(Y?)rr",
|
||||
"(V?)MULSDrr",
|
||||
@ -1547,16 +1533,6 @@ def SKLWriteResGroup86 : SchedWriteRes<[SKLPort0,SKLPort5]> {
|
||||
}
|
||||
def: InstRW<[SKLWriteResGroup86], (instregex "VCVTDQ2PDYrr")>;
|
||||
|
||||
def SKLWriteResGroup87 : SchedWriteRes<[SKLPort0,SKLPort23]> {
|
||||
let Latency = 7;
|
||||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [1,1];
|
||||
}
|
||||
def: InstRW<[SKLWriteResGroup87], (instregex "(V?)COMISDrm",
|
||||
"(V?)COMISSrm",
|
||||
"(V?)UCOMISDrm",
|
||||
"(V?)UCOMISSrm")>;
|
||||
|
||||
def SKLWriteResGroup88 : SchedWriteRes<[SKLPort5,SKLPort23]> {
|
||||
let Latency = 7;
|
||||
let NumMicroOps = 2;
|
||||
@ -2196,17 +2172,11 @@ def: InstRW<[SKLWriteResGroup134], (instregex "(V?)ADDPDrm",
|
||||
"(V?)ADDPSrm",
|
||||
"(V?)ADDSUBPDrm",
|
||||
"(V?)ADDSUBPSrm",
|
||||
"(V?)CMPPDrmi",
|
||||
"(V?)CMPPSrmi",
|
||||
"(V?)CVTDQ2PSrm",
|
||||
"(V?)CVTPH2PSYrm",
|
||||
"(V?)CVTPS2DQrm",
|
||||
"(V?)CVTSS2SDrm",
|
||||
"(V?)CVTTPS2DQrm",
|
||||
"(V?)MAX(C?)PDrm",
|
||||
"(V?)MAX(C?)PSrm",
|
||||
"(V?)MIN(C?)PDrm",
|
||||
"(V?)MIN(C?)PSrm",
|
||||
"(V?)MULPDrm",
|
||||
"(V?)MULPSrm",
|
||||
"(V?)PHMINPOSUWrm",
|
||||
|
@ -151,7 +151,9 @@ def : WriteRes<WriteFLoad, [SKXPort23]> { let Latency = 5; }
|
||||
def : WriteRes<WriteFStore, [SKXPort237, SKXPort4]>;
|
||||
def : WriteRes<WriteFMove, [SKXPort015]>;
|
||||
|
||||
defm : SKXWriteResPair<WriteFAdd, [SKXPort1], 3>; // Floating point add/sub/compare.
|
||||
defm : SKXWriteResPair<WriteFAdd, [SKXPort1], 3>; // Floating point add/sub.
|
||||
defm : SKXWriteResPair<WriteFCmp, [SKXPort015], 4, [1], 1, 6>; // Floating point compare.
|
||||
defm : SKXWriteResPair<WriteFCom, [SKXPort0], 2>; // Floating point compare to flags.
|
||||
defm : SKXWriteResPair<WriteFMul, [SKXPort0], 5>; // Floating point multiplication.
|
||||
defm : SKXWriteResPair<WriteFDiv, [SKXPort0], 12>; // 10-14 cycles. // Floating point division.
|
||||
defm : SKXWriteResPair<WriteFSqrt, [SKXPort0], 15>; // Floating point square root.
|
||||
@ -1406,18 +1408,10 @@ def SKXWriteResGroup12 : SchedWriteRes<[SKXPort0]> {
|
||||
let NumMicroOps = 1;
|
||||
let ResourceCycles = [1];
|
||||
}
|
||||
def: InstRW<[SKXWriteResGroup12], (instregex "COMISDrr",
|
||||
"COMISSrr",
|
||||
"MMX_MOVD64from64rr",
|
||||
def: InstRW<[SKXWriteResGroup12], (instregex "MMX_MOVD64from64rr",
|
||||
"MMX_MOVD64grr",
|
||||
"MOVPDI2DIrr",
|
||||
"MOVPQIto64rr",
|
||||
"UCOMISDrr",
|
||||
"UCOMISSrr",
|
||||
"VCOMISDZrr(b?)",
|
||||
"VCOMISDrr",
|
||||
"VCOMISSZrr(b?)",
|
||||
"VCOMISSrr",
|
||||
"VMOVPDI2DIZrr",
|
||||
"VMOVPDI2DIrr",
|
||||
"VMOVPQIto64Zrr",
|
||||
@ -1425,11 +1419,7 @@ def: InstRW<[SKXWriteResGroup12], (instregex "COMISDrr",
|
||||
"VTESTPDYrr",
|
||||
"VTESTPDrr",
|
||||
"VTESTPSYrr",
|
||||
"VTESTPSrr",
|
||||
"VUCOMISDZrr(b?)",
|
||||
"VUCOMISDrr",
|
||||
"VUCOMISSZrr(b?)",
|
||||
"VUCOMISSrr")>;
|
||||
"VTESTPSrr")>;
|
||||
|
||||
def SKXWriteResGroup13 : SchedWriteRes<[SKXPort5]> {
|
||||
let Latency = 2;
|
||||
@ -2162,21 +2152,9 @@ def: InstRW<[SKXWriteResGroup50], (instregex "ADDPDrr",
|
||||
"ADDSSrr",
|
||||
"ADDSUBPDrr",
|
||||
"ADDSUBPSrr",
|
||||
"CMPPDrri",
|
||||
"CMPPSrri",
|
||||
"CMPSDrr",
|
||||
"CMPSSrr",
|
||||
"CVTDQ2PSrr",
|
||||
"CVTPS2DQrr",
|
||||
"CVTTPS2DQrr",
|
||||
"MAX(C?)PDrr",
|
||||
"MAX(C?)PSrr",
|
||||
"MAX(C?)SDrr",
|
||||
"MAX(C?)SSrr",
|
||||
"MIN(C?)PDrr",
|
||||
"MIN(C?)PSrr",
|
||||
"MIN(C?)SDrr",
|
||||
"MIN(C?)SSrr",
|
||||
"MULPDrr",
|
||||
"MULPSrr",
|
||||
"MULSDrr",
|
||||
@ -2212,12 +2190,6 @@ def: InstRW<[SKXWriteResGroup50], (instregex "ADDPDrr",
|
||||
"VADDSUBPDrr",
|
||||
"VADDSUBPSYrr",
|
||||
"VADDSUBPSrr",
|
||||
"VCMPPDYrri",
|
||||
"VCMPPDrri",
|
||||
"VCMPPSYrri",
|
||||
"VCMPPSrri",
|
||||
"VCMPSDrr",
|
||||
"VCMPSSrr",
|
||||
"VCVTDQ2PSYrr",
|
||||
"VCVTDQ2PSZ128rr",
|
||||
"VCVTDQ2PSZ256rr",
|
||||
@ -2284,34 +2256,6 @@ def: InstRW<[SKXWriteResGroup50], (instregex "ADDPDrr",
|
||||
"VGETMANTPSZrri",
|
||||
"VGETMANTSDZ128rri",
|
||||
"VGETMANTSSZ128rri",
|
||||
"VMAX(C?)PDYrr",
|
||||
"VMAX(C?)PDZ128rr",
|
||||
"VMAX(C?)PDZ256rr",
|
||||
"VMAX(C?)PDZrr",
|
||||
"VMAX(C?)PDrr",
|
||||
"VMAX(C?)PSYrr",
|
||||
"VMAX(C?)PSZ128rr",
|
||||
"VMAX(C?)PSZ256rr",
|
||||
"VMAX(C?)PSZrr",
|
||||
"VMAX(C?)PSrr",
|
||||
"VMAX(C?)SDZrr",
|
||||
"VMAX(C?)SDrr",
|
||||
"VMAX(C?)SSZrr",
|
||||
"VMAX(C?)SSrr",
|
||||
"VMIN(C?)PDYrr",
|
||||
"VMIN(C?)PDZ128rr",
|
||||
"VMIN(C?)PDZ256rr",
|
||||
"VMIN(C?)PDZrr",
|
||||
"VMIN(C?)PDrr",
|
||||
"VMIN(C?)PSYrr",
|
||||
"VMIN(C?)PSZ128rr",
|
||||
"VMIN(C?)PSZ256rr",
|
||||
"VMIN(C?)PSZrr",
|
||||
"VMIN(C?)PSrr",
|
||||
"VMIN(C?)SDZrr",
|
||||
"VMIN(C?)SDrr",
|
||||
"VMIN(C?)SSZrr",
|
||||
"VMIN(C?)SSrr",
|
||||
"VMULPDYrr",
|
||||
"VMULPDZ128rr",
|
||||
"VMULPDZ256rr",
|
||||
@ -3145,24 +3089,6 @@ def SKXWriteResGroup90 : SchedWriteRes<[SKXPort0,SKXPort5]> {
|
||||
}
|
||||
def: InstRW<[SKXWriteResGroup90], (instregex "VCVTDQ2PDYrr")>;
|
||||
|
||||
def SKXWriteResGroup91 : SchedWriteRes<[SKXPort0,SKXPort23]> {
|
||||
let Latency = 7;
|
||||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [1,1];
|
||||
}
|
||||
def: InstRW<[SKXWriteResGroup91], (instregex "COMISDrm",
|
||||
"COMISSrm",
|
||||
"UCOMISDrm",
|
||||
"UCOMISSrm",
|
||||
"VCOMISDZrm(b?)",
|
||||
"VCOMISDrm",
|
||||
"VCOMISSZrm(b?)",
|
||||
"VCOMISSrm",
|
||||
"VUCOMISDZrm(b?)",
|
||||
"VUCOMISDrm",
|
||||
"VUCOMISSZrm(b?)",
|
||||
"VUCOMISSrm")>;
|
||||
|
||||
def SKXWriteResGroup92 : SchedWriteRes<[SKXPort5,SKXPort23]> {
|
||||
let Latency = 7;
|
||||
let NumMicroOps = 2;
|
||||
@ -4744,16 +4670,10 @@ def: InstRW<[SKXWriteResGroup149], (instregex "ADDPDrm",
|
||||
"ADDPSrm",
|
||||
"ADDSUBPDrm",
|
||||
"ADDSUBPSrm",
|
||||
"CMPPDrmi",
|
||||
"CMPPSrmi",
|
||||
"CVTDQ2PSrm",
|
||||
"CVTPS2DQrm",
|
||||
"CVTSS2SDrm",
|
||||
"CVTTPS2DQrm",
|
||||
"MAX(C?)PDrm",
|
||||
"MAX(C?)PSrm",
|
||||
"MIN(C?)PDrm",
|
||||
"MIN(C?)PSrm",
|
||||
"MULPDrm",
|
||||
"MULPSrm",
|
||||
"PHMINPOSUWrm",
|
||||
@ -4775,8 +4695,6 @@ def: InstRW<[SKXWriteResGroup149], (instregex "ADDPDrm",
|
||||
"VADDSSZrm",
|
||||
"VADDSUBPDrm",
|
||||
"VADDSUBPSrm",
|
||||
"VCMPPDrmi",
|
||||
"VCMPPSrmi",
|
||||
"VCVTDQ2PDZ128rm(b?)",
|
||||
"VCVTDQ2PSZ128rm(b?)",
|
||||
"VCVTDQ2PSrm",
|
||||
@ -4817,18 +4735,6 @@ def: InstRW<[SKXWriteResGroup149], (instregex "ADDPDrm",
|
||||
"VGETMANTPSZ128rm(b?)i",
|
||||
"VGETMANTSDZ128rmi(b?)",
|
||||
"VGETMANTSSZ128rmi(b?)",
|
||||
"VMAX(C?)PDZ128rm(b?)",
|
||||
"VMAX(C?)PDrm",
|
||||
"VMAX(C?)PSZ128rm(b?)",
|
||||
"VMAX(C?)PSrm",
|
||||
"VMAX(C?)SDZrm",
|
||||
"VMAX(C?)SSZrm",
|
||||
"VMIN(C?)PDZ128rm(b?)",
|
||||
"VMIN(C?)PDrm",
|
||||
"VMIN(C?)PSZ128rm(b?)",
|
||||
"VMIN(C?)PSrm",
|
||||
"VMIN(C?)SDZrm",
|
||||
"VMIN(C?)SSZrm",
|
||||
"VMULPDZ128rm(b?)",
|
||||
"VMULPDrm",
|
||||
"VMULPSZ128rm(b?)",
|
||||
|
@ -78,7 +78,9 @@ defm WriteJump : X86SchedWritePair;
|
||||
def WriteFLoad : SchedWrite;
|
||||
def WriteFStore : SchedWrite;
|
||||
def WriteFMove : SchedWrite;
|
||||
defm WriteFAdd : X86SchedWritePair; // Floating point add/sub/compare.
|
||||
defm WriteFAdd : X86SchedWritePair; // Floating point add/sub.
|
||||
defm WriteFCmp : X86SchedWritePair; // Floating point compare.
|
||||
defm WriteFCom : X86SchedWritePair; // Floating point compare to flags.
|
||||
defm WriteFMul : X86SchedWritePair; // Floating point multiplication.
|
||||
defm WriteFDiv : X86SchedWritePair; // Floating point division.
|
||||
defm WriteFSqrt : X86SchedWritePair; // Floating point square root.
|
||||
|
@ -203,6 +203,8 @@ def : WriteRes<WriteFStore, [AtomPort0]>;
|
||||
def : WriteRes<WriteFMove, [AtomPort01]>;
|
||||
|
||||
defm : AtomWriteResPair<WriteFAdd, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
|
||||
defm : AtomWriteResPair<WriteFCmp, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
|
||||
defm : AtomWriteResPair<WriteFCom, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
|
||||
defm : AtomWriteResPair<WriteFMul, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
|
||||
defm : AtomWriteResPair<WriteFRcp, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
|
||||
defm : AtomWriteResPair<WriteFRsqrt, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
|
||||
|
@ -294,6 +294,8 @@ def : WriteRes<WriteFStore, [JSAGU, JFPU1, JSTC]>;
|
||||
def : WriteRes<WriteFMove, [JFPU01, JFPX]>;
|
||||
|
||||
defm : JWriteResFpuPair<WriteFAdd, [JFPU0, JFPA], 3>;
|
||||
defm : JWriteResFpuPair<WriteFCmp, [JFPU0, JFPA], 2>;
|
||||
defm : JWriteResFpuPair<WriteFCom, [JFPU0, JFPA, JALU0], 3>;
|
||||
defm : JWriteResFpuPair<WriteFMul, [JFPU1, JFPM], 2>;
|
||||
defm : JWriteResFpuPair<WriteFMA, [JFPU1, JFPM], 2>; // NOTE: Doesn't exist on Jaguar.
|
||||
defm : JWriteResFpuPair<WriteFRcp, [JFPU1, JFPM], 2>;
|
||||
@ -704,28 +706,6 @@ def JWriteVMOVNTPYSt: SchedWriteRes<[JFPU1, JSTC, JSAGU]> {
|
||||
}
|
||||
def : InstRW<[JWriteVMOVNTPYSt], (instrs VMOVNTDQYmr, VMOVNTPDYmr, VMOVNTPSYmr)>;
|
||||
|
||||
def JWriteFComi : SchedWriteRes<[JFPU0, JFPA, JALU0]> {
|
||||
let Latency = 3;
|
||||
}
|
||||
def : InstRW<[JWriteFComi], (instregex "(V)?(U)?COMIS(D|S)rr")>;
|
||||
|
||||
def JWriteFComiLd : SchedWriteRes<[JLAGU, JFPU0, JFPA, JALU0]> {
|
||||
let Latency = 8;
|
||||
}
|
||||
def : InstRW<[JWriteFComiLd], (instregex "(V)?(U)?COMIS(D|S)rm")>;
|
||||
|
||||
def JWriteFCmp: SchedWriteRes<[JFPU0, JFPA]> {
|
||||
let Latency = 2;
|
||||
}
|
||||
def : InstRW<[JWriteFCmp], (instregex "(V)?M(AX|IN)(P|S)(D|S)rr",
|
||||
"(V)?CMPP(S|D)rri", "(V)?CMPS(S|D)rr")>;
|
||||
|
||||
def JWriteFCmpLd: SchedWriteRes<[JLAGU, JFPU0, JFPA]> {
|
||||
let Latency = 7;
|
||||
}
|
||||
def : InstRW<[JWriteFCmpLd], (instregex "(V)?M(AX|IN)(P|S)(D|S)rm",
|
||||
"(V)?CMPP(S|D)rmi", "(V)?CMPS(S|D)rm")>;
|
||||
|
||||
def JWriteFCmpY: SchedWriteRes<[JFPU0, JFPA]> {
|
||||
let Latency = 2;
|
||||
let ResourceCycles = [2, 2];
|
||||
|
@ -125,6 +125,8 @@ def : WriteRes<WriteFLoad, [SLM_MEC_RSV]> { let Latency = 3; }
|
||||
def : WriteRes<WriteFMove, [SLM_FPC_RSV01]>;
|
||||
|
||||
defm : SLMWriteResPair<WriteFAdd, [SLM_FPC_RSV1], 3>;
|
||||
defm : SLMWriteResPair<WriteFCmp, [SLM_FPC_RSV1], 3>;
|
||||
defm : SLMWriteResPair<WriteFCom, [SLM_FPC_RSV1], 3>;
|
||||
defm : SLMWriteResPair<WriteFMul, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
|
||||
defm : SLMWriteResPair<WriteFDiv, [SLM_FPC_RSV0, SLMFPDivider], 34, [1,34]>;
|
||||
defm : SLMWriteResPair<WriteFRcp, [SLM_FPC_RSV0], 5>;
|
||||
|
@ -193,6 +193,8 @@ def : WriteRes<WriteFLoad, [ZnAGU]> { let Latency = 8; }
|
||||
|
||||
defm : ZnWriteResFpuPair<WriteFHAdd, [ZnFPU0], 3>;
|
||||
defm : ZnWriteResFpuPair<WriteFAdd, [ZnFPU0], 3>;
|
||||
defm : ZnWriteResFpuPair<WriteFCmp, [ZnFPU0], 3>;
|
||||
defm : ZnWriteResFpuPair<WriteFCom, [ZnFPU0], 3>;
|
||||
defm : ZnWriteResFpuPair<WriteFBlend, [ZnFPU01], 1>;
|
||||
defm : ZnWriteResFpuPair<WriteFVarBlend, [ZnFPU01], 1>;
|
||||
defm : ZnWriteResFpuPair<WriteVarBlend, [ZnFPU0], 1>;
|
||||
|
@ -1187,16 +1187,16 @@ define void @test_fcomi_fcomip() optsize {
|
||||
; SKYLAKE-LABEL: test_fcomi_fcomip:
|
||||
; SKYLAKE: # %bb.0:
|
||||
; SKYLAKE-NEXT: #APP
|
||||
; SKYLAKE-NEXT: fcomi %st(3) # sched: [3:1.00]
|
||||
; SKYLAKE-NEXT: fcompi %st(3) # sched: [3:1.00]
|
||||
; SKYLAKE-NEXT: fcomi %st(3) # sched: [2:1.00]
|
||||
; SKYLAKE-NEXT: fcompi %st(3) # sched: [2:1.00]
|
||||
; SKYLAKE-NEXT: #NO_APP
|
||||
; SKYLAKE-NEXT: retl # sched: [6:0.50]
|
||||
;
|
||||
; SKX-LABEL: test_fcomi_fcomip:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: #APP
|
||||
; SKX-NEXT: fcomi %st(3) # sched: [3:1.00]
|
||||
; SKX-NEXT: fcompi %st(3) # sched: [3:1.00]
|
||||
; SKX-NEXT: fcomi %st(3) # sched: [2:1.00]
|
||||
; SKX-NEXT: fcompi %st(3) # sched: [2:1.00]
|
||||
; SKX-NEXT: #NO_APP
|
||||
; SKX-NEXT: retl # sched: [6:0.50]
|
||||
;
|
||||
@ -5073,14 +5073,14 @@ define void @test_ftst() optsize {
|
||||
; SKYLAKE-LABEL: test_ftst:
|
||||
; SKYLAKE: # %bb.0:
|
||||
; SKYLAKE-NEXT: #APP
|
||||
; SKYLAKE-NEXT: ftst # sched: [3:1.00]
|
||||
; SKYLAKE-NEXT: ftst # sched: [2:1.00]
|
||||
; SKYLAKE-NEXT: #NO_APP
|
||||
; SKYLAKE-NEXT: retl # sched: [6:0.50]
|
||||
;
|
||||
; SKX-LABEL: test_ftst:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: #APP
|
||||
; SKX-NEXT: ftst # sched: [3:1.00]
|
||||
; SKX-NEXT: ftst # sched: [2:1.00]
|
||||
; SKX-NEXT: #NO_APP
|
||||
; SKX-NEXT: retl # sched: [6:0.50]
|
||||
;
|
||||
@ -5175,7 +5175,7 @@ define void @test_fucom_fucomp_fucompp() optsize {
|
||||
; SKYLAKE-NEXT: fucom %st(3) # sched: [1:1.00]
|
||||
; SKYLAKE-NEXT: fucomp %st(1) # sched: [1:1.00]
|
||||
; SKYLAKE-NEXT: fucomp %st(3) # sched: [1:1.00]
|
||||
; SKYLAKE-NEXT: fucompp # sched: [3:1.00]
|
||||
; SKYLAKE-NEXT: fucompp # sched: [2:1.00]
|
||||
; SKYLAKE-NEXT: #NO_APP
|
||||
; SKYLAKE-NEXT: retl # sched: [6:0.50]
|
||||
;
|
||||
@ -5186,7 +5186,7 @@ define void @test_fucom_fucomp_fucompp() optsize {
|
||||
; SKX-NEXT: fucom %st(3) # sched: [1:1.00]
|
||||
; SKX-NEXT: fucomp %st(1) # sched: [1:1.00]
|
||||
; SKX-NEXT: fucomp %st(3) # sched: [1:1.00]
|
||||
; SKX-NEXT: fucompp # sched: [3:1.00]
|
||||
; SKX-NEXT: fucompp # sched: [2:1.00]
|
||||
; SKX-NEXT: #NO_APP
|
||||
; SKX-NEXT: retl # sched: [6:0.50]
|
||||
;
|
||||
@ -5267,16 +5267,16 @@ define void @test_fucomi_fucomip() optsize {
|
||||
; SKYLAKE-LABEL: test_fucomi_fucomip:
|
||||
; SKYLAKE: # %bb.0:
|
||||
; SKYLAKE-NEXT: #APP
|
||||
; SKYLAKE-NEXT: fucomi %st(3) # sched: [3:1.00]
|
||||
; SKYLAKE-NEXT: fucompi %st(3) # sched: [3:1.00]
|
||||
; SKYLAKE-NEXT: fucomi %st(3) # sched: [2:1.00]
|
||||
; SKYLAKE-NEXT: fucompi %st(3) # sched: [2:1.00]
|
||||
; SKYLAKE-NEXT: #NO_APP
|
||||
; SKYLAKE-NEXT: retl # sched: [6:0.50]
|
||||
;
|
||||
; SKX-LABEL: test_fucomi_fucomip:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: #APP
|
||||
; SKX-NEXT: fucomi %st(3) # sched: [3:1.00]
|
||||
; SKX-NEXT: fucompi %st(3) # sched: [3:1.00]
|
||||
; SKX-NEXT: fucomi %st(3) # sched: [2:1.00]
|
||||
; SKX-NEXT: fucompi %st(3) # sched: [2:1.00]
|
||||
; SKX-NEXT: #NO_APP
|
||||
; SKX-NEXT: retl # sched: [6:0.50]
|
||||
;
|
||||
|
Loading…
Reference in New Issue
Block a user