1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

[X86] Add WriteDPPD/WriteDPPS dot product scheduler classes

llvm-svn: 331489
This commit is contained in:
Simon Pilgrim 2018-05-03 22:31:19 +00:00
parent 66c6126889
commit 575c3c5874
11 changed files with 42 additions and 232 deletions

View File

@ -6026,15 +6026,15 @@ let Predicates = [HasAVX] in {
let ExeDomain = SSEPackedSingle in
defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
VR128, loadv4f32, f128mem, 0,
SchedWriteFAdd.XMM>, VEX_4V, VEX_WIG;
SchedWriteDPPS.XMM>, VEX_4V, VEX_WIG;
let ExeDomain = SSEPackedDouble in
defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
VR128, loadv2f64, f128mem, 0,
SchedWriteFAdd.XMM>, VEX_4V, VEX_WIG;
SchedWriteDPPD.XMM>, VEX_4V, VEX_WIG;
let ExeDomain = SSEPackedSingle in
defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256,
VR256, loadv8f32, i256mem, 0,
SchedWriteFAdd.YMM>, VEX_4V, VEX_L, VEX_WIG;
SchedWriteDPPS.YMM>, VEX_4V, VEX_L, VEX_WIG;
}
let Predicates = [HasAVX2] in {
@ -6055,11 +6055,11 @@ let Constraints = "$src1 = $dst" in {
let ExeDomain = SSEPackedSingle in
defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
VR128, memopv4f32, f128mem, 1,
SchedWriteFAdd.XMM>;
SchedWriteDPPS.XMM>;
let ExeDomain = SSEPackedDouble in
defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd,
VR128, memopv2f64, f128mem, 1,
SchedWriteFAdd.XMM>;
SchedWriteDPPD.XMM>;
}
/// SS41I_blend_rmi - SSE 4.1 blend with 8-bit immediate

View File

@ -173,6 +173,9 @@ defm : BWWriteResPair<WriteFRsqrtY,[BWPort0], 5, [1], 1, 7>; // Floating point
defm : BWWriteResPair<WriteFMA, [BWPort01], 5, [1], 1, 5>; // Fused Multiply Add.
defm : BWWriteResPair<WriteFMAS, [BWPort01], 5, [1], 1, 5>; // Fused Multiply Add (Scalar).
defm : BWWriteResPair<WriteFMAY, [BWPort01], 5, [1], 1, 6>; // Fused Multiply Add (YMM/ZMM).
defm : BWWriteResPair<WriteDPPD, [BWPort0,BWPort1,BWPort5], 9, [1,1,1], 3, 5>; // Floating point double dot product.
defm : BWWriteResPair<WriteDPPS, [BWPort0,BWPort1,BWPort5], 14, [2,1,1], 4, 5>; // Floating point single dot product.
defm : BWWriteResPair<WriteDPPSY, [BWPort0,BWPort1,BWPort5], 14, [2,1,1], 4, 6>; // Floating point single dot product (YMM).
defm : BWWriteResPair<WriteFSign, [BWPort5], 1>; // Floating point fabs/fchs.
defm : BWWriteResPair<WriteFLogic, [BWPort5], 1, [1], 1, 5>; // Floating point and/or/xor logicals.
defm : BWWriteResPair<WriteFLogicY, [BWPort5], 1, [1], 1, 6>; // Floating point and/or/xor logicals (YMM/ZMM).
@ -1267,13 +1270,6 @@ def: InstRW<[BWWriteResGroup102], (instregex "VPERM2F128rm",
"VPMOVZXDQYrm",
"VPMOVZXWQYrm")>;
def BWWriteResGroup104 : SchedWriteRes<[BWPort0,BWPort1,BWPort5]> {
let Latency = 9;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
def: InstRW<[BWWriteResGroup104], (instregex "(V?)DPPDrri")>;
def BWWriteResGroup105 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> {
let Latency = 9;
let NumMicroOps = 3;
@ -1501,20 +1497,6 @@ def BWWriteResGroup141 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> {
}
def: InstRW<[BWWriteResGroup141], (instregex "MUL_FI(16|32)m")>;
def BWWriteResGroup142 : SchedWriteRes<[BWPort0,BWPort1,BWPort5]> {
let Latency = 14;
let NumMicroOps = 4;
let ResourceCycles = [2,1,1];
}
def: InstRW<[BWWriteResGroup142], (instregex "(V?)DPPS(Y?)rri")>;
def BWWriteResGroup143 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort23]> {
let Latency = 14;
let NumMicroOps = 4;
let ResourceCycles = [1,1,1,1];
}
def: InstRW<[BWWriteResGroup143], (instregex "(V?)DPPDrmi")>;
def BWWriteResGroup144 : SchedWriteRes<[BWPort1,BWPort6,BWPort23,BWPort0156]> {
let Latency = 14;
let NumMicroOps = 8;
@ -1620,13 +1602,6 @@ def BWWriteResGroup161 : SchedWriteRes<[BWPort0,BWPort23,BWFPDivider]> {
def: InstRW<[BWWriteResGroup161], (instregex "(V?)DIVPDrm",
"(V?)DIVSDrm")>;
def BWWriteResGroup163 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort23]> {
let Latency = 19;
let NumMicroOps = 5;
let ResourceCycles = [2,1,1,1];
}
def: InstRW<[BWWriteResGroup163], (instregex "(V?)DPPSrmi")>;
def BWWriteResGroup165 : SchedWriteRes<[BWPort0]> {
let Latency = 20;
let NumMicroOps = 1;
@ -1636,13 +1611,6 @@ def: InstRW<[BWWriteResGroup165], (instregex "DIV_FPrST0",
"DIV_FST0r",
"DIV_FrST0")>;
def BWWriteResGroup166 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort23]> {
let Latency = 20;
let NumMicroOps = 5;
let ResourceCycles = [2,1,1,1];
}
def: InstRW<[BWWriteResGroup166], (instregex "VDPPSYrmi")>;
def BWWriteResGroup167 : SchedWriteRes<[BWPort4,BWPort5,BWPort6,BWPort23,BWPort237,BWPort06,BWPort0156]> {
let Latency = 20;
let NumMicroOps = 8;

View File

@ -169,6 +169,9 @@ defm : HWWriteResPair<WriteCvtF2F, [HWPort1], 3>;
defm : HWWriteResPair<WriteFMA, [HWPort01], 5, [1], 1, 6>;
defm : HWWriteResPair<WriteFMAS, [HWPort01], 5, [1], 1, 5>;
defm : HWWriteResPair<WriteFMAY, [HWPort01], 5, [1], 1, 7>;
defm : HWWriteResPair<WriteDPPD, [HWPort0,HWPort1,HWPort5], 9, [1,1,1], 3, 6>;
defm : HWWriteResPair<WriteDPPS, [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4, 6>;
defm : HWWriteResPair<WriteDPPSY, [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4, 7>;
defm : HWWriteResPair<WriteFSign, [HWPort0], 1>;
defm : HWWriteResPair<WriteFLogic, [HWPort5], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteFLogicY, [HWPort5], 1, [1], 1, 7>;
@ -1829,20 +1832,6 @@ def HWWriteResGroup115 : SchedWriteRes<[HWPort0,HWPort1,HWPort23]> {
}
def: InstRW<[HWWriteResGroup115], (instregex "MUL_FI(16|32)m")>;
def HWWriteResGroup116 : SchedWriteRes<[HWPort0,HWPort1,HWPort5]> {
let Latency = 9;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
def: InstRW<[HWWriteResGroup116], (instregex "(V?)DPPDrri")>;
def HWWriteResGroup117 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort23]> {
let Latency = 15;
let NumMicroOps = 4;
let ResourceCycles = [1,1,1,1];
}
def: InstRW<[HWWriteResGroup117], (instregex "(V?)DPPDrmi")>;
def HWWriteResGroup120 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort06,HWPort15,HWPort0156]> {
let Latency = 16;
let NumMicroOps = 10;
@ -1932,27 +1921,6 @@ def HWWriteResGroup138 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> {
}
def: InstRW<[HWWriteResGroup138], (instregex "(V?)SQRTPSm")>;
def HWWriteResGroup140 : SchedWriteRes<[HWPort0,HWPort1,HWPort5]> {
let Latency = 14;
let NumMicroOps = 4;
let ResourceCycles = [2,1,1];
}
def: InstRW<[HWWriteResGroup140], (instregex "(V?)DPPS(Y?)rri")>;
def HWWriteResGroup141 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort23]> {
let Latency = 20;
let NumMicroOps = 5;
let ResourceCycles = [2,1,1,1];
}
def: InstRW<[HWWriteResGroup141], (instregex "(V?)DPPSrmi")>;
def HWWriteResGroup141_1 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort23]> {
let Latency = 21;
let NumMicroOps = 5;
let ResourceCycles = [2,1,1,1];
}
def: InstRW<[HWWriteResGroup141_1], (instregex "VDPPSYrmi")>;
def HWWriteResGroup142 : SchedWriteRes<[HWPort1,HWPort06,HWPort15,HWPort0156]> {
let Latency = 14;
let NumMicroOps = 10;

View File

@ -153,6 +153,9 @@ defm : SBWriteResPair<WriteFRsqrt, [SBPort0], 5, [1], 1, 6>;
defm : SBWriteResPair<WriteFRsqrtY,[SBPort0], 5, [1], 1, 7>;
defm : SBWriteResPair<WriteFSqrt, [SBPort0], 14, [1], 1, 5>;
defm : SBWriteResPair<WriteFSqrtY, [SBPort0], 14, [1], 1, 7>;
defm : SBWriteResPair<WriteDPPD, [SBPort0,SBPort1,SBPort5], 9, [1,1,1], 3, 6>;
defm : SBWriteResPair<WriteDPPS, [SBPort0,SBPort1,SBPort5], 12, [1,2,1], 4, 6>;
defm : SBWriteResPair<WriteDPPSY, [SBPort0,SBPort1,SBPort5], 12, [1,2,1], 4, 7>;
defm : SBWriteResPair<WriteCvtF2I, [SBPort1], 3>;
defm : SBWriteResPair<WriteCvtI2F, [SBPort1], 4>;
defm : SBWriteResPair<WriteCvtF2F, [SBPort1], 3>;
@ -1168,13 +1171,6 @@ def SBWriteResGroup91 : SchedWriteRes<[SBPort23,SBPort05]> {
def: InstRW<[SBWriteResGroup91], (instregex "VMASKMOVPDYrm",
"VMASKMOVPSYrm")>;
def SBWriteResGroup92 : SchedWriteRes<[SBPort0,SBPort1,SBPort5]> {
let Latency = 9;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
def: InstRW<[SBWriteResGroup92], (instregex "(V?)DPPDrri")>;
def SBWriteResGroup93 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
let Latency = 9;
let NumMicroOps = 3;
@ -1358,13 +1354,6 @@ def SBWriteResGroup111 : SchedWriteRes<[SBPort0,SBPort23]> {
}
def: InstRW<[SBWriteResGroup111], (instregex "MUL_F(32|64)m")>;
def SBWriteResGroup112 : SchedWriteRes<[SBPort0,SBPort1,SBPort5]> {
let Latency = 12;
let NumMicroOps = 4;
let ResourceCycles = [1,2,1];
}
def: InstRW<[SBWriteResGroup112], (instregex "(V?)DPPS(Y?)rri")>;
def SBWriteResGroup114 : SchedWriteRes<[SBPort1,SBPort23]> {
let Latency = 13;
let NumMicroOps = 3;
@ -1397,27 +1386,6 @@ def SBWriteResGroup119 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
}
def: InstRW<[SBWriteResGroup119], (instregex "MUL_FI(16|32)m")>;
def SBWriteResGroup120 : SchedWriteRes<[SBPort0,SBPort1,SBPort5,SBPort23]> {
let Latency = 15;
let NumMicroOps = 4;
let ResourceCycles = [1,1,1,1];
}
def: InstRW<[SBWriteResGroup120], (instregex "(V?)DPPDrmi")>;
def SBWriteResGroup121 : SchedWriteRes<[SBPort0,SBPort1,SBPort5,SBPort23]> {
let Latency = 18;
let NumMicroOps = 5;
let ResourceCycles = [1,2,1,1];
}
def: InstRW<[SBWriteResGroup121], (instregex "(V?)DPPSrmi")>;
def SBWriteResGroup122 : SchedWriteRes<[SBPort0,SBPort1,SBPort5,SBPort23]> {
let Latency = 19;
let NumMicroOps = 5;
let ResourceCycles = [1,2,1,1];
}
def: InstRW<[SBWriteResGroup122], (instregex "VDPPSYrmi")>;
def SBWriteResGroup123 : SchedWriteRes<[SBPort0,SBPort23,SBFPDivider]> {
let Latency = 20;
let NumMicroOps = 2;

View File

@ -169,6 +169,9 @@ defm : SKLWriteResPair<WriteFRsqrtY,[SKLPort0], 4, [1], 1, 5>; // Floating point
defm : SKLWriteResPair<WriteFMA, [SKLPort01], 4, [1], 1, 6>; // Fused Multiply Add.
defm : SKLWriteResPair<WriteFMAS, [SKLPort01], 4, [1], 1, 5>; // Fused Multiply Add (Scalar).
defm : SKLWriteResPair<WriteFMAY, [SKLPort01], 4, [1], 1, 7>; // Fused Multiply Add (YMM/ZMM).
defm : SKLWriteResPair<WriteDPPD, [SKLPort5,SKLPort01], 9, [1,2], 3, 6>; // Floating point double dot product.
defm : SKLWriteResPair<WriteDPPS, [SKLPort5,SKLPort01], 13, [1,3], 4, 6>; // Floating point single dot product.
defm : SKLWriteResPair<WriteDPPSY, [SKLPort5,SKLPort01], 13, [1,3], 4, 7>; // Floating point single dot product (YMM).
defm : SKLWriteResPair<WriteFSign, [SKLPort0], 1>; // Floating point fabs/fchs.
defm : SKLWriteResPair<WriteFLogic, [SKLPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals.
defm : SKLWriteResPair<WriteFLogicY, [SKLPort015], 1, [1], 1, 7>; // Floating point and/or/xor logicals (YMM/ZMM).
@ -1521,13 +1524,6 @@ def: InstRW<[SKLWriteResGroup123], (instregex "MMX_CVTPS2PIirm",
"VCVTPH2PSrm",
"(V?)CVTPS2PDrm")>;
def SKLWriteResGroup124 : SchedWriteRes<[SKLPort5,SKLPort01]> {
let Latency = 9;
let NumMicroOps = 3;
let ResourceCycles = [1,2];
}
def: InstRW<[SKLWriteResGroup124], (instregex "(V?)DPPDrri")>;
def SKLWriteResGroup126 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
let Latency = 9;
let NumMicroOps = 3;
@ -1785,13 +1781,6 @@ def SKLWriteResGroup163 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
}
def: InstRW<[SKLWriteResGroup163], (instregex "VCVTDQ2PDYrm")>;
def SKLWriteResGroup164 : SchedWriteRes<[SKLPort5,SKLPort01]> {
let Latency = 13;
let NumMicroOps = 4;
let ResourceCycles = [1,3];
}
def: InstRW<[SKLWriteResGroup164], (instregex "(V?)DPPS(Y?)rri")>;
def SKLWriteResGroup166 : SchedWriteRes<[SKLPort0,SKLFPDivider]> {
let Latency = 14;
let NumMicroOps = 1;
@ -1848,13 +1837,6 @@ def SKLWriteResGroup172 : SchedWriteRes<[SKLPort23,SKLPort01]> {
def: InstRW<[SKLWriteResGroup172], (instregex "VROUNDPDYm",
"VROUNDPSYm")>;
def SKLWriteResGroup173 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort01]> {
let Latency = 15;
let NumMicroOps = 4;
let ResourceCycles = [1,1,2];
}
def: InstRW<[SKLWriteResGroup173], (instregex "(V?)DPPDrmi")>;
def SKLWriteResGroup174 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort06,SKLPort15,SKLPort0156]> {
let Latency = 15;
let NumMicroOps = 10;
@ -1961,13 +1943,6 @@ def SKLWriteResGroup186_1 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> {
}
def: InstRW<[SKLWriteResGroup186_1], (instregex "VSQRTPSYm")>;
def SKLWriteResGroup187 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort01]> {
let Latency = 19;
let NumMicroOps = 5;
let ResourceCycles = [1,1,3];
}
def: InstRW<[SKLWriteResGroup187], (instregex "(V?)DPPSrmi")>;
def SKLWriteResGroup189 : SchedWriteRes<[SKLPort0]> {
let Latency = 20;
let NumMicroOps = 1;
@ -1984,13 +1959,6 @@ def SKLWriteResGroup190 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> {
}
def: InstRW<[SKLWriteResGroup190], (instregex "(V?)DIVPDrm")>;
def SKLWriteResGroup191 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort01]> {
let Latency = 20;
let NumMicroOps = 5;
let ResourceCycles = [1,1,3];
}
def: InstRW<[SKLWriteResGroup191], (instregex "VDPPSYrmi")>;
def SKLWriteResGroup192 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort6,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> {
let Latency = 20;
let NumMicroOps = 8;

View File

@ -169,6 +169,9 @@ defm : SKXWriteResPair<WriteFRsqrtY,[SKXPort0], 4, [1], 1, 7>; // Floating poin
defm : SKXWriteResPair<WriteFMA, [SKXPort015], 4, [1], 1, 6>; // Fused Multiply Add.
defm : SKXWriteResPair<WriteFMAS, [SKXPort015], 4, [1], 1, 5>; // Fused Multiply Add (Scalar).
defm : SKXWriteResPair<WriteFMAY, [SKXPort015], 4, [1], 1, 7>; // Fused Multiply Add (YMM/ZMM).
defm : SKXWriteResPair<WriteDPPD, [SKXPort5,SKXPort015], 9, [1,2], 3, 6>; // Floating point double dot product.
defm : SKXWriteResPair<WriteDPPS, [SKXPort5,SKXPort015], 13, [1,3], 4, 6>; // Floating point single dot product.
defm : SKXWriteResPair<WriteDPPSY,[SKXPort5,SKXPort015], 13, [1,3], 4, 7>; // Floating point single dot product (YMM).
defm : SKXWriteResPair<WriteFSign, [SKXPort0], 1>; // Floating point fabs/fchs.
defm : SKXWriteResPair<WriteFLogic, [SKXPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals.
defm : SKXWriteResPair<WriteFLogicY, [SKXPort015], 1, [1], 1, 7>; // Floating point and/or/xor logicals (YMM/ZMM).
@ -2516,13 +2519,6 @@ def: InstRW<[SKXWriteResGroup138], (instregex "VRCP14PDZr(b?)",
"VRSQRT14PDZr(b?)",
"VRSQRT14PSZr(b?)")>;
def SKXWriteResGroup139 : SchedWriteRes<[SKXPort5,SKXPort015]> {
let Latency = 9;
let NumMicroOps = 3;
let ResourceCycles = [1,2];
}
def: InstRW<[SKXWriteResGroup139], (instregex "(V?)DPPDrri")>;
def SKXWriteResGroup141 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
let Latency = 9;
let NumMicroOps = 3;
@ -2988,15 +2984,6 @@ def SKXWriteResGroup181 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
}
def: InstRW<[SKXWriteResGroup181], (instregex "VCVTDQ2PDYrm")>;
def SKXWriteResGroup182 : SchedWriteRes<[SKXPort5,SKXPort015]> {
let Latency = 13;
let NumMicroOps = 4;
let ResourceCycles = [1,3];
}
def: InstRW<[SKXWriteResGroup182], (instregex "DPPSrri",
"VDPPSYrri",
"VDPPSrri")>;
def SKXWriteResGroup183 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> {
let Latency = 13;
let NumMicroOps = 4;
@ -3092,13 +3079,6 @@ def: InstRW<[SKXWriteResGroup192], (instregex "VRNDSCALEPDZ256rm(b?)i",
"VROUNDPDYm",
"VROUNDPSYm")>;
def SKXWriteResGroup193 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> {
let Latency = 15;
let NumMicroOps = 4;
let ResourceCycles = [1,1,2];
}
def: InstRW<[SKXWriteResGroup193], (instregex "(V?)DPPDrmi")>;
def SKXWriteResGroup194 : SchedWriteRes<[SKXPort1,SKXPort5,SKXPort01,SKXPort23,SKXPort015]> {
let Latency = 15;
let NumMicroOps = 8;
@ -3244,13 +3224,6 @@ def SKXWriteResGroup211 : SchedWriteRes<[SKXPort23,SKXPort015]> {
def: InstRW<[SKXWriteResGroup211], (instregex "VPMULLQZ256rm(b?)",
"VPMULLQZrm(b?)")>;
def SKXWriteResGroup212 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> {
let Latency = 19;
let NumMicroOps = 5;
let ResourceCycles = [1,1,3];
}
def: InstRW<[SKXWriteResGroup212], (instregex "(V?)DPPSrmi")>;
def SKXWriteResGroup214 : SchedWriteRes<[]> {
let Latency = 20;
let NumMicroOps = 0;
@ -3275,13 +3248,6 @@ def SKXWriteResGroup216 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> {
}
def: InstRW<[SKXWriteResGroup216], (instregex "(V?)DIVPD(Z128)?rm")>;
def SKXWriteResGroup217 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> {
let Latency = 20;
let NumMicroOps = 5;
let ResourceCycles = [1,1,3];
}
def: InstRW<[SKXWriteResGroup217], (instregex "VDPPSYrmi")>;
def SKXWriteResGroup218 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
let Latency = 20;
let NumMicroOps = 5;

View File

@ -119,6 +119,9 @@ defm WriteFRsqrtY: X86SchedWritePair; // Floating point reciprocal square root e
defm WriteFMA : X86SchedWritePair; // Fused Multiply Add.
defm WriteFMAS : X86SchedWritePair; // Fused Multiply Add (Scalar).
defm WriteFMAY : X86SchedWritePair; // Fused Multiply Add (YMM/ZMM).
defm WriteDPPD : X86SchedWritePair; // Floating point double dot product.
defm WriteDPPS : X86SchedWritePair; // Floating point single dot product.
defm WriteDPPSY : X86SchedWritePair; // Floating point single dot product (YMM).
defm WriteFSign : X86SchedWritePair; // Floating point fabs/fchs.
defm WriteFLogic : X86SchedWritePair; // Floating point and/or/xor logicals.
defm WriteFLogicY : X86SchedWritePair; // Floating point and/or/xor logicals (YMM/ZMM).
@ -243,6 +246,10 @@ def SchedWriteFMul
: X86SchedWriteWidths<WriteFMul, WriteFMul, WriteFMulY, WriteFMulY>;
def SchedWriteFMA
: X86SchedWriteWidths<WriteFMAS, WriteFMA, WriteFMAY, WriteFMAY>;
def SchedWriteDPPD
: X86SchedWriteWidths<WriteDPPD, WriteDPPD, WriteDPPD, WriteDPPD>;
def SchedWriteDPPS
: X86SchedWriteWidths<WriteDPPS, WriteDPPS, WriteDPPSY, WriteDPPSY>;
def SchedWriteFDiv
: X86SchedWriteWidths<WriteFDiv, WriteFDiv, WriteFDivY, WriteFDivY>;
def SchedWriteFSqrt

View File

@ -227,6 +227,9 @@ defm : AtomWriteResPair<WriteFVarShuffleY, [AtomPort0], [AtomPort0]>; // NOTE
defm : AtomWriteResPair<WriteFMA, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
defm : AtomWriteResPair<WriteFMAS, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
defm : AtomWriteResPair<WriteFMAY, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
defm : AtomWriteResPair<WriteDPPD, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
defm : AtomWriteResPair<WriteDPPS, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
defm : AtomWriteResPair<WriteDPPSY, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
defm : AtomWriteResPair<WriteFBlend, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
defm : AtomWriteResPair<WriteFBlendY, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
defm : AtomWriteResPair<WriteFVarBlend, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.

View File

@ -325,6 +325,9 @@ defm : JWriteResYMMPair<WriteFMulY, [JFPU1, JFPM], 2, [2,2], 2>;
defm : JWriteResFpuPair<WriteFMA, [JFPU1, JFPM], 2>; // NOTE: Doesn't exist on Jaguar.
defm : JWriteResFpuPair<WriteFMAS, [JFPU1, JFPM], 2>; // NOTE: Doesn't exist on Jaguar.
defm : JWriteResFpuPair<WriteFMAY, [JFPU1, JFPM], 2>; // NOTE: Doesn't exist on Jaguar.
defm : JWriteResFpuPair<WriteDPPD, [JFPU1, JFPM, JFPA], 9, [1, 3, 3], 3>;
defm : JWriteResFpuPair<WriteDPPS, [JFPU1, JFPM, JFPA], 11, [1, 3, 3], 5>;
defm : JWriteResYMMPair<WriteDPPSY, [JFPU1, JFPM, JFPA], 12, [2, 6, 6], 10>;
defm : JWriteResFpuPair<WriteFRcp, [JFPU1, JFPM], 2>;
defm : JWriteResYMMPair<WriteFRcpY, [JFPU1, JFPM], 2, [2,2], 2>;
defm : JWriteResFpuPair<WriteFRsqrt, [JFPU1, JFPM], 2>;
@ -482,38 +485,6 @@ defm : JWriteResFpuPair<WritePHAddY, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteCLMul, [JFPU0, JVIMUL], 2>;
////////////////////////////////////////////////////////////////////////////////
// SSE4.1 instructions.
////////////////////////////////////////////////////////////////////////////////
def JWriteDPPS: SchedWriteRes<[JFPU1, JFPM, JFPA]> {
let Latency = 11;
let ResourceCycles = [1, 3, 3];
let NumMicroOps = 5;
}
def : InstRW<[JWriteDPPS], (instrs DPPSrri, VDPPSrri)>;
def JWriteDPPSLd: SchedWriteRes<[JLAGU, JFPU1, JFPM, JFPA]> {
let Latency = 16;
let ResourceCycles = [1, 1, 3, 3];
let NumMicroOps = 5;
}
def : InstRW<[JWriteDPPSLd], (instrs DPPSrmi, VDPPSrmi)>;
def JWriteDPPD: SchedWriteRes<[JFPU1, JFPM, JFPA]> {
let Latency = 9;
let ResourceCycles = [1, 3, 3];
let NumMicroOps = 3;
}
def : InstRW<[JWriteDPPD], (instrs DPPDrri, VDPPDrri)>;
def JWriteDPPDLd: SchedWriteRes<[JLAGU, JFPU1, JFPM, JFPA]> {
let Latency = 14;
let ResourceCycles = [1, 1, 3, 3];
let NumMicroOps = 3;
}
def : InstRW<[JWriteDPPDLd], (instrs DPPDrmi, VDPPDrmi)>;
////////////////////////////////////////////////////////////////////////////////
// SSE4A instructions.
////////////////////////////////////////////////////////////////////////////////
@ -560,20 +531,6 @@ def : InstRW<[JWriteCVTPH2PSYLd], (instrs VCVTPH2PSYrm)>;
// AVX instructions.
////////////////////////////////////////////////////////////////////////////////
def JWriteVDPPSY: SchedWriteRes<[JFPU1, JFPM, JFPA]> {
let Latency = 12;
let ResourceCycles = [2, 6, 6];
let NumMicroOps = 10;
}
def : InstRW<[JWriteVDPPSY], (instrs VDPPSYrri)>;
def JWriteVDPPSYLd: SchedWriteRes<[JLAGU, JFPU1, JFPM, JFPA]> {
let Latency = 17;
let ResourceCycles = [2, 2, 6, 6];
let NumMicroOps = 10;
}
def : InstRW<[JWriteVDPPSYLd, ReadAfterLd], (instrs VDPPSYrmi)>;
def JWriteVMULYPD: SchedWriteRes<[JFPU1, JFPM]> {
let Latency = 4;
let ResourceCycles = [2, 4];

View File

@ -144,6 +144,9 @@ defm : SLMWriteResPair<WriteFRsqrt, [SLM_FPC_RSV0], 5>;
defm : SLMWriteResPair<WriteFRsqrtY,[SLM_FPC_RSV0], 5>;
defm : SLMWriteResPair<WriteFSqrt, [SLM_FPC_RSV0], 15>;
defm : SLMWriteResPair<WriteFSqrtY, [SLM_FPC_RSV0], 15>;
defm : SLMWriteResPair<WriteDPPD, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteDPPS, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteDPPSY, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteCvtF2I, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtI2F, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtF2F, [SLM_FPC_RSV01], 4>;

View File

@ -1540,17 +1540,19 @@ def : InstRW<[ZnWriteROUNDm], (instregex "(V?)ROUND(S|P)(S|D)(Y?)m")>;
// DPPS.
// x,x,i / v,v,v,i.
def : InstRW<[WriteMicrocoded], (instregex "(V?)DPPS(Y?)rri")>;
def : SchedAlias<WriteDPPS, ZnWriteMicrocoded>;
def : SchedAlias<WriteDPPSY, ZnWriteMicrocoded>;
// x,m,i / v,v,m,i.
def : InstRW<[WriteMicrocoded], (instregex "(V?)DPPS(Y?)rmi")>;
def : SchedAlias<WriteDPPSLd, ZnWriteMicrocoded>;
def : SchedAlias<WriteDPPSYLd,ZnWriteMicrocoded>;
// DPPD.
// x,x,i.
def : InstRW<[WriteMicrocoded], (instregex "(V?)DPPDrri")>;
def : SchedAlias<WriteDPPD, ZnWriteMicrocoded>;
// x,m,i.
def : InstRW<[WriteMicrocoded], (instregex "(V?)DPPDrmi")>;
def : SchedAlias<WriteDPPDLd, ZnWriteMicrocoded>;
// VSQRTPS.
// y,y.