1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-23 13:02:52 +02:00

[X86] Split WriteVecIMul/WriteVecPMULLD/WriteMPSAD/WritePSADBW into XMM and YMM/ZMM scheduler classes

Also retagged VDBPSADBW instructions as SchedWritePSADBW instead of SchedWriteVecIMul which matches the behaviour on SkylakeServer (the only thing that supports it...)

llvm-svn: 331445
This commit is contained in:
Simon Pilgrim 2018-05-03 10:31:20 +00:00
parent 61aa16d663
commit b7289046cc
15 changed files with 134 additions and 301 deletions

View File

@ -9711,7 +9711,7 @@ let Predicates = [HasVLX, HasBWI] in {
}
defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
SchedWriteVecIMul, avx512vl_i16_info, avx512vl_i8_info>,
SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
EVEX_CD8<8, CD8VF>;
multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,

View File

@ -203,8 +203,10 @@ defm : BWWriteResPair<WriteVecALU, [BWPort15], 1>; // Vector integer ALU op,
defm : BWWriteResPair<WriteVecLogic, [BWPort015], 1, [1], 1, 5>; // Vector integer and/or/xor.
defm : BWWriteResPair<WriteVecLogicY,[BWPort015], 1, [1], 1, 6>; // Vector integer and/or/xor (YMM/ZMM).
defm : BWWriteResPair<WriteVecShift, [BWPort0], 1>; // Vector integer shifts.
defm : BWWriteResPair<WriteVecIMul, [BWPort0], 5>; // Vector integer multiply.
defm : BWWriteResPair<WritePMULLD, [BWPort0], 10, [2], 2, 5>; // PMULLD
defm : BWWriteResPair<WriteVecIMul, [BWPort0], 5, [1], 1, 5>; // Vector integer multiply.
defm : BWWriteResPair<WriteVecIMulY, [BWPort0], 5, [1], 1, 6>; // Vector integer multiply.
defm : BWWriteResPair<WritePMULLD, [BWPort0], 10, [2], 2, 5>; // Vector PMULLD.
defm : BWWriteResPair<WritePMULLDY, [BWPort0], 10, [2], 2, 6>; // Vector PMULLD (YMM/ZMM).
defm : BWWriteResPair<WriteShuffle, [BWPort5], 1, [1], 1, 5>; // Vector shuffles.
defm : BWWriteResPair<WriteShuffleY, [BWPort5], 1, [1], 1, 6>; // Vector shuffles (YMM/ZMM).
defm : BWWriteResPair<WriteVarShuffle, [BWPort5], 1, [1], 1, 5>; // Vector variable shuffles.
@ -214,8 +216,10 @@ defm : BWWriteResPair<WriteBlendY, [BWPort5], 1, [1], 1, 6>; // Vector blends (Y
defm : BWWriteResPair<WriteVarBlend, [BWPort5], 2, [2], 2, 5>; // Vector variable blends.
defm : BWWriteResPair<WriteVarBlendY, [BWPort5], 2, [2], 2, 6>; // Vector variable blends (YMM/ZMM).
defm : BWWriteResPair<WriteMPSAD, [BWPort0, BWPort5], 7, [1, 2], 3, 5>; // Vector MPSAD.
defm : BWWriteResPair<WritePSADBW, [BWPort0], 5>; // Vector PSADBW.
defm : BWWriteResPair<WritePHMINPOS, [BWPort0], 5>; // Vector PHMINPOS.
defm : BWWriteResPair<WriteMPSADY, [BWPort0, BWPort5], 7, [1, 2], 3, 6>; // Vector MPSAD.
defm : BWWriteResPair<WritePSADBW, [BWPort0], 5, [1], 1, 5>; // Vector PSADBW.
defm : BWWriteResPair<WritePSADBWY, [BWPort0], 5, [1], 1, 6>; // Vector PSADBW (YMM/ZMM).
defm : BWWriteResPair<WritePHMINPOS, [BWPort0], 5>; // Vector PHMINPOS.
// Vector insert/extract operations.
def : WriteRes<WriteVecInsert, [BWPort5]> {
@ -1504,16 +1508,7 @@ def BWWriteResGroup123 : SchedWriteRes<[BWPort0,BWPort23]> {
let ResourceCycles = [1,1];
}
def: InstRW<[BWWriteResGroup123], (instregex "MUL_F(32|64)m",
"VPCMPGTQYrm",
"VPMADDUBSWYrm",
"VPMADDWDYrm",
"VPMULDQYrm",
"VPMULHRSWYrm",
"VPMULHUWYrm",
"VPMULHWYrm",
"VPMULLWYrm",
"VPMULUDQYrm",
"VPSADBWYrm")>;
"VPCMPGTQYrm")>;
def BWWriteResGroup126 : SchedWriteRes<[BWPort0,BWPort015]> {
let Latency = 11;
@ -1594,13 +1589,6 @@ def BWWriteResGroup137_1 : SchedWriteRes<[BWPort0,BWFPDivider]> {
}
def: InstRW<[BWWriteResGroup137_1], (instregex "(V?)SQRTSSr")>;
def BWWriteResGroup138 : SchedWriteRes<[BWPort0,BWPort5,BWPort23]> {
let Latency = 13;
let NumMicroOps = 4;
let ResourceCycles = [1,2,1];
}
def: InstRW<[BWWriteResGroup138], (instregex "VMPSADBWYrmi")>;
def BWWriteResGroup139 : SchedWriteRes<[BWPort0,BWFPDivider]> {
let Latency = 14;
let NumMicroOps = 1;
@ -1681,13 +1669,6 @@ def BWWriteResGroup150 : SchedWriteRes<[BWPort0,BWPort23,BWFPDivider]> {
def: InstRW<[BWWriteResGroup150], (instregex "(V?)DIVPSrm",
"(V?)DIVSSrm")>;
def BWWriteResGroup151 : SchedWriteRes<[BWPort0,BWPort23]> {
let Latency = 16;
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
def: InstRW<[BWWriteResGroup151], (instregex "VPMULLDYrm")>;
def BWWriteResGroup153 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort06,BWPort15,BWPort0156]> {
let Latency = 16;
let NumMicroOps = 14;

View File

@ -198,8 +198,10 @@ defm : HWWriteResPair<WriteVecShift, [HWPort0], 1>;
defm : HWWriteResPair<WriteVecLogic, [HWPort015], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteVecLogicY,[HWPort015], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteVecALU, [HWPort15], 1>;
defm : HWWriteResPair<WriteVecIMul, [HWPort0], 5>;
defm : HWWriteResPair<WriteVecIMul, [HWPort0], 5, [1], 1, 6>;
defm : HWWriteResPair<WriteVecIMulY, [HWPort0], 5, [1], 1, 7>;
defm : HWWriteResPair<WritePMULLD, [HWPort0], 10, [2], 2, 6>;
defm : HWWriteResPair<WritePMULLDY, [HWPort0], 10, [2], 2, 7>;
defm : HWWriteResPair<WriteShuffle, [HWPort5], 1, [1], 1, 5>;
defm : HWWriteResPair<WriteShuffleY, [HWPort5], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteVarShuffle, [HWPort5], 1, [1], 1, 6>;
@ -212,7 +214,9 @@ defm : HWWriteResPair<WriteVarBlend, [HWPort5], 2, [2], 2, 6>;
defm : HWWriteResPair<WriteVarBlendY, [HWPort5], 2, [2], 2, 7>;
defm : HWWriteResPair<WriteVarVecShift, [HWPort0, HWPort5], 2, [2, 1]>;
defm : HWWriteResPair<WriteMPSAD, [HWPort0, HWPort5], 7, [1, 2], 3, 6>;
defm : HWWriteResPair<WritePSADBW, [HWPort0], 5>;
defm : HWWriteResPair<WriteMPSADY, [HWPort0, HWPort5], 7, [1, 2], 3, 7>;
defm : HWWriteResPair<WritePSADBW, [HWPort0], 5, [1], 1, 6>;
defm : HWWriteResPair<WritePSADBWY, [HWPort0], 5, [1], 1, 7>;
defm : HWWriteResPair<WritePHMINPOS, [HWPort0], 5, [1], 1, 6>;
// Vector insert/extract operations.
@ -1799,15 +1803,6 @@ def HWWriteResGroup91_2 : SchedWriteRes<[HWPort0,HWPort23]> {
let ResourceCycles = [1,1];
}
def: InstRW<[HWWriteResGroup91_2], (instregex "(V?)PCMPGTQrm",
"(V?)PMADDUBSWrm",
"(V?)PMADDWDrm",
"(V?)PMULDQrm",
"(V?)PMULHRSWrm",
"(V?)PMULHUWrm",
"(V?)PMULHWrm",
"(V?)PMULLWrm",
"(V?)PMULUDQrm",
"(V?)PSADBWrm",
"(V?)RCPPSm",
"(V?)RSQRTPSm")>;
@ -1817,16 +1812,21 @@ def HWWriteResGroup91_3 : SchedWriteRes<[HWPort0,HWPort23]> {
let ResourceCycles = [1,1];
}
def: InstRW<[HWWriteResGroup91_3], (instregex "MUL_F(32|64)m",
"VPCMPGTQYrm",
"VPMADDUBSWYrm",
"VPMADDWDYrm",
"VPMULDQYrm",
"VPMULHRSWYrm",
"VPMULHUWYrm",
"VPMULHWYrm",
"VPMULLWYrm",
"VPMULUDQYrm",
"VPSADBWYrm")>;
"VPCMPGTQYrm")>;
def HWWriteResGroup91_5 : SchedWriteRes<[HWPort0,HWPort23]> {
let Latency = 10;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
def: InstRW<[HWWriteResGroup91_5], (instregex "MMX_PMADDUBSWrm",
"MMX_PMADDWDirm",
"MMX_PMULHRSWrm",
"MMX_PMULHUWirm",
"MMX_PMULHWirm",
"MMX_PMULLWirm",
"MMX_PMULUDQirm",
"MMX_PSADBWirm")>;
def HWWriteResGroup92_2 : SchedWriteRes<[HWPort01,HWPort23]> {
let Latency = 10;
@ -1966,13 +1966,6 @@ def HWWriteResGroup109 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort06,HWPo
def: InstRW<[HWWriteResGroup109], (instregex "SHLD(16|32|64)mrCL",
"SHRD(16|32|64)mrCL")>;
def HWWriteResGroup113_1 : SchedWriteRes<[HWPort0,HWPort5,HWPort23]> {
let Latency = 14;
let NumMicroOps = 4;
let ResourceCycles = [1,2,1];
}
def: InstRW<[HWWriteResGroup113_1], (instregex "VMPSADBWYrmi")>;
def HWWriteResGroup114 : SchedWriteRes<[HWPort6,HWPort06,HWPort15,HWPort0156]> {
let Latency = 7;
let NumMicroOps = 7;
@ -2001,13 +1994,6 @@ def HWWriteResGroup117 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort23]> {
}
def: InstRW<[HWWriteResGroup117], (instregex "(V?)DPPDrmi")>;
def HWWriteResGroup119_1 : SchedWriteRes<[HWPort0,HWPort23]> {
let Latency = 17;
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
def: InstRW<[HWWriteResGroup119_1], (instregex "VPMULLDYrm")>;
def HWWriteResGroup120 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort06,HWPort15,HWPort0156]> {
let Latency = 16;
let NumMicroOps = 10;

View File

@ -178,8 +178,10 @@ defm : SBWriteResPair<WriteVecShift, [SBPort5], 1>;
defm : SBWriteResPair<WriteVecLogic, [SBPort015], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteVecLogicY,[SBPort015], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteVecALU, [SBPort1], 3>;
defm : SBWriteResPair<WriteVecIMul, [SBPort0], 5>;
defm : SBWriteResPair<WritePMULLD, [SBPort0], 5, [1], 1, 6>; // TODO this is probably wrong for 256/512-bit for the "generic" model
defm : SBWriteResPair<WriteVecIMul, [SBPort0], 5, [1], 1, 6>;
defm : SBWriteResPair<WriteVecIMulY, [SBPort0], 5, [1], 1, 7>;
defm : SBWriteResPair<WritePMULLD, [SBPort0], 5, [1], 1, 6>;
defm : SBWriteResPair<WritePMULLDY, [SBPort0], 5, [1], 1, 7>; // TODO this is probably wrong for 256/512-bit for the "generic" model
defm : SBWriteResPair<WriteShuffle, [SBPort5], 1, [1], 1, 5>;
defm : SBWriteResPair<WriteShuffleY, [SBPort5], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteVarShuffle, [SBPort15], 1, [1], 1, 6>;
@ -188,8 +190,10 @@ defm : SBWriteResPair<WriteBlend, [SBPort15], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteBlendY, [SBPort15], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteVarBlend, [SBPort15], 2, [2], 2, 6>;
defm : SBWriteResPair<WriteVarBlendY,[SBPort15], 2, [2], 2, 7>;
defm : SBWriteResPair<WriteMPSAD, [SBPort0, SBPort15], 7, [1,2], 3, 6>;
defm : SBWriteResPair<WritePSADBW, [SBPort0], 5>;
defm : SBWriteResPair<WriteMPSAD, [SBPort0, SBPort15], 7, [1,2], 3, 6>;
defm : SBWriteResPair<WriteMPSADY, [SBPort0, SBPort15], 7, [1,2], 3, 7>;
defm : SBWriteResPair<WritePSADBW, [SBPort0], 5, [1], 1, 6>;
defm : SBWriteResPair<WritePSADBWY, [SBPort0], 5, [1], 1, 7>;
defm : SBWriteResPair<WritePHMINPOS, [SBPort0], 5, [1], 1, 6>;
// Vector insert/extract operations.
@ -1227,21 +1231,6 @@ def SBWriteResGroup88 : SchedWriteRes<[SBPort4,SBPort23,SBPort05,SBPort015]> {
def: InstRW<[SBWriteResGroup88], (instregex "SHLD(16|32|64)mri8",
"SHRD(16|32|64)mri8")>;
def SBWriteResGroup89 : SchedWriteRes<[SBPort0,SBPort23]> {
let Latency = 11;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
def: InstRW<[SBWriteResGroup89], (instregex "(V?)PMADDUBSWrm",
"(V?)PMADDWDrm",
"(V?)PMULDQrm",
"(V?)PMULHRSWrm",
"(V?)PMULHUWrm",
"(V?)PMULHWrm",
"(V?)PMULLWrm",
"(V?)PMULUDQrm",
"(V?)PSADBWrm")>;
def SBWriteResGroup89_2 : SchedWriteRes<[SBPort0,SBPort23]> {
let Latency = 10;
let NumMicroOps = 2;

View File

@ -199,8 +199,10 @@ defm : SKLWriteResPair<WriteVecALU, [SKLPort15], 1>; // Vector integer ALU op
defm : SKLWriteResPair<WriteVecLogic, [SKLPort015], 1, [1], 1, 6>; // Vector integer and/or/xor.
defm : SKLWriteResPair<WriteVecLogicY,[SKLPort015], 1, [1], 1, 7>; // Vector integer and/or/xor (YMM/ZMM).
defm : SKLWriteResPair<WriteVecShift, [SKLPort0], 1>; // Vector integer shifts.
defm : SKLWriteResPair<WriteVecIMul, [SKLPort0], 5>; // Vector integer multiply.
defm : SKLWriteResPair<WritePMULLD, [SKLPort01], 10, [2], 2, 6>;
defm : SKLWriteResPair<WriteVecIMul, [SKLPort01], 4, [1], 1, 6>; // Vector integer multiply.
defm : SKLWriteResPair<WriteVecIMulY, [SKLPort01], 4, [1], 1, 7>; // Vector integer multiply (YMM/ZMM).
defm : SKLWriteResPair<WritePMULLD, [SKLPort01], 10, [2], 2, 6>; // Vector PMULLD.
defm : SKLWriteResPair<WritePMULLDY, [SKLPort01], 10, [2], 2, 7>; // Vector PMULLD (YMM/ZMM).
defm : SKLWriteResPair<WriteShuffle, [SKLPort5], 1, [1], 1, 5>; // Vector shuffles.
defm : SKLWriteResPair<WriteShuffleY, [SKLPort5], 1, [1], 1, 7>; // Vector shuffles (YMM/ZMM).
defm : SKLWriteResPair<WriteVarShuffle, [SKLPort5], 1, [1], 1, 6>; // Vector shuffles.
@ -210,7 +212,9 @@ defm : SKLWriteResPair<WriteBlendY, [SKLPort5], 1, [1], 1, 7>; // Vector blends
defm : SKLWriteResPair<WriteVarBlend, [SKLPort015], 2, [2], 2, 6>; // Vector variable blends.
defm : SKLWriteResPair<WriteVarBlendY, [SKLPort015], 2, [2], 2, 6>; // Vector variable blends (YMM/ZMM).
defm : SKLWriteResPair<WriteMPSAD, [SKLPort5], 4, [2], 2, 6>; // Vector MPSAD.
defm : SKLWriteResPair<WritePSADBW, [SKLPort5], 3>; // Vector PSADBW.
defm : SKLWriteResPair<WriteMPSADY, [SKLPort5], 4, [2], 2, 7>; // Vector MPSAD.
defm : SKLWriteResPair<WritePSADBW, [SKLPort5], 3, [1], 1, 6>; // Vector PSADBW.
defm : SKLWriteResPair<WritePSADBWY, [SKLPort5], 3, [1], 1, 7>; // Vector PSADBW.
defm : SKLWriteResPair<WritePHMINPOS, [SKLPort01], 4, [1], 1, 6>; // Vector PHMINPOS.
// Vector insert/extract operations.
@ -918,15 +922,7 @@ def SKLWriteResGroup48 : SchedWriteRes<[SKLPort01]> {
}
def: InstRW<[SKLWriteResGroup48], (instregex "(V?)CVTDQ2PS(Y?)rr",
"(V?)CVTPS2DQ(Y?)rr",
"(V?)CVTTPS2DQ(Y?)rr",
"(V?)PMADDUBSW(Y?)rr",
"(V?)PMADDWD(Y?)rr",
"(V?)PMULDQ(Y?)rr",
"(V?)PMULHRSW(Y?)rr",
"(V?)PMULHUW(Y?)rr",
"(V?)PMULHW(Y?)rr",
"(V?)PMULLW(Y?)rr",
"(V?)PMULUDQ(Y?)rr")>;
"(V?)CVTTPS2DQ(Y?)rr")>;
def SKLWriteResGroup51 : SchedWriteRes<[SKLPort1,SKLPort5]> {
let Latency = 4;
@ -1506,6 +1502,7 @@ def: InstRW<[SKLWriteResGroup108], (instregex "FCOM32m",
"FCOM64m",
"FCOMP32m",
"FCOMP64m",
"MMX_PSADBWirm", // TODO - SKLWriteResGroup120??
"VPBROADCASTBYrm",
"VPBROADCASTWYrm",
"VPMOVSXBDYrm",
@ -1669,8 +1666,7 @@ def: InstRW<[SKLWriteResGroup121], (instregex "(V?)PCMPGTQrm",
"VPMOVSXBWYrm",
"VPMOVSXDQYrm",
"VPMOVSXWDYrm",
"VPMOVZXWDYrm",
"(V?)PSADBWrm")>;
"VPMOVZXWDYrm")>;
def SKLWriteResGroup122 : SchedWriteRes<[SKLPort01,SKLPort23]> {
let Latency = 9;
@ -1775,8 +1771,7 @@ def: InstRW<[SKLWriteResGroup133], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
"VPMOVZXBQYrm",
"VPMOVZXBWYrm",
"VPMOVZXDQYrm",
"VPMOVZXWQYrm",
"VPSADBWYrm")>;
"VPMOVZXWQYrm")>;
def SKLWriteResGroup134 : SchedWriteRes<[SKLPort01,SKLPort23]> {
let Latency = 10;
@ -1787,15 +1782,7 @@ def: InstRW<[SKLWriteResGroup134], (instregex "(V?)CVTDQ2PSrm",
"(V?)CVTPH2PSYrm",
"(V?)CVTPS2DQrm",
"(V?)CVTSS2SDrm",
"(V?)CVTTPS2DQrm",
"(V?)PMADDUBSWrm",
"(V?)PMADDWDrm",
"(V?)PMULDQrm",
"(V?)PMULHRSWrm",
"(V?)PMULHUWrm",
"(V?)PMULHWrm",
"(V?)PMULLWrm",
"(V?)PMULUDQrm")>;
"(V?)CVTTPS2DQrm")>;
def SKLWriteResGroup138 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
let Latency = 10;
@ -1883,15 +1870,7 @@ def SKLWriteResGroup147 : SchedWriteRes<[SKLPort01,SKLPort23]> {
def: InstRW<[SKLWriteResGroup147], (instregex "VCVTDQ2PSYrm",
"VCVTPS2DQYrm",
"VCVTPS2PDYrm",
"VCVTTPS2DQYrm",
"VPMADDUBSWYrm",
"VPMADDWDYrm",
"VPMULDQYrm",
"VPMULHRSWYrm",
"VPMULHUWYrm",
"VPMULHWYrm",
"VPMULLWYrm",
"VPMULUDQYrm")>;
"VCVTTPS2DQYrm")>;
def SKLWriteResGroup149 : SchedWriteRes<[SKLPort5,SKLPort23]> {
let Latency = 11;
@ -1901,8 +1880,7 @@ def SKLWriteResGroup149 : SchedWriteRes<[SKLPort5,SKLPort23]> {
def: InstRW<[SKLWriteResGroup149], (instregex "FICOM16m",
"FICOM32m",
"FICOMP16m",
"FICOMP32m",
"VMPSADBWYrmi")>;
"FICOMP32m")>;
def SKLWriteResGroup150 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
let Latency = 11;
@ -2065,13 +2043,6 @@ def SKLWriteResGroup172 : SchedWriteRes<[SKLPort23,SKLPort01]> {
def: InstRW<[SKLWriteResGroup172], (instregex "VROUNDPDYm",
"VROUNDPSYm")>;
def SKLWriteResGroup172_2 : SchedWriteRes<[SKLPort23,SKLPort01]> {
let Latency = 17;
let NumMicroOps = 3;
let ResourceCycles = [1,2];
}
def: InstRW<[SKLWriteResGroup172_2], (instregex "VPMULLDYrm")>;
def SKLWriteResGroup173 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort01]> {
let Latency = 15;
let NumMicroOps = 4;

View File

@ -199,8 +199,10 @@ defm : SKXWriteResPair<WriteVecALU, [SKXPort15], 1>; // Vector integer ALU op
defm : SKXWriteResPair<WriteVecLogic, [SKXPort015], 1, [1], 1, 6>; // Vector integer and/or/xor.
defm : SKXWriteResPair<WriteVecLogicY,[SKXPort015], 1, [1], 1, 7>; // Vector integer and/or/xor (YMM/ZMM).
defm : SKXWriteResPair<WriteVecShift, [SKXPort0], 1>; // Vector integer shifts.
defm : SKXWriteResPair<WriteVecIMul, [SKXPort0], 5>; // Vector integer multiply.
defm : SKXWriteResPair<WritePMULLD, [SKXPort015], 10, [2], 2, 6>; // Vector integer multiply.
defm : SKXWriteResPair<WriteVecIMul, [SKXPort015], 4, [1], 1, 6>; // Vector integer multiply.
defm : SKXWriteResPair<WriteVecIMulY, [SKXPort015], 4, [1], 1, 7>; // Vector integer multiply (YMM/ZMM).
defm : SKXWriteResPair<WritePMULLD, [SKXPort015], 10, [2], 2, 6>; // Vector PMULLD.
defm : SKXWriteResPair<WritePMULLDY, [SKXPort015], 10, [2], 2, 7>; // Vector PMULLD (YMM/ZMM).
defm : SKXWriteResPair<WriteShuffle, [SKXPort5], 1, [1], 1, 5>; // Vector shuffles.
defm : SKXWriteResPair<WriteShuffleY, [SKXPort5], 1, [1], 1, 7>; // Vector shuffles (YMM/ZMM).
defm : SKXWriteResPair<WriteVarShuffle, [SKXPort5], 1, [1], 1, 6>; // Vector variable shuffles.
@ -209,8 +211,10 @@ defm : SKXWriteResPair<WriteBlend, [SKXPort5], 1, [1], 1, 6>; // Vector blends.
defm : SKXWriteResPair<WriteBlendY,[SKXPort5], 1, [1], 1, 7>; // Vector blends (YMM/ZMM).
defm : SKXWriteResPair<WriteVarBlend, [SKXPort015], 2, [2], 2, 6>; // Vector variable blends.
defm : SKXWriteResPair<WriteVarBlendY,[SKXPort015], 2, [2], 2, 6>; // Vector variable blends (YMM/ZMM).
defm : SKXWriteResPair<WriteMPSAD, [SKXPort5], 4, [2], 2, 6>; // Vector MPSAD.
defm : SKXWriteResPair<WritePSADBW, [SKXPort5], 3, [1], 1, 6>; // Vector PSADBW.
defm : SKXWriteResPair<WriteMPSAD, [SKXPort5], 4, [2], 2, 6>; // Vector MPSAD.
defm : SKXWriteResPair<WriteMPSADY, [SKXPort5], 4, [2], 2, 7>; // Vector MPSAD.
defm : SKXWriteResPair<WritePSADBW, [SKXPort5], 3, [1], 1, 6>; // Vector PSADBW.
defm : SKXWriteResPair<WritePSADBWY, [SKXPort5], 3, [1], 1, 7>; // Vector PSADBW.
defm : SKXWriteResPair<WritePHMINPOS, [SKXPort015], 4, [1], 1, 6>; // Vector PHMINPOS.
// Vector insert/extract operations.
@ -1214,9 +1218,7 @@ def: InstRW<[SKXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_FPrST0",
"VCMPPSZrri",
"VCMPSDZrr",
"VCMPSSZrr",
"VDBPSADBWZ128rri",
"VDBPSADBWZ256rri",
"VDBPSADBWZrri",
"VDBPSADBWZrri", // TODO: 512-bit ops require ports 0/1 to be joined.
"VFPCLASSPDZ128rr",
"VFPCLASSPDZ256rr",
"VFPCLASSPDZrr",
@ -1518,47 +1520,7 @@ def: InstRW<[SKXWriteResGroup50], (instregex "VCVTDQ2PSYrr",
"VPLZCNTDZrr",
"VPLZCNTQZ128rr",
"VPLZCNTQZ256rr",
"VPLZCNTQZrr",
"VPMADDUBSWYrr",
"VPMADDUBSWZ128rr",
"VPMADDUBSWZ256rr",
"VPMADDUBSWZrr",
"(V?)PMADDUBSWrr",
"VPMADDWDYrr",
"VPMADDWDZ128rr",
"VPMADDWDZ256rr",
"VPMADDWDZrr",
"(V?)PMADDWDrr",
"VPMULDQYrr",
"VPMULDQZ128rr",
"VPMULDQZ256rr",
"VPMULDQZrr",
"(V?)PMULDQrr",
"VPMULHRSWYrr",
"VPMULHRSWZ128rr",
"VPMULHRSWZ256rr",
"VPMULHRSWZrr",
"(V?)PMULHRSWrr",
"VPMULHUWYrr",
"VPMULHUWZ128rr",
"VPMULHUWZ256rr",
"VPMULHUWZrr",
"(V?)PMULHUWrr",
"VPMULHWYrr",
"VPMULHWZ128rr",
"VPMULHWZ256rr",
"VPMULHWZrr",
"(V?)PMULHWrr",
"VPMULLWYrr",
"VPMULLWZ128rr",
"VPMULLWZ256rr",
"VPMULLWZrr",
"(V?)PMULLWrr",
"VPMULUDQYrr",
"VPMULUDQZ128rr",
"VPMULUDQZ256rr",
"VPMULUDQZrr",
"(V?)PMULUDQrr")>;
"VPLZCNTQZrr")>;
def SKXWriteResGroup51 : SchedWriteRes<[SKXPort5]> {
let Latency = 4;
@ -3060,7 +3022,6 @@ def: InstRW<[SKXWriteResGroup136], (instregex "VALIGNDZ128rm(b?)i",
"VCMPPSZ128rm(b?)i",
"VCMPSDZrm",
"VCMPSSZrm",
"VDBPSADBWZ128rmi(b?)",
"VFPCLASSSSrm(b?)",
"VPCMPBZ128rmi(b?)",
"VPCMPDZ128rmi(b?)",
@ -3107,7 +3068,6 @@ def: InstRW<[SKXWriteResGroup136], (instregex "VALIGNDZ128rm(b?)i",
"VPMOVZXWDYrm",
"VPMOVZXWDZ128rm(b?)",
"VPMOVZXWQZ128rm(b?)",
"VPSADBWZ128rm(b?)",
"VPTESTMBZ128rm(b?)",
"VPTESTMDZ128rm(b?)",
"VPTESTMQZ128rm(b?)",
@ -3219,8 +3179,6 @@ def: InstRW<[SKXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
"VCMPPDZrm(b?)i",
"VCMPPSZ256rm(b?)i",
"VCMPPSZrm(b?)i",
"VDBPSADBWZ256rmi(b?)",
"VDBPSADBWZrmi(b?)",
"VPCMPBZ256rmi(b?)",
"VPCMPBZrmi(b?)",
"VPCMPDZ256rmi(b?)",
@ -3267,9 +3225,6 @@ def: InstRW<[SKXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
"VPMOVZXBWYrm",
"VPMOVZXDQYrm",
"VPMOVZXWQYrm",
"VPSADBWYrm",
"VPSADBWZ256rm(b?)",
"VPSADBWZrm(b?)",
"VPTESTMBZ256rm(b?)",
"VPTESTMBZrm(b?)",
"VPTESTMDZ256rm(b?)",
@ -3296,14 +3251,6 @@ def: InstRW<[SKXWriteResGroup149], (instregex "CVTDQ2PSrm",
"CVTPS2DQrm",
"CVTSS2SDrm",
"CVTTPS2DQrm",
"PMADDUBSWrm",
"PMADDWDrm",
"PMULDQrm",
"PMULHRSWrm",
"PMULHUWrm",
"PMULHWrm",
"PMULLWrm",
"PMULUDQrm",
"VCVTDQ2PDZ128rm(b?)",
"VCVTDQ2PSZ128rm(b?)",
"VCVTDQ2PSrm",
@ -3333,23 +3280,7 @@ def: InstRW<[SKXWriteResGroup149], (instregex "CVTDQ2PSrm",
"VCVTUQQ2PDZ128rm(b?)",
"VCVTUQQ2PSZ128rm(b?)",
"VPLZCNTDZ128rm(b?)",
"VPLZCNTQZ128rm(b?)",
"VPMADDUBSWZ128rm(b?)",
"VPMADDUBSWrm",
"VPMADDWDZ128rm(b?)",
"VPMADDWDrm",
"VPMULDQZ128rm(b?)",
"VPMULDQrm",
"VPMULHRSWZ128rm(b?)",
"VPMULHRSWrm",
"VPMULHUWZ128rm(b?)",
"VPMULHUWrm",
"VPMULHWZ128rm(b?)",
"VPMULHWrm",
"VPMULLWZ128rm(b?)",
"VPMULLWrm",
"VPMULUDQZ128rm(b?)",
"VPMULUDQrm")>;
"VPLZCNTQZ128rm(b?)")>;
def SKXWriteResGroup151 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 10;
@ -3487,31 +3418,7 @@ def: InstRW<[SKXWriteResGroup161], (instregex "VCVTDQ2PDZ256rm(b?)",
"VPLZCNTDZ256rm(b?)",
"VPLZCNTDZrm(b?)",
"VPLZCNTQZ256rm(b?)",
"VPLZCNTQZrm(b?)",
"VPMADDUBSWYrm",
"VPMADDUBSWZ256rm(b?)",
"VPMADDUBSWZrm(b?)",
"VPMADDWDYrm",
"VPMADDWDZ256rm(b?)",
"VPMADDWDZrm(b?)",
"VPMULDQYrm",
"VPMULDQZ256rm(b?)",
"VPMULDQZrm(b?)",
"VPMULHRSWYrm",
"VPMULHRSWZ256rm(b?)",
"VPMULHRSWZrm(b?)",
"VPMULHUWYrm",
"VPMULHUWZ256rm(b?)",
"VPMULHUWZrm(b?)",
"VPMULHWYrm",
"VPMULHWZ256rm(b?)",
"VPMULHWZrm(b?)",
"VPMULLWYrm",
"VPMULLWZ256rm(b?)",
"VPMULLWZrm(b?)",
"VPMULUDQYrm",
"VPMULUDQZ256rm(b?)",
"VPMULUDQZrm(b?)")>;
"VPLZCNTQZrm(b?)")>;
def SKXWriteResGroup162 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 11;
@ -3526,7 +3433,6 @@ def: InstRW<[SKXWriteResGroup162], (instregex "FICOM16m",
"VEXPANDPDZrm(b?)",
"VEXPANDPSZ256rm(b?)",
"VEXPANDPSZrm(b?)",
"VMPSADBWYrmi",
"VPEXPANDDZ256rm(b?)",
"VPEXPANDDZrm(b?)",
"VPEXPANDQZ256rm(b?)",
@ -3805,15 +3711,6 @@ def: InstRW<[SKXWriteResGroup192], (instregex "VRNDSCALEPDZ256rm(b?)i",
"VROUNDPDYm",
"VROUNDPSYm")>;
def SKXWriteResGroup192_2 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let Latency = 17;
let NumMicroOps = 3;
let ResourceCycles = [1,2];
}
def: InstRW<[SKXWriteResGroup192_2], (instregex "VPMULLDYrm",
"VPMULLDZ256rm(b?)",
"VPMULLDZrm(b?)")>;
def SKXWriteResGroup193 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> {
let Latency = 15;
let NumMicroOps = 4;

View File

@ -137,7 +137,9 @@ defm WriteVecLogic : X86SchedWritePair; // Vector integer and/or/xor logicals.
defm WriteVecLogicY: X86SchedWritePair; // Vector integer and/or/xor logicals (YMM/ZMM).
defm WriteVecShift : X86SchedWritePair; // Vector integer shifts.
defm WriteVecIMul : X86SchedWritePair; // Vector integer multiply.
defm WritePMULLD : X86SchedWritePair; // PMULLD
defm WriteVecIMulY : X86SchedWritePair; // Vector integer multiply (YMM/ZMM).
defm WritePMULLD : X86SchedWritePair; // Vector PMULLD.
defm WritePMULLDY : X86SchedWritePair; // Vector PMULLD (YMM/ZMM).
defm WriteShuffle : X86SchedWritePair; // Vector shuffles.
defm WriteShuffleY : X86SchedWritePair; // Vector shuffles (YMM/ZMM).
defm WriteVarShuffle : X86SchedWritePair; // Vector variable shuffles.
@ -146,8 +148,10 @@ defm WriteBlend : X86SchedWritePair; // Vector blends.
defm WriteBlendY : X86SchedWritePair; // Vector blends (YMM/ZMM).
defm WriteVarBlend : X86SchedWritePair; // Vector variable blends.
defm WriteVarBlendY : X86SchedWritePair; // Vector variable blends (YMM/ZMM).
defm WritePSADBW : X86SchedWritePair; // Vector PSADBW.
defm WriteMPSAD : X86SchedWritePair; // Vector MPSAD.
defm WritePSADBW : X86SchedWritePair; // Vector PSADBW.
defm WritePSADBWY : X86SchedWritePair; // Vector PSADBW (YMM/ZMM).
defm WriteMPSAD : X86SchedWritePair; // Vector MPSAD.
defm WriteMPSADY : X86SchedWritePair; // Vector MPSAD (YMM/ZMM).
defm WritePHMINPOS : X86SchedWritePair; // Vector PHMINPOS.
// Vector insert/extract operations.
@ -259,16 +263,16 @@ def SchedWriteVarVecShift
WriteVarVecShift, WriteVarVecShift>;
def SchedWriteVecIMul
: X86SchedWriteWidths<WriteVecIMul, WriteVecIMul,
WriteVecIMul, WriteVecIMul>;
WriteVecIMulY, WriteVecIMulY>;
def SchedWritePMULLD
: X86SchedWriteWidths<WritePMULLD, WritePMULLD,
WritePMULLD, WritePMULLD>;
WritePMULLDY, WritePMULLDY>;
def SchedWriteMPSAD
: X86SchedWriteWidths<WriteMPSAD, WriteMPSAD,
WriteMPSAD, WriteMPSAD>;
WriteMPSADY, WriteMPSADY>;
def SchedWritePSADBW
: X86SchedWriteWidths<WritePSADBW, WritePSADBW,
WritePSADBW, WritePSADBW>;
WritePSADBWY, WritePSADBWY>;
def SchedWriteShuffle
: X86SchedWriteWidths<WriteShuffle, WriteShuffle,

View File

@ -256,10 +256,14 @@ defm : AtomWriteResPair<WriteVecLogic, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVecLogicY, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVecShift, [AtomPort01], [AtomPort01], 2, 3, [2], [3]>;
defm : AtomWriteResPair<WriteVecIMul, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteVecIMulY, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WritePMULLD, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WritePMULLDY, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WritePHMINPOS, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteMPSAD, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteMPSADY, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WritePSADBW, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WritePSADBWY, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteShuffle, [AtomPort0], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteShuffleY, [AtomPort0], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVarShuffle, [AtomPort01], [AtomPort01], 4, 5, [4], [5]>;

View File

@ -406,9 +406,13 @@ def : WriteRes<WriteVecMove, [JFPU01, JVALU]>;
defm : JWriteResFpuPair<WriteVecALU, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVecShift, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVecIMul, [JFPU0, JVIMUL], 2>;
defm : JWriteResFpuPair<WriteVecIMulY, [JFPU0, JVIMUL], 2>;
defm : JWriteResFpuPair<WritePMULLD, [JFPU0, JFPU01, JVIMUL, JVALU], 4, [2, 1, 2, 1], 3>;
defm : JWriteResFpuPair<WritePMULLDY, [JFPU0, JFPU01, JVIMUL, JVALU], 4, [2, 1, 2, 1], 3>;
defm : JWriteResFpuPair<WriteMPSAD, [JFPU0, JVIMUL], 3, [1, 2]>;
defm : JWriteResFpuPair<WriteMPSADY, [JFPU0, JVIMUL], 3, [1, 2]>;
defm : JWriteResFpuPair<WritePSADBW, [JFPU01, JVALU], 2>;
defm : JWriteResFpuPair<WritePSADBWY, [JFPU01, JVALU], 2>;
defm : JWriteResFpuPair<WritePHMINPOS, [JFPU0, JVALU], 2>;
defm : JWriteResFpuPair<WriteShuffle, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteShuffleY, [JFPU01, JVALU], 1>;

View File

@ -167,9 +167,11 @@ defm : SLMWriteResPair<WriteVecLogic, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecLogicY,[SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecALU, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecIMul, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WriteVecIMulY, [SLM_FPC_RSV0], 4>;
// FIXME: The below is closer to correct, but caused some perf regressions.
//defm : SLMWriteResPair<WritePMULLD, [SLM_FPC_RSV0], 11, [11], 7>;
defm : SLMWriteResPair<WritePMULLD, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WritePMULLDY, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WriteShuffle, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteShuffleY, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVarShuffle, [SLM_FPC_RSV0], 1>;
@ -177,7 +179,9 @@ defm : SLMWriteResPair<WriteVarShuffleY, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteBlend, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteBlendY, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteMPSAD, [SLM_FPC_RSV0], 7>;
defm : SLMWriteResPair<WritePSADBW, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WriteMPSADY, [SLM_FPC_RSV0], 7>;
defm : SLMWriteResPair<WritePSADBW, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WritePSADBWY, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WritePHMINPOS, [SLM_FPC_RSV0], 4>;
// Vector insert/extract operations.

View File

@ -240,7 +240,9 @@ defm : ZnWriteResFpuPair<WriteVecLogicY, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WritePHAdd, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecALU, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecIMul, [ZnFPU0], 4>;
defm : ZnWriteResFpuPair<WriteVecIMulY, [ZnFPU0], 4>;
defm : ZnWriteResFpuPair<WritePMULLD, [ZnFPU0], 4>; // FIXME
defm : ZnWriteResFpuPair<WritePMULLDY, [ZnFPU0], 5, [2]>; // FIXME
defm : ZnWriteResFpuPair<WriteShuffle, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteShuffleY, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVarShuffle, [ZnFPU], 1>;
@ -250,6 +252,7 @@ defm : ZnWriteResFpuPair<WriteBlendY, [ZnFPU01], 1>;
defm : ZnWriteResFpuPair<WriteShuffle256, [ZnFPU], 2>;
defm : ZnWriteResFpuPair<WriteVarShuffle256, [ZnFPU], 2>;
defm : ZnWriteResFpuPair<WritePSADBW, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WritePSADBWY, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WritePHMINPOS, [ZnFPU0], 4>;
// Vector Shift Operations
@ -291,7 +294,9 @@ let Latency = 100 in {
def : WriteRes<WriteMicrocoded, []>;
def : WriteRes<WriteSystem, []>;
def : WriteRes<WriteMPSAD, []>;
def : WriteRes<WriteMPSADY, []>;
def : WriteRes<WriteMPSADLd, []>;
def : WriteRes<WriteMPSADYLd, []>;
def : WriteRes<WriteCLMul, []>;
def : WriteRes<WriteCLMulLd, []>;
def : WriteRes<WritePCmpIStrM, []>;
@ -1042,18 +1047,6 @@ def : InstRW<[ZnWritePCMPGTQm], (instregex "(V?)PCMPGTQrm")>;
def : InstRW<[ZnWritePCMPGTQYm], (instregex "(V?)PCMPGTQYrm")>;
// PMULLD.
// x,x.
def ZnWritePMULLDr : SchedWriteRes<[ZnFPU0]> {
let Latency = 4;
}
// ymm.
def ZnWritePMULLDYr : SchedWriteRes<[ZnFPU0]> {
let Latency = 5;
let ResourceCycles = [2];
}
def : InstRW<[ZnWritePMULLDr], (instregex "(V?)PMULLDrr")>;
def : InstRW<[ZnWritePMULLDYr], (instregex "(V?)PMULLDYrr")>;
// x,m.
def ZnWritePMULLDm : SchedWriteRes<[ZnAGU, ZnFPU0]> {
let Latency = 11;

View File

@ -15,7 +15,7 @@ define i64 @test_pavgusb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
; CHECK-LABEL: test_pavgusb:
; CHECK: # %bb.0:
; CHECK-NEXT: pavgusb %mm1, %mm0 # sched: [5:1.00]
; CHECK-NEXT: pavgusb (%rdi), %mm0 # sched: [10:1.00]
; CHECK-NEXT: pavgusb (%rdi), %mm0 # sched: [11:1.00]
; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33]
; CHECK-NEXT: retq # sched: [1:1.00]
%1 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %a0, x86_mmx %a1)
@ -345,7 +345,7 @@ define i64 @test_pmulhrw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
; CHECK-LABEL: test_pmulhrw:
; CHECK: # %bb.0:
; CHECK-NEXT: pmulhrw %mm1, %mm0 # sched: [5:1.00]
; CHECK-NEXT: pmulhrw (%rdi), %mm0 # sched: [10:1.00]
; CHECK-NEXT: pmulhrw (%rdi), %mm0 # sched: [11:1.00]
; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33]
; CHECK-NEXT: retq # sched: [1:1.00]
%1 = call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %a0, x86_mmx %a1)

View File

@ -609,7 +609,7 @@ define <16 x i16> @test_mpsadbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
; GENERIC-LABEL: test_mpsadbw:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [7:1.00]
; GENERIC-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [13:1.00]
; GENERIC-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [14:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_mpsadbw:
@ -3297,7 +3297,7 @@ define <16 x i16> @test_pmaddubsw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2)
; GENERIC-LABEL: test_pmaddubsw:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
; GENERIC-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
; GENERIC-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmaddubsw:
@ -3341,7 +3341,7 @@ define <8 x i32> @test_pmaddwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2)
; GENERIC-LABEL: test_pmaddwd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
; GENERIC-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
; GENERIC-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmaddwd:
@ -4738,7 +4738,7 @@ define <4 x i64> @test_pmuldq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
; GENERIC-LABEL: test_pmuldq:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
; GENERIC-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
; GENERIC-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmuldq:
@ -4782,7 +4782,7 @@ define <16 x i16> @test_pmulhrsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2
; GENERIC-LABEL: test_pmulhrsw:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
; GENERIC-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
; GENERIC-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmulhrsw:
@ -4825,7 +4825,7 @@ define <16 x i16> @test_pmulhuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2)
; GENERIC-LABEL: test_pmulhuw:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
; GENERIC-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
; GENERIC-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmulhuw:
@ -4868,7 +4868,7 @@ define <16 x i16> @test_pmulhw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2)
; GENERIC-LABEL: test_pmulhw:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
; GENERIC-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
; GENERIC-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmulhw:
@ -4911,7 +4911,7 @@ define <8 x i32> @test_pmulld(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
; GENERIC-LABEL: test_pmulld:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
; GENERIC-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
; GENERIC-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmulld:
@ -4953,7 +4953,7 @@ define <16 x i16> @test_pmullw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2)
; GENERIC-LABEL: test_pmullw:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
; GENERIC-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
; GENERIC-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmullw:
@ -4995,7 +4995,7 @@ define <4 x i64> @test_pmuludq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
; GENERIC-LABEL: test_pmuludq:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
; GENERIC-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
; GENERIC-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmuludq:
@ -5088,7 +5088,7 @@ define <4 x i64> @test_psadbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
; GENERIC-LABEL: test_psadbw:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
; GENERIC-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
; GENERIC-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_psadbw:

View File

@ -12,7 +12,7 @@ define <4 x i32> @test_sha1msg1(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; GENERIC-LABEL: test_sha1msg1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: sha1msg1 %xmm1, %xmm0 # sched: [5:1.00]
; GENERIC-NEXT: sha1msg1 (%rdi), %xmm0 # sched: [10:1.00]
; GENERIC-NEXT: sha1msg1 (%rdi), %xmm0 # sched: [11:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GOLDMONT-LABEL: test_sha1msg1:
@ -23,8 +23,8 @@ define <4 x i32> @test_sha1msg1(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
;
; CANNONLAKE-LABEL: test_sha1msg1:
; CANNONLAKE: # %bb.0:
; CANNONLAKE-NEXT: sha1msg1 %xmm1, %xmm0 # sched: [5:1.00]
; CANNONLAKE-NEXT: sha1msg1 (%rdi), %xmm0 # sched: [10:1.00]
; CANNONLAKE-NEXT: sha1msg1 %xmm1, %xmm0 # sched: [4:0.33]
; CANNONLAKE-NEXT: sha1msg1 (%rdi), %xmm0 # sched: [10:0.50]
; CANNONLAKE-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_sha1msg1:
@ -43,7 +43,7 @@ define <4 x i32> @test_sha1msg2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; GENERIC-LABEL: test_sha1msg2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: sha1msg2 %xmm1, %xmm0 # sched: [5:1.00]
; GENERIC-NEXT: sha1msg2 (%rdi), %xmm0 # sched: [10:1.00]
; GENERIC-NEXT: sha1msg2 (%rdi), %xmm0 # sched: [11:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GOLDMONT-LABEL: test_sha1msg2:
@ -54,8 +54,8 @@ define <4 x i32> @test_sha1msg2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
;
; CANNONLAKE-LABEL: test_sha1msg2:
; CANNONLAKE: # %bb.0:
; CANNONLAKE-NEXT: sha1msg2 %xmm1, %xmm0 # sched: [5:1.00]
; CANNONLAKE-NEXT: sha1msg2 (%rdi), %xmm0 # sched: [10:1.00]
; CANNONLAKE-NEXT: sha1msg2 %xmm1, %xmm0 # sched: [4:0.33]
; CANNONLAKE-NEXT: sha1msg2 (%rdi), %xmm0 # sched: [10:0.50]
; CANNONLAKE-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_sha1msg2:
@ -74,7 +74,7 @@ define <4 x i32> @test_sha1nexte(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; GENERIC-LABEL: test_sha1nexte:
; GENERIC: # %bb.0:
; GENERIC-NEXT: sha1nexte %xmm1, %xmm0 # sched: [5:1.00]
; GENERIC-NEXT: sha1nexte (%rdi), %xmm0 # sched: [10:1.00]
; GENERIC-NEXT: sha1nexte (%rdi), %xmm0 # sched: [11:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GOLDMONT-LABEL: test_sha1nexte:
@ -85,8 +85,8 @@ define <4 x i32> @test_sha1nexte(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
;
; CANNONLAKE-LABEL: test_sha1nexte:
; CANNONLAKE: # %bb.0:
; CANNONLAKE-NEXT: sha1nexte %xmm1, %xmm0 # sched: [5:1.00]
; CANNONLAKE-NEXT: sha1nexte (%rdi), %xmm0 # sched: [10:1.00]
; CANNONLAKE-NEXT: sha1nexte %xmm1, %xmm0 # sched: [4:0.33]
; CANNONLAKE-NEXT: sha1nexte (%rdi), %xmm0 # sched: [10:0.50]
; CANNONLAKE-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_sha1nexte:
@ -105,7 +105,7 @@ define <4 x i32> @test_sha1rnds4(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; GENERIC-LABEL: test_sha1rnds4:
; GENERIC: # %bb.0:
; GENERIC-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # sched: [5:1.00]
; GENERIC-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # sched: [10:1.00]
; GENERIC-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # sched: [11:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GOLDMONT-LABEL: test_sha1rnds4:
@ -116,8 +116,8 @@ define <4 x i32> @test_sha1rnds4(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
;
; CANNONLAKE-LABEL: test_sha1rnds4:
; CANNONLAKE: # %bb.0:
; CANNONLAKE-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # sched: [5:1.00]
; CANNONLAKE-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # sched: [10:1.00]
; CANNONLAKE-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # sched: [4:0.33]
; CANNONLAKE-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # sched: [10:0.50]
; CANNONLAKE-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_sha1rnds4:
@ -140,7 +140,7 @@ define <4 x i32> @test_sha256msg1(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2)
; GENERIC-LABEL: test_sha256msg1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: sha256msg1 %xmm1, %xmm0 # sched: [5:1.00]
; GENERIC-NEXT: sha256msg1 (%rdi), %xmm0 # sched: [10:1.00]
; GENERIC-NEXT: sha256msg1 (%rdi), %xmm0 # sched: [11:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GOLDMONT-LABEL: test_sha256msg1:
@ -151,8 +151,8 @@ define <4 x i32> @test_sha256msg1(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2)
;
; CANNONLAKE-LABEL: test_sha256msg1:
; CANNONLAKE: # %bb.0:
; CANNONLAKE-NEXT: sha256msg1 %xmm1, %xmm0 # sched: [5:1.00]
; CANNONLAKE-NEXT: sha256msg1 (%rdi), %xmm0 # sched: [10:1.00]
; CANNONLAKE-NEXT: sha256msg1 %xmm1, %xmm0 # sched: [4:0.33]
; CANNONLAKE-NEXT: sha256msg1 (%rdi), %xmm0 # sched: [10:0.50]
; CANNONLAKE-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_sha256msg1:
@ -171,7 +171,7 @@ define <4 x i32> @test_sha256msg2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2)
; GENERIC-LABEL: test_sha256msg2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: sha256msg2 %xmm1, %xmm0 # sched: [5:1.00]
; GENERIC-NEXT: sha256msg2 (%rdi), %xmm0 # sched: [10:1.00]
; GENERIC-NEXT: sha256msg2 (%rdi), %xmm0 # sched: [11:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GOLDMONT-LABEL: test_sha256msg2:
@ -182,8 +182,8 @@ define <4 x i32> @test_sha256msg2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2)
;
; CANNONLAKE-LABEL: test_sha256msg2:
; CANNONLAKE: # %bb.0:
; CANNONLAKE-NEXT: sha256msg2 %xmm1, %xmm0 # sched: [5:1.00]
; CANNONLAKE-NEXT: sha256msg2 (%rdi), %xmm0 # sched: [10:1.00]
; CANNONLAKE-NEXT: sha256msg2 %xmm1, %xmm0 # sched: [4:0.33]
; CANNONLAKE-NEXT: sha256msg2 (%rdi), %xmm0 # sched: [10:0.50]
; CANNONLAKE-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_sha256msg2:
@ -204,7 +204,7 @@ define <4 x i32> @test_sha256rnds2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2,
; GENERIC-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00]
; GENERIC-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [5:1.00]
; GENERIC-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [10:1.00]
; GENERIC-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [11:1.00]
; GENERIC-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -221,8 +221,8 @@ define <4 x i32> @test_sha256rnds2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2,
; CANNONLAKE: # %bb.0:
; CANNONLAKE-NEXT: vmovaps %xmm0, %xmm3 # sched: [1:0.33]
; CANNONLAKE-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33]
; CANNONLAKE-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [5:1.00]
; CANNONLAKE-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [10:1.00]
; CANNONLAKE-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [4:0.33]
; CANNONLAKE-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [10:0.50]
; CANNONLAKE-NEXT: vmovaps %xmm3, %xmm0 # sched: [1:0.33]
; CANNONLAKE-NEXT: retq # sched: [7:1.00]
;

View File

@ -724,7 +724,7 @@ define void @test_vpmacsswd(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i6
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vpmacsswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; GENERIC-NEXT: vpmacsswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; GENERIC-NEXT: vpmacsswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -744,7 +744,7 @@ define void @test_vpmacssww(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i6
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vpmacssww %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; GENERIC-NEXT: vpmacssww %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; GENERIC-NEXT: vpmacssww %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -764,7 +764,7 @@ define void @test_vpmacswd(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vpmacswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; GENERIC-NEXT: vpmacswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; GENERIC-NEXT: vpmacswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -784,7 +784,7 @@ define void @test_vpmacsww(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vpmacsww %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; GENERIC-NEXT: vpmacsww %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; GENERIC-NEXT: vpmacsww %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -804,7 +804,7 @@ define void @test_vpmadcsswd(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vpmadcsswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; GENERIC-NEXT: vpmadcsswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; GENERIC-NEXT: vpmadcsswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -824,7 +824,7 @@ define void @test_vpmadcswd(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i6
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; GENERIC-NEXT: vpmadcswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; GENERIC-NEXT: vpmadcswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;