1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-23 13:02:52 +02:00

[X86] Split WriteVecIMul/WriteVecPMULLD/WriteMPSAD/WritePSADBW into XMM and YMM/ZMM scheduler classes

Also retagged VDBPSADBW instructions as SchedWritePSADBW instead of SchedWriteVecIMul which matches the behaviour on SkylakeServer (the only thing that supports it...)

llvm-svn: 331445
This commit is contained in:
Simon Pilgrim 2018-05-03 10:31:20 +00:00
parent 61aa16d663
commit b7289046cc
15 changed files with 134 additions and 301 deletions

View File

@ -9711,7 +9711,7 @@ let Predicates = [HasVLX, HasBWI] in {
} }
defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw", defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
SchedWriteVecIMul, avx512vl_i16_info, avx512vl_i8_info>, SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
EVEX_CD8<8, CD8VF>; EVEX_CD8<8, CD8VF>;
multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,

View File

@ -203,8 +203,10 @@ defm : BWWriteResPair<WriteVecALU, [BWPort15], 1>; // Vector integer ALU op,
defm : BWWriteResPair<WriteVecLogic, [BWPort015], 1, [1], 1, 5>; // Vector integer and/or/xor. defm : BWWriteResPair<WriteVecLogic, [BWPort015], 1, [1], 1, 5>; // Vector integer and/or/xor.
defm : BWWriteResPair<WriteVecLogicY,[BWPort015], 1, [1], 1, 6>; // Vector integer and/or/xor (YMM/ZMM). defm : BWWriteResPair<WriteVecLogicY,[BWPort015], 1, [1], 1, 6>; // Vector integer and/or/xor (YMM/ZMM).
defm : BWWriteResPair<WriteVecShift, [BWPort0], 1>; // Vector integer shifts. defm : BWWriteResPair<WriteVecShift, [BWPort0], 1>; // Vector integer shifts.
defm : BWWriteResPair<WriteVecIMul, [BWPort0], 5>; // Vector integer multiply. defm : BWWriteResPair<WriteVecIMul, [BWPort0], 5, [1], 1, 5>; // Vector integer multiply.
defm : BWWriteResPair<WritePMULLD, [BWPort0], 10, [2], 2, 5>; // PMULLD defm : BWWriteResPair<WriteVecIMulY, [BWPort0], 5, [1], 1, 6>; // Vector integer multiply.
defm : BWWriteResPair<WritePMULLD, [BWPort0], 10, [2], 2, 5>; // Vector PMULLD.
defm : BWWriteResPair<WritePMULLDY, [BWPort0], 10, [2], 2, 6>; // Vector PMULLD (YMM/ZMM).
defm : BWWriteResPair<WriteShuffle, [BWPort5], 1, [1], 1, 5>; // Vector shuffles. defm : BWWriteResPair<WriteShuffle, [BWPort5], 1, [1], 1, 5>; // Vector shuffles.
defm : BWWriteResPair<WriteShuffleY, [BWPort5], 1, [1], 1, 6>; // Vector shuffles (YMM/ZMM). defm : BWWriteResPair<WriteShuffleY, [BWPort5], 1, [1], 1, 6>; // Vector shuffles (YMM/ZMM).
defm : BWWriteResPair<WriteVarShuffle, [BWPort5], 1, [1], 1, 5>; // Vector variable shuffles. defm : BWWriteResPair<WriteVarShuffle, [BWPort5], 1, [1], 1, 5>; // Vector variable shuffles.
@ -214,8 +216,10 @@ defm : BWWriteResPair<WriteBlendY, [BWPort5], 1, [1], 1, 6>; // Vector blends (Y
defm : BWWriteResPair<WriteVarBlend, [BWPort5], 2, [2], 2, 5>; // Vector variable blends. defm : BWWriteResPair<WriteVarBlend, [BWPort5], 2, [2], 2, 5>; // Vector variable blends.
defm : BWWriteResPair<WriteVarBlendY, [BWPort5], 2, [2], 2, 6>; // Vector variable blends (YMM/ZMM). defm : BWWriteResPair<WriteVarBlendY, [BWPort5], 2, [2], 2, 6>; // Vector variable blends (YMM/ZMM).
defm : BWWriteResPair<WriteMPSAD, [BWPort0, BWPort5], 7, [1, 2], 3, 5>; // Vector MPSAD. defm : BWWriteResPair<WriteMPSAD, [BWPort0, BWPort5], 7, [1, 2], 3, 5>; // Vector MPSAD.
defm : BWWriteResPair<WritePSADBW, [BWPort0], 5>; // Vector PSADBW. defm : BWWriteResPair<WriteMPSADY, [BWPort0, BWPort5], 7, [1, 2], 3, 6>; // Vector MPSAD.
defm : BWWriteResPair<WritePHMINPOS, [BWPort0], 5>; // Vector PHMINPOS. defm : BWWriteResPair<WritePSADBW, [BWPort0], 5, [1], 1, 5>; // Vector PSADBW.
defm : BWWriteResPair<WritePSADBWY, [BWPort0], 5, [1], 1, 6>; // Vector PSADBW (YMM/ZMM).
defm : BWWriteResPair<WritePHMINPOS, [BWPort0], 5>; // Vector PHMINPOS.
// Vector insert/extract operations. // Vector insert/extract operations.
def : WriteRes<WriteVecInsert, [BWPort5]> { def : WriteRes<WriteVecInsert, [BWPort5]> {
@ -1504,16 +1508,7 @@ def BWWriteResGroup123 : SchedWriteRes<[BWPort0,BWPort23]> {
let ResourceCycles = [1,1]; let ResourceCycles = [1,1];
} }
def: InstRW<[BWWriteResGroup123], (instregex "MUL_F(32|64)m", def: InstRW<[BWWriteResGroup123], (instregex "MUL_F(32|64)m",
"VPCMPGTQYrm", "VPCMPGTQYrm")>;
"VPMADDUBSWYrm",
"VPMADDWDYrm",
"VPMULDQYrm",
"VPMULHRSWYrm",
"VPMULHUWYrm",
"VPMULHWYrm",
"VPMULLWYrm",
"VPMULUDQYrm",
"VPSADBWYrm")>;
def BWWriteResGroup126 : SchedWriteRes<[BWPort0,BWPort015]> { def BWWriteResGroup126 : SchedWriteRes<[BWPort0,BWPort015]> {
let Latency = 11; let Latency = 11;
@ -1594,13 +1589,6 @@ def BWWriteResGroup137_1 : SchedWriteRes<[BWPort0,BWFPDivider]> {
} }
def: InstRW<[BWWriteResGroup137_1], (instregex "(V?)SQRTSSr")>; def: InstRW<[BWWriteResGroup137_1], (instregex "(V?)SQRTSSr")>;
def BWWriteResGroup138 : SchedWriteRes<[BWPort0,BWPort5,BWPort23]> {
let Latency = 13;
let NumMicroOps = 4;
let ResourceCycles = [1,2,1];
}
def: InstRW<[BWWriteResGroup138], (instregex "VMPSADBWYrmi")>;
def BWWriteResGroup139 : SchedWriteRes<[BWPort0,BWFPDivider]> { def BWWriteResGroup139 : SchedWriteRes<[BWPort0,BWFPDivider]> {
let Latency = 14; let Latency = 14;
let NumMicroOps = 1; let NumMicroOps = 1;
@ -1681,13 +1669,6 @@ def BWWriteResGroup150 : SchedWriteRes<[BWPort0,BWPort23,BWFPDivider]> {
def: InstRW<[BWWriteResGroup150], (instregex "(V?)DIVPSrm", def: InstRW<[BWWriteResGroup150], (instregex "(V?)DIVPSrm",
"(V?)DIVSSrm")>; "(V?)DIVSSrm")>;
def BWWriteResGroup151 : SchedWriteRes<[BWPort0,BWPort23]> {
let Latency = 16;
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
def: InstRW<[BWWriteResGroup151], (instregex "VPMULLDYrm")>;
def BWWriteResGroup153 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort06,BWPort15,BWPort0156]> { def BWWriteResGroup153 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort06,BWPort15,BWPort0156]> {
let Latency = 16; let Latency = 16;
let NumMicroOps = 14; let NumMicroOps = 14;

View File

@ -198,8 +198,10 @@ defm : HWWriteResPair<WriteVecShift, [HWPort0], 1>;
defm : HWWriteResPair<WriteVecLogic, [HWPort015], 1, [1], 1, 6>; defm : HWWriteResPair<WriteVecLogic, [HWPort015], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteVecLogicY,[HWPort015], 1, [1], 1, 7>; defm : HWWriteResPair<WriteVecLogicY,[HWPort015], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteVecALU, [HWPort15], 1>; defm : HWWriteResPair<WriteVecALU, [HWPort15], 1>;
defm : HWWriteResPair<WriteVecIMul, [HWPort0], 5>; defm : HWWriteResPair<WriteVecIMul, [HWPort0], 5, [1], 1, 6>;
defm : HWWriteResPair<WriteVecIMulY, [HWPort0], 5, [1], 1, 7>;
defm : HWWriteResPair<WritePMULLD, [HWPort0], 10, [2], 2, 6>; defm : HWWriteResPair<WritePMULLD, [HWPort0], 10, [2], 2, 6>;
defm : HWWriteResPair<WritePMULLDY, [HWPort0], 10, [2], 2, 7>;
defm : HWWriteResPair<WriteShuffle, [HWPort5], 1, [1], 1, 5>; defm : HWWriteResPair<WriteShuffle, [HWPort5], 1, [1], 1, 5>;
defm : HWWriteResPair<WriteShuffleY, [HWPort5], 1, [1], 1, 7>; defm : HWWriteResPair<WriteShuffleY, [HWPort5], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteVarShuffle, [HWPort5], 1, [1], 1, 6>; defm : HWWriteResPair<WriteVarShuffle, [HWPort5], 1, [1], 1, 6>;
@ -212,7 +214,9 @@ defm : HWWriteResPair<WriteVarBlend, [HWPort5], 2, [2], 2, 6>;
defm : HWWriteResPair<WriteVarBlendY, [HWPort5], 2, [2], 2, 7>; defm : HWWriteResPair<WriteVarBlendY, [HWPort5], 2, [2], 2, 7>;
defm : HWWriteResPair<WriteVarVecShift, [HWPort0, HWPort5], 2, [2, 1]>; defm : HWWriteResPair<WriteVarVecShift, [HWPort0, HWPort5], 2, [2, 1]>;
defm : HWWriteResPair<WriteMPSAD, [HWPort0, HWPort5], 7, [1, 2], 3, 6>; defm : HWWriteResPair<WriteMPSAD, [HWPort0, HWPort5], 7, [1, 2], 3, 6>;
defm : HWWriteResPair<WritePSADBW, [HWPort0], 5>; defm : HWWriteResPair<WriteMPSADY, [HWPort0, HWPort5], 7, [1, 2], 3, 7>;
defm : HWWriteResPair<WritePSADBW, [HWPort0], 5, [1], 1, 6>;
defm : HWWriteResPair<WritePSADBWY, [HWPort0], 5, [1], 1, 7>;
defm : HWWriteResPair<WritePHMINPOS, [HWPort0], 5, [1], 1, 6>; defm : HWWriteResPair<WritePHMINPOS, [HWPort0], 5, [1], 1, 6>;
// Vector insert/extract operations. // Vector insert/extract operations.
@ -1799,15 +1803,6 @@ def HWWriteResGroup91_2 : SchedWriteRes<[HWPort0,HWPort23]> {
let ResourceCycles = [1,1]; let ResourceCycles = [1,1];
} }
def: InstRW<[HWWriteResGroup91_2], (instregex "(V?)PCMPGTQrm", def: InstRW<[HWWriteResGroup91_2], (instregex "(V?)PCMPGTQrm",
"(V?)PMADDUBSWrm",
"(V?)PMADDWDrm",
"(V?)PMULDQrm",
"(V?)PMULHRSWrm",
"(V?)PMULHUWrm",
"(V?)PMULHWrm",
"(V?)PMULLWrm",
"(V?)PMULUDQrm",
"(V?)PSADBWrm",
"(V?)RCPPSm", "(V?)RCPPSm",
"(V?)RSQRTPSm")>; "(V?)RSQRTPSm")>;
@ -1817,16 +1812,21 @@ def HWWriteResGroup91_3 : SchedWriteRes<[HWPort0,HWPort23]> {
let ResourceCycles = [1,1]; let ResourceCycles = [1,1];
} }
def: InstRW<[HWWriteResGroup91_3], (instregex "MUL_F(32|64)m", def: InstRW<[HWWriteResGroup91_3], (instregex "MUL_F(32|64)m",
"VPCMPGTQYrm", "VPCMPGTQYrm")>;
"VPMADDUBSWYrm",
"VPMADDWDYrm", def HWWriteResGroup91_5 : SchedWriteRes<[HWPort0,HWPort23]> {
"VPMULDQYrm", let Latency = 10;
"VPMULHRSWYrm", let NumMicroOps = 2;
"VPMULHUWYrm", let ResourceCycles = [1,1];
"VPMULHWYrm", }
"VPMULLWYrm", def: InstRW<[HWWriteResGroup91_5], (instregex "MMX_PMADDUBSWrm",
"VPMULUDQYrm", "MMX_PMADDWDirm",
"VPSADBWYrm")>; "MMX_PMULHRSWrm",
"MMX_PMULHUWirm",
"MMX_PMULHWirm",
"MMX_PMULLWirm",
"MMX_PMULUDQirm",
"MMX_PSADBWirm")>;
def HWWriteResGroup92_2 : SchedWriteRes<[HWPort01,HWPort23]> { def HWWriteResGroup92_2 : SchedWriteRes<[HWPort01,HWPort23]> {
let Latency = 10; let Latency = 10;
@ -1966,13 +1966,6 @@ def HWWriteResGroup109 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort06,HWPo
def: InstRW<[HWWriteResGroup109], (instregex "SHLD(16|32|64)mrCL", def: InstRW<[HWWriteResGroup109], (instregex "SHLD(16|32|64)mrCL",
"SHRD(16|32|64)mrCL")>; "SHRD(16|32|64)mrCL")>;
def HWWriteResGroup113_1 : SchedWriteRes<[HWPort0,HWPort5,HWPort23]> {
let Latency = 14;
let NumMicroOps = 4;
let ResourceCycles = [1,2,1];
}
def: InstRW<[HWWriteResGroup113_1], (instregex "VMPSADBWYrmi")>;
def HWWriteResGroup114 : SchedWriteRes<[HWPort6,HWPort06,HWPort15,HWPort0156]> { def HWWriteResGroup114 : SchedWriteRes<[HWPort6,HWPort06,HWPort15,HWPort0156]> {
let Latency = 7; let Latency = 7;
let NumMicroOps = 7; let NumMicroOps = 7;
@ -2001,13 +1994,6 @@ def HWWriteResGroup117 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort23]> {
} }
def: InstRW<[HWWriteResGroup117], (instregex "(V?)DPPDrmi")>; def: InstRW<[HWWriteResGroup117], (instregex "(V?)DPPDrmi")>;
def HWWriteResGroup119_1 : SchedWriteRes<[HWPort0,HWPort23]> {
let Latency = 17;
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
def: InstRW<[HWWriteResGroup119_1], (instregex "VPMULLDYrm")>;
def HWWriteResGroup120 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort06,HWPort15,HWPort0156]> { def HWWriteResGroup120 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort06,HWPort15,HWPort0156]> {
let Latency = 16; let Latency = 16;
let NumMicroOps = 10; let NumMicroOps = 10;

View File

@ -178,8 +178,10 @@ defm : SBWriteResPair<WriteVecShift, [SBPort5], 1>;
defm : SBWriteResPair<WriteVecLogic, [SBPort015], 1, [1], 1, 6>; defm : SBWriteResPair<WriteVecLogic, [SBPort015], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteVecLogicY,[SBPort015], 1, [1], 1, 7>; defm : SBWriteResPair<WriteVecLogicY,[SBPort015], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteVecALU, [SBPort1], 3>; defm : SBWriteResPair<WriteVecALU, [SBPort1], 3>;
defm : SBWriteResPair<WriteVecIMul, [SBPort0], 5>; defm : SBWriteResPair<WriteVecIMul, [SBPort0], 5, [1], 1, 6>;
defm : SBWriteResPair<WritePMULLD, [SBPort0], 5, [1], 1, 6>; // TODO this is probably wrong for 256/512-bit for the "generic" model defm : SBWriteResPair<WriteVecIMulY, [SBPort0], 5, [1], 1, 7>;
defm : SBWriteResPair<WritePMULLD, [SBPort0], 5, [1], 1, 6>;
defm : SBWriteResPair<WritePMULLDY, [SBPort0], 5, [1], 1, 7>; // TODO this is probably wrong for 256/512-bit for the "generic" model
defm : SBWriteResPair<WriteShuffle, [SBPort5], 1, [1], 1, 5>; defm : SBWriteResPair<WriteShuffle, [SBPort5], 1, [1], 1, 5>;
defm : SBWriteResPair<WriteShuffleY, [SBPort5], 1, [1], 1, 7>; defm : SBWriteResPair<WriteShuffleY, [SBPort5], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteVarShuffle, [SBPort15], 1, [1], 1, 6>; defm : SBWriteResPair<WriteVarShuffle, [SBPort15], 1, [1], 1, 6>;
@ -188,8 +190,10 @@ defm : SBWriteResPair<WriteBlend, [SBPort15], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteBlendY, [SBPort15], 1, [1], 1, 7>; defm : SBWriteResPair<WriteBlendY, [SBPort15], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteVarBlend, [SBPort15], 2, [2], 2, 6>; defm : SBWriteResPair<WriteVarBlend, [SBPort15], 2, [2], 2, 6>;
defm : SBWriteResPair<WriteVarBlendY,[SBPort15], 2, [2], 2, 7>; defm : SBWriteResPair<WriteVarBlendY,[SBPort15], 2, [2], 2, 7>;
defm : SBWriteResPair<WriteMPSAD, [SBPort0, SBPort15], 7, [1,2], 3, 6>; defm : SBWriteResPair<WriteMPSAD, [SBPort0, SBPort15], 7, [1,2], 3, 6>;
defm : SBWriteResPair<WritePSADBW, [SBPort0], 5>; defm : SBWriteResPair<WriteMPSADY, [SBPort0, SBPort15], 7, [1,2], 3, 7>;
defm : SBWriteResPair<WritePSADBW, [SBPort0], 5, [1], 1, 6>;
defm : SBWriteResPair<WritePSADBWY, [SBPort0], 5, [1], 1, 7>;
defm : SBWriteResPair<WritePHMINPOS, [SBPort0], 5, [1], 1, 6>; defm : SBWriteResPair<WritePHMINPOS, [SBPort0], 5, [1], 1, 6>;
// Vector insert/extract operations. // Vector insert/extract operations.
@ -1227,21 +1231,6 @@ def SBWriteResGroup88 : SchedWriteRes<[SBPort4,SBPort23,SBPort05,SBPort015]> {
def: InstRW<[SBWriteResGroup88], (instregex "SHLD(16|32|64)mri8", def: InstRW<[SBWriteResGroup88], (instregex "SHLD(16|32|64)mri8",
"SHRD(16|32|64)mri8")>; "SHRD(16|32|64)mri8")>;
def SBWriteResGroup89 : SchedWriteRes<[SBPort0,SBPort23]> {
let Latency = 11;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
def: InstRW<[SBWriteResGroup89], (instregex "(V?)PMADDUBSWrm",
"(V?)PMADDWDrm",
"(V?)PMULDQrm",
"(V?)PMULHRSWrm",
"(V?)PMULHUWrm",
"(V?)PMULHWrm",
"(V?)PMULLWrm",
"(V?)PMULUDQrm",
"(V?)PSADBWrm")>;
def SBWriteResGroup89_2 : SchedWriteRes<[SBPort0,SBPort23]> { def SBWriteResGroup89_2 : SchedWriteRes<[SBPort0,SBPort23]> {
let Latency = 10; let Latency = 10;
let NumMicroOps = 2; let NumMicroOps = 2;

View File

@ -199,8 +199,10 @@ defm : SKLWriteResPair<WriteVecALU, [SKLPort15], 1>; // Vector integer ALU op
defm : SKLWriteResPair<WriteVecLogic, [SKLPort015], 1, [1], 1, 6>; // Vector integer and/or/xor. defm : SKLWriteResPair<WriteVecLogic, [SKLPort015], 1, [1], 1, 6>; // Vector integer and/or/xor.
defm : SKLWriteResPair<WriteVecLogicY,[SKLPort015], 1, [1], 1, 7>; // Vector integer and/or/xor (YMM/ZMM). defm : SKLWriteResPair<WriteVecLogicY,[SKLPort015], 1, [1], 1, 7>; // Vector integer and/or/xor (YMM/ZMM).
defm : SKLWriteResPair<WriteVecShift, [SKLPort0], 1>; // Vector integer shifts. defm : SKLWriteResPair<WriteVecShift, [SKLPort0], 1>; // Vector integer shifts.
defm : SKLWriteResPair<WriteVecIMul, [SKLPort0], 5>; // Vector integer multiply. defm : SKLWriteResPair<WriteVecIMul, [SKLPort01], 4, [1], 1, 6>; // Vector integer multiply.
defm : SKLWriteResPair<WritePMULLD, [SKLPort01], 10, [2], 2, 6>; defm : SKLWriteResPair<WriteVecIMulY, [SKLPort01], 4, [1], 1, 7>; // Vector integer multiply (YMM/ZMM).
defm : SKLWriteResPair<WritePMULLD, [SKLPort01], 10, [2], 2, 6>; // Vector PMULLD.
defm : SKLWriteResPair<WritePMULLDY, [SKLPort01], 10, [2], 2, 7>; // Vector PMULLD (YMM/ZMM).
defm : SKLWriteResPair<WriteShuffle, [SKLPort5], 1, [1], 1, 5>; // Vector shuffles. defm : SKLWriteResPair<WriteShuffle, [SKLPort5], 1, [1], 1, 5>; // Vector shuffles.
defm : SKLWriteResPair<WriteShuffleY, [SKLPort5], 1, [1], 1, 7>; // Vector shuffles (YMM/ZMM). defm : SKLWriteResPair<WriteShuffleY, [SKLPort5], 1, [1], 1, 7>; // Vector shuffles (YMM/ZMM).
defm : SKLWriteResPair<WriteVarShuffle, [SKLPort5], 1, [1], 1, 6>; // Vector shuffles. defm : SKLWriteResPair<WriteVarShuffle, [SKLPort5], 1, [1], 1, 6>; // Vector shuffles.
@ -210,7 +212,9 @@ defm : SKLWriteResPair<WriteBlendY, [SKLPort5], 1, [1], 1, 7>; // Vector blends
defm : SKLWriteResPair<WriteVarBlend, [SKLPort015], 2, [2], 2, 6>; // Vector variable blends. defm : SKLWriteResPair<WriteVarBlend, [SKLPort015], 2, [2], 2, 6>; // Vector variable blends.
defm : SKLWriteResPair<WriteVarBlendY, [SKLPort015], 2, [2], 2, 6>; // Vector variable blends (YMM/ZMM). defm : SKLWriteResPair<WriteVarBlendY, [SKLPort015], 2, [2], 2, 6>; // Vector variable blends (YMM/ZMM).
defm : SKLWriteResPair<WriteMPSAD, [SKLPort5], 4, [2], 2, 6>; // Vector MPSAD. defm : SKLWriteResPair<WriteMPSAD, [SKLPort5], 4, [2], 2, 6>; // Vector MPSAD.
defm : SKLWriteResPair<WritePSADBW, [SKLPort5], 3>; // Vector PSADBW. defm : SKLWriteResPair<WriteMPSADY, [SKLPort5], 4, [2], 2, 7>; // Vector MPSAD.
defm : SKLWriteResPair<WritePSADBW, [SKLPort5], 3, [1], 1, 6>; // Vector PSADBW.
defm : SKLWriteResPair<WritePSADBWY, [SKLPort5], 3, [1], 1, 7>; // Vector PSADBW.
defm : SKLWriteResPair<WritePHMINPOS, [SKLPort01], 4, [1], 1, 6>; // Vector PHMINPOS. defm : SKLWriteResPair<WritePHMINPOS, [SKLPort01], 4, [1], 1, 6>; // Vector PHMINPOS.
// Vector insert/extract operations. // Vector insert/extract operations.
@ -918,15 +922,7 @@ def SKLWriteResGroup48 : SchedWriteRes<[SKLPort01]> {
} }
def: InstRW<[SKLWriteResGroup48], (instregex "(V?)CVTDQ2PS(Y?)rr", def: InstRW<[SKLWriteResGroup48], (instregex "(V?)CVTDQ2PS(Y?)rr",
"(V?)CVTPS2DQ(Y?)rr", "(V?)CVTPS2DQ(Y?)rr",
"(V?)CVTTPS2DQ(Y?)rr", "(V?)CVTTPS2DQ(Y?)rr")>;
"(V?)PMADDUBSW(Y?)rr",
"(V?)PMADDWD(Y?)rr",
"(V?)PMULDQ(Y?)rr",
"(V?)PMULHRSW(Y?)rr",
"(V?)PMULHUW(Y?)rr",
"(V?)PMULHW(Y?)rr",
"(V?)PMULLW(Y?)rr",
"(V?)PMULUDQ(Y?)rr")>;
def SKLWriteResGroup51 : SchedWriteRes<[SKLPort1,SKLPort5]> { def SKLWriteResGroup51 : SchedWriteRes<[SKLPort1,SKLPort5]> {
let Latency = 4; let Latency = 4;
@ -1506,6 +1502,7 @@ def: InstRW<[SKLWriteResGroup108], (instregex "FCOM32m",
"FCOM64m", "FCOM64m",
"FCOMP32m", "FCOMP32m",
"FCOMP64m", "FCOMP64m",
"MMX_PSADBWirm", // TODO - SKLWriteResGroup120??
"VPBROADCASTBYrm", "VPBROADCASTBYrm",
"VPBROADCASTWYrm", "VPBROADCASTWYrm",
"VPMOVSXBDYrm", "VPMOVSXBDYrm",
@ -1669,8 +1666,7 @@ def: InstRW<[SKLWriteResGroup121], (instregex "(V?)PCMPGTQrm",
"VPMOVSXBWYrm", "VPMOVSXBWYrm",
"VPMOVSXDQYrm", "VPMOVSXDQYrm",
"VPMOVSXWDYrm", "VPMOVSXWDYrm",
"VPMOVZXWDYrm", "VPMOVZXWDYrm")>;
"(V?)PSADBWrm")>;
def SKLWriteResGroup122 : SchedWriteRes<[SKLPort01,SKLPort23]> { def SKLWriteResGroup122 : SchedWriteRes<[SKLPort01,SKLPort23]> {
let Latency = 9; let Latency = 9;
@ -1775,8 +1771,7 @@ def: InstRW<[SKLWriteResGroup133], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
"VPMOVZXBQYrm", "VPMOVZXBQYrm",
"VPMOVZXBWYrm", "VPMOVZXBWYrm",
"VPMOVZXDQYrm", "VPMOVZXDQYrm",
"VPMOVZXWQYrm", "VPMOVZXWQYrm")>;
"VPSADBWYrm")>;
def SKLWriteResGroup134 : SchedWriteRes<[SKLPort01,SKLPort23]> { def SKLWriteResGroup134 : SchedWriteRes<[SKLPort01,SKLPort23]> {
let Latency = 10; let Latency = 10;
@ -1787,15 +1782,7 @@ def: InstRW<[SKLWriteResGroup134], (instregex "(V?)CVTDQ2PSrm",
"(V?)CVTPH2PSYrm", "(V?)CVTPH2PSYrm",
"(V?)CVTPS2DQrm", "(V?)CVTPS2DQrm",
"(V?)CVTSS2SDrm", "(V?)CVTSS2SDrm",
"(V?)CVTTPS2DQrm", "(V?)CVTTPS2DQrm")>;
"(V?)PMADDUBSWrm",
"(V?)PMADDWDrm",
"(V?)PMULDQrm",
"(V?)PMULHRSWrm",
"(V?)PMULHUWrm",
"(V?)PMULHWrm",
"(V?)PMULLWrm",
"(V?)PMULUDQrm")>;
def SKLWriteResGroup138 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { def SKLWriteResGroup138 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
let Latency = 10; let Latency = 10;
@ -1883,15 +1870,7 @@ def SKLWriteResGroup147 : SchedWriteRes<[SKLPort01,SKLPort23]> {
def: InstRW<[SKLWriteResGroup147], (instregex "VCVTDQ2PSYrm", def: InstRW<[SKLWriteResGroup147], (instregex "VCVTDQ2PSYrm",
"VCVTPS2DQYrm", "VCVTPS2DQYrm",
"VCVTPS2PDYrm", "VCVTPS2PDYrm",
"VCVTTPS2DQYrm", "VCVTTPS2DQYrm")>;
"VPMADDUBSWYrm",
"VPMADDWDYrm",
"VPMULDQYrm",
"VPMULHRSWYrm",
"VPMULHUWYrm",
"VPMULHWYrm",
"VPMULLWYrm",
"VPMULUDQYrm")>;
def SKLWriteResGroup149 : SchedWriteRes<[SKLPort5,SKLPort23]> { def SKLWriteResGroup149 : SchedWriteRes<[SKLPort5,SKLPort23]> {
let Latency = 11; let Latency = 11;
@ -1901,8 +1880,7 @@ def SKLWriteResGroup149 : SchedWriteRes<[SKLPort5,SKLPort23]> {
def: InstRW<[SKLWriteResGroup149], (instregex "FICOM16m", def: InstRW<[SKLWriteResGroup149], (instregex "FICOM16m",
"FICOM32m", "FICOM32m",
"FICOMP16m", "FICOMP16m",
"FICOMP32m", "FICOMP32m")>;
"VMPSADBWYrmi")>;
def SKLWriteResGroup150 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { def SKLWriteResGroup150 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
let Latency = 11; let Latency = 11;
@ -2065,13 +2043,6 @@ def SKLWriteResGroup172 : SchedWriteRes<[SKLPort23,SKLPort01]> {
def: InstRW<[SKLWriteResGroup172], (instregex "VROUNDPDYm", def: InstRW<[SKLWriteResGroup172], (instregex "VROUNDPDYm",
"VROUNDPSYm")>; "VROUNDPSYm")>;
def SKLWriteResGroup172_2 : SchedWriteRes<[SKLPort23,SKLPort01]> {
let Latency = 17;
let NumMicroOps = 3;
let ResourceCycles = [1,2];
}
def: InstRW<[SKLWriteResGroup172_2], (instregex "VPMULLDYrm")>;
def SKLWriteResGroup173 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort01]> { def SKLWriteResGroup173 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort01]> {
let Latency = 15; let Latency = 15;
let NumMicroOps = 4; let NumMicroOps = 4;

View File

@ -199,8 +199,10 @@ defm : SKXWriteResPair<WriteVecALU, [SKXPort15], 1>; // Vector integer ALU op
defm : SKXWriteResPair<WriteVecLogic, [SKXPort015], 1, [1], 1, 6>; // Vector integer and/or/xor. defm : SKXWriteResPair<WriteVecLogic, [SKXPort015], 1, [1], 1, 6>; // Vector integer and/or/xor.
defm : SKXWriteResPair<WriteVecLogicY,[SKXPort015], 1, [1], 1, 7>; // Vector integer and/or/xor (YMM/ZMM). defm : SKXWriteResPair<WriteVecLogicY,[SKXPort015], 1, [1], 1, 7>; // Vector integer and/or/xor (YMM/ZMM).
defm : SKXWriteResPair<WriteVecShift, [SKXPort0], 1>; // Vector integer shifts. defm : SKXWriteResPair<WriteVecShift, [SKXPort0], 1>; // Vector integer shifts.
defm : SKXWriteResPair<WriteVecIMul, [SKXPort0], 5>; // Vector integer multiply. defm : SKXWriteResPair<WriteVecIMul, [SKXPort015], 4, [1], 1, 6>; // Vector integer multiply.
defm : SKXWriteResPair<WritePMULLD, [SKXPort015], 10, [2], 2, 6>; // Vector integer multiply. defm : SKXWriteResPair<WriteVecIMulY, [SKXPort015], 4, [1], 1, 7>; // Vector integer multiply (YMM/ZMM).
defm : SKXWriteResPair<WritePMULLD, [SKXPort015], 10, [2], 2, 6>; // Vector PMULLD.
defm : SKXWriteResPair<WritePMULLDY, [SKXPort015], 10, [2], 2, 7>; // Vector PMULLD (YMM/ZMM).
defm : SKXWriteResPair<WriteShuffle, [SKXPort5], 1, [1], 1, 5>; // Vector shuffles. defm : SKXWriteResPair<WriteShuffle, [SKXPort5], 1, [1], 1, 5>; // Vector shuffles.
defm : SKXWriteResPair<WriteShuffleY, [SKXPort5], 1, [1], 1, 7>; // Vector shuffles (YMM/ZMM). defm : SKXWriteResPair<WriteShuffleY, [SKXPort5], 1, [1], 1, 7>; // Vector shuffles (YMM/ZMM).
defm : SKXWriteResPair<WriteVarShuffle, [SKXPort5], 1, [1], 1, 6>; // Vector variable shuffles. defm : SKXWriteResPair<WriteVarShuffle, [SKXPort5], 1, [1], 1, 6>; // Vector variable shuffles.
@ -209,8 +211,10 @@ defm : SKXWriteResPair<WriteBlend, [SKXPort5], 1, [1], 1, 6>; // Vector blends.
defm : SKXWriteResPair<WriteBlendY,[SKXPort5], 1, [1], 1, 7>; // Vector blends (YMM/ZMM). defm : SKXWriteResPair<WriteBlendY,[SKXPort5], 1, [1], 1, 7>; // Vector blends (YMM/ZMM).
defm : SKXWriteResPair<WriteVarBlend, [SKXPort015], 2, [2], 2, 6>; // Vector variable blends. defm : SKXWriteResPair<WriteVarBlend, [SKXPort015], 2, [2], 2, 6>; // Vector variable blends.
defm : SKXWriteResPair<WriteVarBlendY,[SKXPort015], 2, [2], 2, 6>; // Vector variable blends (YMM/ZMM). defm : SKXWriteResPair<WriteVarBlendY,[SKXPort015], 2, [2], 2, 6>; // Vector variable blends (YMM/ZMM).
defm : SKXWriteResPair<WriteMPSAD, [SKXPort5], 4, [2], 2, 6>; // Vector MPSAD. defm : SKXWriteResPair<WriteMPSAD, [SKXPort5], 4, [2], 2, 6>; // Vector MPSAD.
defm : SKXWriteResPair<WritePSADBW, [SKXPort5], 3, [1], 1, 6>; // Vector PSADBW. defm : SKXWriteResPair<WriteMPSADY, [SKXPort5], 4, [2], 2, 7>; // Vector MPSAD.
defm : SKXWriteResPair<WritePSADBW, [SKXPort5], 3, [1], 1, 6>; // Vector PSADBW.
defm : SKXWriteResPair<WritePSADBWY, [SKXPort5], 3, [1], 1, 7>; // Vector PSADBW.
defm : SKXWriteResPair<WritePHMINPOS, [SKXPort015], 4, [1], 1, 6>; // Vector PHMINPOS. defm : SKXWriteResPair<WritePHMINPOS, [SKXPort015], 4, [1], 1, 6>; // Vector PHMINPOS.
// Vector insert/extract operations. // Vector insert/extract operations.
@ -1214,9 +1218,7 @@ def: InstRW<[SKXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_FPrST0",
"VCMPPSZrri", "VCMPPSZrri",
"VCMPSDZrr", "VCMPSDZrr",
"VCMPSSZrr", "VCMPSSZrr",
"VDBPSADBWZ128rri", "VDBPSADBWZrri", // TODO: 512-bit ops require ports 0/1 to be joined.
"VDBPSADBWZ256rri",
"VDBPSADBWZrri",
"VFPCLASSPDZ128rr", "VFPCLASSPDZ128rr",
"VFPCLASSPDZ256rr", "VFPCLASSPDZ256rr",
"VFPCLASSPDZrr", "VFPCLASSPDZrr",
@ -1518,47 +1520,7 @@ def: InstRW<[SKXWriteResGroup50], (instregex "VCVTDQ2PSYrr",
"VPLZCNTDZrr", "VPLZCNTDZrr",
"VPLZCNTQZ128rr", "VPLZCNTQZ128rr",
"VPLZCNTQZ256rr", "VPLZCNTQZ256rr",
"VPLZCNTQZrr", "VPLZCNTQZrr")>;
"VPMADDUBSWYrr",
"VPMADDUBSWZ128rr",
"VPMADDUBSWZ256rr",
"VPMADDUBSWZrr",
"(V?)PMADDUBSWrr",
"VPMADDWDYrr",
"VPMADDWDZ128rr",
"VPMADDWDZ256rr",
"VPMADDWDZrr",
"(V?)PMADDWDrr",
"VPMULDQYrr",
"VPMULDQZ128rr",
"VPMULDQZ256rr",
"VPMULDQZrr",
"(V?)PMULDQrr",
"VPMULHRSWYrr",
"VPMULHRSWZ128rr",
"VPMULHRSWZ256rr",
"VPMULHRSWZrr",
"(V?)PMULHRSWrr",
"VPMULHUWYrr",
"VPMULHUWZ128rr",
"VPMULHUWZ256rr",
"VPMULHUWZrr",
"(V?)PMULHUWrr",
"VPMULHWYrr",
"VPMULHWZ128rr",
"VPMULHWZ256rr",
"VPMULHWZrr",
"(V?)PMULHWrr",
"VPMULLWYrr",
"VPMULLWZ128rr",
"VPMULLWZ256rr",
"VPMULLWZrr",
"(V?)PMULLWrr",
"VPMULUDQYrr",
"VPMULUDQZ128rr",
"VPMULUDQZ256rr",
"VPMULUDQZrr",
"(V?)PMULUDQrr")>;
def SKXWriteResGroup51 : SchedWriteRes<[SKXPort5]> { def SKXWriteResGroup51 : SchedWriteRes<[SKXPort5]> {
let Latency = 4; let Latency = 4;
@ -3060,7 +3022,6 @@ def: InstRW<[SKXWriteResGroup136], (instregex "VALIGNDZ128rm(b?)i",
"VCMPPSZ128rm(b?)i", "VCMPPSZ128rm(b?)i",
"VCMPSDZrm", "VCMPSDZrm",
"VCMPSSZrm", "VCMPSSZrm",
"VDBPSADBWZ128rmi(b?)",
"VFPCLASSSSrm(b?)", "VFPCLASSSSrm(b?)",
"VPCMPBZ128rmi(b?)", "VPCMPBZ128rmi(b?)",
"VPCMPDZ128rmi(b?)", "VPCMPDZ128rmi(b?)",
@ -3107,7 +3068,6 @@ def: InstRW<[SKXWriteResGroup136], (instregex "VALIGNDZ128rm(b?)i",
"VPMOVZXWDYrm", "VPMOVZXWDYrm",
"VPMOVZXWDZ128rm(b?)", "VPMOVZXWDZ128rm(b?)",
"VPMOVZXWQZ128rm(b?)", "VPMOVZXWQZ128rm(b?)",
"VPSADBWZ128rm(b?)",
"VPTESTMBZ128rm(b?)", "VPTESTMBZ128rm(b?)",
"VPTESTMDZ128rm(b?)", "VPTESTMDZ128rm(b?)",
"VPTESTMQZ128rm(b?)", "VPTESTMQZ128rm(b?)",
@ -3219,8 +3179,6 @@ def: InstRW<[SKXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
"VCMPPDZrm(b?)i", "VCMPPDZrm(b?)i",
"VCMPPSZ256rm(b?)i", "VCMPPSZ256rm(b?)i",
"VCMPPSZrm(b?)i", "VCMPPSZrm(b?)i",
"VDBPSADBWZ256rmi(b?)",
"VDBPSADBWZrmi(b?)",
"VPCMPBZ256rmi(b?)", "VPCMPBZ256rmi(b?)",
"VPCMPBZrmi(b?)", "VPCMPBZrmi(b?)",
"VPCMPDZ256rmi(b?)", "VPCMPDZ256rmi(b?)",
@ -3267,9 +3225,6 @@ def: InstRW<[SKXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
"VPMOVZXBWYrm", "VPMOVZXBWYrm",
"VPMOVZXDQYrm", "VPMOVZXDQYrm",
"VPMOVZXWQYrm", "VPMOVZXWQYrm",
"VPSADBWYrm",
"VPSADBWZ256rm(b?)",
"VPSADBWZrm(b?)",
"VPTESTMBZ256rm(b?)", "VPTESTMBZ256rm(b?)",
"VPTESTMBZrm(b?)", "VPTESTMBZrm(b?)",
"VPTESTMDZ256rm(b?)", "VPTESTMDZ256rm(b?)",
@ -3296,14 +3251,6 @@ def: InstRW<[SKXWriteResGroup149], (instregex "CVTDQ2PSrm",
"CVTPS2DQrm", "CVTPS2DQrm",
"CVTSS2SDrm", "CVTSS2SDrm",
"CVTTPS2DQrm", "CVTTPS2DQrm",
"PMADDUBSWrm",
"PMADDWDrm",
"PMULDQrm",
"PMULHRSWrm",
"PMULHUWrm",
"PMULHWrm",
"PMULLWrm",
"PMULUDQrm",
"VCVTDQ2PDZ128rm(b?)", "VCVTDQ2PDZ128rm(b?)",
"VCVTDQ2PSZ128rm(b?)", "VCVTDQ2PSZ128rm(b?)",
"VCVTDQ2PSrm", "VCVTDQ2PSrm",
@ -3333,23 +3280,7 @@ def: InstRW<[SKXWriteResGroup149], (instregex "CVTDQ2PSrm",
"VCVTUQQ2PDZ128rm(b?)", "VCVTUQQ2PDZ128rm(b?)",
"VCVTUQQ2PSZ128rm(b?)", "VCVTUQQ2PSZ128rm(b?)",
"VPLZCNTDZ128rm(b?)", "VPLZCNTDZ128rm(b?)",
"VPLZCNTQZ128rm(b?)", "VPLZCNTQZ128rm(b?)")>;
"VPMADDUBSWZ128rm(b?)",
"VPMADDUBSWrm",
"VPMADDWDZ128rm(b?)",
"VPMADDWDrm",
"VPMULDQZ128rm(b?)",
"VPMULDQrm",
"VPMULHRSWZ128rm(b?)",
"VPMULHRSWrm",
"VPMULHUWZ128rm(b?)",
"VPMULHUWrm",
"VPMULHWZ128rm(b?)",
"VPMULHWrm",
"VPMULLWZ128rm(b?)",
"VPMULLWrm",
"VPMULUDQZ128rm(b?)",
"VPMULUDQrm")>;
def SKXWriteResGroup151 : SchedWriteRes<[SKXPort5,SKXPort23]> { def SKXWriteResGroup151 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 10; let Latency = 10;
@ -3487,31 +3418,7 @@ def: InstRW<[SKXWriteResGroup161], (instregex "VCVTDQ2PDZ256rm(b?)",
"VPLZCNTDZ256rm(b?)", "VPLZCNTDZ256rm(b?)",
"VPLZCNTDZrm(b?)", "VPLZCNTDZrm(b?)",
"VPLZCNTQZ256rm(b?)", "VPLZCNTQZ256rm(b?)",
"VPLZCNTQZrm(b?)", "VPLZCNTQZrm(b?)")>;
"VPMADDUBSWYrm",
"VPMADDUBSWZ256rm(b?)",
"VPMADDUBSWZrm(b?)",
"VPMADDWDYrm",
"VPMADDWDZ256rm(b?)",
"VPMADDWDZrm(b?)",
"VPMULDQYrm",
"VPMULDQZ256rm(b?)",
"VPMULDQZrm(b?)",
"VPMULHRSWYrm",
"VPMULHRSWZ256rm(b?)",
"VPMULHRSWZrm(b?)",
"VPMULHUWYrm",
"VPMULHUWZ256rm(b?)",
"VPMULHUWZrm(b?)",
"VPMULHWYrm",
"VPMULHWZ256rm(b?)",
"VPMULHWZrm(b?)",
"VPMULLWYrm",
"VPMULLWZ256rm(b?)",
"VPMULLWZrm(b?)",
"VPMULUDQYrm",
"VPMULUDQZ256rm(b?)",
"VPMULUDQZrm(b?)")>;
def SKXWriteResGroup162 : SchedWriteRes<[SKXPort5,SKXPort23]> { def SKXWriteResGroup162 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 11; let Latency = 11;
@ -3526,7 +3433,6 @@ def: InstRW<[SKXWriteResGroup162], (instregex "FICOM16m",
"VEXPANDPDZrm(b?)", "VEXPANDPDZrm(b?)",
"VEXPANDPSZ256rm(b?)", "VEXPANDPSZ256rm(b?)",
"VEXPANDPSZrm(b?)", "VEXPANDPSZrm(b?)",
"VMPSADBWYrmi",
"VPEXPANDDZ256rm(b?)", "VPEXPANDDZ256rm(b?)",
"VPEXPANDDZrm(b?)", "VPEXPANDDZrm(b?)",
"VPEXPANDQZ256rm(b?)", "VPEXPANDQZ256rm(b?)",
@ -3805,15 +3711,6 @@ def: InstRW<[SKXWriteResGroup192], (instregex "VRNDSCALEPDZ256rm(b?)i",
"VROUNDPDYm", "VROUNDPDYm",
"VROUNDPSYm")>; "VROUNDPSYm")>;
def SKXWriteResGroup192_2 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let Latency = 17;
let NumMicroOps = 3;
let ResourceCycles = [1,2];
}
def: InstRW<[SKXWriteResGroup192_2], (instregex "VPMULLDYrm",
"VPMULLDZ256rm(b?)",
"VPMULLDZrm(b?)")>;
def SKXWriteResGroup193 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { def SKXWriteResGroup193 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> {
let Latency = 15; let Latency = 15;
let NumMicroOps = 4; let NumMicroOps = 4;

View File

@ -137,7 +137,9 @@ defm WriteVecLogic : X86SchedWritePair; // Vector integer and/or/xor logicals.
defm WriteVecLogicY: X86SchedWritePair; // Vector integer and/or/xor logicals (YMM/ZMM). defm WriteVecLogicY: X86SchedWritePair; // Vector integer and/or/xor logicals (YMM/ZMM).
defm WriteVecShift : X86SchedWritePair; // Vector integer shifts. defm WriteVecShift : X86SchedWritePair; // Vector integer shifts.
defm WriteVecIMul : X86SchedWritePair; // Vector integer multiply. defm WriteVecIMul : X86SchedWritePair; // Vector integer multiply.
defm WritePMULLD : X86SchedWritePair; // PMULLD defm WriteVecIMulY : X86SchedWritePair; // Vector integer multiply (YMM/ZMM).
defm WritePMULLD : X86SchedWritePair; // Vector PMULLD.
defm WritePMULLDY : X86SchedWritePair; // Vector PMULLD (YMM/ZMM).
defm WriteShuffle : X86SchedWritePair; // Vector shuffles. defm WriteShuffle : X86SchedWritePair; // Vector shuffles.
defm WriteShuffleY : X86SchedWritePair; // Vector shuffles (YMM/ZMM). defm WriteShuffleY : X86SchedWritePair; // Vector shuffles (YMM/ZMM).
defm WriteVarShuffle : X86SchedWritePair; // Vector variable shuffles. defm WriteVarShuffle : X86SchedWritePair; // Vector variable shuffles.
@ -146,8 +148,10 @@ defm WriteBlend : X86SchedWritePair; // Vector blends.
defm WriteBlendY : X86SchedWritePair; // Vector blends (YMM/ZMM). defm WriteBlendY : X86SchedWritePair; // Vector blends (YMM/ZMM).
defm WriteVarBlend : X86SchedWritePair; // Vector variable blends. defm WriteVarBlend : X86SchedWritePair; // Vector variable blends.
defm WriteVarBlendY : X86SchedWritePair; // Vector variable blends (YMM/ZMM). defm WriteVarBlendY : X86SchedWritePair; // Vector variable blends (YMM/ZMM).
defm WritePSADBW : X86SchedWritePair; // Vector PSADBW. defm WritePSADBW : X86SchedWritePair; // Vector PSADBW.
defm WriteMPSAD : X86SchedWritePair; // Vector MPSAD. defm WritePSADBWY : X86SchedWritePair; // Vector PSADBW (YMM/ZMM).
defm WriteMPSAD : X86SchedWritePair; // Vector MPSAD.
defm WriteMPSADY : X86SchedWritePair; // Vector MPSAD (YMM/ZMM).
defm WritePHMINPOS : X86SchedWritePair; // Vector PHMINPOS. defm WritePHMINPOS : X86SchedWritePair; // Vector PHMINPOS.
// Vector insert/extract operations. // Vector insert/extract operations.
@ -259,16 +263,16 @@ def SchedWriteVarVecShift
WriteVarVecShift, WriteVarVecShift>; WriteVarVecShift, WriteVarVecShift>;
def SchedWriteVecIMul def SchedWriteVecIMul
: X86SchedWriteWidths<WriteVecIMul, WriteVecIMul, : X86SchedWriteWidths<WriteVecIMul, WriteVecIMul,
WriteVecIMul, WriteVecIMul>; WriteVecIMulY, WriteVecIMulY>;
def SchedWritePMULLD def SchedWritePMULLD
: X86SchedWriteWidths<WritePMULLD, WritePMULLD, : X86SchedWriteWidths<WritePMULLD, WritePMULLD,
WritePMULLD, WritePMULLD>; WritePMULLDY, WritePMULLDY>;
def SchedWriteMPSAD def SchedWriteMPSAD
: X86SchedWriteWidths<WriteMPSAD, WriteMPSAD, : X86SchedWriteWidths<WriteMPSAD, WriteMPSAD,
WriteMPSAD, WriteMPSAD>; WriteMPSADY, WriteMPSADY>;
def SchedWritePSADBW def SchedWritePSADBW
: X86SchedWriteWidths<WritePSADBW, WritePSADBW, : X86SchedWriteWidths<WritePSADBW, WritePSADBW,
WritePSADBW, WritePSADBW>; WritePSADBWY, WritePSADBWY>;
def SchedWriteShuffle def SchedWriteShuffle
: X86SchedWriteWidths<WriteShuffle, WriteShuffle, : X86SchedWriteWidths<WriteShuffle, WriteShuffle,

View File

@ -256,10 +256,14 @@ defm : AtomWriteResPair<WriteVecLogic, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVecLogicY, [AtomPort01], [AtomPort0], 1, 1>; defm : AtomWriteResPair<WriteVecLogicY, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVecShift, [AtomPort01], [AtomPort01], 2, 3, [2], [3]>; defm : AtomWriteResPair<WriteVecShift, [AtomPort01], [AtomPort01], 2, 3, [2], [3]>;
defm : AtomWriteResPair<WriteVecIMul, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>; defm : AtomWriteResPair<WriteVecIMul, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteVecIMulY, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WritePMULLD, [AtomPort01], [AtomPort0], 1, 1>; defm : AtomWriteResPair<WritePMULLD, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WritePMULLDY, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WritePHMINPOS, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>; defm : AtomWriteResPair<WritePHMINPOS, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteMPSAD, [AtomPort01], [AtomPort0], 1, 1>; defm : AtomWriteResPair<WriteMPSAD, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteMPSADY, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WritePSADBW, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>; defm : AtomWriteResPair<WritePSADBW, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WritePSADBWY, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteShuffle, [AtomPort0], [AtomPort0], 1, 1>; defm : AtomWriteResPair<WriteShuffle, [AtomPort0], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteShuffleY, [AtomPort0], [AtomPort0], 1, 1>; defm : AtomWriteResPair<WriteShuffleY, [AtomPort0], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVarShuffle, [AtomPort01], [AtomPort01], 4, 5, [4], [5]>; defm : AtomWriteResPair<WriteVarShuffle, [AtomPort01], [AtomPort01], 4, 5, [4], [5]>;

View File

@ -406,9 +406,13 @@ def : WriteRes<WriteVecMove, [JFPU01, JVALU]>;
defm : JWriteResFpuPair<WriteVecALU, [JFPU01, JVALU], 1>; defm : JWriteResFpuPair<WriteVecALU, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVecShift, [JFPU01, JVALU], 1>; defm : JWriteResFpuPair<WriteVecShift, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVecIMul, [JFPU0, JVIMUL], 2>; defm : JWriteResFpuPair<WriteVecIMul, [JFPU0, JVIMUL], 2>;
defm : JWriteResFpuPair<WriteVecIMulY, [JFPU0, JVIMUL], 2>;
defm : JWriteResFpuPair<WritePMULLD, [JFPU0, JFPU01, JVIMUL, JVALU], 4, [2, 1, 2, 1], 3>; defm : JWriteResFpuPair<WritePMULLD, [JFPU0, JFPU01, JVIMUL, JVALU], 4, [2, 1, 2, 1], 3>;
defm : JWriteResFpuPair<WritePMULLDY, [JFPU0, JFPU01, JVIMUL, JVALU], 4, [2, 1, 2, 1], 3>;
defm : JWriteResFpuPair<WriteMPSAD, [JFPU0, JVIMUL], 3, [1, 2]>; defm : JWriteResFpuPair<WriteMPSAD, [JFPU0, JVIMUL], 3, [1, 2]>;
defm : JWriteResFpuPair<WriteMPSADY, [JFPU0, JVIMUL], 3, [1, 2]>;
defm : JWriteResFpuPair<WritePSADBW, [JFPU01, JVALU], 2>; defm : JWriteResFpuPair<WritePSADBW, [JFPU01, JVALU], 2>;
defm : JWriteResFpuPair<WritePSADBWY, [JFPU01, JVALU], 2>;
defm : JWriteResFpuPair<WritePHMINPOS, [JFPU0, JVALU], 2>; defm : JWriteResFpuPair<WritePHMINPOS, [JFPU0, JVALU], 2>;
defm : JWriteResFpuPair<WriteShuffle, [JFPU01, JVALU], 1>; defm : JWriteResFpuPair<WriteShuffle, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteShuffleY, [JFPU01, JVALU], 1>; defm : JWriteResFpuPair<WriteShuffleY, [JFPU01, JVALU], 1>;

View File

@ -167,9 +167,11 @@ defm : SLMWriteResPair<WriteVecLogic, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecLogicY,[SLM_FPC_RSV01], 1>; defm : SLMWriteResPair<WriteVecLogicY,[SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecALU, [SLM_FPC_RSV01], 1>; defm : SLMWriteResPair<WriteVecALU, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecIMul, [SLM_FPC_RSV0], 4>; defm : SLMWriteResPair<WriteVecIMul, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WriteVecIMulY, [SLM_FPC_RSV0], 4>;
// FIXME: The below is closer to correct, but caused some perf regressions. // FIXME: The below is closer to correct, but caused some perf regressions.
//defm : SLMWriteResPair<WritePMULLD, [SLM_FPC_RSV0], 11, [11], 7>; //defm : SLMWriteResPair<WritePMULLD, [SLM_FPC_RSV0], 11, [11], 7>;
defm : SLMWriteResPair<WritePMULLD, [SLM_FPC_RSV0], 4>; defm : SLMWriteResPair<WritePMULLD, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WritePMULLDY, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WriteShuffle, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteShuffle, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteShuffleY, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteShuffleY, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVarShuffle, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteVarShuffle, [SLM_FPC_RSV0], 1>;
@ -177,7 +179,9 @@ defm : SLMWriteResPair<WriteVarShuffleY, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteBlend, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteBlend, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteBlendY, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteBlendY, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteMPSAD, [SLM_FPC_RSV0], 7>; defm : SLMWriteResPair<WriteMPSAD, [SLM_FPC_RSV0], 7>;
defm : SLMWriteResPair<WritePSADBW, [SLM_FPC_RSV0], 4>; defm : SLMWriteResPair<WriteMPSADY, [SLM_FPC_RSV0], 7>;
defm : SLMWriteResPair<WritePSADBW, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WritePSADBWY, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WritePHMINPOS, [SLM_FPC_RSV0], 4>; defm : SLMWriteResPair<WritePHMINPOS, [SLM_FPC_RSV0], 4>;
// Vector insert/extract operations. // Vector insert/extract operations.

View File

@ -240,7 +240,9 @@ defm : ZnWriteResFpuPair<WriteVecLogicY, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WritePHAdd, [ZnFPU], 1>; defm : ZnWriteResFpuPair<WritePHAdd, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecALU, [ZnFPU], 1>; defm : ZnWriteResFpuPair<WriteVecALU, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecIMul, [ZnFPU0], 4>; defm : ZnWriteResFpuPair<WriteVecIMul, [ZnFPU0], 4>;
defm : ZnWriteResFpuPair<WriteVecIMulY, [ZnFPU0], 4>;
defm : ZnWriteResFpuPair<WritePMULLD, [ZnFPU0], 4>; // FIXME defm : ZnWriteResFpuPair<WritePMULLD, [ZnFPU0], 4>; // FIXME
defm : ZnWriteResFpuPair<WritePMULLDY, [ZnFPU0], 5, [2]>; // FIXME
defm : ZnWriteResFpuPair<WriteShuffle, [ZnFPU], 1>; defm : ZnWriteResFpuPair<WriteShuffle, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteShuffleY, [ZnFPU], 1>; defm : ZnWriteResFpuPair<WriteShuffleY, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVarShuffle, [ZnFPU], 1>; defm : ZnWriteResFpuPair<WriteVarShuffle, [ZnFPU], 1>;
@ -250,6 +252,7 @@ defm : ZnWriteResFpuPair<WriteBlendY, [ZnFPU01], 1>;
defm : ZnWriteResFpuPair<WriteShuffle256, [ZnFPU], 2>; defm : ZnWriteResFpuPair<WriteShuffle256, [ZnFPU], 2>;
defm : ZnWriteResFpuPair<WriteVarShuffle256, [ZnFPU], 2>; defm : ZnWriteResFpuPair<WriteVarShuffle256, [ZnFPU], 2>;
defm : ZnWriteResFpuPair<WritePSADBW, [ZnFPU0], 3>; defm : ZnWriteResFpuPair<WritePSADBW, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WritePSADBWY, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WritePHMINPOS, [ZnFPU0], 4>; defm : ZnWriteResFpuPair<WritePHMINPOS, [ZnFPU0], 4>;
// Vector Shift Operations // Vector Shift Operations
@ -291,7 +294,9 @@ let Latency = 100 in {
def : WriteRes<WriteMicrocoded, []>; def : WriteRes<WriteMicrocoded, []>;
def : WriteRes<WriteSystem, []>; def : WriteRes<WriteSystem, []>;
def : WriteRes<WriteMPSAD, []>; def : WriteRes<WriteMPSAD, []>;
def : WriteRes<WriteMPSADY, []>;
def : WriteRes<WriteMPSADLd, []>; def : WriteRes<WriteMPSADLd, []>;
def : WriteRes<WriteMPSADYLd, []>;
def : WriteRes<WriteCLMul, []>; def : WriteRes<WriteCLMul, []>;
def : WriteRes<WriteCLMulLd, []>; def : WriteRes<WriteCLMulLd, []>;
def : WriteRes<WritePCmpIStrM, []>; def : WriteRes<WritePCmpIStrM, []>;
@ -1042,18 +1047,6 @@ def : InstRW<[ZnWritePCMPGTQm], (instregex "(V?)PCMPGTQrm")>;
def : InstRW<[ZnWritePCMPGTQYm], (instregex "(V?)PCMPGTQYrm")>; def : InstRW<[ZnWritePCMPGTQYm], (instregex "(V?)PCMPGTQYrm")>;
// PMULLD. // PMULLD.
// x,x.
def ZnWritePMULLDr : SchedWriteRes<[ZnFPU0]> {
let Latency = 4;
}
// ymm.
def ZnWritePMULLDYr : SchedWriteRes<[ZnFPU0]> {
let Latency = 5;
let ResourceCycles = [2];
}
def : InstRW<[ZnWritePMULLDr], (instregex "(V?)PMULLDrr")>;
def : InstRW<[ZnWritePMULLDYr], (instregex "(V?)PMULLDYrr")>;
// x,m. // x,m.
def ZnWritePMULLDm : SchedWriteRes<[ZnAGU, ZnFPU0]> { def ZnWritePMULLDm : SchedWriteRes<[ZnAGU, ZnFPU0]> {
let Latency = 11; let Latency = 11;

View File

@ -15,7 +15,7 @@ define i64 @test_pavgusb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
; CHECK-LABEL: test_pavgusb: ; CHECK-LABEL: test_pavgusb:
; CHECK: # %bb.0: ; CHECK: # %bb.0:
; CHECK-NEXT: pavgusb %mm1, %mm0 # sched: [5:1.00] ; CHECK-NEXT: pavgusb %mm1, %mm0 # sched: [5:1.00]
; CHECK-NEXT: pavgusb (%rdi), %mm0 # sched: [10:1.00] ; CHECK-NEXT: pavgusb (%rdi), %mm0 # sched: [11:1.00]
; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33]
; CHECK-NEXT: retq # sched: [1:1.00] ; CHECK-NEXT: retq # sched: [1:1.00]
%1 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %a0, x86_mmx %a1) %1 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %a0, x86_mmx %a1)
@ -345,7 +345,7 @@ define i64 @test_pmulhrw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
; CHECK-LABEL: test_pmulhrw: ; CHECK-LABEL: test_pmulhrw:
; CHECK: # %bb.0: ; CHECK: # %bb.0:
; CHECK-NEXT: pmulhrw %mm1, %mm0 # sched: [5:1.00] ; CHECK-NEXT: pmulhrw %mm1, %mm0 # sched: [5:1.00]
; CHECK-NEXT: pmulhrw (%rdi), %mm0 # sched: [10:1.00] ; CHECK-NEXT: pmulhrw (%rdi), %mm0 # sched: [11:1.00]
; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] ; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33]
; CHECK-NEXT: retq # sched: [1:1.00] ; CHECK-NEXT: retq # sched: [1:1.00]
%1 = call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %a0, x86_mmx %a1) %1 = call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %a0, x86_mmx %a1)

View File

@ -609,7 +609,7 @@ define <16 x i16> @test_mpsadbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
; GENERIC-LABEL: test_mpsadbw: ; GENERIC-LABEL: test_mpsadbw:
; GENERIC: # %bb.0: ; GENERIC: # %bb.0:
; GENERIC-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [7:1.00] ; GENERIC-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [7:1.00]
; GENERIC-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [13:1.00] ; GENERIC-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [14:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_mpsadbw: ; HASWELL-LABEL: test_mpsadbw:
@ -3297,7 +3297,7 @@ define <16 x i16> @test_pmaddubsw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2)
; GENERIC-LABEL: test_pmaddubsw: ; GENERIC-LABEL: test_pmaddubsw:
; GENERIC: # %bb.0: ; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] ; GENERIC-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
; GENERIC-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [10:1.00] ; GENERIC-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pmaddubsw: ; HASWELL-LABEL: test_pmaddubsw:
@ -3341,7 +3341,7 @@ define <8 x i32> @test_pmaddwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2)
; GENERIC-LABEL: test_pmaddwd: ; GENERIC-LABEL: test_pmaddwd:
; GENERIC: # %bb.0: ; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [5:1.00] ; GENERIC-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
; GENERIC-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] ; GENERIC-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pmaddwd: ; HASWELL-LABEL: test_pmaddwd:
@ -4738,7 +4738,7 @@ define <4 x i64> @test_pmuldq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
; GENERIC-LABEL: test_pmuldq: ; GENERIC-LABEL: test_pmuldq:
; GENERIC: # %bb.0: ; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00] ; GENERIC-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
; GENERIC-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [10:1.00] ; GENERIC-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pmuldq: ; HASWELL-LABEL: test_pmuldq:
@ -4782,7 +4782,7 @@ define <16 x i16> @test_pmulhrsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2
; GENERIC-LABEL: test_pmulhrsw: ; GENERIC-LABEL: test_pmulhrsw:
; GENERIC: # %bb.0: ; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] ; GENERIC-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
; GENERIC-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [10:1.00] ; GENERIC-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pmulhrsw: ; HASWELL-LABEL: test_pmulhrsw:
@ -4825,7 +4825,7 @@ define <16 x i16> @test_pmulhuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2)
; GENERIC-LABEL: test_pmulhuw: ; GENERIC-LABEL: test_pmulhuw:
; GENERIC: # %bb.0: ; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] ; GENERIC-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
; GENERIC-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [10:1.00] ; GENERIC-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pmulhuw: ; HASWELL-LABEL: test_pmulhuw:
@ -4868,7 +4868,7 @@ define <16 x i16> @test_pmulhw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2)
; GENERIC-LABEL: test_pmulhw: ; GENERIC-LABEL: test_pmulhw:
; GENERIC: # %bb.0: ; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] ; GENERIC-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
; GENERIC-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [10:1.00] ; GENERIC-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pmulhw: ; HASWELL-LABEL: test_pmulhw:
@ -4911,7 +4911,7 @@ define <8 x i32> @test_pmulld(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
; GENERIC-LABEL: test_pmulld: ; GENERIC-LABEL: test_pmulld:
; GENERIC: # %bb.0: ; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [5:1.00] ; GENERIC-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
; GENERIC-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [11:1.00] ; GENERIC-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pmulld: ; HASWELL-LABEL: test_pmulld:
@ -4953,7 +4953,7 @@ define <16 x i16> @test_pmullw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2)
; GENERIC-LABEL: test_pmullw: ; GENERIC-LABEL: test_pmullw:
; GENERIC: # %bb.0: ; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] ; GENERIC-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
; GENERIC-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [10:1.00] ; GENERIC-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pmullw: ; HASWELL-LABEL: test_pmullw:
@ -4995,7 +4995,7 @@ define <4 x i64> @test_pmuludq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
; GENERIC-LABEL: test_pmuludq: ; GENERIC-LABEL: test_pmuludq:
; GENERIC: # %bb.0: ; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [5:1.00] ; GENERIC-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
; GENERIC-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [10:1.00] ; GENERIC-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pmuludq: ; HASWELL-LABEL: test_pmuludq:
@ -5088,7 +5088,7 @@ define <4 x i64> @test_psadbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
; GENERIC-LABEL: test_psadbw: ; GENERIC-LABEL: test_psadbw:
; GENERIC: # %bb.0: ; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] ; GENERIC-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
; GENERIC-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [10:1.00] ; GENERIC-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_psadbw: ; HASWELL-LABEL: test_psadbw:

View File

@ -12,7 +12,7 @@ define <4 x i32> @test_sha1msg1(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; GENERIC-LABEL: test_sha1msg1: ; GENERIC-LABEL: test_sha1msg1:
; GENERIC: # %bb.0: ; GENERIC: # %bb.0:
; GENERIC-NEXT: sha1msg1 %xmm1, %xmm0 # sched: [5:1.00] ; GENERIC-NEXT: sha1msg1 %xmm1, %xmm0 # sched: [5:1.00]
; GENERIC-NEXT: sha1msg1 (%rdi), %xmm0 # sched: [10:1.00] ; GENERIC-NEXT: sha1msg1 (%rdi), %xmm0 # sched: [11:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00]
; ;
; GOLDMONT-LABEL: test_sha1msg1: ; GOLDMONT-LABEL: test_sha1msg1:
@ -23,8 +23,8 @@ define <4 x i32> @test_sha1msg1(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; ;
; CANNONLAKE-LABEL: test_sha1msg1: ; CANNONLAKE-LABEL: test_sha1msg1:
; CANNONLAKE: # %bb.0: ; CANNONLAKE: # %bb.0:
; CANNONLAKE-NEXT: sha1msg1 %xmm1, %xmm0 # sched: [5:1.00] ; CANNONLAKE-NEXT: sha1msg1 %xmm1, %xmm0 # sched: [4:0.33]
; CANNONLAKE-NEXT: sha1msg1 (%rdi), %xmm0 # sched: [10:1.00] ; CANNONLAKE-NEXT: sha1msg1 (%rdi), %xmm0 # sched: [10:0.50]
; CANNONLAKE-NEXT: retq # sched: [7:1.00] ; CANNONLAKE-NEXT: retq # sched: [7:1.00]
; ;
; ZNVER1-LABEL: test_sha1msg1: ; ZNVER1-LABEL: test_sha1msg1:
@ -43,7 +43,7 @@ define <4 x i32> @test_sha1msg2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; GENERIC-LABEL: test_sha1msg2: ; GENERIC-LABEL: test_sha1msg2:
; GENERIC: # %bb.0: ; GENERIC: # %bb.0:
; GENERIC-NEXT: sha1msg2 %xmm1, %xmm0 # sched: [5:1.00] ; GENERIC-NEXT: sha1msg2 %xmm1, %xmm0 # sched: [5:1.00]
; GENERIC-NEXT: sha1msg2 (%rdi), %xmm0 # sched: [10:1.00] ; GENERIC-NEXT: sha1msg2 (%rdi), %xmm0 # sched: [11:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00]
; ;
; GOLDMONT-LABEL: test_sha1msg2: ; GOLDMONT-LABEL: test_sha1msg2:
@ -54,8 +54,8 @@ define <4 x i32> @test_sha1msg2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; ;
; CANNONLAKE-LABEL: test_sha1msg2: ; CANNONLAKE-LABEL: test_sha1msg2:
; CANNONLAKE: # %bb.0: ; CANNONLAKE: # %bb.0:
; CANNONLAKE-NEXT: sha1msg2 %xmm1, %xmm0 # sched: [5:1.00] ; CANNONLAKE-NEXT: sha1msg2 %xmm1, %xmm0 # sched: [4:0.33]
; CANNONLAKE-NEXT: sha1msg2 (%rdi), %xmm0 # sched: [10:1.00] ; CANNONLAKE-NEXT: sha1msg2 (%rdi), %xmm0 # sched: [10:0.50]
; CANNONLAKE-NEXT: retq # sched: [7:1.00] ; CANNONLAKE-NEXT: retq # sched: [7:1.00]
; ;
; ZNVER1-LABEL: test_sha1msg2: ; ZNVER1-LABEL: test_sha1msg2:
@ -74,7 +74,7 @@ define <4 x i32> @test_sha1nexte(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; GENERIC-LABEL: test_sha1nexte: ; GENERIC-LABEL: test_sha1nexte:
; GENERIC: # %bb.0: ; GENERIC: # %bb.0:
; GENERIC-NEXT: sha1nexte %xmm1, %xmm0 # sched: [5:1.00] ; GENERIC-NEXT: sha1nexte %xmm1, %xmm0 # sched: [5:1.00]
; GENERIC-NEXT: sha1nexte (%rdi), %xmm0 # sched: [10:1.00] ; GENERIC-NEXT: sha1nexte (%rdi), %xmm0 # sched: [11:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00]
; ;
; GOLDMONT-LABEL: test_sha1nexte: ; GOLDMONT-LABEL: test_sha1nexte:
@ -85,8 +85,8 @@ define <4 x i32> @test_sha1nexte(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; ;
; CANNONLAKE-LABEL: test_sha1nexte: ; CANNONLAKE-LABEL: test_sha1nexte:
; CANNONLAKE: # %bb.0: ; CANNONLAKE: # %bb.0:
; CANNONLAKE-NEXT: sha1nexte %xmm1, %xmm0 # sched: [5:1.00] ; CANNONLAKE-NEXT: sha1nexte %xmm1, %xmm0 # sched: [4:0.33]
; CANNONLAKE-NEXT: sha1nexte (%rdi), %xmm0 # sched: [10:1.00] ; CANNONLAKE-NEXT: sha1nexte (%rdi), %xmm0 # sched: [10:0.50]
; CANNONLAKE-NEXT: retq # sched: [7:1.00] ; CANNONLAKE-NEXT: retq # sched: [7:1.00]
; ;
; ZNVER1-LABEL: test_sha1nexte: ; ZNVER1-LABEL: test_sha1nexte:
@ -105,7 +105,7 @@ define <4 x i32> @test_sha1rnds4(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; GENERIC-LABEL: test_sha1rnds4: ; GENERIC-LABEL: test_sha1rnds4:
; GENERIC: # %bb.0: ; GENERIC: # %bb.0:
; GENERIC-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # sched: [5:1.00] ; GENERIC-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # sched: [5:1.00]
; GENERIC-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # sched: [10:1.00] ; GENERIC-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # sched: [11:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00]
; ;
; GOLDMONT-LABEL: test_sha1rnds4: ; GOLDMONT-LABEL: test_sha1rnds4:
@ -116,8 +116,8 @@ define <4 x i32> @test_sha1rnds4(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; ;
; CANNONLAKE-LABEL: test_sha1rnds4: ; CANNONLAKE-LABEL: test_sha1rnds4:
; CANNONLAKE: # %bb.0: ; CANNONLAKE: # %bb.0:
; CANNONLAKE-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # sched: [5:1.00] ; CANNONLAKE-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # sched: [4:0.33]
; CANNONLAKE-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # sched: [10:1.00] ; CANNONLAKE-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # sched: [10:0.50]
; CANNONLAKE-NEXT: retq # sched: [7:1.00] ; CANNONLAKE-NEXT: retq # sched: [7:1.00]
; ;
; ZNVER1-LABEL: test_sha1rnds4: ; ZNVER1-LABEL: test_sha1rnds4:
@ -140,7 +140,7 @@ define <4 x i32> @test_sha256msg1(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2)
; GENERIC-LABEL: test_sha256msg1: ; GENERIC-LABEL: test_sha256msg1:
; GENERIC: # %bb.0: ; GENERIC: # %bb.0:
; GENERIC-NEXT: sha256msg1 %xmm1, %xmm0 # sched: [5:1.00] ; GENERIC-NEXT: sha256msg1 %xmm1, %xmm0 # sched: [5:1.00]
; GENERIC-NEXT: sha256msg1 (%rdi), %xmm0 # sched: [10:1.00] ; GENERIC-NEXT: sha256msg1 (%rdi), %xmm0 # sched: [11:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00]
; ;
; GOLDMONT-LABEL: test_sha256msg1: ; GOLDMONT-LABEL: test_sha256msg1:
@ -151,8 +151,8 @@ define <4 x i32> @test_sha256msg1(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2)
; ;
; CANNONLAKE-LABEL: test_sha256msg1: ; CANNONLAKE-LABEL: test_sha256msg1:
; CANNONLAKE: # %bb.0: ; CANNONLAKE: # %bb.0:
; CANNONLAKE-NEXT: sha256msg1 %xmm1, %xmm0 # sched: [5:1.00] ; CANNONLAKE-NEXT: sha256msg1 %xmm1, %xmm0 # sched: [4:0.33]
; CANNONLAKE-NEXT: sha256msg1 (%rdi), %xmm0 # sched: [10:1.00] ; CANNONLAKE-NEXT: sha256msg1 (%rdi), %xmm0 # sched: [10:0.50]
; CANNONLAKE-NEXT: retq # sched: [7:1.00] ; CANNONLAKE-NEXT: retq # sched: [7:1.00]
; ;
; ZNVER1-LABEL: test_sha256msg1: ; ZNVER1-LABEL: test_sha256msg1:
@ -171,7 +171,7 @@ define <4 x i32> @test_sha256msg2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2)
; GENERIC-LABEL: test_sha256msg2: ; GENERIC-LABEL: test_sha256msg2:
; GENERIC: # %bb.0: ; GENERIC: # %bb.0:
; GENERIC-NEXT: sha256msg2 %xmm1, %xmm0 # sched: [5:1.00] ; GENERIC-NEXT: sha256msg2 %xmm1, %xmm0 # sched: [5:1.00]
; GENERIC-NEXT: sha256msg2 (%rdi), %xmm0 # sched: [10:1.00] ; GENERIC-NEXT: sha256msg2 (%rdi), %xmm0 # sched: [11:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00]
; ;
; GOLDMONT-LABEL: test_sha256msg2: ; GOLDMONT-LABEL: test_sha256msg2:
@ -182,8 +182,8 @@ define <4 x i32> @test_sha256msg2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2)
; ;
; CANNONLAKE-LABEL: test_sha256msg2: ; CANNONLAKE-LABEL: test_sha256msg2:
; CANNONLAKE: # %bb.0: ; CANNONLAKE: # %bb.0:
; CANNONLAKE-NEXT: sha256msg2 %xmm1, %xmm0 # sched: [5:1.00] ; CANNONLAKE-NEXT: sha256msg2 %xmm1, %xmm0 # sched: [4:0.33]
; CANNONLAKE-NEXT: sha256msg2 (%rdi), %xmm0 # sched: [10:1.00] ; CANNONLAKE-NEXT: sha256msg2 (%rdi), %xmm0 # sched: [10:0.50]
; CANNONLAKE-NEXT: retq # sched: [7:1.00] ; CANNONLAKE-NEXT: retq # sched: [7:1.00]
; ;
; ZNVER1-LABEL: test_sha256msg2: ; ZNVER1-LABEL: test_sha256msg2:
@ -204,7 +204,7 @@ define <4 x i32> @test_sha256rnds2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2,
; GENERIC-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00] ; GENERIC-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00]
; GENERIC-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [5:1.00] ; GENERIC-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [5:1.00]
; GENERIC-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [10:1.00] ; GENERIC-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [11:1.00]
; GENERIC-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00]
; ;
@ -221,8 +221,8 @@ define <4 x i32> @test_sha256rnds2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2,
; CANNONLAKE: # %bb.0: ; CANNONLAKE: # %bb.0:
; CANNONLAKE-NEXT: vmovaps %xmm0, %xmm3 # sched: [1:0.33] ; CANNONLAKE-NEXT: vmovaps %xmm0, %xmm3 # sched: [1:0.33]
; CANNONLAKE-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33] ; CANNONLAKE-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33]
; CANNONLAKE-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [5:1.00] ; CANNONLAKE-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [4:0.33]
; CANNONLAKE-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [10:1.00] ; CANNONLAKE-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [10:0.50]
; CANNONLAKE-NEXT: vmovaps %xmm3, %xmm0 # sched: [1:0.33] ; CANNONLAKE-NEXT: vmovaps %xmm3, %xmm0 # sched: [1:0.33]
; CANNONLAKE-NEXT: retq # sched: [7:1.00] ; CANNONLAKE-NEXT: retq # sched: [7:1.00]
; ;

View File

@ -724,7 +724,7 @@ define void @test_vpmacsswd(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i6
; GENERIC: # %bb.0: ; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP ; GENERIC-NEXT: #APP
; GENERIC-NEXT: vpmacsswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; GENERIC-NEXT: vpmacsswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; GENERIC-NEXT: vpmacsswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00] ; GENERIC-NEXT: vpmacsswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00]
; ;
@ -744,7 +744,7 @@ define void @test_vpmacssww(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i6
; GENERIC: # %bb.0: ; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP ; GENERIC-NEXT: #APP
; GENERIC-NEXT: vpmacssww %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; GENERIC-NEXT: vpmacssww %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; GENERIC-NEXT: vpmacssww %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00] ; GENERIC-NEXT: vpmacssww %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00]
; ;
@ -764,7 +764,7 @@ define void @test_vpmacswd(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64
; GENERIC: # %bb.0: ; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP ; GENERIC-NEXT: #APP
; GENERIC-NEXT: vpmacswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; GENERIC-NEXT: vpmacswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; GENERIC-NEXT: vpmacswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00] ; GENERIC-NEXT: vpmacswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00]
; ;
@ -784,7 +784,7 @@ define void @test_vpmacsww(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64
; GENERIC: # %bb.0: ; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP ; GENERIC-NEXT: #APP
; GENERIC-NEXT: vpmacsww %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; GENERIC-NEXT: vpmacsww %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; GENERIC-NEXT: vpmacsww %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00] ; GENERIC-NEXT: vpmacsww %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00]
; ;
@ -804,7 +804,7 @@ define void @test_vpmadcsswd(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i
; GENERIC: # %bb.0: ; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP ; GENERIC-NEXT: #APP
; GENERIC-NEXT: vpmadcsswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; GENERIC-NEXT: vpmadcsswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; GENERIC-NEXT: vpmadcsswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00] ; GENERIC-NEXT: vpmadcsswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00]
; ;
@ -824,7 +824,7 @@ define void @test_vpmadcswd(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i6
; GENERIC: # %bb.0: ; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP ; GENERIC-NEXT: #APP
; GENERIC-NEXT: vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; GENERIC-NEXT: vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; GENERIC-NEXT: vpmadcswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:1.00] ; GENERIC-NEXT: vpmadcswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00]
; ;