1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

[X86] Fix skylake server scheduling info.

Summary:
This fixes most of the scheduling info for SKX vector operations.
I had to split a lot of the YMM/ZMM classes into separate classes for YMM and ZMM.

The before/after llvm-exegesis analysis are in the phabricator diff.

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D47721

llvm-svn: 334407
This commit is contained in:
Clement Courbet 2018-06-11 14:37:53 +00:00
parent 75085cc5d9
commit 33cf554056
36 changed files with 2072 additions and 1639 deletions

View File

@ -8014,8 +8014,8 @@ multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
let Predicates = [HasAVX512] in
defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64,
WriteCvtPH2PSY>,
avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSY>,
WriteCvtPH2PSZ>,
avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
let Predicates = [HasVLX] in {
@ -8068,8 +8068,8 @@ multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
let Predicates = [HasAVX512] in {
defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PH>,
WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
let Predicates = [HasVLX] in {
defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,

View File

@ -189,25 +189,31 @@ defm : X86WriteRes<WriteFMoveY, [BWPort5], 1, [1], 1>;
defm : BWWriteResPair<WriteFAdd, [BWPort1], 3, [1], 1, 5>; // Floating point add/sub.
defm : BWWriteResPair<WriteFAddX, [BWPort1], 3, [1], 1, 5>; // Floating point add/sub (XMM).
defm : BWWriteResPair<WriteFAddY, [BWPort1], 3, [1], 1, 6>; // Floating point add/sub (YMM/ZMM).
defm : X86WriteResPairUnsupported<WriteFAddZ>;
defm : BWWriteResPair<WriteFAdd64, [BWPort1], 3, [1], 1, 5>; // Floating point double add/sub.
defm : BWWriteResPair<WriteFAdd64X, [BWPort1], 3, [1], 1, 5>; // Floating point double add/sub (XMM).
defm : BWWriteResPair<WriteFAdd64Y, [BWPort1], 3, [1], 1, 6>; // Floating point double add/sub (YMM/ZMM).
defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
defm : BWWriteResPair<WriteFCmp, [BWPort1], 3, [1], 1, 5>; // Floating point compare.
defm : BWWriteResPair<WriteFCmpX, [BWPort1], 3, [1], 1, 5>; // Floating point compare (XMM).
defm : BWWriteResPair<WriteFCmpY, [BWPort1], 3, [1], 1, 6>; // Floating point compare (YMM/ZMM).
defm : X86WriteResPairUnsupported<WriteFCmpZ>;
defm : BWWriteResPair<WriteFCmp64, [BWPort1], 3, [1], 1, 5>; // Floating point double compare.
defm : BWWriteResPair<WriteFCmp64X, [BWPort1], 3, [1], 1, 5>; // Floating point double compare (XMM).
defm : BWWriteResPair<WriteFCmp64Y, [BWPort1], 3, [1], 1, 6>; // Floating point double compare (YMM/ZMM).
defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
defm : BWWriteResPair<WriteFCom, [BWPort1], 3>; // Floating point compare to flags.
defm : BWWriteResPair<WriteFMul, [BWPort01], 3, [1], 1, 5>; // Floating point multiplication.
defm : BWWriteResPair<WriteFMulX, [BWPort01], 3, [1], 1, 5>; // Floating point multiplication (XMM).
defm : BWWriteResPair<WriteFMulY, [BWPort01], 3, [1], 1, 6>; // Floating point multiplication (YMM/ZMM).
defm : X86WriteResPairUnsupported<WriteFMulZ>;
defm : BWWriteResPair<WriteFMul64, [BWPort01], 3, [1], 1, 5>; // Floating point double multiplication.
defm : BWWriteResPair<WriteFMul64X, [BWPort01], 3, [1], 1, 5>; // Floating point double multiplication (XMM).
defm : BWWriteResPair<WriteFMul64Y, [BWPort01], 3, [1], 1, 6>; // Floating point double multiplication (YMM/ZMM).
defm : X86WriteResPairUnsupported<WriteFMul64Z>;
//defm : BWWriteResPair<WriteFDiv, [BWPort0,BWFPDivider], 11, [1,3], 1, 5>; // Floating point division.
defm : BWWriteResPair<WriteFDivX, [BWPort0,BWFPDivider], 11, [1,5], 1, 5>; // Floating point division (XMM).
@ -233,34 +239,45 @@ defm : BWWriteResPair<WriteFSqrt80, [BWPort0,BWFPDivider], 23, [1,9]>; // Float
defm : BWWriteResPair<WriteFRcp, [BWPort0], 5, [1], 1, 5>; // Floating point reciprocal estimate.
defm : BWWriteResPair<WriteFRcpX, [BWPort0], 5, [1], 1, 5>; // Floating point reciprocal estimate (XMM).
defm : BWWriteResPair<WriteFRcpY, [BWPort0,BWPort015], 11, [2,1], 3, 6>; // Floating point reciprocal estimate (YMM/ZMM).
defm : X86WriteResPairUnsupported<WriteFRcpZ>;
defm : BWWriteResPair<WriteFRsqrt, [BWPort0], 5, [1], 1, 5>; // Floating point reciprocal square root estimate.
defm : BWWriteResPair<WriteFRsqrtX,[BWPort0], 5, [1], 1, 5>; // Floating point reciprocal square root estimate (XMM).
defm : BWWriteResPair<WriteFRsqrtY,[BWPort0,BWPort015], 11, [2,1], 3, 6>; // Floating point reciprocal square root estimate (YMM/ZMM).
defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
defm : BWWriteResPair<WriteFMA, [BWPort01], 5, [1], 1, 5>; // Fused Multiply Add.
defm : BWWriteResPair<WriteFMAX, [BWPort01], 5, [1], 1, 5>; // Fused Multiply Add (XMM).
defm : BWWriteResPair<WriteFMAY, [BWPort01], 5, [1], 1, 6>; // Fused Multiply Add (YMM/ZMM).
defm : X86WriteResPairUnsupported<WriteFMAZ>;
defm : BWWriteResPair<WriteDPPD, [BWPort0,BWPort1,BWPort5], 9, [1,1,1], 3, 5>; // Floating point double dot product.
defm : BWWriteResPair<WriteDPPS, [BWPort0,BWPort1,BWPort5], 14, [2,1,1], 4, 5>; // Floating point single dot product.
defm : BWWriteResPair<WriteDPPSY, [BWPort0,BWPort1,BWPort5], 14, [2,1,1], 4, 6>; // Floating point single dot product (YMM).
defm : X86WriteResPairUnsupported<WriteDPPSZ>;
defm : BWWriteResPair<WriteFSign, [BWPort5], 1>; // Floating point fabs/fchs.
defm : X86WriteRes<WriteFRnd, [BWPort23], 6, [1], 1>; // Floating point rounding.
defm : X86WriteRes<WriteFRndY, [BWPort23], 6, [1], 1>; // Floating point rounding (YMM/ZMM).
defm : X86WriteResPairUnsupported<WriteFRndZ>;
defm : X86WriteRes<WriteFRndLd, [BWPort1,BWPort23], 11, [2,1], 3>;
defm : X86WriteRes<WriteFRndYLd, [BWPort1,BWPort23], 12, [2,1], 3>;
defm : BWWriteResPair<WriteFLogic, [BWPort5], 1, [1], 1, 5>; // Floating point and/or/xor logicals.
defm : BWWriteResPair<WriteFLogicY, [BWPort5], 1, [1], 1, 6>; // Floating point and/or/xor logicals (YMM/ZMM).
defm : X86WriteResPairUnsupported<WriteFLogicZ>;
defm : BWWriteResPair<WriteFTest, [BWPort0], 1, [1], 1, 5>; // Floating point TEST instructions.
defm : BWWriteResPair<WriteFTestY, [BWPort0], 1, [1], 1, 6>; // Floating point TEST instructions (YMM/ZMM).
defm : X86WriteResPairUnsupported<WriteFTestZ>;
defm : BWWriteResPair<WriteFShuffle, [BWPort5], 1, [1], 1, 5>; // Floating point vector shuffles.
defm : BWWriteResPair<WriteFShuffleY, [BWPort5], 1, [1], 1, 6>; // Floating point vector shuffles (YMM/ZMM).
defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
defm : BWWriteResPair<WriteFVarShuffle, [BWPort5], 1, [1], 1, 5>; // Floating point vector variable shuffles.
defm : BWWriteResPair<WriteFVarShuffleY, [BWPort5], 1, [1], 1, 6>; // Floating point vector variable shuffles.
defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
defm : BWWriteResPair<WriteFBlend, [BWPort015], 1, [1], 1, 5>; // Floating point vector blends.
defm : BWWriteResPair<WriteFBlendY, [BWPort015], 1, [1], 1, 6>; // Floating point vector blends.
defm : X86WriteResPairUnsupported<WriteFBlendZ>;
defm : BWWriteResPair<WriteFVarBlend, [BWPort5], 2, [2], 2, 5>; // Fp vector variable blends.
defm : BWWriteResPair<WriteFVarBlendY, [BWPort5], 2, [2], 2, 6>; // Fp vector variable blends.
defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
// FMA Scheduling helper class.
// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
@ -291,31 +308,42 @@ defm : X86WriteRes<WriteEMMS, [BWPort01,BWPort15,BWPort015,BWPort0156
defm : BWWriteResPair<WriteVecALU, [BWPort15], 1, [1], 1, 5>; // Vector integer ALU op, no logicals.
defm : BWWriteResPair<WriteVecALUX, [BWPort15], 1, [1], 1, 5>; // Vector integer ALU op, no logicals.
defm : BWWriteResPair<WriteVecALUY, [BWPort15], 1, [1], 1, 6>; // Vector integer ALU op, no logicals (YMM/ZMM).
defm : X86WriteResPairUnsupported<WriteVecALUZ>;
defm : BWWriteResPair<WriteVecLogic, [BWPort015], 1, [1], 1, 5>; // Vector integer and/or/xor.
defm : BWWriteResPair<WriteVecLogicX,[BWPort015], 1, [1], 1, 5>; // Vector integer and/or/xor.
defm : BWWriteResPair<WriteVecLogicY,[BWPort015], 1, [1], 1, 6>; // Vector integer and/or/xor (YMM/ZMM).
defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
defm : BWWriteResPair<WriteVecTest, [BWPort0,BWPort5], 2, [1,1], 2, 5>; // Vector integer TEST instructions.
defm : BWWriteResPair<WriteVecTestY, [BWPort0,BWPort5], 4, [1,1], 2, 6>; // Vector integer TEST instructions (YMM/ZMM).
defm : X86WriteResPairUnsupported<WriteVecTestZ>;
defm : BWWriteResPair<WriteVecIMul, [BWPort0], 5, [1], 1, 5>; // Vector integer multiply.
defm : BWWriteResPair<WriteVecIMulX, [BWPort0], 5, [1], 1, 5>; // Vector integer multiply.
defm : BWWriteResPair<WriteVecIMulY, [BWPort0], 5, [1], 1, 6>; // Vector integer multiply.
defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
defm : BWWriteResPair<WritePMULLD, [BWPort0], 10, [2], 2, 5>; // Vector PMULLD.
defm : BWWriteResPair<WritePMULLDY, [BWPort0], 10, [2], 2, 6>; // Vector PMULLD (YMM/ZMM).
defm : X86WriteResPairUnsupported<WritePMULLDZ>;
defm : BWWriteResPair<WriteShuffle, [BWPort5], 1, [1], 1, 5>; // Vector shuffles.
defm : BWWriteResPair<WriteShuffleX, [BWPort5], 1, [1], 1, 5>; // Vector shuffles.
defm : BWWriteResPair<WriteShuffleY, [BWPort5], 1, [1], 1, 6>; // Vector shuffles (YMM/ZMM).
defm : X86WriteResPairUnsupported<WriteShuffleZ>;
defm : BWWriteResPair<WriteVarShuffle, [BWPort5], 1, [1], 1, 5>; // Vector variable shuffles.
defm : BWWriteResPair<WriteVarShuffleX,[BWPort5], 1, [1], 1, 5>; // Vector variable shuffles.
defm : BWWriteResPair<WriteVarShuffleY,[BWPort5], 1, [1], 1, 6>; // Vector variable shuffles (YMM/ZMM).
defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
defm : BWWriteResPair<WriteBlend, [BWPort5], 1, [1], 1, 5>; // Vector blends.
defm : BWWriteResPair<WriteBlendY, [BWPort5], 1, [1], 1, 6>; // Vector blends (YMM/ZMM).
defm : X86WriteResPairUnsupported<WriteBlendZ>;
defm : BWWriteResPair<WriteVarBlend, [BWPort5], 2, [2], 2, 5>; // Vector variable blends.
defm : BWWriteResPair<WriteVarBlendY, [BWPort5], 2, [2], 2, 6>; // Vector variable blends (YMM/ZMM).
defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
defm : BWWriteResPair<WriteMPSAD, [BWPort0, BWPort5], 7, [1, 2], 3, 5>; // Vector MPSAD.
defm : BWWriteResPair<WriteMPSADY, [BWPort0, BWPort5], 7, [1, 2], 3, 6>; // Vector MPSAD.
defm : X86WriteResPairUnsupported<WriteMPSADZ>;
defm : BWWriteResPair<WritePSADBW, [BWPort0], 5, [1], 1, 5>; // Vector PSADBW.
defm : BWWriteResPair<WritePSADBWX, [BWPort0], 5, [1], 1, 5>; // Vector PSADBW.
defm : BWWriteResPair<WritePSADBWY, [BWPort0], 5, [1], 1, 6>; // Vector PSADBW (YMM/ZMM).
defm : X86WriteResPairUnsupported<WritePSADBWZ>;
defm : BWWriteResPair<WritePHMINPOS, [BWPort0], 5>; // Vector PHMINPOS.
// Vector integer shifts.
@ -323,12 +351,15 @@ defm : BWWriteResPair<WriteVecShift, [BWPort0], 1, [1], 1, 5>;
defm : BWWriteResPair<WriteVecShiftX, [BWPort0,BWPort5], 2, [1,1], 2, 5>;
defm : X86WriteRes<WriteVecShiftY, [BWPort0,BWPort5], 4, [1,1], 2>;
defm : X86WriteRes<WriteVecShiftYLd, [BWPort0,BWPort23], 7, [1,1], 2>;
defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
defm : BWWriteResPair<WriteVecShiftImm, [BWPort0], 1, [1], 1, 5>;
defm : BWWriteResPair<WriteVecShiftImmX, [BWPort0], 1, [1], 1, 5>; // Vector integer immediate shifts (XMM).
defm : BWWriteResPair<WriteVecShiftImmY, [BWPort0], 1, [1], 1, 6>; // Vector integer immediate shifts (YMM/ZMM).
defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
defm : BWWriteResPair<WriteVarVecShift, [BWPort0, BWPort5], 3, [2,1], 3, 5>; // Variable vector shifts.
defm : BWWriteResPair<WriteVarVecShiftY, [BWPort0, BWPort5], 3, [2,1], 3, 6>; // Variable vector shifts (YMM/ZMM).
defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
// Vector insert/extract operations.
def : WriteRes<WriteVecInsert, [BWPort5]> {
@ -354,33 +385,43 @@ def : WriteRes<WriteVecExtractSt, [BWPort4,BWPort5,BWPort237]> {
defm : BWWriteResPair<WriteCvtSS2I, [BWPort1], 3>;
defm : BWWriteResPair<WriteCvtPS2I, [BWPort1], 3>;
defm : BWWriteResPair<WriteCvtPS2IY, [BWPort1], 3>;
defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
defm : BWWriteResPair<WriteCvtSD2I, [BWPort1], 3>;
defm : BWWriteResPair<WriteCvtPD2I, [BWPort1], 3>;
defm : BWWriteResPair<WriteCvtPD2IY, [BWPort1], 3>;
defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
defm : BWWriteResPair<WriteCvtI2SS, [BWPort1], 4>;
defm : BWWriteResPair<WriteCvtI2PS, [BWPort1], 4>;
defm : BWWriteResPair<WriteCvtI2PSY, [BWPort1], 4>;
defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
defm : BWWriteResPair<WriteCvtI2SD, [BWPort1], 4>;
defm : BWWriteResPair<WriteCvtI2PD, [BWPort1], 4>;
defm : BWWriteResPair<WriteCvtI2PDY, [BWPort1], 4>;
defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
defm : BWWriteResPair<WriteCvtSS2SD, [BWPort1], 3>;
defm : BWWriteResPair<WriteCvtPS2PD, [BWPort1], 3>;
defm : BWWriteResPair<WriteCvtPS2PDY, [BWPort1], 3>;
defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
defm : BWWriteResPair<WriteCvtSD2SS, [BWPort1], 3>;
defm : BWWriteResPair<WriteCvtPD2PS, [BWPort1], 3>;
defm : BWWriteResPair<WriteCvtPD2PSY, [BWPort1], 3>;
defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
defm : X86WriteRes<WriteCvtPH2PS, [BWPort0,BWPort5], 2, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSY, [BWPort0,BWPort5], 2, [1,1], 2>;
defm : X86WriteResUnsupported<WriteCvtPH2PSZ>;
defm : X86WriteRes<WriteCvtPH2PSLd, [BWPort0,BWPort23], 6, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSYLd, [BWPort0,BWPort23], 6, [1,1], 2>;
defm : X86WriteResUnsupported<WriteCvtPH2PSZLd>;
defm : X86WriteRes<WriteCvtPS2PH, [BWPort1,BWPort5], 4, [1,1], 2>;
defm : X86WriteRes<WriteCvtPS2PHY, [BWPort1,BWPort5], 6, [1,1], 2>;
defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
defm : X86WriteRes<WriteCvtPS2PHSt, [BWPort1,BWPort4,BWPort237], 5, [1,1,1], 3>;
defm : X86WriteRes<WriteCvtPS2PHYSt, [BWPort1,BWPort4,BWPort237], 7, [1,1,1], 3>;
defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
// Strings instructions.

View File

@ -185,25 +185,31 @@ defm : X86WriteRes<WriteEMMS, [HWPort01,HWPort15,HWPort015,HWPort0156],
defm : HWWriteResPair<WriteFAdd, [HWPort1], 3, [1], 1, 5>;
defm : HWWriteResPair<WriteFAddX, [HWPort1], 3, [1], 1, 6>;
defm : HWWriteResPair<WriteFAddY, [HWPort1], 3, [1], 1, 7>;
defm : HWWriteResPair<WriteFAddZ, [HWPort1], 3, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteFAdd64, [HWPort1], 3, [1], 1, 5>;
defm : HWWriteResPair<WriteFAdd64X, [HWPort1], 3, [1], 1, 6>;
defm : HWWriteResPair<WriteFAdd64Y, [HWPort1], 3, [1], 1, 7>;
defm : HWWriteResPair<WriteFAdd64Z, [HWPort1], 3, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteFCmp, [HWPort1], 3, [1], 1, 5>;
defm : HWWriteResPair<WriteFCmpX, [HWPort1], 3, [1], 1, 6>;
defm : HWWriteResPair<WriteFCmpY, [HWPort1], 3, [1], 1, 7>;
defm : HWWriteResPair<WriteFCmpZ, [HWPort1], 3, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteFCmp64, [HWPort1], 3, [1], 1, 5>;
defm : HWWriteResPair<WriteFCmp64X, [HWPort1], 3, [1], 1, 6>;
defm : HWWriteResPair<WriteFCmp64Y, [HWPort1], 3, [1], 1, 7>;
defm : HWWriteResPair<WriteFCmp64Z, [HWPort1], 3, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteFCom, [HWPort1], 3>;
defm : HWWriteResPair<WriteFMul, [HWPort01], 5, [1], 1, 5>;
defm : HWWriteResPair<WriteFMulX, [HWPort01], 5, [1], 1, 6>;
defm : HWWriteResPair<WriteFMulY, [HWPort01], 5, [1], 1, 7>;
defm : HWWriteResPair<WriteFMulZ, [HWPort01], 5, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteFMul64, [HWPort01], 5, [1], 1, 5>;
defm : HWWriteResPair<WriteFMul64X, [HWPort01], 5, [1], 1, 6>;
defm : HWWriteResPair<WriteFMul64Y, [HWPort01], 5, [1], 1, 7>;
defm : HWWriteResPair<WriteFMul64Z, [HWPort01], 5, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteFDiv, [HWPort0,HWFPDivider], 13, [1,7], 1, 5>;
defm : HWWriteResPair<WriteFDivX, [HWPort0,HWFPDivider], 13, [1,7], 1, 6>;
@ -217,10 +223,12 @@ defm : HWWriteResPair<WriteFDiv64Z, [HWPort0,HWPort15,HWFPDivider], 35, [2,1,28]
defm : HWWriteResPair<WriteFRcp, [HWPort0], 5, [1], 1, 5>;
defm : HWWriteResPair<WriteFRcpX, [HWPort0], 5, [1], 1, 6>;
defm : HWWriteResPair<WriteFRcpY, [HWPort0,HWPort015], 11, [2,1], 3, 7>;
defm : HWWriteResPair<WriteFRcpZ, [HWPort0,HWPort015], 11, [2,1], 3, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteFRsqrt, [HWPort0], 5, [1], 1, 5>;
defm : HWWriteResPair<WriteFRsqrtX,[HWPort0], 5, [1], 1, 6>;
defm : HWWriteResPair<WriteFRsqrtY,[HWPort0,HWPort015], 11, [2,1], 3, 7>;
defm : HWWriteResPair<WriteFRsqrtZ,[HWPort0,HWPort015], 11, [2,1], 3, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteFSqrt, [HWPort0,HWFPDivider], 11, [1,7], 1, 5>;
defm : HWWriteResPair<WriteFSqrtX, [HWPort0,HWFPDivider], 11, [1,7], 1, 6>;
@ -235,60 +243,80 @@ defm : HWWriteResPair<WriteFSqrt80, [HWPort0,HWFPDivider], 23, [1,17]>;
defm : HWWriteResPair<WriteFMA, [HWPort01], 5, [1], 1, 5>;
defm : HWWriteResPair<WriteFMAX, [HWPort01], 5, [1], 1, 6>;
defm : HWWriteResPair<WriteFMAY, [HWPort01], 5, [1], 1, 7>;
defm : HWWriteResPair<WriteFMAZ, [HWPort01], 5, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteDPPD, [HWPort0,HWPort1,HWPort5], 9, [1,1,1], 3, 6>;
defm : HWWriteResPair<WriteDPPS, [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4, 6>;
defm : HWWriteResPair<WriteDPPSY, [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4, 7>;
defm : HWWriteResPair<WriteDPPSZ, [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteFSign, [HWPort0], 1>;
defm : X86WriteRes<WriteFRnd, [HWPort23], 6, [1], 1>;
defm : X86WriteRes<WriteFRndY, [HWPort23], 6, [1], 1>;
defm : X86WriteRes<WriteFRndZ, [HWPort23], 6, [1], 1>; // Unsupported = 1
defm : X86WriteRes<WriteFRndLd, [HWPort1,HWPort23], 12, [2,1], 3>;
defm : X86WriteRes<WriteFRndYLd, [HWPort1,HWPort23], 13, [2,1], 3>;
defm : X86WriteRes<WriteFRndZLd, [HWPort1,HWPort23], 13, [2,1], 3>; // Unsupported = 1
defm : HWWriteResPair<WriteFLogic, [HWPort5], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteFLogicY, [HWPort5], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteFLogicZ, [HWPort5], 1, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteFTest, [HWPort0], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteFTestY, [HWPort0], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteFTestZ, [HWPort0], 1, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteFShuffle, [HWPort5], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteFShuffleY, [HWPort5], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteFShuffleZ, [HWPort5], 1, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteFVarShuffle, [HWPort5], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteFVarShuffleY, [HWPort5], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteFVarShuffleZ, [HWPort5], 1, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteFBlend, [HWPort015], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteFBlendY, [HWPort015], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteFBlendZ, [HWPort015], 1, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteFShuffle256, [HWPort5], 3, [1], 1, 7>;
defm : HWWriteResPair<WriteFVarShuffle256, [HWPort5], 3, [1], 1, 7>;
defm : HWWriteResPair<WriteFVarBlend, [HWPort5], 2, [2], 2, 6>;
defm : HWWriteResPair<WriteFVarBlendY, [HWPort5], 2, [2], 2, 7>;
defm : HWWriteResPair<WriteFVarBlendZ, [HWPort5], 2, [2], 2, 7>; // Unsupported = 1
// Conversion between integer and float.
defm : HWWriteResPair<WriteCvtSD2I, [HWPort1], 3>;
defm : HWWriteResPair<WriteCvtPD2I, [HWPort1], 3>;
defm : HWWriteResPair<WriteCvtPD2IY, [HWPort1], 3>;
defm : HWWriteResPair<WriteCvtPD2IZ, [HWPort1], 3>; // Unsupported = 1
defm : HWWriteResPair<WriteCvtSS2I, [HWPort1], 3>;
defm : HWWriteResPair<WriteCvtPS2I, [HWPort1], 3>;
defm : HWWriteResPair<WriteCvtPS2IY, [HWPort1], 3>;
defm : HWWriteResPair<WriteCvtPS2IZ, [HWPort1], 3>; // Unsupported = 1
defm : HWWriteResPair<WriteCvtI2SD, [HWPort1], 4>;
defm : HWWriteResPair<WriteCvtI2PD, [HWPort1], 4>;
defm : HWWriteResPair<WriteCvtI2PDY, [HWPort1], 4>;
defm : HWWriteResPair<WriteCvtI2PDZ, [HWPort1], 4>; // Unsupported = 1
defm : HWWriteResPair<WriteCvtI2SS, [HWPort1], 4>;
defm : HWWriteResPair<WriteCvtI2PS, [HWPort1], 4>;
defm : HWWriteResPair<WriteCvtI2PSY, [HWPort1], 4>;
defm : HWWriteResPair<WriteCvtI2PSZ, [HWPort1], 4>; // Unsupported = 1
defm : HWWriteResPair<WriteCvtSS2SD, [HWPort1], 3>;
defm : HWWriteResPair<WriteCvtPS2PD, [HWPort1], 3>;
defm : HWWriteResPair<WriteCvtPS2PDY, [HWPort1], 3>;
defm : HWWriteResPair<WriteCvtPS2PDZ, [HWPort1], 3>; // Unsupported = 1
defm : HWWriteResPair<WriteCvtSD2SS, [HWPort1], 3>;
defm : HWWriteResPair<WriteCvtPD2PS, [HWPort1], 3>;
defm : HWWriteResPair<WriteCvtPD2PSY, [HWPort1], 3>;
defm : HWWriteResPair<WriteCvtPD2PSZ, [HWPort1], 3>; // Unsupported = 1
defm : X86WriteRes<WriteCvtPH2PS, [HWPort0,HWPort5], 2, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSY, [HWPort0,HWPort5], 2, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSZ, [HWPort0,HWPort5], 2, [1,1], 2>; // Unsupported = 1
defm : X86WriteRes<WriteCvtPH2PSLd, [HWPort0,HWPort23], 6, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSYLd, [HWPort0,HWPort23], 7, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSZLd, [HWPort0,HWPort23], 7, [1,1], 2>; // Unsupported = 1
defm : X86WriteRes<WriteCvtPS2PH, [HWPort1,HWPort5], 4, [1,1], 2>;
defm : X86WriteRes<WriteCvtPS2PHY, [HWPort1,HWPort5], 6, [1,1], 2>;
defm : X86WriteRes<WriteCvtPS2PHZ, [HWPort1,HWPort5], 6, [1,1], 2>; // Unsupported = 1
defm : X86WriteRes<WriteCvtPS2PHSt, [HWPort1,HWPort4,HWPort5,HWPort237], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteCvtPS2PHYSt, [HWPort1,HWPort4,HWPort5,HWPort237], 7, [1,1,1,1], 4>;
defm : X86WriteRes<WriteCvtPS2PHZSt, [HWPort1,HWPort4,HWPort5,HWPort237], 7, [1,1,1,1], 4>; // Unsupported = 1
// Vector integer operations.
defm : X86WriteRes<WriteVecLoad, [HWPort23], 5, [1], 1>;
@ -314,46 +342,61 @@ defm : X86WriteRes<WriteVecMoveFromGpr, [HWPort5], 1, [1], 1>;
defm : HWWriteResPair<WriteVecLogic, [HWPort015], 1, [1], 1, 5>;
defm : HWWriteResPair<WriteVecLogicX,[HWPort015], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteVecLogicY,[HWPort015], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteVecLogicZ,[HWPort015], 1, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteVecTest, [HWPort0,HWPort5], 2, [1,1], 2, 6>;
defm : HWWriteResPair<WriteVecTestY, [HWPort0,HWPort5], 4, [1,1], 2, 7>;
defm : HWWriteResPair<WriteVecTestZ, [HWPort0,HWPort5], 4, [1,1], 2, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteVecALU, [HWPort15], 1, [1], 1, 5>;
defm : HWWriteResPair<WriteVecALUX, [HWPort15], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteVecALUY, [HWPort15], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteVecALUZ, [HWPort15], 1, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteVecIMul, [HWPort0], 5, [1], 1, 5>;
defm : HWWriteResPair<WriteVecIMulX, [HWPort0], 5, [1], 1, 6>;
defm : HWWriteResPair<WriteVecIMulY, [HWPort0], 5, [1], 1, 7>;
defm : HWWriteResPair<WriteVecIMulZ, [HWPort0], 5, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WritePMULLD, [HWPort0], 10, [2], 2, 6>;
defm : HWWriteResPair<WritePMULLDY, [HWPort0], 10, [2], 2, 7>;
defm : HWWriteResPair<WritePMULLDZ, [HWPort0], 10, [2], 2, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteShuffle, [HWPort5], 1, [1], 1, 5>;
defm : HWWriteResPair<WriteShuffleX, [HWPort5], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteShuffleY, [HWPort5], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteShuffleZ, [HWPort5], 1, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteVarShuffle, [HWPort5], 1, [1], 1, 5>;
defm : HWWriteResPair<WriteVarShuffleX,[HWPort5], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteVarShuffleY,[HWPort5], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteVarShuffleZ,[HWPort5], 1, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteBlend, [HWPort5], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteBlendY, [HWPort5], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteBlendZ, [HWPort5], 1, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteShuffle256, [HWPort5], 3, [1], 1, 7>;
defm : HWWriteResPair<WriteVarShuffle256, [HWPort5], 3, [1], 1, 7>;
defm : HWWriteResPair<WriteVarBlend, [HWPort5], 2, [2], 2, 6>;
defm : HWWriteResPair<WriteVarBlendY, [HWPort5], 2, [2], 2, 7>;
defm : HWWriteResPair<WriteVarBlendZ, [HWPort5], 2, [2], 2, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteMPSAD, [HWPort0, HWPort5], 7, [1, 2], 3, 6>;
defm : HWWriteResPair<WriteMPSADY, [HWPort0, HWPort5], 7, [1, 2], 3, 7>;
defm : HWWriteResPair<WriteMPSADZ, [HWPort0, HWPort5], 7, [1, 2], 3, 7>; // Unsupported = 1
defm : HWWriteResPair<WritePSADBW, [HWPort0], 5, [1], 1, 5>;
defm : HWWriteResPair<WritePSADBWX, [HWPort0], 5, [1], 1, 6>;
defm : HWWriteResPair<WritePSADBWY, [HWPort0], 5, [1], 1, 7>;
defm : HWWriteResPair<WritePSADBWZ, [HWPort0], 5, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WritePHMINPOS, [HWPort0], 5, [1], 1, 6>;
// Vector integer shifts.
defm : HWWriteResPair<WriteVecShift, [HWPort0], 1, [1], 1, 5>;
defm : HWWriteResPair<WriteVecShiftX, [HWPort0,HWPort5], 2, [1,1], 2, 6>;
defm : X86WriteRes<WriteVecShiftY, [HWPort0,HWPort5], 4, [1,1], 2>;
defm : X86WriteRes<WriteVecShiftZ, [HWPort0,HWPort5], 4, [1,1], 2>; // Unsupported = 1
defm : X86WriteRes<WriteVecShiftYLd, [HWPort0,HWPort23], 8, [1,1], 2>;
defm : X86WriteRes<WriteVecShiftZLd, [HWPort0,HWPort23], 8, [1,1], 2>; // Unsupported = 1
defm : HWWriteResPair<WriteVecShiftImm, [HWPort0], 1, [1], 1, 5>;
defm : HWWriteResPair<WriteVecShiftImmX, [HWPort0], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteVecShiftImmY, [HWPort0], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteVecShiftImmZ, [HWPort0], 1, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteVarVecShift, [HWPort0, HWPort5], 3, [2,1], 3, 6>;
defm : HWWriteResPair<WriteVarVecShiftY, [HWPort0, HWPort5], 3, [2,1], 3, 7>;
defm : HWWriteResPair<WriteVarVecShiftZ, [HWPort0, HWPort5], 3, [2,1], 3, 7>; // Unsupported = 1
// Vector insert/extract operations.
def : WriteRes<WriteVecInsert, [HWPort5]> {

View File

@ -176,25 +176,31 @@ defm : X86WriteRes<WriteEMMS, [SBPort015], 31, [31], 31>;
defm : SBWriteResPair<WriteFAdd, [SBPort1], 3, [1], 1, 6>;
defm : SBWriteResPair<WriteFAddX, [SBPort1], 3, [1], 1, 6>;
defm : SBWriteResPair<WriteFAddY, [SBPort1], 3, [1], 1, 7>;
defm : SBWriteResPair<WriteFAddZ, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteFAdd64, [SBPort1], 3, [1], 1, 6>;
defm : SBWriteResPair<WriteFAdd64X, [SBPort1], 3, [1], 1, 6>;
defm : SBWriteResPair<WriteFAdd64Y, [SBPort1], 3, [1], 1, 7>;
defm : SBWriteResPair<WriteFAdd64Z, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteFCmp, [SBPort1], 3, [1], 1, 6>;
defm : SBWriteResPair<WriteFCmpX, [SBPort1], 3, [1], 1, 6>;
defm : SBWriteResPair<WriteFCmpY, [SBPort1], 3, [1], 1, 7>;
defm : SBWriteResPair<WriteFCmpZ, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteFCmp64, [SBPort1], 3, [1], 1, 6>;
defm : SBWriteResPair<WriteFCmp64X, [SBPort1], 3, [1], 1, 6>;
defm : SBWriteResPair<WriteFCmp64Y, [SBPort1], 3, [1], 1, 7>;
defm : SBWriteResPair<WriteFCmp64Z, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteFCom, [SBPort1], 3>;
defm : SBWriteResPair<WriteFMul, [SBPort0], 5, [1], 1, 6>;
defm : SBWriteResPair<WriteFMulX, [SBPort0], 5, [1], 1, 6>;
defm : SBWriteResPair<WriteFMulY, [SBPort0], 5, [1], 1, 7>;
defm : SBWriteResPair<WriteFMulZ, [SBPort0], 5, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteFMul64, [SBPort0], 5, [1], 1, 6>;
defm : SBWriteResPair<WriteFMul64X, [SBPort0], 5, [1], 1, 6>;
defm : SBWriteResPair<WriteFMul64Y, [SBPort0], 5, [1], 1, 7>;
defm : SBWriteResPair<WriteFMul64Z, [SBPort0], 5, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteFDiv, [SBPort0,SBFPDivider], 14, [1,14], 1, 6>;
defm : SBWriteResPair<WriteFDivX, [SBPort0,SBFPDivider], 14, [1,14], 1, 6>;
@ -208,10 +214,12 @@ defm : SBWriteResPair<WriteFDiv64Z, [SBPort0,SBPort05,SBFPDivider], 45, [2,1,44]
defm : SBWriteResPair<WriteFRcp, [SBPort0], 5, [1], 1, 6>;
defm : SBWriteResPair<WriteFRcpX, [SBPort0], 5, [1], 1, 6>;
defm : SBWriteResPair<WriteFRcpY, [SBPort0,SBPort05], 7, [2,1], 3, 7>;
defm : SBWriteResPair<WriteFRcpZ, [SBPort0,SBPort05], 7, [2,1], 3, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteFRsqrt, [SBPort0], 5, [1], 1, 6>;
defm : SBWriteResPair<WriteFRsqrtX,[SBPort0], 5, [1], 1, 6>;
defm : SBWriteResPair<WriteFRsqrtY,[SBPort0,SBPort05], 7, [2,1], 3, 7>;
defm : SBWriteResPair<WriteFRsqrtZ,[SBPort0,SBPort05], 7, [2,1], 3, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteFSqrt, [SBPort0,SBFPDivider], 14, [1,14], 1, 6>;
defm : SBWriteResPair<WriteFSqrtX, [SBPort0,SBFPDivider], 14, [1,14], 1, 6>;
@ -226,58 +234,78 @@ defm : SBWriteResPair<WriteFSqrt80, [SBPort0,SBFPDivider], 24, [1,24], 1, 6>;
defm : SBWriteResPair<WriteDPPD, [SBPort0,SBPort1,SBPort5], 9, [1,1,1], 3, 6>;
defm : SBWriteResPair<WriteDPPS, [SBPort0,SBPort1,SBPort5], 12, [1,2,1], 4, 6>;
defm : SBWriteResPair<WriteDPPSY, [SBPort0,SBPort1,SBPort5], 12, [1,2,1], 4, 7>;
defm : SBWriteResPair<WriteDPPSZ, [SBPort0,SBPort1,SBPort5], 12, [1,2,1], 4, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteFSign, [SBPort5], 1>;
defm : SBWriteResPair<WriteFRnd, [SBPort1], 3, [1], 1, 6>;
defm : SBWriteResPair<WriteFRndY, [SBPort1], 3, [1], 1, 7>;
defm : SBWriteResPair<WriteFRndZ, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteFLogic, [SBPort5], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteFLogicY, [SBPort5], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteFLogicZ, [SBPort5], 1, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteFTest, [SBPort0], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteFTestY, [SBPort0], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteFTestZ, [SBPort0], 1, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteFShuffle, [SBPort5], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteFShuffleY,[SBPort5], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteFShuffleZ,[SBPort5], 1, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteFVarShuffle, [SBPort5], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteFVarShuffleY,[SBPort5], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteFVarShuffleZ,[SBPort5], 1, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteFBlend, [SBPort05], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteFBlendY, [SBPort05], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteFBlendZ, [SBPort05], 1, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteFVarBlend, [SBPort05], 2, [2], 2, 6>;
defm : SBWriteResPair<WriteFVarBlendY,[SBPort05], 2, [2], 2, 7>;
defm : SBWriteResPair<WriteFVarBlendZ,[SBPort05], 2, [2], 2, 7>; // Unsupported = 1
// Conversion between integer and float.
defm : SBWriteResPair<WriteCvtSS2I, [SBPort0,SBPort1], 5, [1,1], 2>;
defm : SBWriteResPair<WriteCvtPS2I, [SBPort1], 3, [1], 1, 6>;
defm : SBWriteResPair<WriteCvtPS2IY, [SBPort1], 3, [1], 1, 7>;
defm : SBWriteResPair<WriteCvtPS2IZ, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteCvtSD2I, [SBPort0,SBPort1], 5, [1,1], 2>;
defm : SBWriteResPair<WriteCvtPD2I, [SBPort1,SBPort5], 4, [1,1], 2, 6>;
defm : X86WriteRes<WriteCvtPD2IY, [SBPort1,SBPort5], 4, [1,1], 2>;
defm : X86WriteRes<WriteCvtPD2IZ, [SBPort1,SBPort5], 4, [1,1], 2>; // Unsupported = 1
defm : X86WriteRes<WriteCvtPD2IYLd, [SBPort1,SBPort5,SBPort23], 11, [1,1,1], 3>;
defm : X86WriteRes<WriteCvtPD2IZLd, [SBPort1,SBPort5,SBPort23], 11, [1,1,1], 3>; // Unsupported = 1
defm : X86WriteRes<WriteCvtI2SS, [SBPort1,SBPort5], 5, [1,2], 3>;
defm : X86WriteRes<WriteCvtI2SSLd, [SBPort1,SBPort5,SBPort23], 10, [1,1,1], 3>;
defm : SBWriteResPair<WriteCvtI2PS, [SBPort1], 3, [1], 1, 6>;
defm : SBWriteResPair<WriteCvtI2PSY, [SBPort1], 3, [1], 1, 7>;
defm : SBWriteResPair<WriteCvtI2PSZ, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1
defm : X86WriteRes<WriteCvtI2SD, [SBPort1,SBPort5], 4, [1,1], 2>;
defm : X86WriteRes<WriteCvtI2PD, [SBPort1,SBPort5], 4, [1,1], 2>;
defm : X86WriteRes<WriteCvtI2PDY, [SBPort1,SBPort5], 4, [1,1], 2>;
defm : X86WriteRes<WriteCvtI2PDZ, [SBPort1,SBPort5], 4, [1,1], 2>; // Unsupported = 1
defm : X86WriteRes<WriteCvtI2SDLd, [SBPort1,SBPort23], 9, [1,1], 2>;
defm : X86WriteRes<WriteCvtI2PDLd, [SBPort1,SBPort5,SBPort23], 10, [1,1,1], 3>;
defm : X86WriteRes<WriteCvtI2PDYLd, [SBPort1,SBPort5,SBPort23], 10, [1,1,1], 3>;
defm : X86WriteRes<WriteCvtI2PDZLd, [SBPort1,SBPort5,SBPort23], 10, [1,1,1], 3>; // Unsupported = 1
defm : SBWriteResPair<WriteCvtSS2SD, [SBPort0], 1, [1], 1, 6>;
defm : X86WriteRes<WriteCvtPS2PD, [SBPort0,SBPort5], 2, [1,1], 2>;
defm : X86WriteRes<WriteCvtPS2PDY, [SBPort0,SBPort5], 2, [1,1], 2>;
defm : X86WriteRes<WriteCvtPS2PDZ, [SBPort0,SBPort5], 2, [1,1], 2>; // Unsupported = 1
defm : X86WriteRes<WriteCvtPS2PDLd, [SBPort0,SBPort23], 7, [1,1], 2>;
defm : X86WriteRes<WriteCvtPS2PDYLd, [SBPort0,SBPort23], 7, [1,1], 2>;
defm : X86WriteRes<WriteCvtPS2PDZLd, [SBPort0,SBPort23], 7, [1,1], 2>; // Unsupported = 1
defm : SBWriteResPair<WriteCvtSD2SS, [SBPort1,SBPort5], 4, [1,1], 2, 6>;
defm : SBWriteResPair<WriteCvtPD2PS, [SBPort1,SBPort5], 4, [1,1], 2, 6>;
defm : SBWriteResPair<WriteCvtPD2PSY, [SBPort1,SBPort5], 4, [1,1], 2, 7>;
defm : SBWriteResPair<WriteCvtPD2PSZ, [SBPort1,SBPort5], 4, [1,1], 2, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteCvtPH2PS, [SBPort1], 3>;
defm : SBWriteResPair<WriteCvtPH2PSY, [SBPort1], 3>;
defm : SBWriteResPair<WriteCvtPH2PSZ, [SBPort1], 3>; // Unsupported = 1
defm : X86WriteRes<WriteCvtPS2PH, [SBPort1], 3, [1], 1>;
defm : X86WriteRes<WriteCvtPS2PHY, [SBPort1], 3, [1], 1>;
defm : X86WriteRes<WriteCvtPS2PHZ, [SBPort1], 3, [1], 1>; // Unsupported = 1
defm : X86WriteRes<WriteCvtPS2PHSt, [SBPort1, SBPort23, SBPort4], 4, [1,1,1], 1>;
defm : X86WriteRes<WriteCvtPS2PHYSt, [SBPort1, SBPort23, SBPort4], 4, [1,1,1], 1>;
defm : X86WriteRes<WriteCvtPS2PHZSt, [SBPort1, SBPort23, SBPort4], 4, [1,1,1], 1>; // Unsupported = 1
// Vector integer operations.
defm : X86WriteRes<WriteVecLoad, [SBPort23], 5, [1], 1>;
@ -303,42 +331,56 @@ defm : X86WriteRes<WriteVecMoveFromGpr, [SBPort5], 1, [1], 1>;
defm : SBWriteResPair<WriteVecLogic, [SBPort015], 1, [1], 1, 5>;
defm : SBWriteResPair<WriteVecLogicX,[SBPort015], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteVecLogicY,[SBPort015], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteVecLogicZ,[SBPort015], 1, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteVecTest, [SBPort0,SBPort5], 2, [1,1], 2, 6>;
defm : SBWriteResPair<WriteVecTestY, [SBPort0,SBPort5], 2, [1,1], 2, 7>;
defm : SBWriteResPair<WriteVecTestZ, [SBPort0,SBPort5], 2, [1,1], 2, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteVecALU, [SBPort1], 3, [1], 1, 5>;
defm : SBWriteResPair<WriteVecALUX, [SBPort15], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteVecALUY, [SBPort15], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteVecALUZ, [SBPort15], 1, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteVecIMul, [SBPort0], 5, [1], 1, 5>;
defm : SBWriteResPair<WriteVecIMulX, [SBPort0], 5, [1], 1, 6>;
defm : SBWriteResPair<WriteVecIMulY, [SBPort0], 5, [1], 1, 7>;
defm : SBWriteResPair<WriteVecIMulZ, [SBPort0], 5, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WritePMULLD, [SBPort0], 5, [1], 1, 6>;
defm : SBWriteResPair<WritePMULLDY, [SBPort0], 5, [1], 1, 7>; // TODO this is probably wrong for 256/512-bit for the "generic" model
defm : SBWriteResPair<WritePMULLDZ, [SBPort0], 5, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteShuffle, [SBPort5], 1, [1], 1, 5>;
defm : SBWriteResPair<WriteShuffleX, [SBPort15], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteShuffleY, [SBPort5], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteShuffleZ, [SBPort5], 1, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteVarShuffle, [SBPort15], 1, [1], 1, 5>;
defm : SBWriteResPair<WriteVarShuffleX, [SBPort15], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteVarShuffleY, [SBPort15], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteVarShuffleZ, [SBPort15], 1, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteBlend, [SBPort15], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteBlendY, [SBPort15], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteBlendZ, [SBPort15], 1, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteVarBlend, [SBPort15], 2, [2], 2, 6>;
defm : SBWriteResPair<WriteVarBlendY,[SBPort15], 2, [2], 2, 7>;
defm : SBWriteResPair<WriteVarBlendZ,[SBPort15], 2, [2], 2, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteMPSAD, [SBPort0, SBPort15], 7, [1,2], 3, 6>;
defm : SBWriteResPair<WriteMPSADY, [SBPort0, SBPort15], 7, [1,2], 3, 7>;
defm : SBWriteResPair<WriteMPSADZ, [SBPort0, SBPort15], 7, [1,2], 3, 7>; // Unsupported = 1
defm : SBWriteResPair<WritePSADBW, [SBPort0], 5, [1], 1, 5>;
defm : SBWriteResPair<WritePSADBWX, [SBPort0], 5, [1], 1, 6>;
defm : SBWriteResPair<WritePSADBWY, [SBPort0], 5, [1], 1, 7>;
defm : SBWriteResPair<WritePSADBWZ, [SBPort0], 5, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WritePHMINPOS, [SBPort0], 5, [1], 1, 6>;
// Vector integer shifts.
defm : SBWriteResPair<WriteVecShift, [SBPort5], 1, [1], 1, 5>;
defm : SBWriteResPair<WriteVecShiftX, [SBPort0,SBPort15], 2, [1,1], 2, 6>;
defm : SBWriteResPair<WriteVecShiftY, [SBPort0,SBPort15], 4, [1,1], 2, 7>;
defm : SBWriteResPair<WriteVecShiftZ, [SBPort0,SBPort15], 4, [1,1], 2, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteVecShiftImm, [SBPort5], 1, [1], 1, 5>;
defm : SBWriteResPair<WriteVecShiftImmX, [SBPort0], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteVecShiftImmY, [SBPort0], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteVecShiftImmZ, [SBPort0], 1, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteVarVecShift, [SBPort0], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteVarVecShiftY, [SBPort0], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteVarVecShiftZ, [SBPort0], 1, [1], 1, 7>; // Unsupported = 1
// Vector insert/extract operations.
def : WriteRes<WriteVecInsert, [SBPort5,SBPort15]> {
@ -365,9 +407,11 @@ def : WriteRes<WriteVecExtractSt, [SBPort4,SBPort23,SBPort15]> {
defm : SBWriteResPair<WriteFHAdd, [SBPort1,SBPort5], 5, [1,2], 3, 6>;
defm : SBWriteResPair<WriteFHAddY, [SBPort1,SBPort5], 5, [1,2], 3, 7>;
defm : SBWriteResPair<WriteFHAddZ, [SBPort1,SBPort5], 5, [1,2], 3, 7>; // Unsupported = 1
defm : SBWriteResPair<WritePHAdd, [SBPort15], 3, [3], 3, 5>;
defm : SBWriteResPair<WritePHAddX, [SBPort15], 3, [3], 3, 6>;
defm : SBWriteResPair<WritePHAddY, [SBPort15], 3, [3], 3, 7>;
defm : SBWriteResPair<WritePHAddZ, [SBPort15], 3, [3], 3, 7>; // Unsupported = 1
////////////////////////////////////////////////////////////////////////////////
// String instructions.
@ -484,6 +528,7 @@ defm : SBWriteResPair<WriteVarShuffle256, [SBPort5], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteFMA, [SBPort01], 5>;
defm : SBWriteResPair<WriteFMAX, [SBPort01], 5>;
defm : SBWriteResPair<WriteFMAY, [SBPort01], 5>;
defm : SBWriteResPair<WriteFMAZ, [SBPort01], 5>; // Unsupported = 1
// Remaining SNB instrs.

View File

@ -183,76 +183,93 @@ defm : X86WriteRes<WriteFMoveY, [SKLPort015], 1, [1], 1>;
defm : X86WriteRes<WriteEMMS, [SKLPort05,SKLPort0156], 10, [9,1], 10>;
defm : SKLWriteResPair<WriteFAdd, [SKLPort01], 4, [1], 1, 5>; // Floating point add/sub.
defm : SKLWriteResPair<WriteFAddX, [SKLPort01], 4, [1], 1, 6>; // Floating point add/sub (XMM).
defm : SKLWriteResPair<WriteFAddY, [SKLPort01], 4, [1], 1, 7>; // Floating point add/sub (YMM/ZMM).
defm : SKLWriteResPair<WriteFAddX, [SKLPort01], 4, [1], 1, 6>;
defm : SKLWriteResPair<WriteFAddY, [SKLPort01], 4, [1], 1, 7>;
defm : X86WriteResPairUnsupported<WriteFAddZ>;
defm : SKLWriteResPair<WriteFAdd64, [SKLPort01], 4, [1], 1, 5>; // Floating point double add/sub.
defm : SKLWriteResPair<WriteFAdd64X, [SKLPort01], 4, [1], 1, 6>; // Floating point double add/sub (XMM).
defm : SKLWriteResPair<WriteFAdd64Y, [SKLPort01], 4, [1], 1, 7>; // Floating point double add/sub (YMM/ZMM).
defm : SKLWriteResPair<WriteFAdd64X, [SKLPort01], 4, [1], 1, 6>;
defm : SKLWriteResPair<WriteFAdd64Y, [SKLPort01], 4, [1], 1, 7>;
defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
defm : SKLWriteResPair<WriteFCmp, [SKLPort01], 4, [1], 1, 5>; // Floating point compare.
defm : SKLWriteResPair<WriteFCmpX, [SKLPort01], 4, [1], 1, 6>; // Floating point compare (XMM).
defm : SKLWriteResPair<WriteFCmpY, [SKLPort01], 4, [1], 1, 7>; // Floating point compare (YMM/ZMM).
defm : SKLWriteResPair<WriteFCmpX, [SKLPort01], 4, [1], 1, 6>;
defm : SKLWriteResPair<WriteFCmpY, [SKLPort01], 4, [1], 1, 7>;
defm : X86WriteResPairUnsupported<WriteFCmpZ>;
defm : SKLWriteResPair<WriteFCmp64, [SKLPort01], 4, [1], 1, 5>; // Floating point double compare.
defm : SKLWriteResPair<WriteFCmp64X, [SKLPort01], 4, [1], 1, 6>; // Floating point double compare (XMM).
defm : SKLWriteResPair<WriteFCmp64Y, [SKLPort01], 4, [1], 1, 7>; // Floating point double compare (YMM/ZMM).
defm : SKLWriteResPair<WriteFCmp64X, [SKLPort01], 4, [1], 1, 6>;
defm : SKLWriteResPair<WriteFCmp64Y, [SKLPort01], 4, [1], 1, 7>;
defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
defm : SKLWriteResPair<WriteFCom, [SKLPort0], 2>; // Floating point compare to flags.
defm : SKLWriteResPair<WriteFMul, [SKLPort01], 4, [1], 1, 5>; // Floating point multiplication.
defm : SKLWriteResPair<WriteFMulX, [SKLPort01], 4, [1], 1, 6>; // Floating point multiplication (XMM).
defm : SKLWriteResPair<WriteFMulY, [SKLPort01], 4, [1], 1, 7>; // Floating point multiplication (YMM/ZMM).
defm : SKLWriteResPair<WriteFMulX, [SKLPort01], 4, [1], 1, 6>;
defm : SKLWriteResPair<WriteFMulY, [SKLPort01], 4, [1], 1, 7>;
defm : X86WriteResPairUnsupported<WriteFMulZ>;
defm : SKLWriteResPair<WriteFMul64, [SKLPort01], 4, [1], 1, 5>; // Floating point double multiplication.
defm : SKLWriteResPair<WriteFMul64X, [SKLPort01], 4, [1], 1, 6>; // Floating point double multiplication (XMM).
defm : SKLWriteResPair<WriteFMul64Y, [SKLPort01], 4, [1], 1, 7>; // Floating point double multiplication (YMM/ZMM).
defm : SKLWriteResPair<WriteFMul64X, [SKLPort01], 4, [1], 1, 6>;
defm : SKLWriteResPair<WriteFMul64Y, [SKLPort01], 4, [1], 1, 7>;
defm : X86WriteResPairUnsupported<WriteFMul64Z>;
defm : SKLWriteResPair<WriteFDiv, [SKLPort0,SKLFPDivider], 11, [1,3], 1, 5>; // Floating point division.
//defm : SKLWriteResPair<WriteFDivX, [SKLPort0,SKLFPDivider], 11, [1,3], 1, 6>; // Floating point division (XMM).
defm : SKLWriteResPair<WriteFDivY, [SKLPort0,SKLFPDivider], 11, [1,5], 1, 7>; // Floating point division (YMM).
//defm : SKLWriteResPair<WriteFDivX, [SKLPort0,SKLFPDivider], 11, [1,3], 1, 6>;
defm : SKLWriteResPair<WriteFDivY, [SKLPort0,SKLFPDivider], 11, [1,5], 1, 7>;
defm : X86WriteResPairUnsupported<WriteFDivZ>;
//defm : SKLWriteResPair<WriteFDiv64, [SKLPort0,SKLFPDivider], 14, [1,3], 1, 5>; // Floating point double division.
//defm : SKLWriteResPair<WriteFDiv64X, [SKLPort0,SKLFPDivider], 14, [1,3], 1, 6>; // Floating point double division (XMM).
//defm : SKLWriteResPair<WriteFDiv64Y, [SKLPort0,SKLFPDivider], 14, [1,5], 1, 7>; // Floating point double division (YMM).
//defm : SKLWriteResPair<WriteFDiv64X, [SKLPort0,SKLFPDivider], 14, [1,3], 1, 6>;
//defm : SKLWriteResPair<WriteFDiv64Y, [SKLPort0,SKLFPDivider], 14, [1,5], 1, 7>;
defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
defm : SKLWriteResPair<WriteFSqrt, [SKLPort0,SKLFPDivider], 12, [1,3], 1, 5>; // Floating point square root.
defm : SKLWriteResPair<WriteFSqrtX, [SKLPort0,SKLFPDivider], 12, [1,3], 1, 6>; // Floating point square root (XMM).
defm : SKLWriteResPair<WriteFSqrtY, [SKLPort0,SKLFPDivider], 12, [1,6], 1, 7>; // Floating point square root (YMM).
defm : SKLWriteResPair<WriteFSqrtX, [SKLPort0,SKLFPDivider], 12, [1,3], 1, 6>;
defm : SKLWriteResPair<WriteFSqrtY, [SKLPort0,SKLFPDivider], 12, [1,6], 1, 7>;
defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
defm : SKLWriteResPair<WriteFSqrt64, [SKLPort0,SKLFPDivider], 18, [1,6], 1, 5>; // Floating point double square root.
defm : SKLWriteResPair<WriteFSqrt64X, [SKLPort0,SKLFPDivider], 18, [1,6], 1, 6>; // Floating point double square root (XMM).
defm : SKLWriteResPair<WriteFSqrt64Y, [SKLPort0,SKLFPDivider], 18, [1,12],1, 7>; // Floating point double square root (YMM).
defm : SKLWriteResPair<WriteFSqrt64X, [SKLPort0,SKLFPDivider], 18, [1,6], 1, 6>;
defm : SKLWriteResPair<WriteFSqrt64Y, [SKLPort0,SKLFPDivider], 18, [1,12],1, 7>;
defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
defm : SKLWriteResPair<WriteFSqrt80, [SKLPort0,SKLFPDivider], 21, [1,7]>; // Floating point long double square root.
defm : SKLWriteResPair<WriteFRcp, [SKLPort0], 4, [1], 1, 5>; // Floating point reciprocal estimate.
defm : SKLWriteResPair<WriteFRcpX, [SKLPort0], 4, [1], 1, 6>; // Floating point reciprocal estimate (XMM).
defm : SKLWriteResPair<WriteFRcpY, [SKLPort0], 4, [1], 1, 7>; // Floating point reciprocal estimate (YMM/ZMM).
defm : SKLWriteResPair<WriteFRcpX, [SKLPort0], 4, [1], 1, 6>;
defm : SKLWriteResPair<WriteFRcpY, [SKLPort0], 4, [1], 1, 7>;
defm : X86WriteResPairUnsupported<WriteFRcpZ>;
defm : SKLWriteResPair<WriteFRsqrt, [SKLPort0], 4, [1], 1, 5>; // Floating point reciprocal square root estimate.
defm : SKLWriteResPair<WriteFRsqrtX,[SKLPort0], 4, [1], 1, 6>; // Floating point reciprocal square root estimate (XMM).
defm : SKLWriteResPair<WriteFRsqrtY,[SKLPort0], 4, [1], 1, 7>; // Floating point reciprocal square root estimate (YMM/ZMM).
defm : SKLWriteResPair<WriteFRsqrtX,[SKLPort0], 4, [1], 1, 6>;
defm : SKLWriteResPair<WriteFRsqrtY,[SKLPort0], 4, [1], 1, 7>;
defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
defm : SKLWriteResPair<WriteFMA, [SKLPort01], 4, [1], 1, 5>; // Fused Multiply Add.
defm : SKLWriteResPair<WriteFMAX, [SKLPort01], 4, [1], 1, 6>; // Fused Multiply Add (XMM).
defm : SKLWriteResPair<WriteFMAY, [SKLPort01], 4, [1], 1, 7>; // Fused Multiply Add (YMM/ZMM).
defm : SKLWriteResPair<WriteFMAX, [SKLPort01], 4, [1], 1, 6>;
defm : SKLWriteResPair<WriteFMAY, [SKLPort01], 4, [1], 1, 7>;
defm : X86WriteResPairUnsupported<WriteFMAZ>;
defm : SKLWriteResPair<WriteDPPD, [SKLPort5,SKLPort01], 9, [1,2], 3, 6>; // Floating point double dot product.
defm : SKLWriteResPair<WriteDPPS, [SKLPort5,SKLPort01], 13, [1,3], 4, 6>; // Floating point single dot product.
defm : SKLWriteResPair<WriteDPPSY, [SKLPort5,SKLPort01], 13, [1,3], 4, 7>; // Floating point single dot product (YMM).
defm : SKLWriteResPair<WriteDPPS, [SKLPort5,SKLPort01], 13, [1,3], 4, 6>;
defm : SKLWriteResPair<WriteDPPSY, [SKLPort5,SKLPort01], 13, [1,3], 4, 7>;
defm : X86WriteResPairUnsupported<WriteDPPSZ>;
defm : SKLWriteResPair<WriteFSign, [SKLPort0], 1>; // Floating point fabs/fchs.
defm : SKLWriteResPair<WriteFRnd, [SKLPort01], 8, [2], 2, 6>; // Floating point rounding.
defm : SKLWriteResPair<WriteFRndY, [SKLPort01], 8, [2], 2, 7>; // Floating point rounding (YMM/ZMM).
defm : SKLWriteResPair<WriteFRndY, [SKLPort01], 8, [2], 2, 7>;
defm : X86WriteResPairUnsupported<WriteFRndZ>;
defm : SKLWriteResPair<WriteFLogic, [SKLPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals.
defm : SKLWriteResPair<WriteFLogicY, [SKLPort015], 1, [1], 1, 7>; // Floating point and/or/xor logicals (YMM/ZMM).
defm : SKLWriteResPair<WriteFLogicY, [SKLPort015], 1, [1], 1, 7>;
defm : X86WriteResPairUnsupported<WriteFLogicZ>;
defm : SKLWriteResPair<WriteFTest, [SKLPort0], 2, [1], 1, 6>; // Floating point TEST instructions.
defm : SKLWriteResPair<WriteFTestY, [SKLPort0], 2, [1], 1, 7>; // Floating point TEST instructions (YMM/ZMM).
defm : SKLWriteResPair<WriteFTestY, [SKLPort0], 2, [1], 1, 7>;
defm : X86WriteResPairUnsupported<WriteFTestZ>;
defm : SKLWriteResPair<WriteFShuffle, [SKLPort5], 1, [1], 1, 6>; // Floating point vector shuffles.
defm : SKLWriteResPair<WriteFShuffleY, [SKLPort5], 1, [1], 1, 7>; // Floating point vector shuffles (YMM/ZMM).
defm : SKLWriteResPair<WriteFShuffleY, [SKLPort5], 1, [1], 1, 7>;
defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
defm : SKLWriteResPair<WriteFVarShuffle, [SKLPort5], 1, [1], 1, 6>; // Floating point vector shuffles.
defm : SKLWriteResPair<WriteFVarShuffleY, [SKLPort5], 1, [1], 1, 7>; // Floating point vector shuffles.
defm : SKLWriteResPair<WriteFVarShuffleY, [SKLPort5], 1, [1], 1, 7>;
defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
defm : SKLWriteResPair<WriteFBlend, [SKLPort015], 1, [1], 1, 6>; // Floating point vector blends.
defm : SKLWriteResPair<WriteFBlendY, [SKLPort015], 1, [1], 1, 7>; // Floating point vector blends.
defm : SKLWriteResPair<WriteFBlendY, [SKLPort015], 1, [1], 1, 7>;
defm : X86WriteResPairUnsupported<WriteFBlendZ>;
defm : SKLWriteResPair<WriteFVarBlend, [SKLPort015], 2, [2], 2, 6>; // Fp vector variable blends.
defm : SKLWriteResPair<WriteFVarBlendY,[SKLPort015], 2, [2], 2, 7>; // Fp vector variable blends.
defm : SKLWriteResPair<WriteFVarBlendY,[SKLPort015], 2, [2], 2, 7>;
defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
// FMA Scheduling helper class.
// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
@ -279,33 +296,44 @@ defm : X86WriteRes<WriteVecMoveToGpr, [SKLPort0], 2, [1], 1>;
defm : X86WriteRes<WriteVecMoveFromGpr, [SKLPort5], 1, [1], 1>;
defm : SKLWriteResPair<WriteVecALU, [SKLPort05], 1, [1], 1, 5>; // Vector integer ALU op, no logicals.
defm : SKLWriteResPair<WriteVecALUX, [SKLPort01], 1, [1], 1, 6>; // Vector integer ALU op, no logicals (XMM).
defm : SKLWriteResPair<WriteVecALUY, [SKLPort01], 1, [1], 1, 7>; // Vector integer ALU op, no logicals (YMM/ZMM).
defm : SKLWriteResPair<WriteVecALUX, [SKLPort01], 1, [1], 1, 6>;
defm : SKLWriteResPair<WriteVecALUY, [SKLPort01], 1, [1], 1, 7>;
defm : X86WriteResPairUnsupported<WriteVecALUZ>;
defm : SKLWriteResPair<WriteVecLogic, [SKLPort05], 1, [1], 1, 5>; // Vector integer and/or/xor.
defm : SKLWriteResPair<WriteVecLogicX,[SKLPort015], 1, [1], 1, 6>; // Vector integer and/or/xor (XMM).
defm : SKLWriteResPair<WriteVecLogicY,[SKLPort015], 1, [1], 1, 7>; // Vector integer and/or/xor (YMM/ZMM).
defm : SKLWriteResPair<WriteVecLogicX,[SKLPort015], 1, [1], 1, 6>;
defm : SKLWriteResPair<WriteVecLogicY,[SKLPort015], 1, [1], 1, 7>;
defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
defm : SKLWriteResPair<WriteVecTest, [SKLPort0,SKLPort5], 3, [1,1], 2, 6>; // Vector integer TEST instructions.
defm : SKLWriteResPair<WriteVecTestY, [SKLPort0,SKLPort5], 3, [1,1], 2, 7>; // Vector integer TEST instructions (YMM/ZMM).
defm : SKLWriteResPair<WriteVecTestY, [SKLPort0,SKLPort5], 3, [1,1], 2, 7>;
defm : X86WriteResPairUnsupported<WriteVecTestZ>;
defm : SKLWriteResPair<WriteVecIMul, [SKLPort0] , 4, [1], 1, 5>; // Vector integer multiply.
defm : SKLWriteResPair<WriteVecIMulX, [SKLPort01], 4, [1], 1, 6>; // Vector integer multiply (XMM).
defm : SKLWriteResPair<WriteVecIMulY, [SKLPort01], 4, [1], 1, 7>; // Vector integer multiply (YMM/ZMM).
defm : SKLWriteResPair<WriteVecIMulX, [SKLPort01], 4, [1], 1, 6>;
defm : SKLWriteResPair<WriteVecIMulY, [SKLPort01], 4, [1], 1, 7>;
defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
defm : SKLWriteResPair<WritePMULLD, [SKLPort01], 10, [2], 2, 6>; // Vector PMULLD.
defm : SKLWriteResPair<WritePMULLDY, [SKLPort01], 10, [2], 2, 7>; // Vector PMULLD (YMM/ZMM).
defm : SKLWriteResPair<WritePMULLDY, [SKLPort01], 10, [2], 2, 7>;
defm : X86WriteResPairUnsupported<WritePMULLDZ>;
defm : SKLWriteResPair<WriteShuffle, [SKLPort5], 1, [1], 1, 5>; // Vector shuffles.
defm : SKLWriteResPair<WriteShuffleX, [SKLPort5], 1, [1], 1, 6>; // Vector shuffles (XMM).
defm : SKLWriteResPair<WriteShuffleY, [SKLPort5], 1, [1], 1, 7>; // Vector shuffles (YMM/ZMM).
defm : SKLWriteResPair<WriteShuffleX, [SKLPort5], 1, [1], 1, 6>;
defm : SKLWriteResPair<WriteShuffleY, [SKLPort5], 1, [1], 1, 7>;
defm : X86WriteResPairUnsupported<WriteShuffleZ>;
defm : SKLWriteResPair<WriteVarShuffle, [SKLPort5], 1, [1], 1, 5>; // Vector shuffles.
defm : SKLWriteResPair<WriteVarShuffleX, [SKLPort5], 1, [1], 1, 6>; // Vector shuffles (XMM).
defm : SKLWriteResPair<WriteVarShuffleY, [SKLPort5], 1, [1], 1, 7>; // Vector shuffles (YMM/ZMM).
defm : SKLWriteResPair<WriteVarShuffleX, [SKLPort5], 1, [1], 1, 6>;
defm : SKLWriteResPair<WriteVarShuffleY, [SKLPort5], 1, [1], 1, 7>;
defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
defm : SKLWriteResPair<WriteBlend, [SKLPort5], 1, [1], 1, 6>; // Vector blends.
defm : SKLWriteResPair<WriteBlendY, [SKLPort5], 1, [1], 1, 7>; // Vector blends (YMM/ZMM).
defm : SKLWriteResPair<WriteBlendY, [SKLPort5], 1, [1], 1, 7>;
defm : X86WriteResPairUnsupported<WriteBlendZ>;
defm : SKLWriteResPair<WriteVarBlend, [SKLPort015], 2, [2], 2, 6>; // Vector variable blends.
defm : SKLWriteResPair<WriteVarBlendY, [SKLPort015], 2, [2], 2, 6>; // Vector variable blends (YMM/ZMM).
defm : SKLWriteResPair<WriteVarBlendY, [SKLPort015], 2, [2], 2, 6>;
defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
defm : SKLWriteResPair<WriteMPSAD, [SKLPort5], 4, [2], 2, 6>; // Vector MPSAD.
defm : SKLWriteResPair<WriteMPSADY, [SKLPort5], 4, [2], 2, 7>; // Vector MPSAD (YMM/ZMM).
defm : SKLWriteResPair<WriteMPSADY, [SKLPort5], 4, [2], 2, 7>;
defm : X86WriteResPairUnsupported<WriteMPSADZ>;
defm : SKLWriteResPair<WritePSADBW, [SKLPort5], 3, [1], 1, 5>; // Vector PSADBW.
defm : SKLWriteResPair<WritePSADBWX, [SKLPort5], 3, [1], 1, 6>; // Vector PSADBW (XMM).
defm : SKLWriteResPair<WritePSADBWY, [SKLPort5], 3, [1], 1, 7>; // Vector PSADBW (YMM/ZMM).
defm : SKLWriteResPair<WritePSADBWX, [SKLPort5], 3, [1], 1, 6>;
defm : SKLWriteResPair<WritePSADBWY, [SKLPort5], 3, [1], 1, 7>;
defm : X86WriteResPairUnsupported<WritePSADBWZ>;
defm : SKLWriteResPair<WritePHMINPOS, [SKLPort01], 4, [1], 1, 6>; // Vector PHMINPOS.
// Vector integer shifts.
@ -314,12 +342,15 @@ defm : X86WriteRes<WriteVecShiftX, [SKLPort5,SKLPort01], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecShiftY, [SKLPort5,SKLPort01], 4, [1,1], 2>;
defm : X86WriteRes<WriteVecShiftXLd, [SKLPort01,SKLPort23], 7, [1,1], 2>;
defm : X86WriteRes<WriteVecShiftYLd, [SKLPort01,SKLPort23], 8, [1,1], 2>;
defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
defm : SKLWriteResPair<WriteVecShiftImm, [SKLPort0], 1, [1], 1, 5>;
defm : SKLWriteResPair<WriteVecShiftImmX, [SKLPort01], 1, [1], 1, 6>; // Vector integer immediate shifts (XMM).
defm : SKLWriteResPair<WriteVecShiftImmY, [SKLPort01], 1, [1], 1, 7>; // Vector integer immediate shifts (YMM/ZMM).
defm : SKLWriteResPair<WriteVecShiftImm, [SKLPort0], 1, [1], 1, 5>; // Vector integer immediate shifts.
defm : SKLWriteResPair<WriteVecShiftImmX, [SKLPort01], 1, [1], 1, 6>;
defm : SKLWriteResPair<WriteVecShiftImmY, [SKLPort01], 1, [1], 1, 7>;
defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
defm : SKLWriteResPair<WriteVarVecShift, [SKLPort01], 1, [1], 1, 6>; // Variable vector shifts.
defm : SKLWriteResPair<WriteVarVecShiftY, [SKLPort01], 1, [1], 1, 7>; // Variable vector shifts (YMM/ZMM).
defm : SKLWriteResPair<WriteVarVecShiftY, [SKLPort01], 1, [1], 1, 7>;
defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
// Vector insert/extract operations.
def : WriteRes<WriteVecInsert, [SKLPort5]> {
@ -346,33 +377,43 @@ def : WriteRes<WriteVecExtractSt, [SKLPort4,SKLPort5,SKLPort237]> {
defm : SKLWriteResPair<WriteCvtSS2I, [SKLPort1], 3>;
defm : SKLWriteResPair<WriteCvtPS2I, [SKLPort1], 3>;
defm : SKLWriteResPair<WriteCvtPS2IY, [SKLPort1], 3>;
defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
defm : SKLWriteResPair<WriteCvtSD2I, [SKLPort1], 3>;
defm : SKLWriteResPair<WriteCvtPD2I, [SKLPort1], 3>;
defm : SKLWriteResPair<WriteCvtPD2IY, [SKLPort1], 3>;
defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
defm : SKLWriteResPair<WriteCvtI2SS, [SKLPort1], 4>;
defm : SKLWriteResPair<WriteCvtI2PS, [SKLPort1], 4>;
defm : SKLWriteResPair<WriteCvtI2PSY, [SKLPort1], 4>;
defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
defm : SKLWriteResPair<WriteCvtI2SD, [SKLPort1], 4>;
defm : SKLWriteResPair<WriteCvtI2PD, [SKLPort1], 4>;
defm : SKLWriteResPair<WriteCvtI2PDY, [SKLPort1], 4>;
defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
defm : SKLWriteResPair<WriteCvtSS2SD, [SKLPort1], 3>;
defm : SKLWriteResPair<WriteCvtPS2PD, [SKLPort1], 3>;
defm : SKLWriteResPair<WriteCvtPS2PDY, [SKLPort1], 3>;
defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
defm : SKLWriteResPair<WriteCvtSD2SS, [SKLPort1], 3>;
defm : SKLWriteResPair<WriteCvtPD2PS, [SKLPort1], 3>;
defm : SKLWriteResPair<WriteCvtPD2PSY, [SKLPort1], 3>;
defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
defm : X86WriteRes<WriteCvtPH2PS, [SKLPort5,SKLPort015], 5, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSY, [SKLPort5,SKLPort01], 7, [1,1], 2>;
defm : X86WriteResUnsupported<WriteCvtPH2PSZ>;
defm : X86WriteRes<WriteCvtPH2PSLd, [SKLPort23,SKLPort01], 9, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSYLd, [SKLPort23,SKLPort01], 10, [1,1], 2>;
defm : X86WriteResUnsupported<WriteCvtPH2PSZLd>;
defm : X86WriteRes<WriteCvtPS2PH, [SKLPort5,SKLPort015], 5, [1,1], 2>;
defm : X86WriteRes<WriteCvtPS2PHY, [SKLPort5,SKLPort01], 7, [1,1], 2>;
defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
defm : X86WriteRes<WriteCvtPS2PHSt, [SKLPort4,SKLPort5,SKLPort237,SKLPort01], 6, [1,1,1,1], 4>;
defm : X86WriteRes<WriteCvtPS2PHYSt, [SKLPort4,SKLPort5,SKLPort237,SKLPort01], 8, [1,1,1,1], 4>;
defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
// Strings instructions.

View File

@ -182,77 +182,94 @@ defm : X86WriteRes<WriteFMoveX, [SKXPort015], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveY, [SKXPort015], 1, [1], 1>;
defm : X86WriteRes<WriteEMMS, [SKXPort05,SKXPort0156], 10, [9,1], 10>;
defm : SKXWriteResPair<WriteFAdd, [SKXPort015], 4, [1], 1, 5>; // Floating point add/sub.
defm : SKXWriteResPair<WriteFAddX, [SKXPort015], 4, [1], 1, 6>; // Floating point add/sub (XMM).
defm : SKXWriteResPair<WriteFAddY, [SKXPort015], 4, [1], 1, 7>; // Floating point add/sub (YMM/ZMM).
defm : SKXWriteResPair<WriteFAdd64, [SKXPort015], 4, [1], 1, 5>; // Floating point double add/sub.
defm : SKXWriteResPair<WriteFAdd64X, [SKXPort015], 4, [1], 1, 6>; // Floating point double add/sub (XMM).
defm : SKXWriteResPair<WriteFAdd64Y, [SKXPort015], 4, [1], 1, 7>; // Floating point double add/sub (YMM/ZMM).
defm : SKXWriteResPair<WriteFAdd, [SKXPort01], 4, [1], 1, 5>; // Floating point add/sub.
defm : SKXWriteResPair<WriteFAddX, [SKXPort01], 4, [1], 1, 6>;
defm : SKXWriteResPair<WriteFAddY, [SKXPort01], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteFAddZ, [SKXPort05], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteFAdd64, [SKXPort01], 4, [1], 1, 5>; // Floating point double add/sub.
defm : SKXWriteResPair<WriteFAdd64X, [SKXPort01], 4, [1], 1, 6>;
defm : SKXWriteResPair<WriteFAdd64Y, [SKXPort01], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteFAdd64Z, [SKXPort05], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteFCmp, [SKXPort015], 4, [1], 1, 5>; // Floating point compare.
defm : SKXWriteResPair<WriteFCmpX, [SKXPort015], 4, [1], 1, 6>; // Floating point compare (XMM).
defm : SKXWriteResPair<WriteFCmpY, [SKXPort015], 4, [1], 1, 7>; // Floating point compare (YMM/ZMM).
defm : SKXWriteResPair<WriteFCmp64, [SKXPort015], 4, [1], 1, 5>; // Floating point double compare.
defm : SKXWriteResPair<WriteFCmp64X, [SKXPort015], 4, [1], 1, 6>; // Floating point double compare (XMM).
defm : SKXWriteResPair<WriteFCmp64Y, [SKXPort015], 4, [1], 1, 7>; // Floating point double compare (YMM/ZMM).
defm : SKXWriteResPair<WriteFCmp, [SKXPort01], 4, [1], 1, 5>; // Floating point compare.
defm : SKXWriteResPair<WriteFCmpX, [SKXPort01], 4, [1], 1, 6>;
defm : SKXWriteResPair<WriteFCmpY, [SKXPort01], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteFCmpZ, [SKXPort05], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteFCmp64, [SKXPort01], 4, [1], 1, 5>; // Floating point double compare.
defm : SKXWriteResPair<WriteFCmp64X, [SKXPort01], 4, [1], 1, 6>;
defm : SKXWriteResPair<WriteFCmp64Y, [SKXPort01], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteFCmp64Z, [SKXPort05], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteFCom, [SKXPort0], 2>; // Floating point compare to flags.
defm : SKXWriteResPair<WriteFMul, [SKXPort015], 4, [1], 1, 5>; // Floating point multiplication.
defm : SKXWriteResPair<WriteFMulX, [SKXPort015], 4, [1], 1, 6>; // Floating point multiplication (XMM).
defm : SKXWriteResPair<WriteFMulY, [SKXPort015], 4, [1], 1, 7>; // Floating point multiplication (YMM/ZMM).
defm : SKXWriteResPair<WriteFMul64, [SKXPort015], 4, [1], 1, 5>; // Floating point double multiplication.
defm : SKXWriteResPair<WriteFMul64X, [SKXPort015], 4, [1], 1, 6>; // Floating point double multiplication (XMM).
defm : SKXWriteResPair<WriteFMul64Y, [SKXPort015], 4, [1], 1, 7>; // Floating point double multiplication (YMM/ZMM).
defm : SKXWriteResPair<WriteFMul, [SKXPort01], 4, [1], 1, 5>; // Floating point multiplication.
defm : SKXWriteResPair<WriteFMulX, [SKXPort01], 4, [1], 1, 6>;
defm : SKXWriteResPair<WriteFMulY, [SKXPort01], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteFMulZ, [SKXPort05], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteFMul64, [SKXPort01], 4, [1], 1, 5>; // Floating point double multiplication.
defm : SKXWriteResPair<WriteFMul64X, [SKXPort01], 4, [1], 1, 6>;
defm : SKXWriteResPair<WriteFMul64Y, [SKXPort01], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteFMul64Z, [SKXPort05], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteFDiv, [SKXPort0,SKXFPDivider], 11, [1,3], 1, 5>; // 10-14 cycles. // Floating point division.
//defm : SKXWriteResPair<WriteFDivX, [SKXPort0,SKXFPDivider], 11, [1,3], 1, 6>; // 10-14 cycles. // Floating point division (XMM).
defm : SKXWriteResPair<WriteFDivY, [SKXPort0,SKXFPDivider], 11, [1,5], 1, 7>; // 10-14 cycles. // Floating point division (YMM).
defm : SKXWriteResPair<WriteFDivZ, [SKXPort0,SKXPort5,SKXFPDivider], 18, [2,1,10], 3, 7>; // 10-14 cycles. // Floating point division (ZMM).
//defm : SKXWriteResPair<WriteFDivX, [SKXPort0,SKXFPDivider], 11, [1,3], 1, 6>; // 10-14 cycles.
defm : SKXWriteResPair<WriteFDivY, [SKXPort0,SKXFPDivider], 11, [1,5], 1, 7>; // 10-14 cycles.
defm : SKXWriteResPair<WriteFDivZ, [SKXPort0,SKXPort5,SKXFPDivider], 18, [2,1,10], 3, 7>; // 10-14 cycles.
//defm : SKXWriteResPair<WriteFDiv64, [SKXPort0,SKXFPDivider], 14, [1,3], 1, 5>; // 10-14 cycles. // Floating point division.
//defm : SKXWriteResPair<WriteFDiv64X, [SKXPort0,SKXFPDivider], 14, [1,3], 1, 6>; // 10-14 cycles. // Floating point division (XMM).
//defm : SKXWriteResPair<WriteFDiv64Y, [SKXPort0,SKXFPDivider], 14, [1,5], 1, 7>; // 10-14 cycles. // Floating point division (YMM).
defm : SKXWriteResPair<WriteFDiv64Z, [SKXPort0,SKXPort5,SKXFPDivider], 23, [2,1,16], 3, 7>; // 10-14 cycles. // Floating point division (ZMM).
//defm : SKXWriteResPair<WriteFDiv64X, [SKXPort0,SKXFPDivider], 14, [1,3], 1, 6>; // 10-14 cycles.
//defm : SKXWriteResPair<WriteFDiv64Y, [SKXPort0,SKXFPDivider], 14, [1,5], 1, 7>; // 10-14 cycles.
defm : SKXWriteResPair<WriteFDiv64Z, [SKXPort0,SKXPort5,SKXFPDivider], 23, [2,1,16], 3, 7>; // 10-14 cycles.
defm : SKXWriteResPair<WriteFSqrt, [SKXPort0,SKXFPDivider], 12, [1,3], 1, 5>; // Floating point square root.
defm : SKXWriteResPair<WriteFSqrtX, [SKXPort0,SKXFPDivider], 12, [1,3], 1, 6>; // Floating point square root (XMM).
defm : SKXWriteResPair<WriteFSqrtY, [SKXPort0,SKXFPDivider], 12, [1,6], 1, 7>; // Floating point square root (YMM).
defm : SKXWriteResPair<WriteFSqrtZ, [SKXPort0,SKXPort5,SKXFPDivider], 20, [2,1,12], 3, 7>; // Floating point square root (ZMM).
defm : SKXWriteResPair<WriteFSqrtX, [SKXPort0,SKXFPDivider], 12, [1,3], 1, 6>;
defm : SKXWriteResPair<WriteFSqrtY, [SKXPort0,SKXFPDivider], 12, [1,6], 1, 7>;
defm : SKXWriteResPair<WriteFSqrtZ, [SKXPort0,SKXPort5,SKXFPDivider], 20, [2,1,12], 3, 7>;
defm : SKXWriteResPair<WriteFSqrt64, [SKXPort0,SKXFPDivider], 18, [1,6], 1, 5>; // Floating point double square root.
defm : SKXWriteResPair<WriteFSqrt64X, [SKXPort0,SKXFPDivider], 18, [1,6], 1, 6>; // Floating point double square root (XMM).
defm : SKXWriteResPair<WriteFSqrt64Y, [SKXPort0,SKXFPDivider], 18, [1,12],1, 7>; // Floating point double square root (YMM).
defm : SKXWriteResPair<WriteFSqrt64Z, [SKXPort0,SKXPort5,SKXFPDivider], 32, [2,1,24], 3, 7>; // Floating point double square root (ZMM).
defm : SKXWriteResPair<WriteFSqrt64X, [SKXPort0,SKXFPDivider], 18, [1,6], 1, 6>;
defm : SKXWriteResPair<WriteFSqrt64Y, [SKXPort0,SKXFPDivider], 18, [1,12],1, 7>;
defm : SKXWriteResPair<WriteFSqrt64Z, [SKXPort0,SKXPort5,SKXFPDivider], 32, [2,1,24], 3, 7>;
defm : SKXWriteResPair<WriteFSqrt80, [SKXPort0,SKXFPDivider], 21, [1,7]>; // Floating point long double square root.
defm : SKXWriteResPair<WriteFRcp, [SKXPort0], 4, [1], 1, 5>; // Floating point reciprocal estimate.
defm : SKXWriteResPair<WriteFRcpX, [SKXPort0], 4, [1], 1, 6>; // Floating point reciprocal estimate (XMM).
defm : SKXWriteResPair<WriteFRcpY, [SKXPort0], 4, [1], 1, 7>; // Floating point reciprocal estimate (YMM/ZMM).
defm : SKXWriteResPair<WriteFRcpX, [SKXPort0], 4, [1], 1, 6>;
defm : SKXWriteResPair<WriteFRcpY, [SKXPort0], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteFRcpZ, [SKXPort0,SKXPort5], 4, [2,1], 3, 7>;
defm : SKXWriteResPair<WriteFRsqrt, [SKXPort0], 4, [1], 1, 5>; // Floating point reciprocal square root estimate.
defm : SKXWriteResPair<WriteFRsqrtX,[SKXPort0], 4, [1], 1, 6>; // Floating point reciprocal square root estimate (XMM).
defm : SKXWriteResPair<WriteFRsqrtY,[SKXPort0], 4, [1], 1, 7>; // Floating point reciprocal square root estimate (YMM/ZMM).
defm : SKXWriteResPair<WriteFRsqrtX,[SKXPort0], 4, [1], 1, 6>;
defm : SKXWriteResPair<WriteFRsqrtY,[SKXPort0], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteFRsqrtZ,[SKXPort0,SKXPort5], 9, [2,1], 3, 7>;
defm : SKXWriteResPair<WriteFMA, [SKXPort015], 4, [1], 1, 5>; // Fused Multiply Add.
defm : SKXWriteResPair<WriteFMAX, [SKXPort015], 4, [1], 1, 6>; // Fused Multiply Add (XMM).
defm : SKXWriteResPair<WriteFMAY, [SKXPort015], 4, [1], 1, 7>; // Fused Multiply Add (YMM/ZMM).
defm : SKXWriteResPair<WriteFMA, [SKXPort01], 4, [1], 1, 5>; // Fused Multiply Add.
defm : SKXWriteResPair<WriteFMAX, [SKXPort01], 4, [1], 1, 6>;
defm : SKXWriteResPair<WriteFMAY, [SKXPort01], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteFMAZ, [SKXPort05], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteDPPD, [SKXPort5,SKXPort015], 9, [1,2], 3, 6>; // Floating point double dot product.
defm : SKXWriteResPair<WriteDPPS, [SKXPort5,SKXPort015], 13, [1,3], 4, 6>; // Floating point single dot product.
defm : SKXWriteResPair<WriteDPPSY,[SKXPort5,SKXPort015], 13, [1,3], 4, 7>; // Floating point single dot product (YMM).
defm : SKXWriteResPair<WriteDPPS, [SKXPort5,SKXPort015], 13, [1,3], 4, 6>;
defm : SKXWriteResPair<WriteDPPSY,[SKXPort5,SKXPort015], 13, [1,3], 4, 7>;
defm : SKXWriteResPair<WriteDPPSZ,[SKXPort5,SKXPort015], 13, [1,3], 4, 7>;
defm : SKXWriteResPair<WriteFSign, [SKXPort0], 1>; // Floating point fabs/fchs.
defm : SKXWriteResPair<WriteFRnd, [SKXPort015], 8, [2], 2, 6>; // Floating point rounding.
defm : SKXWriteResPair<WriteFRndY, [SKXPort015], 8, [2], 2, 7>; // Floating point rounding (YMM/ZMM).
defm : SKXWriteResPair<WriteFRnd, [SKXPort01], 8, [2], 2, 6>; // Floating point rounding.
defm : SKXWriteResPair<WriteFRndY, [SKXPort01], 8, [2], 2, 7>;
defm : SKXWriteResPair<WriteFRndZ, [SKXPort05], 8, [2], 2, 7>;
defm : SKXWriteResPair<WriteFLogic, [SKXPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals.
defm : SKXWriteResPair<WriteFLogicY, [SKXPort015], 1, [1], 1, 7>; // Floating point and/or/xor logicals (YMM/ZMM).
defm : SKXWriteResPair<WriteFLogicY, [SKXPort015], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteFLogicZ, [SKXPort05], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteFTest, [SKXPort0], 2, [1], 1, 6>; // Floating point TEST instructions.
defm : SKXWriteResPair<WriteFTestY, [SKXPort0], 2, [1], 1, 7>; // Floating point TEST instructions (YMM/ZMM).
defm : SKXWriteResPair<WriteFTestY, [SKXPort0], 2, [1], 1, 7>;
defm : SKXWriteResPair<WriteFTestZ, [SKXPort0], 2, [1], 1, 7>;
defm : SKXWriteResPair<WriteFShuffle, [SKXPort5], 1, [1], 1, 6>; // Floating point vector shuffles.
defm : SKXWriteResPair<WriteFShuffleY, [SKXPort5], 1, [1], 1, 7>; // Floating point vector shuffles (YMM/ZMM).
defm : SKXWriteResPair<WriteFShuffleY, [SKXPort5], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteFShuffleZ, [SKXPort5], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteFVarShuffle, [SKXPort5], 1, [1], 1, 6>; // Floating point vector variable shuffles.
defm : SKXWriteResPair<WriteFVarShuffleY, [SKXPort5], 1, [1], 1, 7>; // Floating point vector variable shuffles.
defm : SKXWriteResPair<WriteFVarShuffleY, [SKXPort5], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteFVarShuffleZ, [SKXPort5], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteFBlend, [SKXPort015], 1, [1], 1, 6>; // Floating point vector blends.
defm : SKXWriteResPair<WriteFBlendY,[SKXPort015], 1, [1], 1, 7>; // Floating point vector blends.
defm : SKXWriteResPair<WriteFBlendY,[SKXPort015], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteFBlendZ,[SKXPort015], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteFVarBlend, [SKXPort015], 2, [2], 2, 6>; // Fp vector variable blends.
defm : SKXWriteResPair<WriteFVarBlendY,[SKXPort015], 2, [2], 2, 7>; // Fp vector variable blends.
defm : SKXWriteResPair<WriteFVarBlendY,[SKXPort015], 2, [2], 2, 7>;
defm : SKXWriteResPair<WriteFVarBlendZ,[SKXPort015], 2, [2], 2, 7>;
// FMA Scheduling helper class.
// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
@ -279,47 +296,62 @@ defm : X86WriteRes<WriteVecMoveToGpr, [SKXPort0], 2, [1], 1>;
defm : X86WriteRes<WriteVecMoveFromGpr, [SKXPort5], 1, [1], 1>;
defm : SKXWriteResPair<WriteVecALU, [SKXPort05], 1, [1], 1, 5>; // Vector integer ALU op, no logicals.
defm : SKXWriteResPair<WriteVecALUX, [SKXPort01], 1, [1], 1, 6>; // Vector integer ALU op, no logicals (XMM).
defm : SKXWriteResPair<WriteVecALUY, [SKXPort01], 1, [1], 1, 7>; // Vector integer ALU op, no logicals (YMM/ZMM).
defm : SKXWriteResPair<WriteVecALUX, [SKXPort01], 1, [1], 1, 6>;
defm : SKXWriteResPair<WriteVecALUY, [SKXPort01], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteVecALUZ, [SKXPort0], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteVecLogic, [SKXPort05], 1, [1], 1, 5>; // Vector integer and/or/xor.
defm : SKXWriteResPair<WriteVecLogicX,[SKXPort015], 1, [1], 1, 6>; // Vector integer and/or/xor (XMM).
defm : SKXWriteResPair<WriteVecLogicY,[SKXPort015], 1, [1], 1, 7>; // Vector integer and/or/xor (YMM/ZMM).
defm : SKXWriteResPair<WriteVecLogicX,[SKXPort015], 1, [1], 1, 6>;
defm : SKXWriteResPair<WriteVecLogicY,[SKXPort015], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteVecLogicZ,[SKXPort05], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteVecTest, [SKXPort0,SKXPort5], 3, [1,1], 2, 6>; // Vector integer TEST instructions.
defm : SKXWriteResPair<WriteVecTestY, [SKXPort0,SKXPort5], 3, [1,1], 2, 7>; // Vector integer TEST instructions (YMM/ZMM).
defm : SKXWriteResPair<WriteVecTestY, [SKXPort0,SKXPort5], 3, [1,1], 2, 7>;
defm : SKXWriteResPair<WriteVecTestZ, [SKXPort0,SKXPort5], 3, [1,1], 2, 7>;
defm : SKXWriteResPair<WriteVecIMul, [SKXPort0], 4, [1], 1, 5>; // Vector integer multiply.
defm : SKXWriteResPair<WriteVecIMulX, [SKXPort015], 4, [1], 1, 6>; // Vector integer multiply (XMM).
defm : SKXWriteResPair<WriteVecIMulY, [SKXPort015], 4, [1], 1, 7>; // Vector integer multiply (YMM/ZMM).
defm : SKXWriteResPair<WritePMULLD, [SKXPort015], 10, [2], 2, 6>; // Vector PMULLD.
defm : SKXWriteResPair<WritePMULLDY, [SKXPort015], 10, [2], 2, 7>; // Vector PMULLD (YMM/ZMM).
defm : SKXWriteResPair<WriteVecIMulX, [SKXPort01], 4, [1], 1, 6>;
defm : SKXWriteResPair<WriteVecIMulY, [SKXPort01], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteVecIMulZ, [SKXPort05], 4, [1], 1, 7>;
defm : SKXWriteResPair<WritePMULLD, [SKXPort01], 10, [2], 2, 6>; // Vector PMULLD.
defm : SKXWriteResPair<WritePMULLDY, [SKXPort01], 10, [2], 2, 7>;
defm : SKXWriteResPair<WritePMULLDZ, [SKXPort05], 10, [2], 2, 7>;
defm : SKXWriteResPair<WriteShuffle, [SKXPort5], 1, [1], 1, 5>; // Vector shuffles.
defm : SKXWriteResPair<WriteShuffleX, [SKXPort5], 1, [1], 1, 6>; // Vector shuffles (XMM).
defm : SKXWriteResPair<WriteShuffleY, [SKXPort5], 1, [1], 1, 7>; // Vector shuffles (YMM/ZMM).
defm : SKXWriteResPair<WriteShuffleX, [SKXPort5], 1, [1], 1, 6>;
defm : SKXWriteResPair<WriteShuffleY, [SKXPort5], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteShuffleZ, [SKXPort5], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteVarShuffle, [SKXPort5], 1, [1], 1, 5>; // Vector variable shuffles.
defm : SKXWriteResPair<WriteVarShuffleX, [SKXPort5], 1, [1], 1, 6>; // Vector variable shuffles (XMM).
defm : SKXWriteResPair<WriteVarShuffleY, [SKXPort5], 1, [1], 1, 7>; // Vector variable shuffles (YMM/ZMM).
defm : SKXWriteResPair<WriteVarShuffleX, [SKXPort5], 1, [1], 1, 6>;
defm : SKXWriteResPair<WriteVarShuffleY, [SKXPort5], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteVarShuffleZ, [SKXPort5], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteBlend, [SKXPort5], 1, [1], 1, 6>; // Vector blends.
defm : SKXWriteResPair<WriteBlendY,[SKXPort5], 1, [1], 1, 7>; // Vector blends (YMM/ZMM).
defm : SKXWriteResPair<WriteBlendY,[SKXPort5], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteBlendZ,[SKXPort5], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteVarBlend, [SKXPort015], 2, [2], 2, 6>; // Vector variable blends.
defm : SKXWriteResPair<WriteVarBlendY,[SKXPort015], 2, [2], 2, 6>; // Vector variable blends (YMM/ZMM).
defm : SKXWriteResPair<WriteVarBlendY,[SKXPort015], 2, [2], 2, 6>;
defm : SKXWriteResPair<WriteVarBlendZ,[SKXPort05], 2, [1], 1, 6>;
defm : SKXWriteResPair<WriteMPSAD, [SKXPort5], 4, [2], 2, 6>; // Vector MPSAD.
defm : SKXWriteResPair<WriteMPSADY, [SKXPort5], 4, [2], 2, 7>; // Vector MPSAD.
defm : SKXWriteResPair<WriteMPSADY, [SKXPort5], 4, [2], 2, 7>;
defm : SKXWriteResPair<WriteMPSADZ, [SKXPort5], 4, [2], 2, 7>;
defm : SKXWriteResPair<WritePSADBW, [SKXPort5], 3, [1], 1, 5>; // Vector PSADBW.
defm : SKXWriteResPair<WritePSADBWX, [SKXPort5], 3, [1], 1, 6>; // Vector PSADBW.
defm : SKXWriteResPair<WritePSADBWY, [SKXPort5], 3, [1], 1, 7>; // Vector PSADBW.
defm : SKXWriteResPair<WritePHMINPOS, [SKXPort015], 4, [1], 1, 6>; // Vector PHMINPOS.
defm : SKXWriteResPair<WritePSADBWX, [SKXPort5], 3, [1], 1, 6>;
defm : SKXWriteResPair<WritePSADBWY, [SKXPort5], 3, [1], 1, 7>;
defm : SKXWriteResPair<WritePSADBWZ, [SKXPort5], 3, [1], 1, 7>;
defm : SKXWriteResPair<WritePHMINPOS, [SKXPort0], 4, [1], 1, 6>; // Vector PHMINPOS.
// Vector integer shifts.
defm : SKXWriteResPair<WriteVecShift, [SKXPort0], 1, [1], 1, 5>;
defm : X86WriteRes<WriteVecShiftX, [SKXPort5,SKXPort01], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecShiftY, [SKXPort5,SKXPort01], 4, [1,1], 2>;
defm : X86WriteRes<WriteVecShiftZ, [SKXPort5,SKXPort0], 4, [1,1], 2>;
defm : X86WriteRes<WriteVecShiftXLd, [SKXPort01,SKXPort23], 7, [1,1], 2>;
defm : X86WriteRes<WriteVecShiftYLd, [SKXPort01,SKXPort23], 8, [1,1], 2>;
defm : X86WriteRes<WriteVecShiftZLd, [SKXPort0,SKXPort23], 8, [1,1], 2>;
defm : SKXWriteResPair<WriteVecShiftImm, [SKXPort0], 1, [1], 1, 5>;
defm : SKXWriteResPair<WriteVecShiftImmX, [SKXPort01], 1, [1], 1, 6>; // Vector integer immediate shifts (XMM).
defm : SKXWriteResPair<WriteVecShiftImmY, [SKXPort01], 1, [1], 1, 7>; // Vector integer immediate shifts (YMM/ZMM).
defm : SKXWriteResPair<WriteVecShiftImmX, [SKXPort01], 1, [1], 1, 6>; // Vector integer immediate shifts.
defm : SKXWriteResPair<WriteVecShiftImmY, [SKXPort01], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteVecShiftImmZ, [SKXPort0], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteVarVecShift, [SKXPort01], 1, [1], 1, 6>; // Variable vector shifts.
defm : SKXWriteResPair<WriteVarVecShiftY, [SKXPort01], 1, [1], 1, 7>; // Variable vector shifts (YMM/ZMM).
defm : SKXWriteResPair<WriteVarVecShiftY, [SKXPort01], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteVarVecShiftZ, [SKXPort0], 1, [1], 1, 7>;
// Vector insert/extract operations.
def : WriteRes<WriteVecInsert, [SKXPort5]> {
@ -343,36 +375,46 @@ def : WriteRes<WriteVecExtractSt, [SKXPort4,SKXPort5,SKXPort237]> {
}
// Conversion between integer and float.
defm : SKXWriteResPair<WriteCvtSS2I, [SKXPort0,SKXPort015], 6, [1,1], 2>;
defm : SKXWriteResPair<WriteCvtPS2I, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteCvtPS2IY, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteCvtSD2I, [SKXPort0,SKXPort015], 6, [1,1], 2>;
defm : SKXWriteResPair<WriteCvtPD2I, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteCvtPD2IY, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteCvtSS2I, [SKXPort01], 6, [2], 2>; // Needs more work: DD vs DQ.
defm : SKXWriteResPair<WriteCvtPS2I, [SKXPort01], 3>;
defm : SKXWriteResPair<WriteCvtPS2IY, [SKXPort01], 3>;
defm : SKXWriteResPair<WriteCvtPS2IZ, [SKXPort05], 3>;
defm : SKXWriteResPair<WriteCvtSD2I, [SKXPort01], 6, [2], 2>;
defm : SKXWriteResPair<WriteCvtPD2I, [SKXPort01], 3>;
defm : SKXWriteResPair<WriteCvtPD2IY, [SKXPort01], 3>;
defm : SKXWriteResPair<WriteCvtPD2IZ, [SKXPort05], 3>;
defm : SKXWriteResPair<WriteCvtI2SS, [SKXPort1], 4>;
defm : SKXWriteResPair<WriteCvtI2PS, [SKXPort1], 4>;
defm : SKXWriteResPair<WriteCvtI2PSY, [SKXPort1], 4>;
defm : SKXWriteResPair<WriteCvtI2PS, [SKXPort01], 4>;
defm : SKXWriteResPair<WriteCvtI2PSY, [SKXPort01], 4>;
defm : SKXWriteResPair<WriteCvtI2PSZ, [SKXPort05], 4>; // Needs more work: DD vs DQ.
defm : SKXWriteResPair<WriteCvtI2SD, [SKXPort1], 4>;
defm : SKXWriteResPair<WriteCvtI2PD, [SKXPort0,SKXPort5], 5, [1,1], 2>;
defm : SKXWriteResPair<WriteCvtI2PDY, [SKXPort1], 4>;
defm : SKXWriteResPair<WriteCvtI2PD, [SKXPort01], 4>;
defm : SKXWriteResPair<WriteCvtI2PDY, [SKXPort01], 4>;
defm : SKXWriteResPair<WriteCvtI2PDZ, [SKXPort05], 4>;
defm : SKXWriteResPair<WriteCvtSS2SD, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteCvtPS2PD, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteCvtPS2PDY, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteCvtPS2PDY, [SKXPort5,SKXPort01], 3, [1,1], 2>;
defm : SKXWriteResPair<WriteCvtPS2PDZ, [SKXPort05], 3, [2], 2>;
defm : SKXWriteResPair<WriteCvtSD2SS, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteCvtPD2PS, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteCvtPD2PSY, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteCvtPD2PSY, [SKXPort5,SKXPort01], 3, [1,1], 2>;
defm : SKXWriteResPair<WriteCvtPD2PSZ, [SKXPort05], 3, [2], 2>;
defm : X86WriteRes<WriteCvtPH2PS, [SKXPort5,SKXPort015], 5, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSY, [SKXPort5,SKXPort015], 7, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSLd, [SKXPort23,SKXPort015], 9, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSYLd, [SKXPort23,SKXPort015], 10, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PS, [SKXPort5,SKXPort01], 5, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSY, [SKXPort5,SKXPort01], 7, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSZ, [SKXPort5,SKXPort0], 7, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSLd, [SKXPort23,SKXPort01], 9, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSYLd, [SKXPort23,SKXPort01], 10, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSZLd, [SKXPort23,SKXPort05], 10, [1,1], 2>;
defm : X86WriteRes<WriteCvtPS2PH, [SKXPort5,SKXPort015], 5, [1,1], 2>;
defm : X86WriteRes<WriteCvtPS2PHY, [SKXPort5,SKXPort015], 7, [1,1], 2>;
defm : X86WriteRes<WriteCvtPS2PHSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort015], 6, [1,1,1,1], 4>;
defm : X86WriteRes<WriteCvtPS2PHYSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort015], 8, [1,1,1,1], 4>;
defm : X86WriteRes<WriteCvtPS2PH, [SKXPort5,SKXPort01], 5, [1,1], 2>;
defm : X86WriteRes<WriteCvtPS2PHY, [SKXPort5,SKXPort01], 7, [1,1], 2>;
defm : X86WriteRes<WriteCvtPS2PHZ, [SKXPort5,SKXPort05], 7, [1,1], 2>;
defm : X86WriteRes<WriteCvtPS2PHSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort01], 6, [1,1,1,1], 4>;
defm : X86WriteRes<WriteCvtPS2PHYSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort01], 8, [1,1,1,1], 4>;
defm : X86WriteRes<WriteCvtPS2PHZSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort05], 8, [1,1,1,1], 4>;
// Strings instructions.
@ -589,15 +631,15 @@ def SKXWriteResGroup9 : SchedWriteRes<[SKXPort015]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDMPD(Z|Z128|Z256)rr",
"VBLENDMPS(Z|Z128|Z256)rr",
def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDMPD(Z128|Z256)rr",
"VBLENDMPS(Z128|Z256)rr",
"VPADD(B|D|Q|W)(Y|Z|Z128|Z256)rr",
"(V?)PADD(B|D|Q|W)rr",
"VPBLENDD(Y?)rri",
"VPBLENDMB(Z|Z128|Z256)rr",
"VPBLENDMD(Z|Z128|Z256)rr",
"VPBLENDMQ(Z|Z128|Z256)rr",
"VPBLENDMW(Z|Z128|Z256)rr",
"VPBLENDMB(Z128|Z256)rr",
"VPBLENDMD(Z128|Z256)rr",
"VPBLENDMQ(Z128|Z256)rr",
"VPBLENDMW(Z128|Z256)rr",
"VPSUB(B|D|Q|W)(Y|Z|Z128|Z256)rr",
"(V?)PSUB(B|D|Q|W)rr",
"VPTERNLOGD(Z|Z128|Z256)rri",
@ -611,8 +653,7 @@ def SKXWriteResGroup10 : SchedWriteRes<[SKXPort0156]> {
def: InstRW<[SKXWriteResGroup10], (instrs CBW, CWDE, CDQE,
CMC, STC)>;
def: InstRW<[SKXWriteResGroup10], (instrs LAHF, SAHF)>; // TODO: This doesn't match Agner's data
def: InstRW<[SKXWriteResGroup10], (instregex "NOOP",
"SGDT64m",
def: InstRW<[SKXWriteResGroup10], (instregex "SGDT64m",
"SIDT64m",
"SMSW16m",
"STRm",
@ -739,8 +780,7 @@ def SKXWriteResGroup30 : SchedWriteRes<[SKXPort0]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
def: InstRW<[SKXWriteResGroup30], (instregex "KADD(B|D|Q|W)rr",
"KMOV(B|D|Q|W)rk",
def: InstRW<[SKXWriteResGroup30], (instregex "KMOV(B|D|Q|W)rk",
"KORTEST(B|D|Q|W)rr",
"KTEST(B|D|Q|W)rr")>;
@ -768,6 +808,7 @@ def SKXWriteResGroup32 : SchedWriteRes<[SKXPort5]> {
let ResourceCycles = [1];
}
def: InstRW<[SKXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0)",
"KADD(B|D|Q|W)rr",
"KSHIFTL(B|D|Q|W)ri",
"KSHIFTR(B|D|Q|W)ri",
"KUNPCKBWrr",
@ -907,26 +948,44 @@ def SKXWriteResGroup49 : SchedWriteRes<[SKXPort0]> {
}
def: InstRW<[SKXWriteResGroup49], (instregex "MUL_(FPrST0|FST0r|FrST0)")>;
def SKXWriteResGroup50 : SchedWriteRes<[SKXPort015]> {
def SKXWriteResGroup50 : SchedWriteRes<[SKXPort01]> {
let Latency = 4;
let NumMicroOps = 1;
let ResourceCycles = [1];
}
def: InstRW<[SKXWriteResGroup50], (instregex "VCVTDQ2PS(Y|Z|Z128|Z256)rr",
def: InstRW<[SKXWriteResGroup50], (instregex "VCVTDQ2PS(Y|Z128|Z256)rr",
"(V?)CVTDQ2PSrr",
"VCVTPD2QQ(Z|Z128|Z256)rr",
"VCVTPD2UQQ(Z|Z128|Z256)rr",
"VCVTPS2DQ(Y|Z|Z128|Z256)rr",
"VCVTPD2QQ(Z128|Z256)rr",
"VCVTPD2UQQ(Z128|Z256)rr",
"VCVTPS2DQ(Y|Z128|Z256)rr",
"(V?)CVTPS2DQrr",
"VCVTPS2UDQ(Z|Z128|Z256)rr",
"VCVTQQ2PD(Z|Z128|Z256)rr",
"VCVTTPD2QQ(Z|Z128|Z256)rr",
"VCVTTPD2UQQ(Z|Z128|Z256)rr",
"VCVTTPS2DQ(Y|Z|Z128|Z256)rr",
"VCVTPS2UDQ(Z128|Z256)rr",
"VCVTQQ2PD(Z128|Z256)rr",
"VCVTTPD2QQ(Z128|Z256)rr",
"VCVTTPD2UQQ(Z128|Z256)rr",
"VCVTTPS2DQ(Z128|Z256)rr",
"(V?)CVTTPS2DQrr",
"VCVTTPS2UDQ(Z|Z128|Z256)rr",
"VCVTUDQ2PS(Z|Z128|Z256)rr",
"VCVTUQQ2PD(Z|Z128|Z256)rr")>;
"VCVTTPS2UDQ(Z128|Z256)rr",
"VCVTUDQ2PS(Z128|Z256)rr",
"VCVTUQQ2PD(Z128|Z256)rr")>;
def SKXWriteResGroup50z : SchedWriteRes<[SKXPort05]> {
let Latency = 4;
let NumMicroOps = 1;
let ResourceCycles = [1];
}
def: InstRW<[SKXWriteResGroup50z], (instrs VCVTDQ2PSZrr,
VCVTPD2QQZrr,
VCVTPD2UQQZrr,
VCVTPS2DQZrr,
VCVTPS2UDQZrr,
VCVTQQ2PDZrr,
VCVTTPD2QQZrr,
VCVTTPD2UQQZrr,
VCVTTPS2DQZrr,
VCVTTPS2UDQZrr,
VCVTUDQ2PSZrr,
VCVTUQQ2PDZrr)>;
def SKXWriteResGroup51 : SchedWriteRes<[SKXPort5]> {
let Latency = 4;
@ -1010,13 +1069,6 @@ def: InstRW<[SKXWriteResGroup58], (instregex "MOVSX(16|32|64)rm16",
"MOVZX(16|32|64)rm8",
"(V?)MOVDDUPrm")>; // TODO: Should this be SKXWriteResGroup71?
def SKXWriteResGroup59 : SchedWriteRes<[SKXPort015]> {
let Latency = 5;
let NumMicroOps = 2;
let ResourceCycles = [2];
}
def: InstRW<[SKXWriteResGroup59], (instregex "VCVTSD2SSZrr")>;
def SKXWriteResGroup61 : SchedWriteRes<[SKXPort5,SKXPort015]> {
let Latency = 5;
let NumMicroOps = 2;
@ -1035,7 +1087,7 @@ def: InstRW<[SKXWriteResGroup61], (instregex "MMX_CVT(T?)PD2PIirr",
"VCVTPS2QQZ128rr",
"VCVTPS2UQQZ128rr",
"VCVTQQ2PSZ128rr",
"(V?)CVTSD2SSrr",
"(V?)CVTSD2SS(Z?)rr",
"(V?)CVTSI(64)?2SDrr",
"VCVTSI2SSZrr",
"(V?)CVTSI2SSrr",
@ -1136,7 +1188,7 @@ def: InstRW<[SKXWriteResGroup71], (instregex "VBROADCASTSSrm",
"VPBROADCASTDrm",
"VPBROADCASTQrm")>;
def SKXWriteResGroup72 : SchedWriteRes<[SKXPort0]> {
def SKXWriteResGroup72 : SchedWriteRes<[SKXPort5]> {
let Latency = 6;
let NumMicroOps = 2;
let ResourceCycles = [2];
@ -1286,7 +1338,7 @@ def: InstRW<[SKXWriteResGroup89], (instregex "LD_F(32|64|80)m",
"VPBROADCASTDYrm",
"VPBROADCASTQYrm")>;
def SKXWriteResGroup90 : SchedWriteRes<[SKXPort0,SKXPort5]> {
def SKXWriteResGroup90 : SchedWriteRes<[SKXPort01,SKXPort5]> {
let Latency = 7;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
@ -1318,21 +1370,40 @@ def SKXWriteResGroup93 : SchedWriteRes<[SKXPort5,SKXPort015]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup93], (instregex "VCVTDQ2PD(Z|Z256)rr",
"VCVTPD2DQ(Y|Z|Z256)rr",
"VCVTPD2PS(Y|Z|Z256)rr",
"VCVTPD2UDQ(Z|Z256)rr",
"VCVTPS2PD(Y|Z|Z256)rr",
"VCVTPS2QQ(Z|Z256)rr",
"VCVTPS2UQQ(Z|Z256)rr",
"VCVTQQ2PS(Z|Z256)rr",
"VCVTTPD2DQ(Y|Z|Z256)rr",
"VCVTTPD2UDQ(Z|Z256)rr",
"VCVTTPS2QQ(Z|Z256)rr",
"VCVTTPS2UQQ(Z|Z256)rr",
"VCVTUDQ2PD(Z|Z256)rr",
"VCVTUQQ2PS(Z|Z256)rr")>;
def: InstRW<[SKXWriteResGroup93], (instregex "VCVTDQ2PDZ256rr",
"VCVTPD2DQ(Y|Z256)rr",
"VCVTPD2PS(Y|Z256)rr",
"VCVTPD2UDQZ256rr",
"VCVTPS2PD(Y|Z256)rr",
"VCVTPS2QQZ256rr",
"VCVTPS2UQQZ256rr",
"VCVTQQ2PSZ256rr",
"VCVTTPD2DQ(Y|Z256)rr",
"VCVTTPD2UDQZ256rr",
"VCVTTPS2QQZ256rr",
"VCVTTPS2UQQZ256rr",
"VCVTUDQ2PDZ256rr",
"VCVTUQQ2PSZ256rr")>;
def SKXWriteResGroup93z : SchedWriteRes<[SKXPort5,SKXPort05]> {
let Latency = 7;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup93z], (instrs VCVTDQ2PDZrr,
VCVTPD2DQZrr,
VCVTPD2PSZrr,
VCVTPD2UDQZrr,
VCVTPS2PDZrr,
VCVTPS2QQZrr,
VCVTPS2UQQZrr,
VCVTQQ2PSZrr,
VCVTTPD2DQZrr,
VCVTTPD2UDQZrr,
VCVTTPS2QQZrr,
VCVTTPS2UQQZrr,
VCVTUDQ2PDZrr,
VCVTUQQ2PSZrr)>;
def SKXWriteResGroup95 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let Latency = 7;
@ -1405,8 +1476,8 @@ def SKXWriteResGroup100 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort015]> {
let ResourceCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup100], (instregex "VCVTSS2USI64Zrr",
"VCVTTSS2SI(64)?Zrr",
"(V?)CVTTSS2SI(64)?rr",
"(V?)CVTSS2SI64(Z?)rr",
"(V?)CVTTSS2SI64(Z?)rr",
"VCVTTSS2USI64Zrr")>;
def SKXWriteResGroup101 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort05]> {
@ -1751,16 +1822,6 @@ def SKXWriteResGroup137 : SchedWriteRes<[SKXPort23,SKXPort015]> {
def: InstRW<[SKXWriteResGroup137], (instregex "MMX_CVT(T?)PS2PIirm",
"(V?)CVTPS2PDrm")>;
def SKXWriteResGroup138 : SchedWriteRes<[SKXPort0,SKXPort015]> {
let Latency = 9;
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
def: InstRW<[SKXWriteResGroup138], (instregex "VRCP14PDZr(b?)",
"VRCP14PSZr(b?)",
"VRSQRT14PDZr(b?)",
"VRSQRT14PSZr(b?)")>;
def SKXWriteResGroup142 : SchedWriteRes<[SKXPort1,SKXPort5,SKXPort23]> {
let Latency = 9;
let NumMicroOps = 3;
@ -2009,12 +2070,19 @@ def SKXWriteResGroup171 : SchedWriteRes<[SKXPort06,SKXPort0156]> {
}
def: InstRW<[SKXWriteResGroup171], (instrs LOOPE, LOOPNE)>;
def SKXWriteResGroup174 : SchedWriteRes<[SKXPort015]> {
def SKXWriteResGroup174 : SchedWriteRes<[SKXPort01]> {
let Latency = 12;
let NumMicroOps = 3;
let ResourceCycles = [3];
}
def: InstRW<[SKXWriteResGroup174], (instregex "VPMULLQ(Z|Z128|Z256)rr")>;
def: InstRW<[SKXWriteResGroup174], (instregex "VPMULLQ(Z128|Z256)rr")>;
def SKXWriteResGroup174z : SchedWriteRes<[SKXPort05]> {
let Latency = 12;
let NumMicroOps = 3;
let ResourceCycles = [3];
}
def: InstRW<[SKXWriteResGroup174z], (instregex "VPMULLQZrr")>;
def SKXWriteResGroup175 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 12;
@ -2143,16 +2211,6 @@ def SKXWriteResGroup195 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort06
}
def: InstRW<[SKXWriteResGroup195], (instregex "RCL(8|16|32|64)mCL")>;
def SKXWriteResGroup198 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015]> {
let Latency = 16;
let NumMicroOps = 4;
let ResourceCycles = [2,1,1];
}
def: InstRW<[SKXWriteResGroup198], (instregex "VRCP14PDZm(b?)",
"VRCP14PSZm(b?)",
"VRSQRT14PDZm(b?)",
"VRSQRT14PSZm(b?)")>;
def SKXWriteResGroup199 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06,SKXPort15,SKXPort0156]> {
let Latency = 16;
let NumMicroOps = 14;

View File

@ -62,7 +62,6 @@ multiclass X86WriteResPairUnsupported<X86FoldableSchedWrite SchedRW> {
}
}
// Multiclass that wraps X86FoldableSchedWrite for each vector width.
class X86SchedWriteWidths<X86FoldableSchedWrite sScl,
X86FoldableSchedWrite s128,
@ -177,23 +176,29 @@ def WriteFMoveY : SchedWrite;
defm WriteFAdd : X86SchedWritePair; // Floating point add/sub.
defm WriteFAddX : X86SchedWritePair; // Floating point add/sub (XMM).
defm WriteFAddY : X86SchedWritePair; // Floating point add/sub (YMM/ZMM).
defm WriteFAddY : X86SchedWritePair; // Floating point add/sub (YMM).
defm WriteFAddZ : X86SchedWritePair; // Floating point add/sub (ZMM).
defm WriteFAdd64 : X86SchedWritePair; // Floating point double add/sub.
defm WriteFAdd64X : X86SchedWritePair; // Floating point double add/sub (XMM).
defm WriteFAdd64Y : X86SchedWritePair; // Floating point double add/sub (YMM/ZMM).
defm WriteFAdd64Y : X86SchedWritePair; // Floating point double add/sub (YMM).
defm WriteFAdd64Z : X86SchedWritePair; // Floating point double add/sub (ZMM).
defm WriteFCmp : X86SchedWritePair; // Floating point compare.
defm WriteFCmpX : X86SchedWritePair; // Floating point compare (XMM).
defm WriteFCmpY : X86SchedWritePair; // Floating point compare (YMM/ZMM).
defm WriteFCmpY : X86SchedWritePair; // Floating point compare (YMM).
defm WriteFCmpZ : X86SchedWritePair; // Floating point compare (ZMM).
defm WriteFCmp64 : X86SchedWritePair; // Floating point double compare.
defm WriteFCmp64X : X86SchedWritePair; // Floating point double compare (XMM).
defm WriteFCmp64Y : X86SchedWritePair; // Floating point double compare (YMM/ZMM).
defm WriteFCmp64Y : X86SchedWritePair; // Floating point double compare (YMM).
defm WriteFCmp64Z : X86SchedWritePair; // Floating point double compare (ZMM).
defm WriteFCom : X86SchedWritePair; // Floating point compare to flags.
defm WriteFMul : X86SchedWritePair; // Floating point multiplication.
defm WriteFMulX : X86SchedWritePair; // Floating point multiplication (XMM).
defm WriteFMulY : X86SchedWritePair; // Floating point multiplication (YMM/ZMM).
defm WriteFMulY : X86SchedWritePair; // Floating point multiplication (YMM).
defm WriteFMulZ : X86SchedWritePair; // Floating point multiplication (YMM).
defm WriteFMul64 : X86SchedWritePair; // Floating point double multiplication.
defm WriteFMul64X : X86SchedWritePair; // Floating point double multiplication (XMM).
defm WriteFMul64Y : X86SchedWritePair; // Floating point double multiplication (YMM/ZMM).
defm WriteFMul64Y : X86SchedWritePair; // Floating point double multiplication (YMM).
defm WriteFMul64Z : X86SchedWritePair; // Floating point double multiplication (ZMM).
defm WriteFDiv : X86SchedWritePair; // Floating point division.
defm WriteFDivX : X86SchedWritePair; // Floating point division (XMM).
defm WriteFDivY : X86SchedWritePair; // Floating point division (YMM).
@ -213,41 +218,54 @@ defm WriteFSqrt64Z : X86SchedWritePair; // Floating point double square root (ZM
defm WriteFSqrt80 : X86SchedWritePair; // Floating point long double square root.
defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal estimate.
defm WriteFRcpX : X86SchedWritePair; // Floating point reciprocal estimate (XMM).
defm WriteFRcpY : X86SchedWritePair; // Floating point reciprocal estimate (YMM/ZMM).
defm WriteFRcpY : X86SchedWritePair; // Floating point reciprocal estimate (YMM).
defm WriteFRcpZ : X86SchedWritePair; // Floating point reciprocal estimate (ZMM).
defm WriteFRsqrt : X86SchedWritePair; // Floating point reciprocal square root estimate.
defm WriteFRsqrtX: X86SchedWritePair; // Floating point reciprocal square root estimate (XMM).
defm WriteFRsqrtY: X86SchedWritePair; // Floating point reciprocal square root estimate (YMM/ZMM).
defm WriteFRsqrtY: X86SchedWritePair; // Floating point reciprocal square root estimate (YMM).
defm WriteFRsqrtZ: X86SchedWritePair; // Floating point reciprocal square root estimate (ZMM).
defm WriteFMA : X86SchedWritePair; // Fused Multiply Add.
defm WriteFMAX : X86SchedWritePair; // Fused Multiply Add (XMM).
defm WriteFMAY : X86SchedWritePair; // Fused Multiply Add (YMM/ZMM).
defm WriteFMAY : X86SchedWritePair; // Fused Multiply Add (YMM).
defm WriteFMAZ : X86SchedWritePair; // Fused Multiply Add (ZMM).
defm WriteDPPD : X86SchedWritePair; // Floating point double dot product.
defm WriteDPPS : X86SchedWritePair; // Floating point single dot product.
defm WriteDPPSY : X86SchedWritePair; // Floating point single dot product (YMM).
defm WriteDPPSZ : X86SchedWritePair; // Floating point single dot product (ZMM).
defm WriteFSign : X86SchedWritePair; // Floating point fabs/fchs.
defm WriteFRnd : X86SchedWritePair; // Floating point rounding.
defm WriteFRndY : X86SchedWritePair; // Floating point rounding (YMM/ZMM).
defm WriteFRndY : X86SchedWritePair; // Floating point rounding (YMM).
defm WriteFRndZ : X86SchedWritePair; // Floating point rounding (ZMM).
defm WriteFLogic : X86SchedWritePair; // Floating point and/or/xor logicals.
defm WriteFLogicY : X86SchedWritePair; // Floating point and/or/xor logicals (YMM/ZMM).
defm WriteFLogicY : X86SchedWritePair; // Floating point and/or/xor logicals (YMM).
defm WriteFLogicZ : X86SchedWritePair; // Floating point and/or/xor logicals (ZMM).
defm WriteFTest : X86SchedWritePair; // Floating point TEST instructions.
defm WriteFTestY : X86SchedWritePair; // Floating point TEST instructions (YMM/ZMM).
defm WriteFTestY : X86SchedWritePair; // Floating point TEST instructions (YMM).
defm WriteFTestZ : X86SchedWritePair; // Floating point TEST instructions (ZMM).
defm WriteFShuffle : X86SchedWritePair; // Floating point vector shuffles.
defm WriteFShuffleY : X86SchedWritePair; // Floating point vector shuffles (YMM/ZMM).
defm WriteFShuffleY : X86SchedWritePair; // Floating point vector shuffles (YMM).
defm WriteFShuffleZ : X86SchedWritePair; // Floating point vector shuffles (ZMM).
defm WriteFVarShuffle : X86SchedWritePair; // Floating point vector variable shuffles.
defm WriteFVarShuffleY : X86SchedWritePair; // Floating point vector variable shuffles (YMM/ZMM).
defm WriteFVarShuffleY : X86SchedWritePair; // Floating point vector variable shuffles (YMM).
defm WriteFVarShuffleZ : X86SchedWritePair; // Floating point vector variable shuffles (ZMM).
defm WriteFBlend : X86SchedWritePair; // Floating point vector blends.
defm WriteFBlendY : X86SchedWritePair; // Floating point vector blends (YMM/ZMM).
defm WriteFBlendY : X86SchedWritePair; // Floating point vector blends (YMM).
defm WriteFBlendZ : X86SchedWritePair; // Floating point vector blends (ZMM).
defm WriteFVarBlend : X86SchedWritePair; // Fp vector variable blends.
defm WriteFVarBlendY : X86SchedWritePair; // Fp vector variable blends (YMM/ZMM).
defm WriteFVarBlendY : X86SchedWritePair; // Fp vector variable blends (YMM).
defm WriteFVarBlendZ : X86SchedWritePair; // Fp vector variable blends (YMZMM).
// FMA Scheduling helper class.
class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
// Horizontal Add/Sub (float and integer)
defm WriteFHAdd : X86SchedWritePair;
defm WriteFHAddY : X86SchedWritePair; // YMM/ZMM.
defm WriteFHAddY : X86SchedWritePair;
defm WriteFHAddZ : X86SchedWritePair;
defm WritePHAdd : X86SchedWritePair;
defm WritePHAddX : X86SchedWritePair; // XMM.
defm WritePHAddY : X86SchedWritePair; // YMM/ZMM.
defm WritePHAddX : X86SchedWritePair;
defm WritePHAddY : X86SchedWritePair;
defm WritePHAddZ : X86SchedWritePair;
// Vector integer operations.
def WriteVecLoad : SchedWrite;
@ -272,38 +290,51 @@ def WriteVecMoveFromGpr : SchedWrite;
defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals.
defm WriteVecALUX : X86SchedWritePair; // Vector integer ALU op, no logicals (XMM).
defm WriteVecALUY : X86SchedWritePair; // Vector integer ALU op, no logicals (YMM/ZMM).
defm WriteVecALUY : X86SchedWritePair; // Vector integer ALU op, no logicals (YMM).
defm WriteVecALUZ : X86SchedWritePair; // Vector integer ALU op, no logicals (ZMM).
defm WriteVecLogic : X86SchedWritePair; // Vector integer and/or/xor logicals.
defm WriteVecLogicX : X86SchedWritePair; // Vector integer and/or/xor logicals (XMM).
defm WriteVecLogicY : X86SchedWritePair; // Vector integer and/or/xor logicals (YMM/ZMM).
defm WriteVecLogicY : X86SchedWritePair; // Vector integer and/or/xor logicals (YMM).
defm WriteVecLogicZ : X86SchedWritePair; // Vector integer and/or/xor logicals (ZMM).
defm WriteVecTest : X86SchedWritePair; // Vector integer TEST instructions.
defm WriteVecTestY : X86SchedWritePair; // Vector integer TEST instructions (YMM/ZMM).
defm WriteVecTestY : X86SchedWritePair; // Vector integer TEST instructions (YMM).
defm WriteVecTestZ : X86SchedWritePair; // Vector integer TEST instructions (ZMM).
defm WriteVecShift : X86SchedWritePair; // Vector integer shifts (default).
defm WriteVecShiftX : X86SchedWritePair; // Vector integer shifts (XMM).
defm WriteVecShiftY : X86SchedWritePair; // Vector integer shifts (YMM/ZMM).
defm WriteVecShiftY : X86SchedWritePair; // Vector integer shifts (YMM).
defm WriteVecShiftZ : X86SchedWritePair; // Vector integer shifts (ZMM).
defm WriteVecShiftImm : X86SchedWritePair; // Vector integer immediate shifts (default).
defm WriteVecShiftImmX: X86SchedWritePair; // Vector integer immediate shifts (XMM).
defm WriteVecShiftImmY: X86SchedWritePair; // Vector integer immediate shifts (YMM/ZMM).
defm WriteVecShiftImmY: X86SchedWritePair; // Vector integer immediate shifts (YMM).
defm WriteVecShiftImmZ: X86SchedWritePair; // Vector integer immediate shifts (ZMM).
defm WriteVecIMul : X86SchedWritePair; // Vector integer multiply (default).
defm WriteVecIMulX : X86SchedWritePair; // Vector integer multiply (XMM).
defm WriteVecIMulY : X86SchedWritePair; // Vector integer multiply (YMM/ZMM).
defm WriteVecIMulY : X86SchedWritePair; // Vector integer multiply (YMM).
defm WriteVecIMulZ : X86SchedWritePair; // Vector integer multiply (ZMM).
defm WritePMULLD : X86SchedWritePair; // Vector PMULLD.
defm WritePMULLDY : X86SchedWritePair; // Vector PMULLD (YMM/ZMM).
defm WritePMULLDY : X86SchedWritePair; // Vector PMULLD (YMM).
defm WritePMULLDZ : X86SchedWritePair; // Vector PMULLD (ZMM).
defm WriteShuffle : X86SchedWritePair; // Vector shuffles.
defm WriteShuffleX : X86SchedWritePair; // Vector shuffles (XMM).
defm WriteShuffleY : X86SchedWritePair; // Vector shuffles (YMM/ZMM).
defm WriteShuffleY : X86SchedWritePair; // Vector shuffles (YMM).
defm WriteShuffleZ : X86SchedWritePair; // Vector shuffles (ZMM).
defm WriteVarShuffle : X86SchedWritePair; // Vector variable shuffles.
defm WriteVarShuffleX : X86SchedWritePair; // Vector variable shuffles (XMM).
defm WriteVarShuffleY : X86SchedWritePair; // Vector variable shuffles (YMM/ZMM).
defm WriteVarShuffleY : X86SchedWritePair; // Vector variable shuffles (YMM).
defm WriteVarShuffleZ : X86SchedWritePair; // Vector variable shuffles (ZMM).
defm WriteBlend : X86SchedWritePair; // Vector blends.
defm WriteBlendY : X86SchedWritePair; // Vector blends (YMM/ZMM).
defm WriteBlendY : X86SchedWritePair; // Vector blends (YMM).
defm WriteBlendZ : X86SchedWritePair; // Vector blends (ZMM).
defm WriteVarBlend : X86SchedWritePair; // Vector variable blends.
defm WriteVarBlendY : X86SchedWritePair; // Vector variable blends (YMM/ZMM).
defm WriteVarBlendY : X86SchedWritePair; // Vector variable blends (YMM).
defm WriteVarBlendZ : X86SchedWritePair; // Vector variable blends (ZMM).
defm WritePSADBW : X86SchedWritePair; // Vector PSADBW.
defm WritePSADBWX : X86SchedWritePair; // Vector PSADBW (XMM).
defm WritePSADBWY : X86SchedWritePair; // Vector PSADBW (YMM/ZMM).
defm WritePSADBWY : X86SchedWritePair; // Vector PSADBW (YMM).
defm WritePSADBWZ : X86SchedWritePair; // Vector PSADBW (ZMM).
defm WriteMPSAD : X86SchedWritePair; // Vector MPSAD.
defm WriteMPSADY : X86SchedWritePair; // Vector MPSAD (YMM/ZMM).
defm WriteMPSADY : X86SchedWritePair; // Vector MPSAD (YMM).
defm WriteMPSADZ : X86SchedWritePair; // Vector MPSAD (ZMM).
defm WritePHMINPOS : X86SchedWritePair; // Vector PHMINPOS.
// Vector insert/extract operations.
@ -320,35 +351,44 @@ def WriteMMXMOVMSK : SchedWrite;
// Conversion between integer and float.
defm WriteCvtSD2I : X86SchedWritePair; // Double -> Integer.
defm WriteCvtPD2I : X86SchedWritePair; // Double -> Integer (XMM).
defm WriteCvtPD2IY : X86SchedWritePair; // Double -> Integer (YMM/ZMM).
defm WriteCvtPD2IY : X86SchedWritePair; // Double -> Integer (YMM).
defm WriteCvtPD2IZ : X86SchedWritePair; // Double -> Integer (ZMM).
defm WriteCvtSS2I : X86SchedWritePair; // Float -> Integer.
defm WriteCvtPS2I : X86SchedWritePair; // Float -> Integer (XMM).
defm WriteCvtPS2IY : X86SchedWritePair; // Float -> Integer (YMM/ZMM).
defm WriteCvtPS2IY : X86SchedWritePair; // Float -> Integer (YMM).
defm WriteCvtPS2IZ : X86SchedWritePair; // Float -> Integer (ZMM).
defm WriteCvtI2SD : X86SchedWritePair; // Integer -> Double.
defm WriteCvtI2PD : X86SchedWritePair; // Integer -> Double (XMM).
defm WriteCvtI2PDY : X86SchedWritePair; // Integer -> Double (YMM/ZMM).
defm WriteCvtI2PDY : X86SchedWritePair; // Integer -> Double (YMM).
defm WriteCvtI2PDZ : X86SchedWritePair; // Integer -> Double (ZMM).
defm WriteCvtI2SS : X86SchedWritePair; // Integer -> Float.
defm WriteCvtI2PS : X86SchedWritePair; // Integer -> Float (XMM).
defm WriteCvtI2PSY : X86SchedWritePair; // Integer -> Float (YMM/ZMM).
defm WriteCvtI2PSY : X86SchedWritePair; // Integer -> Float (YMM).
defm WriteCvtI2PSZ : X86SchedWritePair; // Integer -> Float (ZMM).
defm WriteCvtSS2SD : X86SchedWritePair; // Float -> Double size conversion.
defm WriteCvtPS2PD : X86SchedWritePair; // Float -> Double size conversion (XMM).
defm WriteCvtPS2PDY : X86SchedWritePair; // Float -> Double size conversion (YMM/ZMM).
defm WriteCvtPS2PDY : X86SchedWritePair; // Float -> Double size conversion (YMM).
defm WriteCvtPS2PDZ : X86SchedWritePair; // Float -> Double size conversion (ZMM).
defm WriteCvtSD2SS : X86SchedWritePair; // Double -> Float size conversion.
defm WriteCvtPD2PS : X86SchedWritePair; // Double -> Float size conversion (XMM).
defm WriteCvtPD2PSY : X86SchedWritePair; // Double -> Float size conversion (YMM/ZMM).
defm WriteCvtPD2PSY : X86SchedWritePair; // Double -> Float size conversion (YMM).
defm WriteCvtPD2PSZ : X86SchedWritePair; // Double -> Float size conversion (ZMM).
defm WriteCvtPH2PS : X86SchedWritePair; // Half -> Float size conversion.
defm WriteCvtPH2PSY : X86SchedWritePair; // Half -> Float size conversion (YMM/ZMM).
defm WriteCvtPH2PSY : X86SchedWritePair; // Half -> Float size conversion (YMM).
defm WriteCvtPH2PSZ : X86SchedWritePair; // Half -> Float size conversion (ZMM).
def WriteCvtPS2PH : SchedWrite; // // Float -> Half size conversion.
def WriteCvtPS2PHY : SchedWrite; // // Float -> Half size conversion (YMM/ZMM).
def WriteCvtPS2PHY : SchedWrite; // // Float -> Half size conversion (YMM).
def WriteCvtPS2PHZ : SchedWrite; // // Float -> Half size conversion (ZMM).
def WriteCvtPS2PHSt : SchedWrite; // // Float -> Half + store size conversion.
def WriteCvtPS2PHYSt : SchedWrite; // // Float -> Half + store size conversion (YMM/ZMM).
def WriteCvtPS2PHYSt : SchedWrite; // // Float -> Half + store size conversion (YMM).
def WriteCvtPS2PHZSt : SchedWrite; // // Float -> Half + store size conversion (ZMM).
// CRC32 instruction.
defm WriteCRC32 : X86SchedWritePair;
@ -387,7 +427,8 @@ defm WriteFVarShuffle256 : X86SchedWritePair; // Fp 256-bit width variable shuff
defm WriteShuffle256 : X86SchedWritePair; // 256-bit width vector shuffles.
defm WriteVarShuffle256 : X86SchedWritePair; // 256-bit width vector variable shuffles.
defm WriteVarVecShift : X86SchedWritePair; // Variable vector shifts.
defm WriteVarVecShiftY : X86SchedWritePair; // Variable vector shifts (YMM/ZMM).
defm WriteVarVecShiftY : X86SchedWritePair; // Variable vector shifts (YMM).
defm WriteVarVecShiftZ : X86SchedWritePair; // Variable vector shifts (ZMM).
// Old microcoded instructions that nobody use.
def WriteMicrocoded : SchedWrite;
@ -441,25 +482,25 @@ def SchedWriteVecMoveLSNT
// Vector width wrappers.
def SchedWriteFAdd
: X86SchedWriteWidths<WriteFAdd, WriteFAddX, WriteFAddY, WriteFAddY>;
: X86SchedWriteWidths<WriteFAdd, WriteFAddX, WriteFAddY, WriteFAddZ>;
def SchedWriteFAdd64
: X86SchedWriteWidths<WriteFAdd64, WriteFAdd64X, WriteFAdd64Y, WriteFAdd64Y>;
: X86SchedWriteWidths<WriteFAdd64, WriteFAdd64X, WriteFAdd64Y, WriteFAdd64Z>;
def SchedWriteFHAdd
: X86SchedWriteWidths<WriteFHAdd, WriteFHAdd, WriteFHAddY, WriteFHAddY>;
: X86SchedWriteWidths<WriteFHAdd, WriteFHAdd, WriteFHAddY, WriteFHAddZ>;
def SchedWriteFCmp
: X86SchedWriteWidths<WriteFCmp, WriteFCmpX, WriteFCmpY, WriteFCmpY>;
: X86SchedWriteWidths<WriteFCmp, WriteFCmpX, WriteFCmpY, WriteFCmpZ>;
def SchedWriteFCmp64
: X86SchedWriteWidths<WriteFCmp64, WriteFCmp64X, WriteFCmp64Y, WriteFCmp64Y>;
: X86SchedWriteWidths<WriteFCmp64, WriteFCmp64X, WriteFCmp64Y, WriteFCmp64Z>;
def SchedWriteFMul
: X86SchedWriteWidths<WriteFMul, WriteFMulX, WriteFMulY, WriteFMulY>;
: X86SchedWriteWidths<WriteFMul, WriteFMulX, WriteFMulY, WriteFMulZ>;
def SchedWriteFMul64
: X86SchedWriteWidths<WriteFMul64, WriteFMul64X, WriteFMul64Y, WriteFMul64Y>;
: X86SchedWriteWidths<WriteFMul64, WriteFMul64X, WriteFMul64Y, WriteFMul64Z>;
def SchedWriteFMA
: X86SchedWriteWidths<WriteFMA, WriteFMAX, WriteFMAY, WriteFMAY>;
: X86SchedWriteWidths<WriteFMA, WriteFMAX, WriteFMAY, WriteFMAZ>;
def SchedWriteDPPD
: X86SchedWriteWidths<WriteDPPD, WriteDPPD, WriteDPPD, WriteDPPD>;
def SchedWriteDPPS
: X86SchedWriteWidths<WriteDPPS, WriteDPPS, WriteDPPSY, WriteDPPSY>;
: X86SchedWriteWidths<WriteDPPS, WriteDPPS, WriteDPPSY, WriteDPPSZ>;
def SchedWriteFDiv
: X86SchedWriteWidths<WriteFDiv, WriteFDivX, WriteFDivY, WriteFDivZ>;
def SchedWriteFDiv64
@ -471,90 +512,90 @@ def SchedWriteFSqrt64
: X86SchedWriteWidths<WriteFSqrt64, WriteFSqrt64X,
WriteFSqrt64Y, WriteFSqrt64Z>;
def SchedWriteFRcp
: X86SchedWriteWidths<WriteFRcp, WriteFRcpX, WriteFRcpY, WriteFRcpY>;
: X86SchedWriteWidths<WriteFRcp, WriteFRcpX, WriteFRcpY, WriteFRcpZ>;
def SchedWriteFRsqrt
: X86SchedWriteWidths<WriteFRsqrt, WriteFRsqrtX, WriteFRsqrtY, WriteFRsqrtY>;
: X86SchedWriteWidths<WriteFRsqrt, WriteFRsqrtX, WriteFRsqrtY, WriteFRsqrtZ>;
def SchedWriteFRnd
: X86SchedWriteWidths<WriteFRnd, WriteFRnd, WriteFRndY, WriteFRndY>;
: X86SchedWriteWidths<WriteFRnd, WriteFRnd, WriteFRndY, WriteFRndZ>;
def SchedWriteFLogic
: X86SchedWriteWidths<WriteFLogic, WriteFLogic, WriteFLogicY, WriteFLogicY>;
: X86SchedWriteWidths<WriteFLogic, WriteFLogic, WriteFLogicY, WriteFLogicZ>;
def SchedWriteFTest
: X86SchedWriteWidths<WriteFTest, WriteFTest, WriteFTestY, WriteFTestY>;
: X86SchedWriteWidths<WriteFTest, WriteFTest, WriteFTestY, WriteFTestZ>;
def SchedWriteFShuffle
: X86SchedWriteWidths<WriteFShuffle, WriteFShuffle,
WriteFShuffleY, WriteFShuffleY>;
WriteFShuffleY, WriteFShuffleZ>;
def SchedWriteFVarShuffle
: X86SchedWriteWidths<WriteFVarShuffle, WriteFVarShuffle,
WriteFVarShuffleY, WriteFVarShuffleY>;
WriteFVarShuffleY, WriteFVarShuffleZ>;
def SchedWriteFBlend
: X86SchedWriteWidths<WriteFBlend, WriteFBlend, WriteFBlendY, WriteFBlendY>;
: X86SchedWriteWidths<WriteFBlend, WriteFBlend, WriteFBlendY, WriteFBlendZ>;
def SchedWriteFVarBlend
: X86SchedWriteWidths<WriteFVarBlend, WriteFVarBlend,
WriteFVarBlendY, WriteFVarBlendY>;
WriteFVarBlendY, WriteFVarBlendZ>;
def SchedWriteCvtDQ2PD
: X86SchedWriteWidths<WriteCvtI2SD, WriteCvtI2PD,
WriteCvtI2PDY, WriteCvtI2PDY>;
WriteCvtI2PDY, WriteCvtI2PDZ>;
def SchedWriteCvtDQ2PS
: X86SchedWriteWidths<WriteCvtI2SS, WriteCvtI2PS,
WriteCvtI2PSY, WriteCvtI2PSY>;
WriteCvtI2PSY, WriteCvtI2PSZ>;
def SchedWriteCvtPD2DQ
: X86SchedWriteWidths<WriteCvtSD2I, WriteCvtPD2I,
WriteCvtPD2IY, WriteCvtPD2IY>;
WriteCvtPD2IY, WriteCvtPD2IZ>;
def SchedWriteCvtPS2DQ
: X86SchedWriteWidths<WriteCvtSS2I, WriteCvtPS2I,
WriteCvtPS2IY, WriteCvtPS2IY>;
WriteCvtPS2IY, WriteCvtPS2IZ>;
def SchedWriteCvtPS2PD
: X86SchedWriteWidths<WriteCvtSS2SD, WriteCvtPS2PD,
WriteCvtPS2PDY, WriteCvtPS2PDY>;
WriteCvtPS2PDY, WriteCvtPS2PDZ>;
def SchedWriteCvtPD2PS
: X86SchedWriteWidths<WriteCvtSD2SS, WriteCvtPD2PS,
WriteCvtPD2PSY, WriteCvtPD2PSY>;
WriteCvtPD2PSY, WriteCvtPD2PSZ>;
def SchedWriteVecALU
: X86SchedWriteWidths<WriteVecALU, WriteVecALUX, WriteVecALUY, WriteVecALUY>;
: X86SchedWriteWidths<WriteVecALU, WriteVecALUX, WriteVecALUY, WriteVecALUZ>;
def SchedWritePHAdd
: X86SchedWriteWidths<WritePHAdd, WritePHAddX, WritePHAddY, WritePHAddY>;
: X86SchedWriteWidths<WritePHAdd, WritePHAddX, WritePHAddY, WritePHAddZ>;
def SchedWriteVecLogic
: X86SchedWriteWidths<WriteVecLogic, WriteVecLogicX,
WriteVecLogicY, WriteVecLogicY>;
WriteVecLogicY, WriteVecLogicZ>;
def SchedWriteVecTest
: X86SchedWriteWidths<WriteVecTest, WriteVecTest,
WriteVecTestY, WriteVecTestY>;
WriteVecTestY, WriteVecTestZ>;
def SchedWriteVecShift
: X86SchedWriteWidths<WriteVecShift, WriteVecShiftX,
WriteVecShiftY, WriteVecShiftY>;
WriteVecShiftY, WriteVecShiftZ>;
def SchedWriteVecShiftImm
: X86SchedWriteWidths<WriteVecShiftImm, WriteVecShiftImmX,
WriteVecShiftImmY, WriteVecShiftImmY>;
WriteVecShiftImmY, WriteVecShiftImmZ>;
def SchedWriteVarVecShift
: X86SchedWriteWidths<WriteVarVecShift, WriteVarVecShift,
WriteVarVecShiftY, WriteVarVecShiftY>;
WriteVarVecShiftY, WriteVarVecShiftZ>;
def SchedWriteVecIMul
: X86SchedWriteWidths<WriteVecIMul, WriteVecIMulX,
WriteVecIMulY, WriteVecIMulY>;
WriteVecIMulY, WriteVecIMulZ>;
def SchedWritePMULLD
: X86SchedWriteWidths<WritePMULLD, WritePMULLD,
WritePMULLDY, WritePMULLDY>;
WritePMULLDY, WritePMULLDZ>;
def SchedWriteMPSAD
: X86SchedWriteWidths<WriteMPSAD, WriteMPSAD,
WriteMPSADY, WriteMPSADY>;
WriteMPSADY, WriteMPSADZ>;
def SchedWritePSADBW
: X86SchedWriteWidths<WritePSADBW, WritePSADBWX,
WritePSADBWY, WritePSADBWY>;
WritePSADBWY, WritePSADBWZ>;
def SchedWriteShuffle
: X86SchedWriteWidths<WriteShuffle, WriteShuffleX,
WriteShuffleY, WriteShuffleY>;
WriteShuffleY, WriteShuffleZ>;
def SchedWriteVarShuffle
: X86SchedWriteWidths<WriteVarShuffle, WriteVarShuffleX,
WriteVarShuffleY, WriteVarShuffleY>;
WriteVarShuffleY, WriteVarShuffleZ>;
def SchedWriteBlend
: X86SchedWriteWidths<WriteBlend, WriteBlend, WriteBlendY, WriteBlendY>;
: X86SchedWriteWidths<WriteBlend, WriteBlend, WriteBlendY, WriteBlendZ>;
def SchedWriteVarBlend
: X86SchedWriteWidths<WriteVarBlend, WriteVarBlend,
WriteVarBlendY, WriteVarBlendY>;
WriteVarBlendY, WriteVarBlendZ>;
// Vector size wrappers.
def SchedWriteFAddSizes

View File

@ -210,28 +210,36 @@ defm : X86WriteRes<WriteEMMS, [AtomPort01], 5, [5], 1>;
defm : AtomWriteResPair<WriteFAdd, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFAddX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFAddY, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : X86WriteResPairUnsupported<WriteFAddZ>;
defm : AtomWriteResPair<WriteFAdd64, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFAdd64X, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : AtomWriteResPair<WriteFAdd64Y, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
defm : AtomWriteResPair<WriteFCmp, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFCmpX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFCmpY, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : X86WriteResPairUnsupported<WriteFCmpZ>;
defm : AtomWriteResPair<WriteFCmp64, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFCmp64X, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : AtomWriteResPair<WriteFCmp64Y, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
defm : AtomWriteResPair<WriteFCom, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFMul, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
defm : AtomWriteResPair<WriteFMulX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFMulY, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
defm : X86WriteResPairUnsupported<WriteFMulZ>;
defm : AtomWriteResPair<WriteFMul64, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFMul64X, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>;
defm : AtomWriteResPair<WriteFMul64Y, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>;
defm : X86WriteResPairUnsupported<WriteFMul64Z>;
defm : AtomWriteResPair<WriteFRcp, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
defm : AtomWriteResPair<WriteFRcpX, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>;
defm : AtomWriteResPair<WriteFRcpY, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>;
defm : X86WriteResPairUnsupported<WriteFRcpZ>;
defm : AtomWriteResPair<WriteFRsqrt, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
defm : AtomWriteResPair<WriteFRsqrtX, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>;
defm : AtomWriteResPair<WriteFRsqrtY, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>;
defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
defm : AtomWriteResPair<WriteFDiv, [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
defm : AtomWriteResPair<WriteFDivX, [AtomPort01], [AtomPort01], 70, 70, [70], [70]>;
defm : AtomWriteResPair<WriteFDivY, [AtomPort01], [AtomPort01], 70, 70, [70], [70]>;
@ -252,24 +260,33 @@ defm : AtomWriteResPair<WriteFSqrt80, [AtomPort01], [AtomPort01], 71, 71,
defm : AtomWriteResPair<WriteFSign, [AtomPort1], [AtomPort1]>;
defm : AtomWriteResPair<WriteFRnd, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFRndY, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : X86WriteResPairUnsupported<WriteFRndZ>;
defm : AtomWriteResPair<WriteFLogic, [AtomPort01], [AtomPort0]>;
defm : X86WriteResPairUnsupported<WriteFLogicY>;
defm : X86WriteResPairUnsupported<WriteFLogicZ>;
defm : AtomWriteResPair<WriteFTest, [AtomPort01], [AtomPort0]>;
defm : X86WriteResPairUnsupported<WriteFTestY>;
defm : X86WriteResPairUnsupported<WriteFTestZ>;
defm : AtomWriteResPair<WriteFShuffle, [AtomPort0], [AtomPort0]>;
defm : X86WriteResPairUnsupported<WriteFShuffleY>;
defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
defm : X86WriteResPairUnsupported<WriteFVarShuffle>;
defm : X86WriteResPairUnsupported<WriteFVarShuffleY>;
defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
defm : X86WriteResPairUnsupported<WriteFMA>;
defm : X86WriteResPairUnsupported<WriteFMAX>;
defm : X86WriteResPairUnsupported<WriteFMAY>;
defm : X86WriteResPairUnsupported<WriteFMAZ>;
defm : X86WriteResPairUnsupported<WriteDPPD>;
defm : X86WriteResPairUnsupported<WriteDPPS>;
defm : X86WriteResPairUnsupported<WriteDPPSY>;
defm : X86WriteResPairUnsupported<WriteDPPSZ>;
defm : X86WriteResPairUnsupported<WriteFBlend>;
defm : X86WriteResPairUnsupported<WriteFBlendY>;
defm : X86WriteResPairUnsupported<WriteFBlendZ>;
defm : X86WriteResPairUnsupported<WriteFVarBlend>;
defm : X86WriteResPairUnsupported<WriteFVarBlendY>;
defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
defm : X86WriteResPairUnsupported<WriteFShuffle256>;
defm : X86WriteResPairUnsupported<WriteFVarShuffle256>;
@ -280,30 +297,39 @@ defm : X86WriteResPairUnsupported<WriteFVarShuffle256>;
defm : AtomWriteResPair<WriteCvtSS2I, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
defm : AtomWriteResPair<WriteCvtPS2I, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : AtomWriteResPair<WriteCvtPS2IY, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
defm : AtomWriteResPair<WriteCvtSD2I, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
defm : AtomWriteResPair<WriteCvtPD2I, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
defm : AtomWriteResPair<WriteCvtPD2IY, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
defm : AtomWriteResPair<WriteCvtI2SS, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : AtomWriteResPair<WriteCvtI2PS, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : AtomWriteResPair<WriteCvtI2PSY, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
defm : AtomWriteResPair<WriteCvtI2SD, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : AtomWriteResPair<WriteCvtI2PD, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
defm : AtomWriteResPair<WriteCvtI2PDY, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
defm : AtomWriteResPair<WriteCvtSS2SD, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : AtomWriteResPair<WriteCvtPS2PD, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
defm : AtomWriteResPair<WriteCvtPS2PDY, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
defm : AtomWriteResPair<WriteCvtSD2SS, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : AtomWriteResPair<WriteCvtPD2PS, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
defm : AtomWriteResPair<WriteCvtPD2PSY, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
defm : X86WriteResPairUnsupported<WriteCvtPH2PS>;
defm : X86WriteResPairUnsupported<WriteCvtPH2PSY>;
defm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>;
defm : X86WriteResUnsupported<WriteCvtPS2PH>;
defm : X86WriteResUnsupported<WriteCvtPS2PHSt>;
defm : X86WriteResUnsupported<WriteCvtPS2PHY>;
defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
defm : X86WriteResUnsupported<WriteCvtPS2PHYSt>;
defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
////////////////////////////////////////////////////////////////////////////////
// Vector integer operations.
@ -334,42 +360,56 @@ defm : X86WriteRes<WriteVecMoveFromGpr, [AtomPort0], 1, [1], 1>;
defm : AtomWriteResPair<WriteVecALU, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVecALUX, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVecALUY, [AtomPort01], [AtomPort0], 1, 1>;
defm : X86WriteResPairUnsupported<WriteVecALUZ>;
defm : AtomWriteResPair<WriteVecLogic, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVecLogicX, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVecLogicY, [AtomPort01], [AtomPort0], 1, 1>;
defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
defm : AtomWriteResPair<WriteVecTest, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVecTestY, [AtomPort01], [AtomPort0], 1, 1>;
defm : X86WriteResPairUnsupported<WriteVecTestZ>;
defm : AtomWriteResPair<WriteVecShift, [AtomPort01], [AtomPort01], 2, 3, [2], [3]>;
defm : AtomWriteResPair<WriteVecShiftX, [AtomPort01], [AtomPort01], 2, 3, [2], [3]>;
defm : AtomWriteResPair<WriteVecShiftY, [AtomPort01], [AtomPort01], 2, 3, [2], [3]>;
defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
defm : AtomWriteResPair<WriteVecShiftImm, [AtomPort01], [AtomPort01], 1, 1, [1], [1]>;
defm : AtomWriteResPair<WriteVecShiftImmX, [AtomPort01], [AtomPort01], 1, 1, [1], [1]>;
defm : AtomWriteResPair<WriteVecShiftImmY, [AtomPort01], [AtomPort01], 1, 1, [1], [1]>;
defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
defm : AtomWriteResPair<WriteVecIMul, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
defm : AtomWriteResPair<WriteVecIMulX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteVecIMulY, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
defm : AtomWriteResPair<WritePMULLD, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WritePMULLDY, [AtomPort01], [AtomPort0], 1, 1>;
defm : X86WriteResPairUnsupported<WritePMULLDZ>;
defm : AtomWriteResPair<WritePHMINPOS, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteMPSAD, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteMPSADY, [AtomPort01], [AtomPort0], 1, 1>;
defm : X86WriteResPairUnsupported<WriteMPSADZ>;
defm : AtomWriteResPair<WritePSADBW, [AtomPort01], [AtomPort01], 4, 4, [4], [4]>;
defm : AtomWriteResPair<WritePSADBWX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WritePSADBWY, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : X86WriteResPairUnsupported<WritePSADBWZ>;
defm : AtomWriteResPair<WriteShuffle, [AtomPort0], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteShuffleX, [AtomPort0], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteShuffleY, [AtomPort0], [AtomPort0], 1, 1>;
defm : X86WriteResPairUnsupported<WriteShuffleZ>;
defm : AtomWriteResPair<WriteVarShuffle, [AtomPort0], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVarShuffleX, [AtomPort01], [AtomPort01], 4, 5, [4], [5]>;
defm : AtomWriteResPair<WriteVarShuffleY, [AtomPort01], [AtomPort01], 4, 5, [4], [5]>;
defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
defm : X86WriteResPairUnsupported<WriteBlend>;
defm : X86WriteResPairUnsupported<WriteBlendY>;
defm : X86WriteResPairUnsupported<WriteBlendZ>;
defm : X86WriteResPairUnsupported<WriteVarBlend>;
defm : X86WriteResPairUnsupported<WriteVarBlendY>;
defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
defm : X86WriteResPairUnsupported<WriteShuffle256>;
defm : X86WriteResPairUnsupported<WriteVarShuffle256>;
defm : X86WriteResPairUnsupported<WriteVarVecShift>;
defm : X86WriteResPairUnsupported<WriteVarVecShiftY>;
defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
////////////////////////////////////////////////////////////////////////////////
// Vector insert/extract operations.

View File

@ -298,34 +298,44 @@ defm : X86WriteRes<WriteEMMS, [JFPU01, JFPX], 2, [1, 1], 1>;
defm : JWriteResFpuPair<WriteFAdd, [JFPU0, JFPA], 3>;
defm : JWriteResFpuPair<WriteFAddX, [JFPU0, JFPA], 3>;
defm : JWriteResYMMPair<WriteFAddY, [JFPU0, JFPA], 3, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteFAddZ>;
defm : JWriteResFpuPair<WriteFAdd64, [JFPU0, JFPA], 3>;
defm : JWriteResFpuPair<WriteFAdd64X, [JFPU0, JFPA], 3>;
defm : JWriteResYMMPair<WriteFAdd64Y, [JFPU0, JFPA], 3, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
defm : JWriteResFpuPair<WriteFCmp, [JFPU0, JFPA], 2>;
defm : JWriteResFpuPair<WriteFCmpX, [JFPU0, JFPA], 2>;
defm : JWriteResYMMPair<WriteFCmpY, [JFPU0, JFPA], 2, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteFCmpZ>;
defm : JWriteResFpuPair<WriteFCmp64, [JFPU0, JFPA], 2>;
defm : JWriteResFpuPair<WriteFCmp64X, [JFPU0, JFPA], 2>;
defm : JWriteResYMMPair<WriteFCmp64Y, [JFPU0, JFPA], 2, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
defm : JWriteResFpuPair<WriteFCom, [JFPU0, JFPA, JALU0], 3>;
defm : JWriteResFpuPair<WriteFMul, [JFPU1, JFPM], 2>;
defm : JWriteResFpuPair<WriteFMulX, [JFPU1, JFPM], 2>;
defm : JWriteResYMMPair<WriteFMulY, [JFPU1, JFPM], 2, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteFMulZ>;
defm : JWriteResFpuPair<WriteFMul64, [JFPU1, JFPM], 4, [1,2]>;
defm : JWriteResFpuPair<WriteFMul64X, [JFPU1, JFPM], 4, [1,2]>;
defm : JWriteResYMMPair<WriteFMul64Y, [JFPU1, JFPM], 4, [2,4], 2>;
defm : X86WriteResPairUnsupported<WriteFMul64Z>;
defm : X86WriteResPairUnsupported<WriteFMA>;
defm : X86WriteResPairUnsupported<WriteFMAX>;
defm : X86WriteResPairUnsupported<WriteFMAY>;
defm : X86WriteResPairUnsupported<WriteFMAZ>;
defm : JWriteResFpuPair<WriteDPPD, [JFPU1, JFPM, JFPA], 9, [1, 3, 3], 3>;
defm : JWriteResFpuPair<WriteDPPS, [JFPU1, JFPM, JFPA], 11, [1, 3, 3], 5>;
defm : JWriteResYMMPair<WriteDPPSY, [JFPU1, JFPM, JFPA], 12, [2, 6, 6], 10>;
defm : X86WriteResPairUnsupported<WriteDPPSZ>;
defm : JWriteResFpuPair<WriteFRcp, [JFPU1, JFPM], 2>;
defm : JWriteResFpuPair<WriteFRcpX, [JFPU1, JFPM], 2>;
defm : JWriteResYMMPair<WriteFRcpY, [JFPU1, JFPM], 2, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteFRcpZ>;
defm : JWriteResFpuPair<WriteFRsqrt, [JFPU1, JFPM], 2>;
defm : JWriteResFpuPair<WriteFRsqrtX, [JFPU1, JFPM], 2>;
defm : JWriteResYMMPair<WriteFRsqrtY, [JFPU1, JFPM], 2, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
defm : JWriteResFpuPair<WriteFDiv, [JFPU1, JFPM], 19, [1, 19]>;
defm : JWriteResFpuPair<WriteFDivX, [JFPU1, JFPM], 19, [1, 19]>;
defm : JWriteResYMMPair<WriteFDivY, [JFPU1, JFPM], 38, [2, 38], 2>;
@ -346,18 +356,25 @@ defm : JWriteResFpuPair<WriteFSqrt80, [JFPU1, JFPM], 35, [1, 35]>;
defm : JWriteResFpuPair<WriteFSign, [JFPU1, JFPM], 2>;
defm : JWriteResFpuPair<WriteFRnd, [JFPU1, JSTC], 3>;
defm : JWriteResYMMPair<WriteFRndY, [JFPU1, JSTC], 3, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteFRndZ>;
defm : JWriteResFpuPair<WriteFLogic, [JFPU01, JFPX], 1>;
defm : JWriteResYMMPair<WriteFLogicY, [JFPU01, JFPX], 1, [2, 2], 2>;
defm : X86WriteResPairUnsupported<WriteFLogicZ>;
defm : JWriteResFpuPair<WriteFTest, [JFPU0, JFPA, JALU0], 3>;
defm : JWriteResYMMPair<WriteFTestY , [JFPU01, JFPX, JFPA, JALU0], 4, [2, 2, 2, 1], 3>;
defm : X86WriteResPairUnsupported<WriteFTestZ>;
defm : JWriteResFpuPair<WriteFShuffle, [JFPU01, JFPX], 1>;
defm : JWriteResYMMPair<WriteFShuffleY, [JFPU01, JFPX], 1, [2, 2], 2>;
defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
defm : JWriteResFpuPair<WriteFVarShuffle, [JFPU01, JFPX], 2, [1, 4], 3>;
defm : JWriteResYMMPair<WriteFVarShuffleY,[JFPU01, JFPX], 3, [2, 6], 6>;
defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
defm : JWriteResFpuPair<WriteFBlend, [JFPU01, JFPX], 1>;
defm : JWriteResYMMPair<WriteFBlendY, [JFPU01, JFPX], 1, [2, 2], 2>;
defm : X86WriteResPairUnsupported<WriteFBlendZ>;
defm : JWriteResFpuPair<WriteFVarBlend, [JFPU01, JFPX], 2, [1, 4], 3>;
defm : JWriteResYMMPair<WriteFVarBlendY, [JFPU01, JFPX], 3, [2, 6], 6>;
defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
defm : JWriteResFpuPair<WriteFShuffle256, [JFPU01, JFPX], 1>;
defm : X86WriteResPairUnsupported<WriteFVarShuffle256>;
@ -368,33 +385,42 @@ defm : X86WriteResPairUnsupported<WriteFVarShuffle256>;
defm : JWriteResFpuPair<WriteCvtSS2I, [JFPU1, JSTC, JFPA, JALU0], 7, [1,1,1,1], 2>;
defm : JWriteResFpuPair<WriteCvtPS2I, [JFPU1, JSTC], 3, [1,1], 1>;
defm : JWriteResYMMPair<WriteCvtPS2IY, [JFPU1, JSTC], 3, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
defm : JWriteResFpuPair<WriteCvtSD2I, [JFPU1, JSTC, JFPA, JALU0], 7, [1,1,1,1], 2>;
defm : JWriteResFpuPair<WriteCvtPD2I, [JFPU1, JSTC], 3, [1,1], 1>;
defm : JWriteResYMMPair<WriteCvtPD2IY, [JFPU1, JSTC, JFPX], 6, [2,2,4], 3>;
defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
// FIXME: f+3 ST, LD+STC latency
defm : JWriteResFpuPair<WriteCvtI2SS, [JFPU1, JSTC], 9, [1,1], 2>;
defm : JWriteResFpuPair<WriteCvtI2PS, [JFPU1, JSTC], 3, [1,1], 1>;
defm : JWriteResYMMPair<WriteCvtI2PSY, [JFPU1, JSTC], 3, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
defm : JWriteResFpuPair<WriteCvtI2SD, [JFPU1, JSTC], 9, [1,1], 2>;
defm : JWriteResFpuPair<WriteCvtI2PD, [JFPU1, JSTC], 3, [1,1], 1>;
defm : JWriteResYMMPair<WriteCvtI2PDY, [JFPU1, JSTC], 3, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
defm : JWriteResFpuPair<WriteCvtSS2SD, [JFPU1, JSTC], 7, [1,2], 2>;
defm : JWriteResFpuPair<WriteCvtPS2PD, [JFPU1, JSTC], 2, [1,1], 1>;
defm : JWriteResYMMPair<WriteCvtPS2PDY, [JFPU1, JSTC], 2, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
defm : JWriteResFpuPair<WriteCvtSD2SS, [JFPU1, JSTC], 7, [1,2], 2>;
defm : JWriteResFpuPair<WriteCvtPD2PS, [JFPU1, JSTC], 3, [1,1], 1>;
defm : JWriteResYMMPair<WriteCvtPD2PSY, [JFPU1, JSTC, JFPX], 6, [2,2,4], 3>;
defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
defm : JWriteResFpuPair<WriteCvtPH2PS, [JFPU1, JSTC], 3, [1,1], 1>;
defm : JWriteResYMMPair<WriteCvtPH2PSY, [JFPU1, JSTC], 3, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>;
defm : X86WriteRes<WriteCvtPS2PH, [JFPU1, JSTC], 3, [1,1], 1>;
defm : X86WriteRes<WriteCvtPS2PHY, [JFPU1, JSTC, JFPX], 6, [2,2,2], 3>;
defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
defm : X86WriteRes<WriteCvtPS2PHSt, [JFPU1, JSTC, JSAGU], 4, [1,1,1], 1>;
defm : X86WriteRes<WriteCvtPS2PHYSt, [JFPU1, JSTC, JFPX, JSAGU], 7, [2,2,2,1], 3>;
defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
////////////////////////////////////////////////////////////////////////////////
// Vector integer operations.
@ -425,42 +451,56 @@ defm : X86WriteRes<WriteVecMoveFromGpr, [JFPU01, JFPX], 8, [1, 1], 2>;
defm : JWriteResFpuPair<WriteVecALU, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVecALUX, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVecALUY, [JFPU01, JVALU], 1>;
defm : X86WriteResPairUnsupported<WriteVecALUZ>;
defm : JWriteResFpuPair<WriteVecShift, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVecShiftX, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVecShiftY, [JFPU01, JVALU], 1>;
defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
defm : JWriteResFpuPair<WriteVecShiftImm, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVecShiftImmX,[JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVecShiftImmY,[JFPU01, JVALU], 1>;
defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
defm : JWriteResFpuPair<WriteVecIMul, [JFPU0, JVIMUL], 2>;
defm : JWriteResFpuPair<WriteVecIMulX, [JFPU0, JVIMUL], 2>;
defm : JWriteResFpuPair<WriteVecIMulY, [JFPU0, JVIMUL], 2>;
defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
defm : JWriteResFpuPair<WritePMULLD, [JFPU0, JFPU01, JVIMUL, JVALU], 4, [2, 1, 2, 1], 3>;
defm : JWriteResFpuPair<WritePMULLDY, [JFPU0, JFPU01, JVIMUL, JVALU], 4, [2, 1, 2, 1], 3>;
defm : X86WriteResPairUnsupported<WritePMULLDZ>;
defm : JWriteResFpuPair<WriteMPSAD, [JFPU0, JVIMUL], 3, [1, 2]>;
defm : JWriteResFpuPair<WriteMPSADY, [JFPU0, JVIMUL], 3, [1, 2]>;
defm : X86WriteResPairUnsupported<WriteMPSADZ>;
defm : JWriteResFpuPair<WritePSADBW, [JFPU01, JVALU], 2>;
defm : JWriteResFpuPair<WritePSADBWX, [JFPU01, JVALU], 2>;
defm : JWriteResFpuPair<WritePSADBWY, [JFPU01, JVALU], 2>;
defm : X86WriteResPairUnsupported<WritePSADBWZ>;
defm : JWriteResFpuPair<WritePHMINPOS, [JFPU0, JVALU], 2>;
defm : JWriteResFpuPair<WriteShuffle, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteShuffleX, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteShuffleY, [JFPU01, JVALU], 1>;
defm : X86WriteResPairUnsupported<WriteShuffleZ>;
defm : JWriteResFpuPair<WriteVarShuffle, [JFPU01, JVALU], 2, [1, 4], 3>;
defm : JWriteResFpuPair<WriteVarShuffleX, [JFPU01, JVALU], 2, [1, 4], 3>;
defm : JWriteResFpuPair<WriteVarShuffleY, [JFPU01, JVALU], 2, [1, 4], 3>;
defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
defm : JWriteResFpuPair<WriteBlend, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteBlendY, [JFPU01, JVALU], 1>;
defm : X86WriteResPairUnsupported<WriteBlendZ>;
defm : JWriteResFpuPair<WriteVarBlend, [JFPU01, JVALU], 2, [1, 4], 3>;
defm : JWriteResFpuPair<WriteVarBlendY, [JFPU01, JVALU], 2, [1, 4], 3>;
defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
defm : JWriteResFpuPair<WriteVecLogic, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVecLogicX, [JFPU01, JVALU], 1>;
defm : X86WriteResPairUnsupported<WriteVecLogicY>;
defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
defm : JWriteResFpuPair<WriteVecTest, [JFPU0, JFPA, JALU0], 3>;
defm : JWriteResYMMPair<WriteVecTestY , [JFPU01, JFPX, JFPA, JALU0], 4, [2, 2, 2, 1], 3>;
defm : JWriteResYMMPair<WriteVecTestY, [JFPU01, JFPX, JFPA, JALU0], 4, [2, 2, 2, 1], 3>;
defm : X86WriteResPairUnsupported<WriteVecTestZ>;
defm : JWriteResFpuPair<WriteShuffle256, [JFPU01, JVALU], 1>;
defm : X86WriteResPairUnsupported<WriteVarShuffle256>;
defm : X86WriteResPairUnsupported<WriteVarVecShift>;
defm : X86WriteResPairUnsupported<WriteVarVecShiftY>;
defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
////////////////////////////////////////////////////////////////////////////////
// Vector insert/extract operations.
@ -555,7 +595,7 @@ def JWriteZeroLatency : SchedWriteRes<[]> {
let Latency = 0;
}
// Certain instructions that use the same register for both source
// Certain instructions that use the same register for both source
// operands do not have a real dependency on the previous contents of the
// register, and thus, do not have to wait before completing. They can be
// optimized out at register renaming stage.

View File

@ -159,22 +159,28 @@ defm : X86WriteRes<WriteEMMS, [SLM_FPC_RSV01], 10, [10], 9>;
defm : SLMWriteResPair<WriteFAdd, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFAddX, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFAddY, [SLM_FPC_RSV1], 3>;
defm : X86WriteResPairUnsupported<WriteFAddZ>;
defm : SLMWriteResPair<WriteFAdd64, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFAdd64X, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFAdd64Y, [SLM_FPC_RSV1], 3>;
defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
defm : SLMWriteResPair<WriteFCmp, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFCmpX, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFCmpY, [SLM_FPC_RSV1], 3>;
defm : X86WriteResPairUnsupported<WriteFCmpZ>;
defm : SLMWriteResPair<WriteFCmp64, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFCmp64X, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFCmp64Y, [SLM_FPC_RSV1], 3>;
defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
defm : SLMWriteResPair<WriteFCom, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFMul, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
defm : SLMWriteResPair<WriteFMulX, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
defm : SLMWriteResPair<WriteFMulY, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
defm : X86WriteResPairUnsupported<WriteFMulZ>;
defm : SLMWriteResPair<WriteFMul64, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
defm : SLMWriteResPair<WriteFMul64X, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
defm : SLMWriteResPair<WriteFMul64Y, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
defm : X86WriteResPairUnsupported<WriteFMul64Z>;
defm : SLMWriteResPair<WriteFDiv, [SLM_FPC_RSV0, SLMFPDivider], 19, [1,17]>;
defm : SLMWriteResPair<WriteFDivX, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>;
defm : SLMWriteResPair<WriteFDivY, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>;
@ -186,9 +192,11 @@ defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
defm : SLMWriteResPair<WriteFRcp, [SLM_FPC_RSV0], 5>;
defm : SLMWriteResPair<WriteFRcpX, [SLM_FPC_RSV0], 5>;
defm : SLMWriteResPair<WriteFRcpY, [SLM_FPC_RSV0], 5>;
defm : X86WriteResPairUnsupported<WriteFRcpZ>;
defm : SLMWriteResPair<WriteFRsqrt, [SLM_FPC_RSV0], 5>;
defm : SLMWriteResPair<WriteFRsqrtX, [SLM_FPC_RSV0], 5>;
defm : SLMWriteResPair<WriteFRsqrtY, [SLM_FPC_RSV0], 5>;
defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
defm : SLMWriteResPair<WriteFSqrt, [SLM_FPC_RSV0,SLMFPDivider], 20, [1,20], 1, 3>;
defm : SLMWriteResPair<WriteFSqrtX, [SLM_FPC_RSV0,SLMFPDivider], 41, [1,40], 1, 3>;
defm : SLMWriteResPair<WriteFSqrtY, [SLM_FPC_RSV0,SLMFPDivider], 41, [1,40], 1, 3>;
@ -201,40 +209,52 @@ defm : SLMWriteResPair<WriteFSqrt80, [SLM_FPC_RSV0,SLMFPDivider], 40, [1,40]>;
defm : SLMWriteResPair<WriteDPPD, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteDPPS, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteDPPSY, [SLM_FPC_RSV1], 3>;
defm : X86WriteResPairUnsupported<WriteDPPSZ>;
defm : SLMWriteResPair<WriteFSign, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteFRnd, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFRndY, [SLM_FPC_RSV1], 3>;
defm : X86WriteResPairUnsupported<WriteFRndZ>;
defm : SLMWriteResPair<WriteFLogic, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteFLogicY, [SLM_FPC_RSV01], 1>;
defm : X86WriteResPairUnsupported<WriteFLogicZ>;
defm : SLMWriteResPair<WriteFTest, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteFTestY, [SLM_FPC_RSV01], 1>;
defm : X86WriteResPairUnsupported<WriteFTestZ>;
defm : SLMWriteResPair<WriteFShuffle, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFShuffleY, [SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
defm : SLMWriteResPair<WriteFVarShuffle, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFVarShuffleY,[SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
defm : SLMWriteResPair<WriteFBlend, [SLM_FPC_RSV0], 1>;
// Conversion between integer and float.
defm : SLMWriteResPair<WriteCvtSS2I, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPS2I, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPS2IY, [SLM_FPC_RSV01], 4>;
defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
defm : SLMWriteResPair<WriteCvtSD2I, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPD2I, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPD2IY, [SLM_FPC_RSV01], 4>;
defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
defm : SLMWriteResPair<WriteCvtI2SS, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtI2PS, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtI2PSY, [SLM_FPC_RSV01], 4>;
defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
defm : SLMWriteResPair<WriteCvtI2SD, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtI2PD, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtI2PDY, [SLM_FPC_RSV01], 4>;
defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
defm : SLMWriteResPair<WriteCvtSS2SD, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPS2PD, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPS2PDY, [SLM_FPC_RSV01], 4>;
defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
defm : SLMWriteResPair<WriteCvtSD2SS, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPD2PS, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPD2PSY, [SLM_FPC_RSV01], 4>;
defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
// Vector integer operations.
def : WriteRes<WriteVecLoad, [SLM_MEC_RSV]> { let Latency = 3; }
@ -260,37 +280,49 @@ def : WriteRes<WriteVecMoveFromGpr, [SLM_IEC_RSV01]>;
defm : SLMWriteResPair<WriteVecShift, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVecShiftX, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVecShiftY, [SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
defm : SLMWriteResPair<WriteVecShiftImm, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVecShiftImmX,[SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVecShiftImmY,[SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
defm : SLMWriteResPair<WriteVecLogic, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecLogicX,[SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecLogicY,[SLM_FPC_RSV01], 1>;
defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
defm : SLMWriteResPair<WriteVecTest, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecTestY, [SLM_FPC_RSV01], 1>;
defm : X86WriteResPairUnsupported<WriteVecTestZ>;
defm : SLMWriteResPair<WriteVecALU, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecALUX, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecALUY, [SLM_FPC_RSV01], 1>;
defm : X86WriteResPairUnsupported<WriteVecALUZ>;
defm : SLMWriteResPair<WriteVecIMul, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WriteVecIMulX, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WriteVecIMulY, [SLM_FPC_RSV0], 4>;
defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
// FIXME: The below is closer to correct, but caused some perf regressions.
//defm : SLMWriteResPair<WritePMULLD, [SLM_FPC_RSV0], 11, [11], 7>;
defm : SLMWriteResPair<WritePMULLD, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WritePMULLDY, [SLM_FPC_RSV0], 4>;
defm : X86WriteResPairUnsupported<WritePMULLDZ>;
defm : SLMWriteResPair<WriteShuffle, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteShuffleY, [SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteShuffleZ>;
defm : SLMWriteResPair<WriteShuffleX, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVarShuffle, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVarShuffleX, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVarShuffleY, [SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
defm : SLMWriteResPair<WriteBlend, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteBlendY, [SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteBlendZ>;
defm : SLMWriteResPair<WriteMPSAD, [SLM_FPC_RSV0], 7>;
defm : SLMWriteResPair<WriteMPSADY, [SLM_FPC_RSV0], 7>;
defm : X86WriteResPairUnsupported<WriteMPSADZ>;
defm : SLMWriteResPair<WritePSADBW, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WritePSADBWX, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WritePSADBWY, [SLM_FPC_RSV0], 4>;
defm : X86WriteResPairUnsupported<WritePSADBWZ>;
defm : SLMWriteResPair<WritePHMINPOS, [SLM_FPC_RSV0], 4>;
// Vector insert/extract operations.
@ -309,9 +341,11 @@ def : WriteRes<WriteVecExtractSt, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
defm : SLMWriteResPair<WriteFHAdd, [SLM_FPC_RSV01], 3, [2]>;
defm : SLMWriteResPair<WriteFHAddY, [SLM_FPC_RSV01], 3, [2]>;
defm : X86WriteResPairUnsupported<WriteFHAddZ>;
defm : SLMWriteResPair<WritePHAdd, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WritePHAddX, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WritePHAddY, [SLM_FPC_RSV01], 1>;
defm : X86WriteResPairUnsupported<WritePHAddZ>;
// String instructions.
// Packed Compare Implicit Length Strings, Return Mask
@ -407,25 +441,33 @@ def : WriteRes<WriteNop, []>;
// scheduling resources anyway.
def : WriteRes<WriteIMulH, [SLM_FPC_RSV0]>;
defm : X86WriteResPairUnsupported<WriteFBlendY>;
defm : X86WriteResPairUnsupported<WriteFBlendZ>;
defm : SLMWriteResPair<WriteVarBlend, [SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteVarBlendY>;
defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
defm : SLMWriteResPair<WriteFVarBlend, [SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteFVarBlendY>;
defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
defm : X86WriteResPairUnsupported<WriteFShuffle256>;
defm : X86WriteResPairUnsupported<WriteFVarShuffle256>;
defm : X86WriteResPairUnsupported<WriteShuffle256>;
defm : X86WriteResPairUnsupported<WriteVarShuffle256>;
defm : SLMWriteResPair<WriteVarVecShift, [SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteVarVecShiftY>;
defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
defm : X86WriteResPairUnsupported<WriteFMA>;
defm : X86WriteResPairUnsupported<WriteFMAX>;
defm : X86WriteResPairUnsupported<WriteFMAY>;
defm : X86WriteResPairUnsupported<WriteFMAZ>;
defm : X86WriteResPairUnsupported<WriteCvtPH2PS>;
defm : X86WriteResPairUnsupported<WriteCvtPH2PSY>;
defm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>;
defm : X86WriteResUnsupported<WriteCvtPS2PH>;
defm : X86WriteResUnsupported<WriteCvtPS2PHY>;
defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
defm : X86WriteResUnsupported<WriteCvtPS2PHSt>;
defm : X86WriteResUnsupported<WriteCvtPS2PHYSt>;
defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
} // SchedModel

View File

@ -212,34 +212,45 @@ defm : X86WriteRes<WriteFMoveY, [ZnFPU], 1, [1], 1>;
defm : ZnWriteResFpuPair<WriteFAdd, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFAddX, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFAddY, [ZnFPU0], 3>;
defm : X86WriteResPairUnsupported<WriteFAddZ>;
defm : ZnWriteResFpuPair<WriteFAdd64, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFAdd64X, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFAdd64Y, [ZnFPU0], 3>;
defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
defm : ZnWriteResFpuPair<WriteFCmp, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFCmpX, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFCmpY, [ZnFPU0], 3>;
defm : X86WriteResPairUnsupported<WriteFCmpZ>;
defm : ZnWriteResFpuPair<WriteFCmp64, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFCmp64X, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFCmp64Y, [ZnFPU0], 3>;
defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
defm : ZnWriteResFpuPair<WriteFCom, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFBlend, [ZnFPU01], 1>;
defm : ZnWriteResFpuPair<WriteFBlendY, [ZnFPU01], 1>;
defm : X86WriteResPairUnsupported<WriteFBlendZ>;
defm : ZnWriteResFpuPair<WriteFVarBlend, [ZnFPU01], 1>;
defm : ZnWriteResFpuPair<WriteFVarBlendY,[ZnFPU01], 1>;
defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
defm : ZnWriteResFpuPair<WriteVarBlend, [ZnFPU0], 1>;
defm : ZnWriteResFpuPair<WriteVarBlendY, [ZnFPU0], 1>;
defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
defm : ZnWriteResFpuPair<WriteCvtSS2I, [ZnFPU3], 5>;
defm : ZnWriteResFpuPair<WriteCvtPS2I, [ZnFPU3], 5>;
defm : ZnWriteResFpuPair<WriteCvtPS2IY, [ZnFPU3], 5>;
defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
defm : ZnWriteResFpuPair<WriteCvtSD2I, [ZnFPU3], 5>;
defm : ZnWriteResFpuPair<WriteCvtPD2I, [ZnFPU3], 5>;
defm : ZnWriteResFpuPair<WriteCvtPD2IY, [ZnFPU3], 5>;
defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
defm : ZnWriteResFpuPair<WriteCvtI2SS, [ZnFPU3], 5>;
defm : ZnWriteResFpuPair<WriteCvtI2PS, [ZnFPU3], 5>;
defm : ZnWriteResFpuPair<WriteCvtI2PSY, [ZnFPU3], 5>;
defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
defm : ZnWriteResFpuPair<WriteCvtI2SD, [ZnFPU3], 5>;
defm : ZnWriteResFpuPair<WriteCvtI2PD, [ZnFPU3], 5>;
defm : ZnWriteResFpuPair<WriteCvtI2PDY, [ZnFPU3], 5>;
defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
defm : ZnWriteResFpuPair<WriteFDiv, [ZnFPU3], 15>;
defm : ZnWriteResFpuPair<WriteFDivX, [ZnFPU3], 15>;
//defm : ZnWriteResFpuPair<WriteFDivY, [ZnFPU3], 15>;
@ -251,29 +262,39 @@ defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
defm : ZnWriteResFpuPair<WriteFSign, [ZnFPU3], 2>;
defm : ZnWriteResFpuPair<WriteFRnd, [ZnFPU3], 4, [1], 1, 7, 1>; // FIXME: Should folds require 1 extra uops?
defm : ZnWriteResFpuPair<WriteFRndY, [ZnFPU3], 4, [1], 1, 7, 1>; // FIXME: Should folds require 1 extra uops?
defm : X86WriteResPairUnsupported<WriteFRndZ>;
defm : ZnWriteResFpuPair<WriteFLogic, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteFLogicY, [ZnFPU], 1>;
defm : X86WriteResPairUnsupported<WriteFLogicZ>;
defm : ZnWriteResFpuPair<WriteFTest, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteFTestY, [ZnFPU], 1>;
defm : X86WriteResPairUnsupported<WriteFTestZ>;
defm : ZnWriteResFpuPair<WriteFShuffle, [ZnFPU12], 1>;
defm : ZnWriteResFpuPair<WriteFShuffleY, [ZnFPU12], 1>;
defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
defm : ZnWriteResFpuPair<WriteFVarShuffle, [ZnFPU12], 1>;
defm : ZnWriteResFpuPair<WriteFVarShuffleY,[ZnFPU12], 1>;
defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
defm : ZnWriteResFpuPair<WriteFMul, [ZnFPU01], 3, [1], 1, 7, 1>;
defm : ZnWriteResFpuPair<WriteFMulX, [ZnFPU01], 3, [1], 1, 7, 1>;
defm : ZnWriteResFpuPair<WriteFMulY, [ZnFPU01], 4, [1], 1, 7, 1>;
defm : X86WriteResPairUnsupported<WriteFMulZ>;
defm : ZnWriteResFpuPair<WriteFMul64, [ZnFPU01], 3, [1], 1, 7, 1>;
defm : ZnWriteResFpuPair<WriteFMul64X, [ZnFPU01], 3, [1], 1, 7, 1>;
defm : ZnWriteResFpuPair<WriteFMul64Y, [ZnFPU01], 4, [1], 1, 7, 1>;
defm : X86WriteResPairUnsupported<WriteFMul64Z>;
defm : ZnWriteResFpuPair<WriteFMA, [ZnFPU03], 5>;
defm : ZnWriteResFpuPair<WriteFMAX, [ZnFPU03], 5>;
defm : ZnWriteResFpuPair<WriteFMAY, [ZnFPU03], 5>;
defm : X86WriteResPairUnsupported<WriteFMAZ>;
defm : ZnWriteResFpuPair<WriteFRcp, [ZnFPU01], 5>;
defm : ZnWriteResFpuPair<WriteFRcpX, [ZnFPU01], 5>;
defm : ZnWriteResFpuPair<WriteFRcpY, [ZnFPU01], 5, [1], 1, 7, 2>;
defm : X86WriteResPairUnsupported<WriteFRcpZ>;
//defm : ZnWriteResFpuPair<WriteFRsqrt, [ZnFPU02], 5>;
defm : ZnWriteResFpuPair<WriteFRsqrtX, [ZnFPU01], 5, [1], 1, 7, 1>;
//defm : ZnWriteResFpuPair<WriteFRsqrtY, [ZnFPU01], 5, [2], 2>;
defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
defm : ZnWriteResFpuPair<WriteFSqrt, [ZnFPU3], 20, [20]>;
defm : ZnWriteResFpuPair<WriteFSqrtX, [ZnFPU3], 20, [20]>;
defm : ZnWriteResFpuPair<WriteFSqrtY, [ZnFPU3], 28, [28], 1, 7, 1>;
@ -309,40 +330,52 @@ defm : X86WriteRes<WriteEMMS, [ZnFPU], 2, [1], 1>;
defm : ZnWriteResFpuPair<WriteVecShift, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecShiftX, [ZnFPU2], 1>;
defm : ZnWriteResFpuPair<WriteVecShiftY, [ZnFPU2], 2>;
defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
defm : ZnWriteResFpuPair<WriteVecShiftImm, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecShiftImmX, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecShiftImmY, [ZnFPU], 1>;
defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
defm : ZnWriteResFpuPair<WriteVecLogic, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecLogicX, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecLogicY, [ZnFPU], 1>;
defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
defm : ZnWriteResFpuPair<WriteVecTest, [ZnFPU12], 1, [2], 1, 7, 1>;
defm : ZnWriteResFpuPair<WriteVecTestY, [ZnFPU12], 1, [2], 1, 7, 1>;
defm : X86WriteResPairUnsupported<WriteVecTestZ>;
defm : ZnWriteResFpuPair<WriteVecALU, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecALUX, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecALUY, [ZnFPU], 1>;
defm : X86WriteResPairUnsupported<WriteVecALUZ>;
defm : ZnWriteResFpuPair<WriteVecIMul, [ZnFPU0], 4>;
defm : ZnWriteResFpuPair<WriteVecIMulX, [ZnFPU0], 4>;
defm : ZnWriteResFpuPair<WriteVecIMulY, [ZnFPU0], 4>;
defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
defm : ZnWriteResFpuPair<WritePMULLD, [ZnFPU0], 4, [1], 1, 7, 1>; // FIXME
defm : ZnWriteResFpuPair<WritePMULLDY, [ZnFPU0], 5, [2], 1, 7, 1>; // FIXME
defm : X86WriteResPairUnsupported<WritePMULLDZ>;
defm : ZnWriteResFpuPair<WriteShuffle, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteShuffleX, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteShuffleY, [ZnFPU], 1>;
defm : X86WriteResPairUnsupported<WriteShuffleZ>;
defm : ZnWriteResFpuPair<WriteVarShuffle, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVarShuffleX,[ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVarShuffleY,[ZnFPU], 1>;
defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
defm : ZnWriteResFpuPair<WriteBlend, [ZnFPU01], 1>;
defm : ZnWriteResFpuPair<WriteBlendY, [ZnFPU01], 1>;
defm : X86WriteResPairUnsupported<WriteBlendZ>;
defm : ZnWriteResFpuPair<WriteShuffle256, [ZnFPU], 2>;
defm : ZnWriteResFpuPair<WriteVarShuffle256, [ZnFPU], 2>;
defm : ZnWriteResFpuPair<WritePSADBW, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WritePSADBWX, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WritePSADBWY, [ZnFPU0], 3>;
defm : X86WriteResPairUnsupported<WritePSADBWZ>;
defm : ZnWriteResFpuPair<WritePHMINPOS, [ZnFPU0], 4>;
// Vector Shift Operations
defm : ZnWriteResFpuPair<WriteVarVecShift, [ZnFPU12], 1>;
defm : ZnWriteResFpuPair<WriteVarVecShiftY, [ZnFPU12], 1>;
defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
// Vector insert/extract operations.
defm : ZnWriteResFpuPair<WriteVecInsert, [ZnFPU], 1>;
@ -1144,9 +1177,10 @@ def ZnWriteCVTPD2PSYr: SchedWriteRes<[ZnFPU3]> {
// CVTPD2PS.
// x,x.
def : SchedAlias<WriteCvtPD2PS, ZnWriteCVTPD2PSr>;
// y,y.
def : SchedAlias<WriteCvtPD2PSY, ZnWriteCVTPD2PSYr>;
// z,z.
defm : X86WriteResUnsupported<WriteCvtPD2PSZ>;
def ZnWriteCVTPD2PSLd: SchedWriteRes<[ZnAGU,ZnFPU03]> {
let Latency = 11;
@ -1161,6 +1195,8 @@ def ZnWriteCVTPD2PSYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
let Latency = 11;
}
def : SchedAlias<WriteCvtPD2PSYLd, ZnWriteCVTPD2PSYLd>;
// z,m512
defm : X86WriteResUnsupported<WriteCvtPD2PSZLd>;
// CVTSD2SS.
// x,x.
@ -1185,12 +1221,14 @@ def ZnWriteCVTPS2PDLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
}
def : SchedAlias<WriteCvtPS2PDLd, ZnWriteCVTPS2PDLd>;
def : SchedAlias<WriteCvtPS2PDYLd, ZnWriteCVTPS2PDLd>;
defm : X86WriteResUnsupported<WriteCvtPS2PDZLd>;
// y,x.
def ZnWriteVCVTPS2PDY : SchedWriteRes<[ZnFPU3]> {
let Latency = 3;
}
def : SchedAlias<WriteCvtPS2PDY, ZnWriteVCVTPS2PDY>;
defm : X86WriteResUnsupported<WriteCvtPS2PDZ>;
// CVTSS2SD.
// x,x.
@ -1288,17 +1326,21 @@ def : InstRW<[ZnWriteCVSTSI2SILd], (instregex "(V?)CVT(T?)SD2SI(64)?rm")>;
// x,v,i.
def : SchedAlias<WriteCvtPS2PH, ZnWriteMicrocoded>;
def : SchedAlias<WriteCvtPS2PHY, ZnWriteMicrocoded>;
defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
// m,v,i.
def : SchedAlias<WriteCvtPS2PHSt, ZnWriteMicrocoded>;
def : SchedAlias<WriteCvtPS2PHYSt, ZnWriteMicrocoded>;
defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
// VCVTPH2PS.
// v,x.
def : SchedAlias<WriteCvtPH2PS, ZnWriteMicrocoded>;
def : SchedAlias<WriteCvtPH2PSY, ZnWriteMicrocoded>;
defm : X86WriteResUnsupported<WriteCvtPH2PSZ>;
// v,m.
def : SchedAlias<WriteCvtPH2PSLd, ZnWriteMicrocoded>;
def : SchedAlias<WriteCvtPH2PSYLd, ZnWriteMicrocoded>;
defm : X86WriteResUnsupported<WriteCvtPH2PSZLd>;
//-- SSE4A instructions --//
// EXTRQ

View File

@ -42,7 +42,7 @@ define <4 x double> @test_addpd(<4 x double> %a0, <4 x double> %a1, <4 x double>
;
; SKX-LABEL: test_addpd:
; SKX: # %bb.0:
; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -96,7 +96,7 @@ define <8 x float> @test_addps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a
;
; SKX-LABEL: test_addps:
; SKX: # %bb.0:
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -150,7 +150,7 @@ define <4 x double> @test_addsubpd(<4 x double> %a0, <4 x double> %a1, <4 x doub
;
; SKX-LABEL: test_addsubpd:
; SKX: # %bb.0:
; SKX-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -205,7 +205,7 @@ define <8 x float> @test_addsubps(<8 x float> %a0, <8 x float> %a1, <8 x float>
;
; SKX-LABEL: test_addsubps:
; SKX: # %bb.0:
; SKX-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -267,7 +267,7 @@ define <4 x double> @test_andnotpd(<4 x double> %a0, <4 x double> %a1, <4 x doub
; SKX: # %bb.0:
; SKX-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; SKX-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_andnotpd:
@ -336,7 +336,7 @@ define <8 x float> @test_andnotps(<8 x float> %a0, <8 x float> %a1, <8 x float>
; SKX: # %bb.0:
; SKX-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; SKX-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_andnotps:
@ -405,7 +405,7 @@ define <4 x double> @test_andpd(<4 x double> %a0, <4 x double> %a1, <4 x double>
; SKX: # %bb.0:
; SKX-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; SKX-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_andpd:
@ -472,7 +472,7 @@ define <8 x float> @test_andps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a
; SKX: # %bb.0:
; SKX-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; SKX-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_andps:
@ -538,7 +538,7 @@ define <4 x double> @test_blendpd(<4 x double> %a0, <4 x double> %a1, <4 x doubl
; SKX-LABEL: test_blendpd:
; SKX: # %bb.0:
; SKX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33]
; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -602,7 +602,7 @@ define <8 x float> @test_blendps(<8 x float> %a0, <8 x float> %a1, <8 x float> *
; SKX: # %bb.0:
; SKX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.33]
; SKX-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [8:0.50]
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_blendps:
@ -956,7 +956,7 @@ define <4 x double> @test_cmppd(<4 x double> %a0, <4 x double> %a1, <4 x double>
;
; SKX-LABEL: test_cmppd:
; SKX: # %bb.0:
; SKX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [4:0.33]
; SKX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [4:0.50]
; SKX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
@ -1022,7 +1022,7 @@ define <8 x float> @test_cmpps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a
;
; SKX-LABEL: test_cmpps:
; SKX: # %bb.0:
; SKX-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [4:0.33]
; SKX-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [4:0.50]
; SKX-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
@ -1090,7 +1090,7 @@ define <4 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) {
; SKX: # %bb.0:
; SKX-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00]
; SKX-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [13:1.00]
; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_cvtdq2pd:
@ -1153,9 +1153,9 @@ define <8 x float> @test_cvtdq2ps(<8 x i32> %a0, <8 x i32> *%a1) {
;
; SKX-LABEL: test_cvtdq2ps:
; SKX: # %bb.0:
; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [11:0.50]
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_cvtdq2ps:
@ -1217,7 +1217,7 @@ define <8 x i32> @test_cvtpd2dq(<4 x double> %a0, <4 x double> *%a1) {
; SKX-LABEL: test_cvtpd2dq:
; SKX: # %bb.0:
; SKX-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [7:1.00]
; SKX-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [8:1.00]
; SKX-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [8:0.50]
; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -1281,7 +1281,7 @@ define <8 x i32> @test_cvttpd2dq(<4 x double> %a0, <4 x double> *%a1) {
; SKX-LABEL: test_cvttpd2dq:
; SKX: # %bb.0:
; SKX-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [7:1.00]
; SKX-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [8:1.00]
; SKX-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [8:0.50]
; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -1406,7 +1406,7 @@ define <8 x i32> @test_cvtps2dq(<8 x float> %a0, <8 x float> *%a1) {
;
; SKX-LABEL: test_cvtps2dq:
; SKX: # %bb.0:
; SKX-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [11:0.50]
; SKX-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
@ -1470,7 +1470,7 @@ define <8 x i32> @test_cvttps2dq(<8 x float> %a0, <8 x float> *%a1) {
;
; SKX-LABEL: test_cvttps2dq:
; SKX: # %bb.0:
; SKX-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:0.50]
; SKX-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [11:0.50]
; SKX-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
@ -1979,7 +1979,7 @@ define <8 x float> @test_insertf128(<8 x float> %a0, <4 x float> %a1, <4 x float
; SKX: # %bb.0:
; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
; SKX-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_insertf128:
@ -2334,7 +2334,7 @@ define <4 x double> @test_maxpd(<4 x double> %a0, <4 x double> %a1, <4 x double>
;
; SKX-LABEL: test_maxpd:
; SKX: # %bb.0:
; SKX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -2389,7 +2389,7 @@ define <8 x float> @test_maxps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a
;
; SKX-LABEL: test_maxps:
; SKX: # %bb.0:
; SKX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -2444,7 +2444,7 @@ define <4 x double> @test_minpd(<4 x double> %a0, <4 x double> %a1, <4 x double>
;
; SKX-LABEL: test_minpd:
; SKX: # %bb.0:
; SKX-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -2499,7 +2499,7 @@ define <8 x float> @test_minps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a
;
; SKX-LABEL: test_minps:
; SKX: # %bb.0:
; SKX-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -2560,7 +2560,7 @@ define <4 x double> @test_movapd(<4 x double> *%a0, <4 x double> *%a1) {
; SKX-LABEL: test_movapd:
; SKX: # %bb.0:
; SKX-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50]
; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -2622,7 +2622,7 @@ define <8 x float> @test_movaps(<8 x float> *%a0, <8 x float> *%a1) {
; SKX-LABEL: test_movaps:
; SKX: # %bb.0:
; SKX-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50]
; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -2685,7 +2685,7 @@ define <4 x double> @test_movddup(<4 x double> %a0, <4 x double> *%a1) {
; SKX: # %bb.0:
; SKX-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00]
; SKX-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50]
; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_movddup:
@ -2912,7 +2912,7 @@ define <4 x double> @test_movntpd(<4 x double> %a0, <4 x double> *%a1) {
;
; SKX-LABEL: test_movntpd:
; SKX: # %bb.0:
; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -2965,7 +2965,7 @@ define <8 x float> @test_movntps(<8 x float> %a0, <8 x float> *%a1) {
;
; SKX-LABEL: test_movntps:
; SKX: # %bb.0:
; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -3025,7 +3025,7 @@ define <8 x float> @test_movshdup(<8 x float> %a0, <8 x float> *%a1) {
; SKX: # %bb.0:
; SKX-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00]
; SKX-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50]
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_movshdup:
@ -3088,7 +3088,7 @@ define <8 x float> @test_movsldup(<8 x float> %a0, <8 x float> *%a1) {
; SKX: # %bb.0:
; SKX-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00]
; SKX-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50]
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_movsldup:
@ -3152,7 +3152,7 @@ define <4 x double> @test_movupd(<4 x double> *%a0, <4 x double> *%a1) {
; SKX-LABEL: test_movupd:
; SKX: # %bb.0:
; SKX-NEXT: vmovupd (%rdi), %ymm0 # sched: [7:0.50]
; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -3216,7 +3216,7 @@ define <8 x float> @test_movups(<8 x float> *%a0, <8 x float> *%a1) {
; SKX-LABEL: test_movups:
; SKX: # %bb.0:
; SKX-NEXT: vmovups (%rdi), %ymm0 # sched: [7:0.50]
; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -3272,7 +3272,7 @@ define <4 x double> @test_mulpd(<4 x double> %a0, <4 x double> %a1, <4 x double>
;
; SKX-LABEL: test_mulpd:
; SKX: # %bb.0:
; SKX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -3326,7 +3326,7 @@ define <8 x float> @test_mulps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a
;
; SKX-LABEL: test_mulps:
; SKX: # %bb.0:
; SKX-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -3387,7 +3387,7 @@ define <4 x double> @orpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2)
; SKX: # %bb.0:
; SKX-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; SKX-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: orpd:
@ -3454,7 +3454,7 @@ define <8 x float> @test_orps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2
; SKX: # %bb.0:
; SKX-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; SKX-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_orps:
@ -3521,7 +3521,7 @@ define <4 x double> @test_perm2f128(<4 x double> %a0, <4 x double> %a1, <4 x dou
; SKX: # %bb.0:
; SKX-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_perm2f128:
@ -3584,7 +3584,7 @@ define <2 x double> @test_permilpd(<2 x double> %a0, <2 x double> *%a1) {
; SKX: # %bb.0:
; SKX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00]
; SKX-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00]
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_permilpd:
@ -3647,7 +3647,7 @@ define <4 x double> @test_permilpd_ymm(<4 x double> %a0, <4 x double> *%a1) {
; SKX: # %bb.0:
; SKX-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00]
; SKX-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [8:1.00]
; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_permilpd_ymm:
@ -3710,7 +3710,7 @@ define <4 x float> @test_permilps(<4 x float> %a0, <4 x float> *%a1) {
; SKX: # %bb.0:
; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00]
; SKX-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_permilps:
@ -3773,7 +3773,7 @@ define <8 x float> @test_permilps_ymm(<8 x float> %a0, <8 x float> *%a1) {
; SKX: # %bb.0:
; SKX-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
; SKX-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [8:1.00]
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_permilps_ymm:
@ -4056,7 +4056,7 @@ define <8 x float> @test_rcpps(<8 x float> %a0, <8 x float> *%a1) {
; SKX: # %bb.0:
; SKX-NEXT: vrcpps %ymm0, %ymm0 # sched: [4:1.00]
; SKX-NEXT: vrcpps (%rdi), %ymm1 # sched: [11:1.00]
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_rcpps:
@ -4118,9 +4118,9 @@ define <4 x double> @test_roundpd(<4 x double> %a0, <4 x double> *%a1) {
;
; SKX-LABEL: test_roundpd:
; SKX: # %bb.0:
; SKX-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [8:0.67]
; SKX-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [15:0.67]
; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [8:1.00]
; SKX-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [15:1.00]
; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_roundpd:
@ -4182,9 +4182,9 @@ define <8 x float> @test_roundps(<8 x float> %a0, <8 x float> *%a1) {
;
; SKX-LABEL: test_roundps:
; SKX: # %bb.0:
; SKX-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [8:0.67]
; SKX-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [15:0.67]
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [8:1.00]
; SKX-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [15:1.00]
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_roundps:
@ -4248,7 +4248,7 @@ define <8 x float> @test_rsqrtps(<8 x float> %a0, <8 x float> *%a1) {
; SKX: # %bb.0:
; SKX-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [4:1.00]
; SKX-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [11:1.00]
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_rsqrtps:
@ -4312,7 +4312,7 @@ define <4 x double> @test_shufpd(<4 x double> %a0, <4 x double> %a1, <4 x double
; SKX: # %bb.0:
; SKX-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00]
; SKX-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00]
; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_shufpd:
@ -4375,7 +4375,7 @@ define <8 x float> @test_shufps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%
; SKX: # %bb.0:
; SKX-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00]
; SKX-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [8:1.00]
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_shufps:
@ -4438,7 +4438,7 @@ define <4 x double> @test_sqrtpd(<4 x double> %a0, <4 x double> *%a1) {
; SKX: # %bb.0:
; SKX-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [18:12.00]
; SKX-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [25:12.00]
; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_sqrtpd:
@ -4502,7 +4502,7 @@ define <8 x float> @test_sqrtps(<8 x float> %a0, <8 x float> *%a1) {
; SKX: # %bb.0:
; SKX-NEXT: vsqrtps %ymm0, %ymm0 # sched: [12:6.00]
; SKX-NEXT: vsqrtps (%rdi), %ymm1 # sched: [19:6.00]
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_sqrtps:
@ -4559,7 +4559,7 @@ define <4 x double> @test_subpd(<4 x double> %a0, <4 x double> %a1, <4 x double>
;
; SKX-LABEL: test_subpd:
; SKX: # %bb.0:
; SKX-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -4613,7 +4613,7 @@ define <8 x float> @test_subps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a
;
; SKX-LABEL: test_subps:
; SKX: # %bb.0:
; SKX-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -5008,7 +5008,7 @@ define <4 x double> @test_unpckhpd(<4 x double> %a0, <4 x double> %a1, <4 x doub
; SKX: # %bb.0:
; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
; SKX-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [8:1.00]
; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_unpckhpd:
@ -5125,7 +5125,7 @@ define <4 x double> @test_unpcklpd(<4 x double> %a0, <4 x double> %a1, <4 x doub
; SKX: # %bb.0:
; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
; SKX-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00]
; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_unpcklpd:
@ -5242,7 +5242,7 @@ define <4 x double> @test_xorpd(<4 x double> %a0, <4 x double> %a1, <4 x double>
; SKX: # %bb.0:
; SKX-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; SKX-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_xorpd:
@ -5309,7 +5309,7 @@ define <8 x float> @test_xorps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a
; SKX: # %bb.0:
; SKX-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; SKX-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_xorps:

View File

@ -76,7 +76,7 @@ define <4 x double> @test_broadcastsd_ymm(<2 x double> %a0) {
; SKX-LABEL: test_broadcastsd_ymm:
; SKX: # %bb.0:
; SKX-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00]
; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_broadcastsd_ymm:
@ -117,7 +117,7 @@ define <4 x float> @test_broadcastss(<4 x float> %a0) {
; SKX-LABEL: test_broadcastss:
; SKX: # %bb.0:
; SKX-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00]
; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_broadcastss:
@ -158,7 +158,7 @@ define <8 x float> @test_broadcastss_ymm(<4 x float> %a0) {
; SKX-LABEL: test_broadcastss_ymm:
; SKX: # %bb.0:
; SKX-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00]
; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_broadcastss_ymm:
@ -2634,7 +2634,7 @@ define <4 x double> @test_permpd(<4 x double> %a0, <4 x double> *%a1) {
; SKX: # %bb.0:
; SKX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
; SKX-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00]
; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_permpd:
@ -2683,7 +2683,7 @@ define <8 x float> @test_permps(<8 x i32> %a0, <8 x float> %a1, <8 x float> *%a2
; SKX: # %bb.0:
; SKX-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
; SKX-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_permps:
@ -3320,7 +3320,7 @@ define <16 x i16> @test_pmaddubsw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2)
;
; SKX-LABEL: test_pmaddubsw:
; SKX: # %bb.0:
; SKX-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -3364,7 +3364,7 @@ define <8 x i32> @test_pmaddwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2)
;
; SKX-LABEL: test_pmaddwd:
; SKX: # %bb.0:
; SKX-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -4761,7 +4761,7 @@ define <4 x i64> @test_pmuldq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
;
; SKX-LABEL: test_pmuldq:
; SKX: # %bb.0:
; SKX-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -4805,7 +4805,7 @@ define <16 x i16> @test_pmulhrsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2
;
; SKX-LABEL: test_pmulhrsw:
; SKX: # %bb.0:
; SKX-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -4848,7 +4848,7 @@ define <16 x i16> @test_pmulhuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2)
;
; SKX-LABEL: test_pmulhuw:
; SKX: # %bb.0:
; SKX-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -4891,7 +4891,7 @@ define <16 x i16> @test_pmulhw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2)
;
; SKX-LABEL: test_pmulhw:
; SKX: # %bb.0:
; SKX-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -4934,8 +4934,8 @@ define <8 x i32> @test_pmulld(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
;
; SKX-LABEL: test_pmulld:
; SKX: # %bb.0:
; SKX-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:0.67]
; SKX-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [17:0.67]
; SKX-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:1.00]
; SKX-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [17:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; ZNVER1-LABEL: test_pmulld:
@ -4976,7 +4976,7 @@ define <16 x i16> @test_pmullw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2)
;
; SKX-LABEL: test_pmullw:
; SKX: # %bb.0:
; SKX-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -5018,7 +5018,7 @@ define <4 x i64> @test_pmuludq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
;
; SKX-LABEL: test_pmuludq:
; SKX: # %bb.0:
; SKX-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
; SKX-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;

File diff suppressed because it is too large Load Diff

View File

@ -25,15 +25,15 @@ define void @test_vpopcntd(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> *%a2, i16
; ICELAKE: # %bb.0:
; ICELAKE-NEXT: kmovd %esi, %k1 # sched: [1:1.00]
; ICELAKE-NEXT: #APP
; ICELAKE-NEXT: vpopcntd %zmm1, %zmm0 # sched: [1:0.50]
; ICELAKE-NEXT: vpopcntd %zmm1, %zmm0 {%k1} # sched: [1:0.50]
; ICELAKE-NEXT: vpopcntd %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50]
; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 # sched: [8:0.50]
; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 {%k1} # sched: [8:0.50]
; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 # sched: [8:0.50]
; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} # sched: [8:0.50]
; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} {z} # sched: [8:0.50]
; ICELAKE-NEXT: vpopcntd %zmm1, %zmm0 # sched: [1:1.00]
; ICELAKE-NEXT: vpopcntd %zmm1, %zmm0 {%k1} # sched: [1:1.00]
; ICELAKE-NEXT: vpopcntd %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00]
; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 # sched: [8:1.00]
; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 {%k1} # sched: [8:1.00]
; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 # sched: [8:1.00]
; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} # sched: [8:1.00]
; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} {z} # sched: [8:1.00]
; ICELAKE-NEXT: #NO_APP
; ICELAKE-NEXT: vzeroupper # sched: [4:1.00]
; ICELAKE-NEXT: retq # sched: [7:1.00]
@ -63,15 +63,15 @@ define void @test_vpopcntq(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> *%a2, i8 %a3)
; ICELAKE: # %bb.0:
; ICELAKE-NEXT: kmovd %esi, %k1 # sched: [1:1.00]
; ICELAKE-NEXT: #APP
; ICELAKE-NEXT: vpopcntq %zmm1, %zmm0 # sched: [1:0.50]
; ICELAKE-NEXT: vpopcntq %zmm1, %zmm0 {%k1} # sched: [1:0.50]
; ICELAKE-NEXT: vpopcntq %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50]
; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 # sched: [8:0.50]
; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 {%k1} # sched: [8:0.50]
; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 # sched: [8:0.50]
; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} # sched: [8:0.50]
; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} {z} # sched: [8:0.50]
; ICELAKE-NEXT: vpopcntq %zmm1, %zmm0 # sched: [1:1.00]
; ICELAKE-NEXT: vpopcntq %zmm1, %zmm0 {%k1} # sched: [1:1.00]
; ICELAKE-NEXT: vpopcntq %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00]
; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 # sched: [8:1.00]
; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 {%k1} # sched: [8:1.00]
; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 # sched: [8:1.00]
; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} # sched: [8:1.00]
; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} {z} # sched: [8:1.00]
; ICELAKE-NEXT: #NO_APP
; ICELAKE-NEXT: vzeroupper # sched: [4:1.00]
; ICELAKE-NEXT: retq # sched: [7:1.00]

View File

@ -75,9 +75,9 @@ define void @test_vfmaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double>
; SKX-LABEL: test_vfmaddpd_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.33]
; SKX-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.33]
; SKX-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.33]
; SKX-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
; SKX-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
; SKX-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
; SKX-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
; SKX-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
; SKX-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
@ -167,9 +167,9 @@ define void @test_vfmaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double>
; SKX-LABEL: test_vfmaddpd_256:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.33]
; SKX-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.33]
; SKX-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.33]
; SKX-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.50]
; SKX-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.50]
; SKX-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.50]
; SKX-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50]
; SKX-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50]
; SKX-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50]
@ -257,9 +257,9 @@ define void @test_vfmaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2
; SKX-LABEL: test_vfmaddps_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.33]
; SKX-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.33]
; SKX-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.33]
; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
; SKX-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
; SKX-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
; SKX-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
; SKX-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
@ -349,9 +349,9 @@ define void @test_vfmaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2
; SKX-LABEL: test_vfmaddps_256:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.33]
; SKX-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.33]
; SKX-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.33]
; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.50]
; SKX-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.50]
; SKX-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.50]
; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50]
; SKX-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50]
; SKX-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50]
@ -439,9 +439,9 @@ define void @test_vfmaddsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double>
; SKX-LABEL: test_vfmaddsd_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.33]
; SKX-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.33]
; SKX-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.33]
; SKX-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
; SKX-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
; SKX-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
; SKX-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50]
; SKX-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50]
; SKX-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50]
@ -527,9 +527,9 @@ define void @test_vfmaddss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2
; SKX-LABEL: test_vfmaddss_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.33]
; SKX-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.33]
; SKX-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.33]
; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
; SKX-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
; SKX-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50]
; SKX-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50]
; SKX-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50]
@ -619,9 +619,9 @@ define void @test_vfmaddsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x doubl
; SKX-LABEL: test_vfmaddsubpd_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [4:0.33]
; SKX-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [4:0.33]
; SKX-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [4:0.33]
; SKX-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [4:0.50]
; SKX-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [4:0.50]
; SKX-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [4:0.50]
; SKX-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
; SKX-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
; SKX-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
@ -711,9 +711,9 @@ define void @test_vfmaddsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x doubl
; SKX-LABEL: test_vfmaddsubpd_256:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [4:0.33]
; SKX-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [4:0.33]
; SKX-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [4:0.33]
; SKX-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [4:0.50]
; SKX-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [4:0.50]
; SKX-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [4:0.50]
; SKX-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50]
; SKX-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50]
; SKX-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50]
@ -801,9 +801,9 @@ define void @test_vfmaddsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float>
; SKX-LABEL: test_vfmaddsubps_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [4:0.33]
; SKX-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [4:0.33]
; SKX-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [4:0.33]
; SKX-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [4:0.50]
; SKX-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [4:0.50]
; SKX-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [4:0.50]
; SKX-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
; SKX-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
; SKX-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
@ -893,9 +893,9 @@ define void @test_vfmaddsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float>
; SKX-LABEL: test_vfmaddsubps_256:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [4:0.33]
; SKX-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [4:0.33]
; SKX-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [4:0.33]
; SKX-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [4:0.50]
; SKX-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [4:0.50]
; SKX-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [4:0.50]
; SKX-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50]
; SKX-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50]
; SKX-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50]
@ -987,9 +987,9 @@ define void @test_vfmsubaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x doubl
; SKX-LABEL: test_vfmsubaddpd_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [4:0.33]
; SKX-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [4:0.33]
; SKX-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [4:0.33]
; SKX-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [4:0.50]
; SKX-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [4:0.50]
; SKX-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [4:0.50]
; SKX-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
; SKX-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
; SKX-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
@ -1079,9 +1079,9 @@ define void @test_vfmsubaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x doubl
; SKX-LABEL: test_vfmsubaddpd_256:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [4:0.33]
; SKX-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [4:0.33]
; SKX-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [4:0.33]
; SKX-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [4:0.50]
; SKX-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [4:0.50]
; SKX-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [4:0.50]
; SKX-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50]
; SKX-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50]
; SKX-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50]
@ -1169,9 +1169,9 @@ define void @test_vfmsubaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float>
; SKX-LABEL: test_vfmsubaddps_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [4:0.33]
; SKX-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [4:0.33]
; SKX-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [4:0.33]
; SKX-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [4:0.50]
; SKX-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [4:0.50]
; SKX-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [4:0.50]
; SKX-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
; SKX-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
; SKX-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
@ -1261,9 +1261,9 @@ define void @test_vfmsubaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float>
; SKX-LABEL: test_vfmsubaddps_256:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [4:0.33]
; SKX-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [4:0.33]
; SKX-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [4:0.33]
; SKX-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [4:0.50]
; SKX-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [4:0.50]
; SKX-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [4:0.50]
; SKX-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50]
; SKX-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50]
; SKX-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50]
@ -1355,9 +1355,9 @@ define void @test_vfmsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double>
; SKX-LABEL: test_vfmsubpd_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.33]
; SKX-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.33]
; SKX-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.33]
; SKX-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50]
; SKX-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50]
; SKX-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50]
; SKX-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
; SKX-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
; SKX-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
@ -1447,9 +1447,9 @@ define void @test_vfmsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double>
; SKX-LABEL: test_vfmsubpd_256:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [4:0.33]
; SKX-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [4:0.33]
; SKX-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [4:0.33]
; SKX-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [4:0.50]
; SKX-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [4:0.50]
; SKX-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [4:0.50]
; SKX-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50]
; SKX-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50]
; SKX-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50]
@ -1537,9 +1537,9 @@ define void @test_vfmsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2
; SKX-LABEL: test_vfmsubps_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.33]
; SKX-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.33]
; SKX-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.33]
; SKX-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50]
; SKX-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50]
; SKX-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50]
; SKX-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
; SKX-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
; SKX-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
@ -1629,9 +1629,9 @@ define void @test_vfmsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2
; SKX-LABEL: test_vfmsubps_256:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [4:0.33]
; SKX-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [4:0.33]
; SKX-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [4:0.33]
; SKX-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [4:0.50]
; SKX-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [4:0.50]
; SKX-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [4:0.50]
; SKX-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50]
; SKX-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50]
; SKX-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50]
@ -1719,9 +1719,9 @@ define void @test_vfmsubsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double>
; SKX-LABEL: test_vfmsubsd_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.33]
; SKX-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.33]
; SKX-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.33]
; SKX-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50]
; SKX-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50]
; SKX-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50]
; SKX-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [9:0.50]
; SKX-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [9:0.50]
; SKX-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [9:0.50]
@ -1807,9 +1807,9 @@ define void @test_vfmsubss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2
; SKX-LABEL: test_vfmsubss_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.33]
; SKX-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.33]
; SKX-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.33]
; SKX-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50]
; SKX-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50]
; SKX-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50]
; SKX-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [9:0.50]
; SKX-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [9:0.50]
; SKX-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [9:0.50]
@ -1899,9 +1899,9 @@ define void @test_vfnmaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double>
; SKX-LABEL: test_vfnmaddpd_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.33]
; SKX-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.33]
; SKX-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.33]
; SKX-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50]
; SKX-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50]
; SKX-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50]
; SKX-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
; SKX-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
; SKX-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
@ -1991,9 +1991,9 @@ define void @test_vfnmaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double>
; SKX-LABEL: test_vfnmaddpd_256:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [4:0.33]
; SKX-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [4:0.33]
; SKX-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [4:0.33]
; SKX-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [4:0.50]
; SKX-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [4:0.50]
; SKX-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [4:0.50]
; SKX-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50]
; SKX-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
; SKX-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50]
@ -2081,9 +2081,9 @@ define void @test_vfnmaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a
; SKX-LABEL: test_vfnmaddps_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.33]
; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.33]
; SKX-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.33]
; SKX-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50]
; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50]
; SKX-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50]
; SKX-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
; SKX-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
@ -2173,9 +2173,9 @@ define void @test_vfnmaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a
; SKX-LABEL: test_vfnmaddps_256:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [4:0.33]
; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [4:0.33]
; SKX-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [4:0.33]
; SKX-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [4:0.50]
; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [4:0.50]
; SKX-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [4:0.50]
; SKX-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50]
; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
; SKX-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50]
@ -2263,9 +2263,9 @@ define void @test_vfnmaddsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double>
; SKX-LABEL: test_vfnmaddsd_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.33]
; SKX-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.33]
; SKX-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.33]
; SKX-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50]
; SKX-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50]
; SKX-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50]
; SKX-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [9:0.50]
; SKX-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50]
; SKX-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [9:0.50]
@ -2351,9 +2351,9 @@ define void @test_vfnmaddss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a
; SKX-LABEL: test_vfnmaddss_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.33]
; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.33]
; SKX-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.33]
; SKX-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50]
; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50]
; SKX-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50]
; SKX-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [9:0.50]
; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50]
; SKX-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [9:0.50]
@ -2443,9 +2443,9 @@ define void @test_vfnmsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double>
; SKX-LABEL: test_vfnmsubpd_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.33]
; SKX-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.33]
; SKX-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.33]
; SKX-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50]
; SKX-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50]
; SKX-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50]
; SKX-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
; SKX-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
; SKX-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
@ -2535,9 +2535,9 @@ define void @test_vfnmsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double>
; SKX-LABEL: test_vfnmsubpd_256:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [4:0.33]
; SKX-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [4:0.33]
; SKX-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [4:0.33]
; SKX-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [4:0.50]
; SKX-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [4:0.50]
; SKX-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [4:0.50]
; SKX-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50]
; SKX-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50]
; SKX-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50]
@ -2625,9 +2625,9 @@ define void @test_vfnmsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a
; SKX-LABEL: test_vfnmsubps_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.33]
; SKX-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.33]
; SKX-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.33]
; SKX-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50]
; SKX-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50]
; SKX-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50]
; SKX-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
; SKX-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
; SKX-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
@ -2717,9 +2717,9 @@ define void @test_vfnmsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a
; SKX-LABEL: test_vfnmsubps_256:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [4:0.33]
; SKX-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [4:0.33]
; SKX-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [4:0.33]
; SKX-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [4:0.50]
; SKX-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [4:0.50]
; SKX-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [4:0.50]
; SKX-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50]
; SKX-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50]
; SKX-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50]
@ -2807,9 +2807,9 @@ define void @test_vfnmsubsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double>
; SKX-LABEL: test_vfnmsubsd_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.33]
; SKX-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.33]
; SKX-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.33]
; SKX-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50]
; SKX-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50]
; SKX-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50]
; SKX-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [9:0.50]
; SKX-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [9:0.50]
; SKX-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [9:0.50]
@ -2895,9 +2895,9 @@ define void @test_vfnmsubss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a
; SKX-LABEL: test_vfnmsubss_128:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.33]
; SKX-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.33]
; SKX-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.33]
; SKX-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50]
; SKX-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50]
; SKX-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50]
; SKX-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [9:0.50]
; SKX-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [9:0.50]
; SKX-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [9:0.50]

View File

@ -152,9 +152,9 @@ define <2 x double> @test_cvtpi2pd(x86_mmx %a0, x86_mmx* %a1) optsize {
;
; SKX-LABEL: test_cvtpi2pd:
; SKX: # %bb.0:
; SKX-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [5:1.00]
; SKX-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [10:1.00]
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [9:0.50]
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_cvtpi2pd:
@ -232,7 +232,7 @@ define <4 x float> @test_cvtpi2ps(x86_mmx %a0, x86_mmx* %a1, <4 x float> %a2, <4
; SKX: # %bb.0:
; SKX-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [6:2.00]
; SKX-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [9:1.00]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-LABEL: test_cvtpi2ps:

View File

@ -153,7 +153,7 @@ define float @f32_one_step(float %x) #1 {
; SKX: # %bb.0:
; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50]
; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.33]
; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast float 1.0, %x
ret float %div
@ -271,10 +271,10 @@ define float @f32_two_step(float %x) #2 {
; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
; SKX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:0.33]
; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.33]
; SKX-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.33]
; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.33]
; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.33]
; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.50]
; SKX-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.50]
; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.50]
; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast float 1.0, %x
ret float %div
@ -418,7 +418,7 @@ define <4 x float> @v4f32_one_step(<4 x float> %x) #1 {
; SKX: # %bb.0:
; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00]
; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.33]
; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <4 x float> %div
@ -536,10 +536,10 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 {
; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00]
; SKX-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50]
; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:0.33]
; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.33]
; SKX-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.33]
; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.33]
; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.33]
; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.50]
; SKX-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.50]
; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.50]
; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <4 x float> %div
@ -693,7 +693,7 @@ define <8 x float> @v8f32_one_step(<8 x float> %x) #1 {
; SKX: # %bb.0:
; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00]
; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [4:0.33]
; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <8 x float> %div
@ -824,10 +824,10 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 {
; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00]
; SKX-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:0.33]
; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 sched: [4:0.33]
; SKX-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 sched: [4:0.33]
; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2 sched: [4:0.33]
; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3 sched: [4:0.33]
; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 sched: [4:0.50]
; SKX-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 sched: [4:0.50]
; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2 sched: [4:0.50]
; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3 sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <8 x float> %div
@ -1031,9 +1031,9 @@ define <16 x float> @v16f32_one_step(<16 x float> %x) #1 {
;
; SKX-LABEL: v16f32_one_step:
; SKX: # %bb.0:
; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [9:2.00]
; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [4:2.00]
; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [11:0.50]
; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [4:0.33]
; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <16 x float> %div
@ -1235,13 +1235,13 @@ define <16 x float> @v16f32_two_step(<16 x float> %x) #2 {
;
; SKX-LABEL: v16f32_two_step:
; SKX: # %bb.0:
; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [9:2.00]
; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [4:2.00]
; SKX-NEXT: vbroadcastss {{.*#+}} zmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] sched: [8:0.50]
; SKX-NEXT: vmovaps %zmm1, %zmm3 # sched: [1:0.33]
; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [4:0.33]
; SKX-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1 sched: [4:0.33]
; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm2 sched: [4:0.33]
; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm3) + zmm3 sched: [4:0.33]
; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [4:0.50]
; SKX-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1 sched: [4:0.50]
; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm2 sched: [4:0.50]
; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm3) + zmm3 sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <16 x float> %div

View File

@ -154,7 +154,7 @@ define float @f32_one_step_2(float %x) #1 {
; SKX: # %bb.0:
; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50]
; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.33]
; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50]
; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast float 3456.0, %x
@ -254,9 +254,9 @@ define float @f32_one_step_2_divs(float %x) #1 {
; SKX: # %bb.0:
; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50]
; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.33]
; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50]
; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
; SKX-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast float 3456.0, %x
%div2 = fdiv fast float %div, %x
@ -383,10 +383,10 @@ define float @f32_two_step_2(float %x) #2 {
; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
; SKX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:0.33]
; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.33]
; SKX-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.33]
; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.33]
; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.33]
; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.50]
; SKX-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.50]
; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.50]
; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.50]
; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast float 6789.0, %x
@ -480,7 +480,7 @@ define <4 x float> @v4f32_one_step2(<4 x float> %x) #1 {
; SKX: # %bb.0:
; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00]
; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.33]
; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50]
; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x
@ -582,9 +582,9 @@ define <4 x float> @v4f32_one_step_2_divs(<4 x float> %x) #1 {
; SKX: # %bb.0:
; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00]
; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.33]
; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50]
; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [10:0.50]
; SKX-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x
%div2 = fdiv fast <4 x float> %div, %x
@ -711,10 +711,10 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 {
; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00]
; SKX-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50]
; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:0.33]
; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.33]
; SKX-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.33]
; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.33]
; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.33]
; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.50]
; SKX-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.50]
; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.50]
; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.50]
; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x
@ -816,7 +816,7 @@ define <8 x float> @v8f32_one_step2(<8 x float> %x) #1 {
; SKX: # %bb.0:
; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00]
; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [4:0.33]
; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [4:0.50]
; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x
@ -927,9 +927,9 @@ define <8 x float> @v8f32_one_step_2_divs(<8 x float> %x) #1 {
; SKX: # %bb.0:
; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00]
; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [4:0.33]
; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [4:0.50]
; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [11:0.50]
; SKX-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x
%div2 = fdiv fast <8 x float> %div, %x
@ -1070,10 +1070,10 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 {
; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00]
; SKX-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:0.33]
; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 sched: [4:0.33]
; SKX-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 sched: [4:0.33]
; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2 sched: [4:0.33]
; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3 sched: [4:0.33]
; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 sched: [4:0.50]
; SKX-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 sched: [4:0.50]
; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2 sched: [4:0.50]
; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3 sched: [4:0.50]
; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x
@ -1331,9 +1331,9 @@ define <16 x float> @v16f32_one_step2(<16 x float> %x) #1 {
;
; SKX-LABEL: v16f32_one_step2:
; SKX: # %bb.0:
; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [9:2.00]
; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [4:2.00]
; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [11:0.50]
; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [4:0.33]
; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [4:0.50]
; SKX-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, %x
@ -1498,11 +1498,11 @@ define <16 x float> @v16f32_one_step_2_divs(<16 x float> %x) #1 {
;
; SKX-LABEL: v16f32_one_step_2_divs:
; SKX: # %bb.0:
; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [9:2.00]
; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [4:2.00]
; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [11:0.50]
; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [4:0.33]
; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [4:0.50]
; SKX-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm1 # sched: [11:0.50]
; SKX-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.33]
; SKX-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, %x
%div2 = fdiv fast <16 x float> %div, %x
@ -1721,13 +1721,13 @@ define <16 x float> @v16f32_two_step2(<16 x float> %x) #2 {
;
; SKX-LABEL: v16f32_two_step2:
; SKX: # %bb.0:
; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [9:2.00]
; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [4:2.00]
; SKX-NEXT: vbroadcastss {{.*#+}} zmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] sched: [8:0.50]
; SKX-NEXT: vmovaps %zmm1, %zmm3 # sched: [1:0.33]
; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [4:0.33]
; SKX-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1 sched: [4:0.33]
; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm2 sched: [4:0.33]
; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm3) + zmm3 sched: [4:0.33]
; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [4:0.50]
; SKX-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1 sched: [4:0.50]
; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm2 sched: [4:0.50]
; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm3) + zmm3 sched: [4:0.50]
; SKX-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, %x
@ -1786,7 +1786,7 @@ define <16 x float> @v16f32_no_step(<16 x float> %x) #3 {
;
; SKX-LABEL: v16f32_no_step:
; SKX: # %bb.0:
; SKX-NEXT: vrcp14ps %zmm0, %zmm0 # sched: [9:2.00]
; SKX-NEXT: vrcp14ps %zmm0, %zmm0 # sched: [4:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <16 x float> %div
@ -1861,7 +1861,7 @@ define <16 x float> @v16f32_no_step2(<16 x float> %x) #3 {
;
; SKX-LABEL: v16f32_no_step2:
; SKX: # %bb.0:
; SKX-NEXT: vrcp14ps %zmm0, %zmm0 # sched: [9:2.00]
; SKX-NEXT: vrcp14ps %zmm0, %zmm0 # sched: [4:2.00]
; SKX-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, %x

View File

@ -8376,13 +8376,13 @@ define void @test_nop(i16 %a0, i32 %a1, i64 %a2, i16 *%p0, i32 *%p1, i64 *%p2) o
; SKX-LABEL: test_nop:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: nop # sched: [1:0.25]
; SKX-NEXT: nopw %di # sched: [1:0.25]
; SKX-NEXT: nopw (%rcx) # sched: [1:0.25]
; SKX-NEXT: nopl %esi # sched: [1:0.25]
; SKX-NEXT: nopl (%r8) # sched: [1:0.25]
; SKX-NEXT: nopq %rdx # sched: [1:0.25]
; SKX-NEXT: nopq (%r9) # sched: [1:0.25]
; SKX-NEXT: nop # sched: [1:0.17]
; SKX-NEXT: nopw %di # sched: [1:0.17]
; SKX-NEXT: nopw (%rcx) # sched: [1:0.17]
; SKX-NEXT: nopl %esi # sched: [1:0.17]
; SKX-NEXT: nopl (%r8) # sched: [1:0.17]
; SKX-NEXT: nopq %rdx # sched: [1:0.17]
; SKX-NEXT: nopq (%r9) # sched: [1:0.17]
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;

View File

@ -23,7 +23,7 @@ define <4 x i32> @test_sha1msg1(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
;
; CANNONLAKE-LABEL: test_sha1msg1:
; CANNONLAKE: # %bb.0:
; CANNONLAKE-NEXT: sha1msg1 %xmm1, %xmm0 # sched: [4:0.33]
; CANNONLAKE-NEXT: sha1msg1 %xmm1, %xmm0 # sched: [4:0.50]
; CANNONLAKE-NEXT: sha1msg1 (%rdi), %xmm0 # sched: [10:0.50]
; CANNONLAKE-NEXT: retq # sched: [7:1.00]
;
@ -54,7 +54,7 @@ define <4 x i32> @test_sha1msg2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
;
; CANNONLAKE-LABEL: test_sha1msg2:
; CANNONLAKE: # %bb.0:
; CANNONLAKE-NEXT: sha1msg2 %xmm1, %xmm0 # sched: [4:0.33]
; CANNONLAKE-NEXT: sha1msg2 %xmm1, %xmm0 # sched: [4:0.50]
; CANNONLAKE-NEXT: sha1msg2 (%rdi), %xmm0 # sched: [10:0.50]
; CANNONLAKE-NEXT: retq # sched: [7:1.00]
;
@ -85,7 +85,7 @@ define <4 x i32> @test_sha1nexte(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
;
; CANNONLAKE-LABEL: test_sha1nexte:
; CANNONLAKE: # %bb.0:
; CANNONLAKE-NEXT: sha1nexte %xmm1, %xmm0 # sched: [4:0.33]
; CANNONLAKE-NEXT: sha1nexte %xmm1, %xmm0 # sched: [4:0.50]
; CANNONLAKE-NEXT: sha1nexte (%rdi), %xmm0 # sched: [10:0.50]
; CANNONLAKE-NEXT: retq # sched: [7:1.00]
;
@ -116,7 +116,7 @@ define <4 x i32> @test_sha1rnds4(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
;
; CANNONLAKE-LABEL: test_sha1rnds4:
; CANNONLAKE: # %bb.0:
; CANNONLAKE-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # sched: [4:0.33]
; CANNONLAKE-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # sched: [4:0.50]
; CANNONLAKE-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # sched: [10:0.50]
; CANNONLAKE-NEXT: retq # sched: [7:1.00]
;
@ -151,7 +151,7 @@ define <4 x i32> @test_sha256msg1(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2)
;
; CANNONLAKE-LABEL: test_sha256msg1:
; CANNONLAKE: # %bb.0:
; CANNONLAKE-NEXT: sha256msg1 %xmm1, %xmm0 # sched: [4:0.33]
; CANNONLAKE-NEXT: sha256msg1 %xmm1, %xmm0 # sched: [4:0.50]
; CANNONLAKE-NEXT: sha256msg1 (%rdi), %xmm0 # sched: [10:0.50]
; CANNONLAKE-NEXT: retq # sched: [7:1.00]
;
@ -182,7 +182,7 @@ define <4 x i32> @test_sha256msg2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2)
;
; CANNONLAKE-LABEL: test_sha256msg2:
; CANNONLAKE: # %bb.0:
; CANNONLAKE-NEXT: sha256msg2 %xmm1, %xmm0 # sched: [4:0.33]
; CANNONLAKE-NEXT: sha256msg2 %xmm1, %xmm0 # sched: [4:0.50]
; CANNONLAKE-NEXT: sha256msg2 (%rdi), %xmm0 # sched: [10:0.50]
; CANNONLAKE-NEXT: retq # sched: [7:1.00]
;
@ -221,7 +221,7 @@ define <4 x i32> @test_sha256rnds2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2,
; CANNONLAKE: # %bb.0:
; CANNONLAKE-NEXT: vmovaps %xmm0, %xmm3 # sched: [1:0.33]
; CANNONLAKE-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33]
; CANNONLAKE-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [4:0.33]
; CANNONLAKE-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [4:0.50]
; CANNONLAKE-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [10:0.50]
; CANNONLAKE-NEXT: vmovaps %xmm3, %xmm0 # sched: [1:0.33]
; CANNONLAKE-NEXT: retq # sched: [7:1.00]

View File

@ -90,13 +90,13 @@ define <4 x float> @test_addps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
;
; SKX-SSE-LABEL: test_addps:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: addps (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_addps:
; SKX: # %bb.0:
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -198,13 +198,13 @@ define float @test_addss(float %a0, float %a1, float *%a2) {
;
; SKX-SSE-LABEL: test_addss:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: addss (%rdi), %xmm0 # sched: [9:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_addss:
; SKX: # %bb.0:
; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -551,14 +551,14 @@ define <4 x float> @test_cmpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
;
; SKX-SSE-LABEL: test_cmpps:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [4:0.33]
; SKX-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [4:0.50]
; SKX-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.33]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cmpps:
; SKX: # %bb.0:
; SKX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [4:0.33]
; SKX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [4:0.50]
; SKX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
@ -669,13 +669,13 @@ define float @test_cmpss(float %a0, float %a1, float *%a2) {
;
; SKX-SSE-LABEL: test_cmpss:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [9:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cmpss:
; SKX: # %bb.0:
; SKX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -1041,14 +1041,14 @@ define float @test_cvtsi2ss(i32 %a0, i32 *%a1) {
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [5:1.00]
; SKX-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [9:1.00]
; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvtsi2ss:
; SKX: # %bb.0:
; SKX-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00]
; SKX-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_cvtsi2ss:
@ -1167,14 +1167,14 @@ define float @test_cvtsi2ssq(i64 %a0, i64 *%a1) {
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [6:2.00]
; SKX-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [9:1.00]
; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvtsi2ssq:
; SKX: # %bb.0:
; SKX-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [6:2.00]
; SKX-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_cvtsi2ssq:
@ -1420,14 +1420,14 @@ define i64 @test_cvtss2siq(float %a0, float *%a1) {
;
; SKX-SSE-LABEL: test_cvtss2siq:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [6:1.00]
; SKX-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [7:1.00]
; SKX-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [11:1.00]
; SKX-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvtss2siq:
; SKX: # %bb.0:
; SKX-NEXT: vcvtss2si %xmm0, %rcx # sched: [6:1.00]
; SKX-NEXT: vcvtss2si %xmm0, %rcx # sched: [7:1.00]
; SKX-NEXT: vcvtss2si (%rdi), %rax # sched: [11:1.00]
; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
@ -1549,14 +1549,14 @@ define i32 @test_cvttss2si(float %a0, float *%a1) {
;
; SKX-SSE-LABEL: test_cvttss2si:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [7:1.00]
; SKX-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [6:1.00]
; SKX-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [11:1.00]
; SKX-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvttss2si:
; SKX: # %bb.0:
; SKX-NEXT: vcvttss2si %xmm0, %ecx # sched: [7:1.00]
; SKX-NEXT: vcvttss2si %xmm0, %ecx # sched: [6:1.00]
; SKX-NEXT: vcvttss2si (%rdi), %eax # sched: [11:1.00]
; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
@ -2116,13 +2116,13 @@ define <4 x float> @test_maxps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
;
; SKX-SSE-LABEL: test_maxps:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_maxps:
; SKX: # %bb.0:
; SKX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -2225,13 +2225,13 @@ define <4 x float> @test_maxss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
;
; SKX-SSE-LABEL: test_maxss:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [9:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_maxss:
; SKX: # %bb.0:
; SKX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -2334,13 +2334,13 @@ define <4 x float> @test_minps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
;
; SKX-SSE-LABEL: test_minps:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: minps %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: minps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: minps (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_minps:
; SKX: # %bb.0:
; SKX-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -2443,13 +2443,13 @@ define <4 x float> @test_minss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
;
; SKX-SSE-LABEL: test_minss:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: minss %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: minss %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: minss (%rdi), %xmm0 # sched: [9:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_minss:
; SKX: # %bb.0:
; SKX-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -2564,14 +2564,14 @@ define void @test_movaps(<4 x float> *%a0, <4 x float> *%a1) {
; SKX-SSE-LABEL: test_movaps:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [6:0.50]
; SKX-SSE-NEXT: addps %xmm0, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addps %xmm0, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_movaps:
; SKX: # %bb.0:
; SKX-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50]
; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -2795,7 +2795,7 @@ define void @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) {
; SKX-SSE-LABEL: test_movhps:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; SKX-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.33]
; SKX-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.50]
; SKX-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
; SKX-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
@ -2803,7 +2803,7 @@ define void @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) {
; SKX-LABEL: test_movhps:
; SKX: # %bb.0:
; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -2918,13 +2918,13 @@ define <4 x float> @test_movlhps(<4 x float> %a0, <4 x float> %a1) {
; SKX-SSE-LABEL: test_movlhps:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_movlhps:
; SKX: # %bb.0:
; SKX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_movlhps:
@ -3036,14 +3036,14 @@ define void @test_movlps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) {
; SKX-SSE-LABEL: test_movlps:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
; SKX-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.33]
; SKX-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.50]
; SKX-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_movlps:
; SKX: # %bb.0:
; SKX-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -3352,14 +3352,14 @@ define void @test_movss_mem(float* %a0, float* %a1) {
; SKX-SSE-LABEL: test_movss_mem:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
; SKX-SSE-NEXT: addss %xmm0, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addss %xmm0, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_movss_mem:
; SKX: # %bb.0:
; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
; SKX-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -3572,14 +3572,14 @@ define void @test_movups(<4 x float> *%a0, <4 x float> *%a1) {
; SKX-SSE-LABEL: test_movups:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movups (%rdi), %xmm0 # sched: [6:0.50]
; SKX-SSE-NEXT: addps %xmm0, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addps %xmm0, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_movups:
; SKX: # %bb.0:
; SKX-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50]
; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -3685,13 +3685,13 @@ define <4 x float> @test_mulps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
;
; SKX-SSE-LABEL: test_mulps:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_mulps:
; SKX: # %bb.0:
; SKX-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -3793,13 +3793,13 @@ define float @test_mulss(float %a0, float %a1, float *%a2) {
;
; SKX-SSE-LABEL: test_mulss:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [9:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_mulss:
; SKX: # %bb.0:
; SKX-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -4206,14 +4206,14 @@ define <4 x float> @test_rcpps(<4 x float> %a0, <4 x float> *%a1) {
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [4:1.00]
; SKX-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [10:1.00]
; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_rcpps:
; SKX: # %bb.0:
; SKX-NEXT: vrcpps %xmm0, %xmm0 # sched: [4:1.00]
; SKX-NEXT: vrcpps (%rdi), %xmm1 # sched: [10:1.00]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_rcpps:
@ -4347,7 +4347,7 @@ define <4 x float> @test_rcpss(float %a0, float *%a1) {
; SKX-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [4:1.00]
; SKX-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
; SKX-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [4:1.00]
; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_rcpss:
@ -4355,7 +4355,7 @@ define <4 x float> @test_rcpss(float %a0, float *%a1) {
; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
; SKX-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_rcpss:
@ -4483,14 +4483,14 @@ define <4 x float> @test_rsqrtps(<4 x float> %a0, <4 x float> *%a1) {
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [4:1.00]
; SKX-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [10:1.00]
; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_rsqrtps:
; SKX: # %bb.0:
; SKX-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [4:1.00]
; SKX-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [10:1.00]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_rsqrtps:
@ -4624,7 +4624,7 @@ define <4 x float> @test_rsqrtss(float %a0, float *%a1) {
; SKX-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [4:1.00]
; SKX-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
; SKX-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [4:1.00]
; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_rsqrtss:
@ -4632,7 +4632,7 @@ define <4 x float> @test_rsqrtss(float %a0, float *%a1) {
; SKX-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
; SKX-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_rsqrtss:
@ -4854,14 +4854,14 @@ define <4 x float> @test_shufps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
; SKX-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00]
; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_shufps:
; SKX: # %bb.0:
; SKX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
; SKX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_shufps:
@ -4981,14 +4981,14 @@ define <4 x float> @test_sqrtps(<4 x float> %a0, <4 x float> *%a1) {
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [12:3.00]
; SKX-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [18:3.00]
; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_sqrtps:
; SKX: # %bb.0:
; SKX-NEXT: vsqrtps %xmm0, %xmm0 # sched: [12:3.00]
; SKX-NEXT: vsqrtps (%rdi), %xmm1 # sched: [18:3.00]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_sqrtps:
@ -5122,7 +5122,7 @@ define <4 x float> @test_sqrtss(<4 x float> %a0, <4 x float> *%a1) {
; SKX-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [12:3.00]
; SKX-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [6:0.50]
; SKX-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [12:3.00]
; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_sqrtss:
@ -5130,7 +5130,7 @@ define <4 x float> @test_sqrtss(<4 x float> %a0, <4 x float> *%a1) {
; SKX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:3.00]
; SKX-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50]
; SKX-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [12:3.00]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_sqrtss:
@ -5351,13 +5351,13 @@ define <4 x float> @test_subps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
;
; SKX-SSE-LABEL: test_subps:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: subps %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: subps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: subps (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_subps:
; SKX: # %bb.0:
; SKX-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -5459,13 +5459,13 @@ define float @test_subss(float %a0, float %a1, float *%a2) {
;
; SKX-SSE-LABEL: test_subss:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: subss %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: subss %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: subss (%rdi), %xmm0 # sched: [9:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_subss:
; SKX: # %bb.0:
; SKX-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -5826,14 +5826,14 @@ define <4 x float> @test_unpckhps(<4 x float> %a0, <4 x float> %a1, <4 x float>
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
; SKX-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_unpckhps:
; SKX: # %bb.0:
; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
; SKX-NEXT: vunpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_unpckhps:
@ -5952,14 +5952,14 @@ define <4 x float> @test_unpcklps(<4 x float> %a0, <4 x float> %a1, <4 x float>
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
; SKX-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_unpcklps:
; SKX: # %bb.0:
; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
; SKX-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_unpcklps:
@ -6210,7 +6210,7 @@ define <4 x float> @test_fnop() nounwind {
; SKX-SSE-LABEL: test_fnop:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: #APP
; SKX-SSE-NEXT: nop # sched: [1:0.25]
; SKX-SSE-NEXT: nop # sched: [1:0.17]
; SKX-SSE-NEXT: #NO_APP
; SKX-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [1:0.33]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
@ -6218,7 +6218,7 @@ define <4 x float> @test_fnop() nounwind {
; SKX-LABEL: test_fnop:
; SKX: # %bb.0:
; SKX-NEXT: #APP
; SKX-NEXT: nop # sched: [1:0.25]
; SKX-NEXT: nop # sched: [1:0.17]
; SKX-NEXT: #NO_APP
; SKX-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]

View File

@ -88,13 +88,13 @@ define <2 x double> @test_addpd(<2 x double> %a0, <2 x double> %a1, <2 x double>
;
; SKX-SSE-LABEL: test_addpd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_addpd:
; SKX: # %bb.0:
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -196,13 +196,13 @@ define double @test_addsd(double %a0, double %a1, double *%a2) {
;
; SKX-SSE-LABEL: test_addsd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [9:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_addsd:
; SKX: # %bb.0:
; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -317,14 +317,14 @@ define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.33]
; SKX-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:0.50]
; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_andpd:
; SKX: # %bb.0:
; SKX-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_andpd:
@ -447,14 +447,14 @@ define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.33]
; SKX-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:0.50]
; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_andnotpd:
; SKX: # %bb.0:
; SKX-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_andnotpd:
@ -673,14 +673,14 @@ define <2 x double> @test_cmppd(<2 x double> %a0, <2 x double> %a1, <2 x double>
;
; SKX-SSE-LABEL: test_cmppd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [4:0.33]
; SKX-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [4:0.50]
; SKX-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cmppd:
; SKX: # %bb.0:
; SKX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [4:0.33]
; SKX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [4:0.50]
; SKX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
@ -790,13 +790,13 @@ define double @test_cmpsd(double %a0, double %a1, double *%a2) {
;
; SKX-SSE-LABEL: test_cmpsd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [9:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cmpsd:
; SKX: # %bb.0:
; SKX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -1162,16 +1162,16 @@ define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) {
;
; SKX-SSE-LABEL: test_cvtdq2pd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [5:1.00]
; SKX-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:0.50]
; SKX-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [11:1.00]
; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvtdq2pd:
; SKX: # %bb.0:
; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00]
; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [11:1.00]
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_cvtdq2pd:
@ -1291,16 +1291,16 @@ define <4 x float> @test_cvtdq2ps(<4 x i32> %a0, <4 x i32> *%a1) {
;
; SKX-SSE-LABEL: test_cvtdq2ps:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [4:0.33]
; SKX-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [4:0.50]
; SKX-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvtdq2ps:
; SKX: # %bb.0:
; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [10:0.50]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_cvtdq2ps:
@ -1427,7 +1427,7 @@ define <4 x i32> @test_cvtpd2dq(<2 x double> %a0, <2 x double> *%a1) {
; SKX-LABEL: test_cvtpd2dq:
; SKX: # %bb.0:
; SKX-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [5:1.00]
; SKX-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
; SKX-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:0.50]
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -1550,14 +1550,14 @@ define <4 x float> @test_cvtpd2ps(<2 x double> %a0, <2 x double> *%a1) {
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [5:1.00]
; SKX-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [11:1.00]
; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvtpd2ps:
; SKX: # %bb.0:
; SKX-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [5:1.00]
; SKX-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_cvtpd2ps:
@ -1676,14 +1676,14 @@ define <4 x i32> @test_cvtps2dq(<4 x float> %a0, <4 x float> *%a1) {
;
; SKX-SSE-LABEL: test_cvtps2dq:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [4:0.33]
; SKX-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [4:0.50]
; SKX-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvtps2dq:
; SKX: # %bb.0:
; SKX-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [10:0.50]
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
@ -1806,14 +1806,14 @@ define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) {
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [5:1.00]
; SKX-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [9:0.50]
; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvtps2pd:
; SKX: # %bb.0:
; SKX-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [5:1.00]
; SKX-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [9:0.50]
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_cvtps2pd:
@ -2205,7 +2205,7 @@ define float @test_cvtsd2ss(double %a0, double *%a1) {
; SKX-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [5:1.00]
; SKX-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
; SKX-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [5:1.00]
; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvtsd2ss:
@ -2213,7 +2213,7 @@ define float @test_cvtsd2ss(double %a0, double *%a1) {
; SKX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
; SKX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
; SKX-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_cvtsd2ss:
@ -2336,14 +2336,14 @@ define double @test_cvtsi2sd(i32 %a0, i32 *%a1) {
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [5:1.00]
; SKX-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvtsi2sd:
; SKX: # %bb.0:
; SKX-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00]
; SKX-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_cvtsi2sd:
@ -2462,14 +2462,14 @@ define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) {
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [5:1.00]
; SKX-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvtsi2sdq:
; SKX: # %bb.0:
; SKX-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00]
; SKX-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_cvtsi2sdq:
@ -2603,7 +2603,7 @@ define double @test_cvtss2sd(float %a0, float *%a1) {
; SKX-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [5:1.00]
; SKX-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
; SKX-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [5:1.00]
; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvtss2sd:
@ -2611,7 +2611,7 @@ define double @test_cvtss2sd(float %a0, float *%a1) {
; SKX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
; SKX-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_cvtss2sd:
@ -2742,7 +2742,7 @@ define <4 x i32> @test_cvttpd2dq(<2 x double> %a0, <2 x double> *%a1) {
; SKX-LABEL: test_cvttpd2dq:
; SKX: # %bb.0:
; SKX-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [5:1.00]
; SKX-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
; SKX-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:0.50]
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -2863,14 +2863,14 @@ define <4 x i32> @test_cvttps2dq(<4 x float> %a0, <4 x float> *%a1) {
;
; SKX-SSE-LABEL: test_cvttps2dq:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [4:0.33]
; SKX-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [4:0.50]
; SKX-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_cvttps2dq:
; SKX: # %bb.0:
; SKX-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [10:0.50]
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
@ -3732,13 +3732,13 @@ define <2 x double> @test_maxpd(<2 x double> %a0, <2 x double> %a1, <2 x double>
;
; SKX-SSE-LABEL: test_maxpd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_maxpd:
; SKX: # %bb.0:
; SKX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -3841,13 +3841,13 @@ define <2 x double> @test_maxsd(<2 x double> %a0, <2 x double> %a1, <2 x double>
;
; SKX-SSE-LABEL: test_maxsd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [9:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_maxsd:
; SKX: # %bb.0:
; SKX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -3950,13 +3950,13 @@ define <2 x double> @test_minpd(<2 x double> %a0, <2 x double> %a1, <2 x double>
;
; SKX-SSE-LABEL: test_minpd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_minpd:
; SKX: # %bb.0:
; SKX-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -4059,13 +4059,13 @@ define <2 x double> @test_minsd(<2 x double> %a0, <2 x double> %a1, <2 x double>
;
; SKX-SSE-LABEL: test_minsd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [9:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_minsd:
; SKX: # %bb.0:
; SKX-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -4180,14 +4180,14 @@ define void @test_movapd(<2 x double> *%a0, <2 x double> *%a1) {
; SKX-SSE-LABEL: test_movapd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50]
; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_movapd:
; SKX: # %bb.0:
; SKX-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50]
; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -4917,14 +4917,14 @@ define void @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) {
; SKX-SSE-LABEL: test_movhpd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.33]
; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50]
; SKX-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_movhpd:
; SKX: # %bb.0:
; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -5045,14 +5045,14 @@ define void @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) {
; SKX-SSE-LABEL: test_movlpd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.33]
; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50]
; SKX-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_movlpd:
; SKX: # %bb.0:
; SKX-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -5362,13 +5362,13 @@ define void @test_movntpd(<2 x double> %a0, <2 x double> *%a1) {
;
; SKX-SSE-LABEL: test_movntpd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_movntpd:
; SKX: # %bb.0:
; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -5717,14 +5717,14 @@ define void @test_movsd_mem(double* %a0, double* %a1) {
; SKX-SSE-LABEL: test_movsd_mem:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
; SKX-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_movsd_mem:
; SKX: # %bb.0:
; SKX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
; SKX-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -5945,14 +5945,14 @@ define void @test_movupd(<2 x double> *%a0, <2 x double> *%a1) {
; SKX-SSE-LABEL: test_movupd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50]
; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_movupd:
; SKX: # %bb.0:
; SKX-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50]
; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -6058,13 +6058,13 @@ define <2 x double> @test_mulpd(<2 x double> %a0, <2 x double> %a1, <2 x double>
;
; SKX-SSE-LABEL: test_mulpd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_mulpd:
; SKX: # %bb.0:
; SKX-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -6166,13 +6166,13 @@ define double @test_mulsd(double %a0, double %a1, double *%a2) {
;
; SKX-SSE-LABEL: test_mulsd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [9:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_mulsd:
; SKX: # %bb.0:
; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -6287,14 +6287,14 @@ define <2 x double> @test_orpd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33]
; SKX-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:0.50]
; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_orpd:
; SKX: # %bb.0:
; SKX-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_orpd:
@ -9176,13 +9176,13 @@ define <4 x i32> @test_pmaddwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
;
; SKX-SSE-LABEL: test_pmaddwd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_pmaddwd:
; SKX: # %bb.0:
; SKX-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -9830,13 +9830,13 @@ define <8 x i16> @test_pmulhuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
;
; SKX-SSE-LABEL: test_pmulhuw:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_pmulhuw:
; SKX: # %bb.0:
; SKX-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -9939,13 +9939,13 @@ define <8 x i16> @test_pmulhw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
;
; SKX-SSE-LABEL: test_pmulhw:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_pmulhw:
; SKX: # %bb.0:
; SKX-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -10048,13 +10048,13 @@ define <8 x i16> @test_pmullw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
;
; SKX-SSE-LABEL: test_pmullw:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_pmullw:
; SKX: # %bb.0:
; SKX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -10156,13 +10156,13 @@ define <2 x i64> @test_pmuludq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
;
; SKX-SSE-LABEL: test_pmuludq:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_pmuludq:
; SKX: # %bb.0:
; SKX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -14094,14 +14094,14 @@ define <2 x double> @test_shufpd(<2 x double> %a0, <2 x double> %a1, <2 x double
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
; SKX-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_shufpd:
; SKX: # %bb.0:
; SKX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
; SKX-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_shufpd:
@ -14221,14 +14221,14 @@ define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) {
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [18:6.00]
; SKX-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [24:6.00]
; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_sqrtpd:
; SKX: # %bb.0:
; SKX-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [18:6.00]
; SKX-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [24:6.00]
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_sqrtpd:
@ -14362,7 +14362,7 @@ define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) {
; SKX-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [18:6.00]
; SKX-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50]
; SKX-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [18:6.00]
; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_sqrtsd:
@ -14370,7 +14370,7 @@ define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) {
; SKX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:6.00]
; SKX-NEXT: vmovapd (%rdi), %xmm1 # sched: [6:0.50]
; SKX-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [18:6.00]
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_sqrtsd:
@ -14481,13 +14481,13 @@ define <2 x double> @test_subpd(<2 x double> %a0, <2 x double> %a1, <2 x double>
;
; SKX-SSE-LABEL: test_subpd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_subpd:
; SKX: # %bb.0:
; SKX-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -14589,13 +14589,13 @@ define double @test_subsd(double %a0, double %a1, double *%a2) {
;
; SKX-SSE-LABEL: test_subsd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [9:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_subsd:
; SKX: # %bb.0:
; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -14956,14 +14956,14 @@ define <2 x double> @test_unpckhpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
; SKX-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_unpckhpd:
; SKX: # %bb.0:
; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
; SKX-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_unpckhpd:
@ -15097,7 +15097,7 @@ define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
; SKX-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
; SKX-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:0.33]
; SKX-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.33]
; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50]
; SKX-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.33]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
@ -15105,7 +15105,7 @@ define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
; SKX: # %bb.0:
; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
; SKX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [7:1.00]
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_unpcklpd:
@ -15228,14 +15228,14 @@ define <2 x double> @test_xorpd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.33]
; SKX-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:0.50]
; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_xorpd:
; SKX: # %bb.0:
; SKX-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_xorpd:

View File

@ -88,13 +88,13 @@ define <2 x double> @test_addsubpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
;
; SKX-SSE-LABEL: test_addsubpd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_addsubpd:
; SKX: # %bb.0:
; SKX-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -197,13 +197,13 @@ define <4 x float> @test_addsubps(<4 x float> %a0, <4 x float> %a1, <4 x float>
;
; SKX-SSE-LABEL: test_addsubps:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_addsubps:
; SKX: # %bb.0:
; SKX-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -972,14 +972,14 @@ define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) {
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
; SKX-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50]
; SKX-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_movddup:
; SKX: # %bb.0:
; SKX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
; SKX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50]
; SKX-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_movddup:
@ -1099,14 +1099,14 @@ define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) {
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
; SKX-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_movshdup:
; SKX: # %bb.0:
; SKX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
; SKX-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_movshdup:
@ -1226,14 +1226,14 @@ define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) {
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
; SKX-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_movsldup:
; SKX: # %bb.0:
; SKX-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
; SKX-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_movsldup:

View File

@ -92,14 +92,14 @@ define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x doubl
; SKX-SSE-LABEL: test_blendpd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_blendpd:
; SKX: # %bb.0:
; SKX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -212,14 +212,14 @@ define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> *
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
; SKX-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_blendps:
; SKX: # %bb.0:
; SKX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
; SKX-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_blendps:
@ -2065,14 +2065,14 @@ define <8 x i16> @test_phminposuw(<8 x i16> *%a0) {
;
; SKX-SSE-LABEL: test_phminposuw:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [10:1.00]
; SKX-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [4:1.00]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_phminposuw:
; SKX: # %bb.0:
; SKX-NEXT: vphminposuw (%rdi), %xmm0 # sched: [10:0.50]
; SKX-NEXT: vphminposuw %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vphminposuw (%rdi), %xmm0 # sched: [10:1.00]
; SKX-NEXT: vphminposuw %xmm0, %xmm0 # sched: [4:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_phminposuw:
@ -4767,13 +4767,13 @@ define <2 x i64> @test_pmuldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
;
; SKX-SSE-LABEL: test_pmuldq:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_pmuldq:
; SKX: # %bb.0:
; SKX-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -4871,14 +4871,14 @@ define <4 x i32> @test_pmulld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
;
; SKX-SSE-LABEL: test_pmulld:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [10:0.67]
; SKX-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [16:0.67]
; SKX-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [10:1.00]
; SKX-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [16:1.00]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_pmulld:
; SKX: # %bb.0:
; SKX-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:0.67]
; SKX-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:0.67]
; SKX-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:1.00]
; SKX-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_pmulld:
@ -5153,16 +5153,16 @@ define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) {
;
; SKX-SSE-LABEL: test_roundpd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [8:0.67]
; SKX-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [14:0.67]
; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [8:1.00]
; SKX-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [14:1.00]
; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_roundpd:
; SKX: # %bb.0:
; SKX-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [8:0.67]
; SKX-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [14:0.67]
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [8:1.00]
; SKX-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [14:1.00]
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_roundpd:
@ -5275,16 +5275,16 @@ define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) {
;
; SKX-SSE-LABEL: test_roundps:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [8:0.67]
; SKX-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [14:0.67]
; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [8:1.00]
; SKX-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [14:1.00]
; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_roundps:
; SKX: # %bb.0:
; SKX-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [8:0.67]
; SKX-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [14:0.67]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [8:1.00]
; SKX-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [14:1.00]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_roundps:
@ -5402,16 +5402,16 @@ define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x doubl
; SKX-SSE-LABEL: test_roundsd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.33]
; SKX-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [8:0.67]
; SKX-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [14:0.67]
; SKX-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [8:1.00]
; SKX-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [14:1.00]
; SKX-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_roundsd:
; SKX: # %bb.0:
; SKX-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [8:0.67]
; SKX-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [14:0.67]
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [8:1.00]
; SKX-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [14:1.00]
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_roundsd:
@ -5531,16 +5531,16 @@ define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> *
; SKX-SSE-LABEL: test_roundss:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.33]
; SKX-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [8:0.67]
; SKX-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [14:0.67]
; SKX-SSE-NEXT: addps %xmm2, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [8:1.00]
; SKX-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [14:1.00]
; SKX-SSE-NEXT: addps %xmm2, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_roundss:
; SKX: # %bb.0:
; SKX-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [8:0.67]
; SKX-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [14:0.67]
; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [8:1.00]
; SKX-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [14:1.00]
; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_roundss:

View File

@ -1249,13 +1249,13 @@ define <8 x i16> @test_pmaddubsw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
;
; SKX-SSE-LABEL: test_pmaddubsw:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_pmaddubsw:
; SKX: # %bb.0:
; SKX-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
@ -1359,13 +1359,13 @@ define <8 x i16> @test_pmulhrsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
;
; SKX-SSE-LABEL: test_pmulhrsw:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [4:0.33]
; SKX-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [10:0.50]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_pmulhrsw:
; SKX: # %bb.0:
; SKX-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;

View File

@ -1018,25 +1018,25 @@ vzeroupper
# CHECK-NEXT: [6]: HasSideEffects
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 4 0.33 vaddpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vaddpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vaddpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vaddpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vaddpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vaddpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vaddps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vaddps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vaddps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vaddps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vaddsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vaddsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vaddsd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vaddss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vaddss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vaddss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vaddsubpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vaddsubpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vaddsubpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vaddsubpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vaddsubpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vaddsubpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vaddsubps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vaddsubps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vaddsubps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vaddsubps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vaddsubps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vaddsubps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 1.00 vaesdec %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 1.00 * vaesdec (%rax), %xmm1, %xmm2
@ -1086,41 +1086,41 @@ vzeroupper
# CHECK-NEXT: 1 7 0.50 * vbroadcastsd (%rax), %ymm2
# CHECK-NEXT: 1 6 0.50 * vbroadcastss (%rax), %xmm2
# CHECK-NEXT: 1 7 0.50 * vbroadcastss (%rax), %ymm2
# CHECK-NEXT: 1 4 0.33 vcmppd $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vcmppd $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vcmppd $0, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vcmppd $0, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vcmppd $0, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vcmppd $0, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vcmpps $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vcmpps $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vcmpps $0, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vcmpps $0, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vcmpps $0, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vcmpps $0, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vcmpsd $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vcmpsd $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vcmpsd $0, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vcmpss $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vcmpss $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vcmpss $0, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 1.00 vcomisd %xmm0, %xmm1
# CHECK-NEXT: 2 7 1.00 * vcomisd (%rax), %xmm1
# CHECK-NEXT: 1 2 1.00 vcomiss %xmm0, %xmm1
# CHECK-NEXT: 2 7 1.00 * vcomiss (%rax), %xmm1
# CHECK-NEXT: 2 5 1.00 vcvtdq2pd %xmm0, %xmm2
# CHECK-NEXT: 1 4 0.50 vcvtdq2pd %xmm0, %xmm2
# CHECK-NEXT: 3 11 1.00 * vcvtdq2pd (%rax), %xmm2
# CHECK-NEXT: 2 7 1.00 vcvtdq2pd %xmm0, %ymm2
# CHECK-NEXT: 3 13 1.00 * vcvtdq2pd (%rax), %ymm2
# CHECK-NEXT: 1 4 0.33 vcvtdq2ps %xmm0, %xmm2
# CHECK-NEXT: 1 4 0.50 vcvtdq2ps %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * vcvtdq2ps (%rax), %xmm2
# CHECK-NEXT: 1 4 0.33 vcvtdq2ps %ymm0, %ymm2
# CHECK-NEXT: 1 4 0.50 vcvtdq2ps %ymm0, %ymm2
# CHECK-NEXT: 2 11 0.50 * vcvtdq2ps (%rax), %ymm2
# CHECK-NEXT: 2 5 1.00 vcvtpd2dq %xmm0, %xmm2
# CHECK-NEXT: 2 8 1.00 * vcvtpd2dqx (%rax), %xmm2
# CHECK-NEXT: 2 8 0.50 * vcvtpd2dqx (%rax), %xmm2
# CHECK-NEXT: 2 7 1.00 vcvtpd2dq %ymm0, %xmm2
# CHECK-NEXT: 2 8 1.00 * vcvtpd2dqy (%rax), %xmm2
# CHECK-NEXT: 2 8 0.50 * vcvtpd2dqy (%rax), %xmm2
# CHECK-NEXT: 2 5 1.00 vcvtpd2ps %xmm0, %xmm2
# CHECK-NEXT: 2 8 1.00 * vcvtpd2psx (%rax), %xmm2
# CHECK-NEXT: 2 7 1.00 vcvtpd2ps %ymm0, %xmm2
# CHECK-NEXT: 2 8 1.00 * vcvtpd2psy (%rax), %xmm2
# CHECK-NEXT: 1 4 0.33 vcvtps2dq %xmm0, %xmm2
# CHECK-NEXT: 3 8 1.00 * vcvtpd2psy (%rax), %xmm2
# CHECK-NEXT: 1 4 0.50 vcvtps2dq %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * vcvtps2dq (%rax), %xmm2
# CHECK-NEXT: 1 4 0.33 vcvtps2dq %ymm0, %ymm2
# CHECK-NEXT: 1 4 0.50 vcvtps2dq %ymm0, %ymm2
# CHECK-NEXT: 2 11 0.50 * vcvtps2dq (%rax), %ymm2
# CHECK-NEXT: 2 5 1.00 vcvtps2pd %xmm0, %xmm2
# CHECK-NEXT: 2 9 0.50 * vcvtps2pd (%rax), %xmm2
@ -1143,22 +1143,22 @@ vzeroupper
# CHECK-NEXT: 2 5 1.00 vcvtss2sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vcvtss2sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 6 1.00 vcvtss2si %xmm0, %ecx
# CHECK-NEXT: 2 6 1.00 vcvtss2si %xmm0, %rcx
# CHECK-NEXT: 3 7 1.00 vcvtss2si %xmm0, %rcx
# CHECK-NEXT: 3 11 1.00 * vcvtss2si (%rax), %ecx
# CHECK-NEXT: 3 11 1.00 * vcvtss2si (%rax), %rcx
# CHECK-NEXT: 2 5 1.00 vcvttpd2dq %xmm0, %xmm2
# CHECK-NEXT: 2 8 1.00 * vcvttpd2dqx (%rax), %xmm2
# CHECK-NEXT: 2 8 0.50 * vcvttpd2dqx (%rax), %xmm2
# CHECK-NEXT: 2 7 1.00 vcvttpd2dq %ymm0, %xmm2
# CHECK-NEXT: 2 8 1.00 * vcvttpd2dqy (%rax), %xmm2
# CHECK-NEXT: 1 4 0.33 vcvttps2dq %xmm0, %xmm2
# CHECK-NEXT: 2 8 0.50 * vcvttpd2dqy (%rax), %xmm2
# CHECK-NEXT: 1 4 0.50 vcvttps2dq %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * vcvttps2dq (%rax), %xmm2
# CHECK-NEXT: 1 4 0.33 vcvttps2dq %ymm0, %ymm2
# CHECK-NEXT: 1 3 0.50 vcvttps2dq %ymm0, %ymm2
# CHECK-NEXT: 2 11 0.50 * vcvttps2dq (%rax), %ymm2
# CHECK-NEXT: 2 6 1.00 vcvttsd2si %xmm0, %ecx
# CHECK-NEXT: 2 6 1.00 vcvttsd2si %xmm0, %rcx
# CHECK-NEXT: 3 11 1.00 * vcvttsd2si (%rax), %ecx
# CHECK-NEXT: 3 11 1.00 * vcvttsd2si (%rax), %rcx
# CHECK-NEXT: 3 7 1.00 vcvttss2si %xmm0, %ecx
# CHECK-NEXT: 2 6 1.00 vcvttss2si %xmm0, %ecx
# CHECK-NEXT: 3 7 1.00 vcvttss2si %xmm0, %rcx
# CHECK-NEXT: 3 11 1.00 * vcvttss2si (%rax), %ecx
# CHECK-NEXT: 3 11 1.00 * vcvttss2si (%rax), %rcx
@ -1216,29 +1216,29 @@ vzeroupper
# CHECK-NEXT: 2 8 0.50 * vmaskmovps (%rax), %ymm0, %ymm2
# CHECK-NEXT: 2 2 1.00 * * vmaskmovps %xmm0, %xmm1, (%rax)
# CHECK-NEXT: 2 2 1.00 * * vmaskmovps %ymm0, %ymm1, (%rax)
# CHECK-NEXT: 1 4 0.33 vmaxpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vmaxpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vmaxpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vmaxpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vmaxpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vmaxpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vmaxps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vmaxps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vmaxps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vmaxps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vmaxps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vmaxps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vmaxsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vmaxsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vmaxsd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vmaxss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vmaxss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vmaxss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vminpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vminpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vminpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vminpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vminpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vminpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vminps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vminps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vminps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vminps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vminps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vminps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vminsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vminsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vminsd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vminss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vminss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vminss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.33 vmovapd %xmm0, %xmm2
# CHECK-NEXT: 2 1 1.00 * vmovapd %xmm0, (%rax)
@ -1327,17 +1327,17 @@ vzeroupper
# CHECK-NEXT: 1 7 0.50 * vmovups (%rax), %ymm2
# CHECK-NEXT: 2 4 2.00 vmpsadbw $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 3 10 2.00 * vmpsadbw $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vmulpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vmulpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vmulpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vmulpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vmulpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vmulpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vmulps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vmulps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vmulps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vmulps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vmulps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vmulps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vmulsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vmulsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vmulsd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vmulss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vmulss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vmulss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.33 vorpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 7 0.50 * vorpd (%rax), %xmm1, %xmm2
@ -1441,8 +1441,8 @@ vzeroupper
# CHECK-NEXT: 4 9 2.00 * vphaddsw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 3 3 2.00 vphaddw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 4 9 2.00 * vphaddw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vphminposuw %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * vphminposuw (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 vphminposuw %xmm0, %xmm2
# CHECK-NEXT: 2 10 1.00 * vphminposuw (%rax), %xmm2
# CHECK-NEXT: 3 3 2.00 vphsubd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 4 9 2.00 * vphsubd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 3 3 2.00 vphsubsw %xmm0, %xmm1, %xmm2
@ -1457,9 +1457,9 @@ vzeroupper
# CHECK-NEXT: 2 6 1.00 * vpinsrq $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 2 2.00 vpinsrw $1, %eax, %xmm1, %xmm2
# CHECK-NEXT: 2 6 1.00 * vpinsrw $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vpmaddubsw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vpmaddubsw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vpmaddubsw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vpmaddwd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vpmaddwd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vpmaddwd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpmaxsb %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 7 0.50 * vpmaxsb (%rax), %xmm1, %xmm2
@ -1510,19 +1510,19 @@ vzeroupper
# CHECK-NEXT: 2 6 1.00 * vpmovzxwd (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 vpmovzxwq %xmm0, %xmm2
# CHECK-NEXT: 2 6 1.00 * vpmovzxwq (%rax), %xmm2
# CHECK-NEXT: 1 4 0.33 vpmuldq %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vpmuldq %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vpmuldq (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vpmulhrsw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vpmulhrsw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vpmulhrsw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vpmulhuw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vpmulhuw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vpmulhuw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vpmulhw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vpmulhw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vpmulhw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.67 vpmulld %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 3 16 0.67 * vpmulld (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vpmullw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 1.00 vpmulld %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 3 16 1.00 * vpmulld (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vpmullw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vpmullw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vpmuludq %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vpmuludq %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vpmuludq (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.33 vpor %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 7 0.50 * vpor (%rax), %xmm1, %xmm2
@ -1612,18 +1612,18 @@ vzeroupper
# CHECK-NEXT: 2 11 1.00 * vrcpps (%rax), %ymm2
# CHECK-NEXT: 1 4 1.00 vrcpss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 1.00 * vrcpss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 8 0.67 vroundpd $1, %xmm0, %xmm2
# CHECK-NEXT: 3 14 0.67 * vroundpd $1, (%rax), %xmm2
# CHECK-NEXT: 2 8 0.67 vroundpd $1, %ymm0, %ymm2
# CHECK-NEXT: 3 15 0.67 * vroundpd $1, (%rax), %ymm2
# CHECK-NEXT: 2 8 0.67 vroundps $1, %xmm0, %xmm2
# CHECK-NEXT: 3 14 0.67 * vroundps $1, (%rax), %xmm2
# CHECK-NEXT: 2 8 0.67 vroundps $1, %ymm0, %ymm2
# CHECK-NEXT: 3 15 0.67 * vroundps $1, (%rax), %ymm2
# CHECK-NEXT: 2 8 0.67 vroundsd $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 3 14 0.67 * vroundsd $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 8 0.67 vroundss $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 3 14 0.67 * vroundss $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 8 1.00 vroundpd $1, %xmm0, %xmm2
# CHECK-NEXT: 3 14 1.00 * vroundpd $1, (%rax), %xmm2
# CHECK-NEXT: 2 8 1.00 vroundpd $1, %ymm0, %ymm2
# CHECK-NEXT: 3 15 1.00 * vroundpd $1, (%rax), %ymm2
# CHECK-NEXT: 2 8 1.00 vroundps $1, %xmm0, %xmm2
# CHECK-NEXT: 3 14 1.00 * vroundps $1, (%rax), %xmm2
# CHECK-NEXT: 2 8 1.00 vroundps $1, %ymm0, %ymm2
# CHECK-NEXT: 3 15 1.00 * vroundps $1, (%rax), %ymm2
# CHECK-NEXT: 2 8 1.00 vroundsd $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 3 14 1.00 * vroundsd $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 8 1.00 vroundss $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 3 14 1.00 * vroundss $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 1.00 vrsqrtps %xmm0, %xmm2
# CHECK-NEXT: 2 10 1.00 * vrsqrtps (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 vrsqrtps %ymm0, %ymm2
@ -1651,17 +1651,17 @@ vzeroupper
# CHECK-NEXT: 1 12 3.00 vsqrtss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 17 3.00 * vsqrtss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 3 2 1.00 * * * vstmxcsr (%rax)
# CHECK-NEXT: 1 4 0.33 vsubpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vsubpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vsubpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vsubpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vsubpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vsubpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vsubps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vsubps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vsubps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vsubps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vsubps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vsubps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vsubsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vsubsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vsubsd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vsubss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vsubss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vsubss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 1.00 vtestpd %xmm0, %xmm1
# CHECK-NEXT: 2 8 1.00 * vtestpd (%rax), %xmm1
@ -1716,30 +1716,30 @@ vzeroupper
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
# CHECK-NEXT: - 123.00 271.00 170.00 171.17 171.17 34.00 376.00 5.00 12.67
# CHECK-NEXT: - 123.00 290.83 198.83 171.17 171.17 34.00 327.33 5.00 12.67
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vaddpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vaddpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vaddpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vaddpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vaddps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vaddps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vaddps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vaddsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vaddsd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vaddss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vaddss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vaddsubpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vaddsubpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vaddsubpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vaddsubpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vaddsubps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vaddsubps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vaddsubps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vaddsubps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddsd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddsubpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddsubpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddsubpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddsubpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddsubps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddsubps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddsubps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddsubps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 1.00 - - - - - - - vaesdec %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vaesdec (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - vaesdeclast %xmm0, %xmm1, %xmm2
@ -1788,50 +1788,50 @@ vzeroupper
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vbroadcastsd (%rax), %ymm2
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vbroadcastss (%rax), %xmm2
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vbroadcastss (%rax), %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcmppd $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcmppd $0, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcmppd $0, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcmppd $0, (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcmpps $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcmpps $0, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcmpps $0, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcmpps $0, (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcmpsd $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcmpsd $0, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcmpss $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcmpss $0, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcmppd $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcmppd $0, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcmppd $0, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcmppd $0, (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcmpps $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcmpps $0, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcmpps $0, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcmpps $0, (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcmpsd $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcmpsd $0, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcmpss $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcmpss $0, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - vcomisd %xmm0, %xmm1
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcomisd (%rax), %xmm1
# CHECK-NEXT: - - 1.00 - - - - - - - vcomiss %xmm0, %xmm1
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcomiss (%rax), %xmm1
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtdq2pd %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtdq2pd %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - vcvtdq2pd (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtdq2pd %xmm0, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtdq2pd %xmm0, %ymm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - vcvtdq2pd (%rax), %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcvtdq2ps %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtdq2ps %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtdq2ps (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcvtdq2ps %ymm0, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtdq2ps %ymm0, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtdq2ps (%rax), %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2dq %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2dqx (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2dqx (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2dq %ymm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2dqy (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2dqy (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2ps %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2psx (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2ps %ymm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2psy (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcvtps2dq %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2psy (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2dq %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2dq (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcvtps2dq %ymm0, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2dq %ymm0, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2dq (%rax), %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtps2pd %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtps2pd %xmm0, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax), %ymm2
# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - vcvtsd2si %xmm0, %ecx
# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - vcvtsd2si %xmm0, %rcx
# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - vcvtsd2si (%rax), %ecx
# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - vcvtsd2si (%rax), %rcx
# CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvtsd2si %xmm0, %ecx
# CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvtsd2si %xmm0, %rcx
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvtsd2si (%rax), %ecx
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvtsd2si (%rax), %rcx
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtsd2ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvtsd2ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtsi2sdl %ecx, %xmm0, %xmm2
@ -1844,26 +1844,26 @@ vzeroupper
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtsi2ssq (%rax), %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtss2sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtss2sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - vcvtss2si %xmm0, %ecx
# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - vcvtss2si %xmm0, %rcx
# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - vcvtss2si (%rax), %ecx
# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - vcvtss2si (%rax), %rcx
# CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvtss2si %xmm0, %ecx
# CHECK-NEXT: - - 1.33 0.33 - - - 1.33 - - vcvtss2si %xmm0, %rcx
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvtss2si (%rax), %ecx
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvtss2si (%rax), %rcx
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvttpd2dq %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvttpd2dqx (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2dqx (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvttpd2dq %ymm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvttpd2dqy (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcvttps2dq %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2dqy (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvttps2dq %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2dq (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcvttps2dq %ymm0, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvttps2dq %ymm0, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2dq (%rax), %ymm2
# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - vcvttsd2si %xmm0, %ecx
# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - vcvttsd2si %xmm0, %rcx
# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - vcvttsd2si (%rax), %ecx
# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - vcvttsd2si (%rax), %rcx
# CHECK-NEXT: - - 1.33 0.33 - - - 1.33 - - vcvttss2si %xmm0, %ecx
# CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvttsd2si %xmm0, %ecx
# CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvttsd2si %xmm0, %rcx
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvttsd2si (%rax), %ecx
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvttsd2si (%rax), %rcx
# CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvttss2si %xmm0, %ecx
# CHECK-NEXT: - - 1.33 0.33 - - - 1.33 - - vcvttss2si %xmm0, %rcx
# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - vcvttss2si (%rax), %ecx
# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - vcvttss2si (%rax), %rcx
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvttss2si (%rax), %ecx
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvttss2si (%rax), %rcx
# CHECK-NEXT: - 3.00 1.00 - - - - - - - vdivpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - 4.00 1.00 - 0.50 0.50 - - - - vdivpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - 5.00 1.00 - - - - - - - vdivpd %ymm0, %ymm1, %ymm2
@ -1918,30 +1918,30 @@ vzeroupper
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmaskmovps (%rax), %ymm0, %ymm2
# CHECK-NEXT: - - 1.00 - 0.33 0.33 - - - 0.33 vmaskmovps %xmm0, %xmm1, (%rax)
# CHECK-NEXT: - - 1.00 - 0.33 0.33 - - - 0.33 vmaskmovps %ymm0, %ymm1, (%rax)
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmaxpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmaxpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmaxpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmaxpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmaxps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmaxps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmaxps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmaxps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmaxsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmaxsd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmaxss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmaxss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vminpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vminpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vminpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vminpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vminps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vminps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vminps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vminps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vminsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vminsd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vminss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vminss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmaxpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmaxpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmaxpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmaxpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmaxps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmaxps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmaxps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmaxps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmaxsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmaxsd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmaxss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmaxss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vminpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vminpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vminpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vminpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vminps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vminps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vminps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vminps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vminsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vminsd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vminss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vminss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovapd %xmm0, %xmm2
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovapd %xmm0, (%rax)
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vmovapd (%rax), %xmm2
@ -2029,18 +2029,18 @@ vzeroupper
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vmovups (%rax), %ymm2
# CHECK-NEXT: - - - - - - - 2.00 - - vmpsadbw $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - vmpsadbw $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmulpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmulpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmulpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmulpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmulps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmulps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmulps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmulps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmulsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmulsd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmulss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmulss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmulpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmulpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmulpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmulpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmulps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmulps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmulps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmulps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmulsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmulsd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmulss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmulss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vorpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vorpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vorpd %ymm0, %ymm1, %ymm2
@ -2143,8 +2143,8 @@ vzeroupper
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - vphaddsw (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - vphaddw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - vphaddw (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vphminposuw %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vphminposuw (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - vphminposuw %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vphminposuw (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - vphsubd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - vphsubd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - vphsubsw %xmm0, %xmm1, %xmm2
@ -2159,10 +2159,10 @@ vzeroupper
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpinsrq $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - 2.00 - - vpinsrw $1, %eax, %xmm1, %xmm2
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpinsrw $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmaddubsw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmaddubsw (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmaddwd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmaddwd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmaddubsw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmaddubsw (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmaddwd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmaddwd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmaxsb %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmaxsb (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmaxsd %xmm0, %xmm1, %xmm2
@ -2212,20 +2212,20 @@ vzeroupper
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpmovzxwd (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 1.00 - - vpmovzxwq %xmm0, %xmm2
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpmovzxwq (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmuldq %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmuldq (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmulhrsw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmulhrsw (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmulhuw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmulhuw (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmulhw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmulhw (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - vpmulld %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - vpmulld (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmullw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmullw (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmuludq %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmuludq (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmuldq %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmuldq (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmulhrsw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmulhrsw (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmulhuw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmulhuw (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmulhw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmulhw (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 1.00 - - - - - - vpmulld %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vpmulld (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmullw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmullw (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmuludq %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmuludq (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpor %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpor (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - 1.00 - - vpsadbw %xmm0, %xmm1, %xmm2
@ -2314,18 +2314,18 @@ vzeroupper
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vrcpps (%rax), %ymm2
# CHECK-NEXT: - - 1.00 - - - - - - - vrcpss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vrcpss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - vroundpd $1, %xmm0, %xmm2
# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - vroundpd $1, (%rax), %xmm2
# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - vroundpd $1, %ymm0, %ymm2
# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - vroundpd $1, (%rax), %ymm2
# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - vroundps $1, %xmm0, %xmm2
# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - vroundps $1, (%rax), %xmm2
# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - vroundps $1, %ymm0, %ymm2
# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - vroundps $1, (%rax), %ymm2
# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - vroundsd $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - vroundsd $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - vroundss $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - vroundss $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 1.00 - - - - - - vroundpd $1, %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vroundpd $1, (%rax), %xmm2
# CHECK-NEXT: - - 1.00 1.00 - - - - - - vroundpd $1, %ymm0, %ymm2
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vroundpd $1, (%rax), %ymm2
# CHECK-NEXT: - - 1.00 1.00 - - - - - - vroundps $1, %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vroundps $1, (%rax), %xmm2
# CHECK-NEXT: - - 1.00 1.00 - - - - - - vroundps $1, %ymm0, %ymm2
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vroundps $1, (%rax), %ymm2
# CHECK-NEXT: - - 1.00 1.00 - - - - - - vroundsd $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vroundsd $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 1.00 - - - - - - vroundss $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vroundss $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - vrsqrtps %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vrsqrtps (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - vrsqrtps %ymm0, %ymm2
@ -2353,18 +2353,18 @@ vzeroupper
# CHECK-NEXT: - 3.00 1.00 - - - - - - - vsqrtss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - 3.00 1.00 - 0.50 0.50 - - - - vsqrtss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - 0.33 0.33 1.00 1.00 - 0.33 vstmxcsr (%rax)
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vsubpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vsubpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vsubpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vsubpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vsubps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vsubps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vsubps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vsubps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vsubsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vsubsd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vsubss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vsubss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vsubpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vsubpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vsubpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vsubpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vsubps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vsubps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vsubps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vsubps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vsubsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vsubsd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vsubss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vsubss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - vtestpd %xmm0, %xmm1
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vtestpd (%rax), %xmm1
# CHECK-NEXT: - - 1.00 - - - - - - - vtestpd %ymm0, %ymm1

View File

@ -583,9 +583,9 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 4 10 2.00 * vphsubsw (%rax), %ymm1, %ymm2
# CHECK-NEXT: 3 3 2.00 vphsubw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 4 10 2.00 * vphsubw (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vpmaddubsw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vpmaddubsw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vpmaddubsw (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vpmaddwd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vpmaddwd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vpmaddwd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 2 7 0.50 * vpmaskmovd (%rax), %xmm0, %xmm2
# CHECK-NEXT: 2 8 0.50 * vpmaskmovd (%rax), %ymm0, %ymm2
@ -644,19 +644,19 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 2 9 1.00 * vpmovzxwd (%rax), %ymm2
# CHECK-NEXT: 1 3 1.00 vpmovzxwq %xmm0, %ymm2
# CHECK-NEXT: 2 10 1.00 * vpmovzxwq (%rax), %ymm2
# CHECK-NEXT: 1 4 0.33 vpmuldq %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vpmuldq %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vpmuldq (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vpmulhrsw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vpmulhrsw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vpmulhrsw (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vpmulhuw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vpmulhuw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vpmulhuw (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vpmulhw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vpmulhw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vpmulhw (%rax), %ymm1, %ymm2
# CHECK-NEXT: 2 10 0.67 vpmulld %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 3 17 0.67 * vpmulld (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vpmullw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 10 1.00 vpmulld %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 3 17 1.00 * vpmulld (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vpmullw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vpmullw (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vpmuludq %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vpmuludq %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vpmuludq (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.33 vpor %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 8 0.50 * vpor (%rax), %ymm1, %ymm2
@ -771,7 +771,7 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
# CHECK-NEXT: - - 107.00 86.00 85.17 85.17 1.00 169.00 - 1.67
# CHECK-NEXT: - - 110.33 89.33 85.17 85.17 1.00 162.33 - 1.67
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@ -901,10 +901,10 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - vphsubsw (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - vphsubw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - vphsubw (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmaddubsw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmaddubsw (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmaddwd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmaddwd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmaddubsw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmaddubsw (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmaddwd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmaddwd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmaskmovd (%rax), %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmaskmovd (%rax), %ymm0, %ymm2
# CHECK-NEXT: - - 1.00 - 0.33 0.33 - - - 0.33 vpmaskmovd %xmm0, %xmm1, (%rax)
@ -962,20 +962,20 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpmovzxwd (%rax), %ymm2
# CHECK-NEXT: - - - - - - - 1.00 - - vpmovzxwq %xmm0, %ymm2
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpmovzxwq (%rax), %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmuldq %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmuldq (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmulhrsw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmulhrsw (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmulhuw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmulhuw (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmulhw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmulhw (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - vpmulld %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - vpmulld (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmullw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmullw (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmuludq %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmuludq (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmuldq %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmuldq (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmulhrsw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmulhrsw (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmulhuw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmulhuw (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmulhw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmulhw (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 1.00 1.00 - - - - - - vpmulld %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vpmulld (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmullw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmullw (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmuludq %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmuludq (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpor %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpor (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - 1.00 - - vpsadbw %ymm0, %ymm1, %ymm2

View File

@ -45,15 +45,15 @@ vcvtps2ph $0, %ymm0, (%rax)
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
# CHECK-NEXT: - - 2.67 2.67 1.67 1.67 2.00 8.67 - 0.67
# CHECK-NEXT: - - 4.00 4.00 1.67 1.67 2.00 6.00 - 0.67
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtph2ps %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtph2ps (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtph2ps %xmm0, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtph2ps (%rax), %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtps2ph $0, %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.33 0.33 1.00 1.33 - 0.33 vcvtps2ph $0, %xmm0, (%rax)
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtps2ph $0, %ymm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.33 0.33 1.00 1.33 - 0.33 vcvtps2ph $0, %ymm0, (%rax)
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtph2ps %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtph2ps (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtph2ps %xmm0, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtph2ps (%rax), %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtps2ph $0, %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.33 0.33 1.00 1.00 - 0.33 vcvtps2ph $0, %xmm0, (%rax)
# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtps2ph $0, %ymm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.33 0.33 1.00 1.00 - 0.33 vcvtps2ph $0, %ymm0, (%rax)

View File

@ -298,197 +298,197 @@ vfnmsub231ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: [6]: HasSideEffects
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 4 0.33 vfmadd132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfmadd132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmadd132pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfmadd132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vfmadd132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vfmadd213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfmadd213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmadd213pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfmadd213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vfmadd213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmadd213pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vfmadd231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfmadd231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmadd231pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfmadd231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vfmadd231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmadd231pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vfmadd132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfmadd132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmadd132ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfmadd132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vfmadd132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmadd132ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vfmadd213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmadd213ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfmadd213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vfmadd213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmadd213ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vfmadd231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfmadd231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmadd231ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfmadd231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vfmadd231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmadd231ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vfmadd132sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfmadd132sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfmadd132sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfmadd213sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfmadd213sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfmadd213sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfmadd231sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfmadd231sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfmadd231sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfmadd132ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfmadd132ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfmadd132ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfmadd213ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfmadd213ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfmadd213ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfmadd231ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfmadd231ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfmadd231ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfmaddsub132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfmaddsub132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmaddsub132pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfmaddsub132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vfmaddsub132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmaddsub132pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vfmaddsub213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfmaddsub213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmaddsub213pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfmaddsub213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vfmaddsub213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmaddsub213pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vfmaddsub231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfmaddsub231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmaddsub231pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfmaddsub231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vfmaddsub231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmaddsub231pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vfmaddsub132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfmaddsub132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmaddsub132ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfmaddsub132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vfmaddsub132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmaddsub132ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vfmaddsub213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfmaddsub213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmaddsub213ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfmaddsub213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vfmaddsub213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmaddsub213ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vfmaddsub231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfmaddsub231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmaddsub231ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfmaddsub231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vfmaddsub231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmaddsub231ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vfmsub132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfmsub132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsub132pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfmsub132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vfmsub132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmsub132pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vfmsub213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfmsub213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsub213pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfmsub213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vfmsub213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmsub213pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vfmsub231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfmsub231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsub231pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfmsub231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vfmsub231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmsub231pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vfmsub132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfmsub132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsub132ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfmsub132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vfmsub132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmsub132ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vfmsub213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfmsub213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsub213ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfmsub213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vfmsub213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmsub213ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vfmsub231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfmsub231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsub231ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfmsub231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vfmsub231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmsub231ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vfmsub132sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfmsub132sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfmsub132sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfmsub213sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfmsub213sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfmsub213sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfmsub231sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfmsub231sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfmsub231sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfmsub132ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfmsub132ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfmsub132ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfmsub213ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfmsub213ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfmsub213ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfmsub231ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfmsub231ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfmsub231ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfmsubadd132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfmsubadd132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsubadd132pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfmsubadd132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vfmsubadd132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmsubadd132pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vfmsubadd213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfmsubadd213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsubadd213pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfmsubadd213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vfmsubadd213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmsubadd213pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vfmsubadd231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfmsubadd231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsubadd231pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfmsubadd231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vfmsubadd231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmsubadd231pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vfmsubadd132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfmsubadd132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsubadd132ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfmsubadd132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vfmsubadd132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmsubadd132ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vfmsubadd213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfmsubadd213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsubadd213ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfmsubadd213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vfmsubadd213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmsubadd213ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vfmsubadd231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfmsubadd231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsubadd231ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfmsubadd231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vfmsubadd231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfmsubadd231ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vfnmadd132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfnmadd132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmadd132pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfnmadd132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vfnmadd132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfnmadd132pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vfnmadd213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfnmadd213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmadd213pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfnmadd213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vfnmadd213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfnmadd213pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vfnmadd231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfnmadd231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmadd231pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfnmadd231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vfnmadd231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfnmadd231pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vfnmadd132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfnmadd132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmadd132ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfnmadd132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vfnmadd132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfnmadd132ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vfnmadd213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfnmadd213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmadd213ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfnmadd213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vfnmadd213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfnmadd213ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vfnmadd231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfnmadd231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmadd231ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfnmadd231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vfnmadd231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfnmadd231ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vfnmadd132sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfnmadd132sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfnmadd132sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfnmadd213sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfnmadd213sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfnmadd213sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfnmadd231sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfnmadd231sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfnmadd231sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfnmadd132ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfnmadd132ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfnmadd132ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfnmadd213ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfnmadd213ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfnmadd213ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfnmadd231ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfnmadd231ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfnmadd231ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfnmsub132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfnmsub132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmsub132pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfnmsub132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vfnmsub132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfnmsub132pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vfnmsub213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfnmsub213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmsub213pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfnmsub213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vfnmsub213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfnmsub213pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vfnmsub231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfnmsub231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmsub231pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfnmsub231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vfnmsub231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfnmsub231pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vfnmsub132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfnmsub132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmsub132ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfnmsub132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vfnmsub132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfnmsub132ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vfnmsub213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfnmsub213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmsub213ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfnmsub213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vfnmsub213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfnmsub213ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vfnmsub231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfnmsub231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmsub231ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfnmsub231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.50 vfnmsub231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vfnmsub231ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 0.33 vfnmsub132sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfnmsub132sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfnmsub132sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfnmsub213sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfnmsub213sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfnmsub213sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfnmsub231sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfnmsub231sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfnmsub231sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfnmsub132ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfnmsub132ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfnmsub132ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfnmsub213ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfnmsub213ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfnmsub213ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.33 vfnmsub231ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vfnmsub231ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vfnmsub231ss (%rax), %xmm1, %xmm2
# CHECK: Resources:
@ -505,199 +505,199 @@ vfnmsub231ss (%rax), %xmm1, %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
# CHECK-NEXT: - - 64.00 64.00 48.00 48.00 - 64.00 - -
# CHECK-NEXT: - - 96.00 96.00 48.00 48.00 - - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd132pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd132pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd213pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd213pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd231pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd231pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd132ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd132ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd213ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd213ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd231ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd231ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd132sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd132sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd213sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd213sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd231sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd231sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd132ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd132ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd213ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd213ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd231ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd231ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub132pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub132pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub213pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub213pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub231pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub231pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub132ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub132ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub213ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub213ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub231ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub231ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub132pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub132pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub213pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub213pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub231pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub231pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub132ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub132ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub213ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub213ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub231ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub231ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub132sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub132sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub213sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub213sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub231sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub231sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub132ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub132ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub213ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub213ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub231ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub231ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd132pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd132pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd213pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd213pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd231pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd231pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd132ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd132ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd213ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd213ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd231ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd231ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd132pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd132pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd213pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd213pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd231pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd231pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd132ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd132ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd213ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd213ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd231ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd231ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd132sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd132sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd213sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd213sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd231sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd231sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd132ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd132ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd213ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd213ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd231ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd231ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub132pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub132pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub213pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub213pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub231pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub231pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub132ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub132ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub213ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub213ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub231ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub231ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub132sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub132sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub213sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub213sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub231sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub231sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub132ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub132ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub213ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub213ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub231ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub231ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd132sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd231sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd132ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd231ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub132pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub132pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub213pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub213pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub231pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub231pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub132ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub132ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub213ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub213ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub231ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub231ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub132pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub132pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub213pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub213pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub231pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub231pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub132ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub132ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub213ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub213ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub231ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub231ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub132sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub132sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub213sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub213sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub231sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub231sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub132ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub132ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub213ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub213ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub231ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub231ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd132pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd132pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd213pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd213pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd231pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd231pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd132ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd132ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd213ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd213ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd231ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd231ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd132pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd132pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd213pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd213pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd231pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd231pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd132ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd132ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd213ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd213ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd231ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd231ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd132sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd132sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd213sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd213sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd231sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd231sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd132ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd132ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd213ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd213ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd231ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd231ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub132pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub132pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub213pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub213pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub231pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub231pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub132ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub132ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub213ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub213ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub231ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub231ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub132sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub132sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub213sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub213sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub231sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub231sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub132ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub132ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub213ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub213ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub231ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub231ss (%rax), %xmm1, %xmm2

View File

@ -194,17 +194,17 @@ xorps (%rax), %xmm2
# CHECK-NEXT: [6]: HasSideEffects
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 4 0.33 addps %xmm0, %xmm2
# CHECK-NEXT: 1 4 0.50 addps %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * addps (%rax), %xmm2
# CHECK-NEXT: 1 4 0.33 addss %xmm0, %xmm2
# CHECK-NEXT: 1 4 0.50 addss %xmm0, %xmm2
# CHECK-NEXT: 2 9 0.50 * addss (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 andnps %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * andnps (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 andps %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * andps (%rax), %xmm2
# CHECK-NEXT: 1 4 0.33 cmpps $0, %xmm0, %xmm2
# CHECK-NEXT: 1 4 0.50 cmpps $0, %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * cmpps $0, (%rax), %xmm2
# CHECK-NEXT: 1 4 0.33 cmpss $0, %xmm0, %xmm2
# CHECK-NEXT: 1 4 0.50 cmpss $0, %xmm0, %xmm2
# CHECK-NEXT: 2 9 0.50 * cmpss $0, (%rax), %xmm2
# CHECK-NEXT: 1 2 1.00 comiss %xmm0, %xmm1
# CHECK-NEXT: 2 7 1.00 * comiss (%rax), %xmm1
@ -217,12 +217,12 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 2 9 1.00 * cvtsi2ssl (%rax), %xmm2
# CHECK-NEXT: 2 9 1.00 * cvtsi2ssl (%rax), %xmm2
# CHECK-NEXT: 2 6 1.00 cvtss2si %xmm0, %ecx
# CHECK-NEXT: 2 6 1.00 cvtss2si %xmm0, %rcx
# CHECK-NEXT: 3 7 1.00 cvtss2si %xmm0, %rcx
# CHECK-NEXT: 3 11 1.00 * cvtss2si (%rax), %ecx
# CHECK-NEXT: 3 11 1.00 * cvtss2si (%rax), %rcx
# CHECK-NEXT: 2 5 1.00 cvttps2pi %xmm0, %mm2
# CHECK-NEXT: 2 9 0.50 * cvttps2pi (%rax), %mm2
# CHECK-NEXT: 3 7 1.00 cvttss2si %xmm0, %ecx
# CHECK-NEXT: 2 6 1.00 cvttss2si %xmm0, %ecx
# CHECK-NEXT: 3 7 1.00 cvttss2si %xmm0, %rcx
# CHECK-NEXT: 3 11 1.00 * cvttss2si (%rax), %ecx
# CHECK-NEXT: 4 12 1.00 * cvttss2si (%rax), %rcx
@ -232,13 +232,13 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 2 16 3.00 * divss (%rax), %xmm2
# CHECK-NEXT: 3 7 1.00 * * * ldmxcsr (%rax)
# CHECK-NEXT: 1 1 1.00 * * * maskmovq %mm0, %mm1
# CHECK-NEXT: 1 4 0.33 maxps %xmm0, %xmm2
# CHECK-NEXT: 1 4 0.50 maxps %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * maxps (%rax), %xmm2
# CHECK-NEXT: 1 4 0.33 maxss %xmm0, %xmm2
# CHECK-NEXT: 1 4 0.50 maxss %xmm0, %xmm2
# CHECK-NEXT: 2 9 0.50 * maxss (%rax), %xmm2
# CHECK-NEXT: 1 4 0.33 minps %xmm0, %xmm2
# CHECK-NEXT: 1 4 0.50 minps %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * minps (%rax), %xmm2
# CHECK-NEXT: 1 4 0.33 minss %xmm0, %xmm2
# CHECK-NEXT: 1 4 0.50 minss %xmm0, %xmm2
# CHECK-NEXT: 2 9 0.50 * minss (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 movaps %xmm0, %xmm2
# CHECK-NEXT: 2 1 1.00 * movaps %xmm0, (%rax)
@ -258,9 +258,9 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 movups %xmm0, %xmm2
# CHECK-NEXT: 2 1 1.00 * movups %xmm0, (%rax)
# CHECK-NEXT: 1 6 0.50 * movups (%rax), %xmm2
# CHECK-NEXT: 1 4 0.33 mulps %xmm0, %xmm2
# CHECK-NEXT: 1 4 0.50 mulps %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * mulps (%rax), %xmm2
# CHECK-NEXT: 1 4 0.33 mulss %xmm0, %xmm2
# CHECK-NEXT: 1 4 0.50 mulss %xmm0, %xmm2
# CHECK-NEXT: 2 9 0.50 * mulss (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 orps %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * orps (%rax), %xmm2
@ -306,9 +306,9 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 1 12 3.00 sqrtss %xmm0, %xmm2
# CHECK-NEXT: 2 17 3.00 * sqrtss (%rax), %xmm2
# CHECK-NEXT: 3 2 1.00 * * * stmxcsr (%rax)
# CHECK-NEXT: 1 4 0.33 subps %xmm0, %xmm2
# CHECK-NEXT: 1 4 0.50 subps %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * subps (%rax), %xmm2
# CHECK-NEXT: 1 4 0.33 subss %xmm0, %xmm2
# CHECK-NEXT: 1 4 0.50 subss %xmm0, %xmm2
# CHECK-NEXT: 2 9 0.50 * subss (%rax), %xmm2
# CHECK-NEXT: 1 2 1.00 ucomiss %xmm0, %xmm1
# CHECK-NEXT: 2 7 1.00 * ucomiss (%rax), %xmm1
@ -333,25 +333,25 @@ xorps (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
# CHECK-NEXT: - 26.00 65.50 18.50 32.00 32.00 8.00 45.50 0.50 3.00
# CHECK-NEXT: - 26.00 65.83 25.83 32.00 32.00 8.00 37.83 0.50 3.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - addps %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - addps (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - addss %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - addss (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - addps %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - addps (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - addss %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - addss (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - andnps %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - andnps (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - andps %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - andps (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - cmpps $0, %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - cmpps $0, (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - cmpss $0, %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - cmpss $0, (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - cmpps $0, %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cmpps $0, (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - cmpss $0, %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cmpss $0, (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - comiss %xmm0, %xmm1
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - comiss (%rax), %xmm1
# CHECK-NEXT: - - 2.00 - - - - - - - cvtpi2ps %mm0, %xmm2
# CHECK-NEXT: - - - - - - - 2.00 - - cvtpi2ps %mm0, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - cvtpi2ps (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtps2pi %xmm0, %mm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - cvtps2pi (%rax), %mm2
@ -359,15 +359,15 @@ xorps (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - cvtsi2ssq %rcx, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtsi2ssl (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtsi2ssl (%rax), %xmm2
# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - cvtss2si %xmm0, %ecx
# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - cvtss2si %xmm0, %rcx
# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - cvtss2si (%rax), %ecx
# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - cvtss2si (%rax), %rcx
# CHECK-NEXT: - - 1.00 1.00 - - - - - - cvtss2si %xmm0, %ecx
# CHECK-NEXT: - - 1.33 0.33 - - - 1.33 - - cvtss2si %xmm0, %rcx
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - cvtss2si (%rax), %ecx
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - cvtss2si (%rax), %rcx
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvttps2pi %xmm0, %mm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - cvttps2pi (%rax), %mm2
# CHECK-NEXT: - - 1.33 0.33 - - - 1.33 - - cvttss2si %xmm0, %ecx
# CHECK-NEXT: - - 1.00 1.00 - - - - - - cvttss2si %xmm0, %ecx
# CHECK-NEXT: - - 1.33 0.33 - - - 1.33 - - cvttss2si %xmm0, %rcx
# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - cvttss2si (%rax), %ecx
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - cvttss2si (%rax), %ecx
# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 1.33 - - cvttss2si (%rax), %rcx
# CHECK-NEXT: - 3.00 1.00 - - - - - - - divps %xmm0, %xmm2
# CHECK-NEXT: - 5.00 1.00 - 0.50 0.50 - - - - divps (%rax), %xmm2
@ -375,14 +375,14 @@ xorps (%rax), %xmm2
# CHECK-NEXT: - 3.00 1.00 - 0.50 0.50 - - - - divss (%rax), %xmm2
# CHECK-NEXT: - - 1.25 0.25 0.50 0.50 - 0.25 0.25 - ldmxcsr (%rax)
# CHECK-NEXT: - - - - - - - 1.00 - - maskmovq %mm0, %mm1
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - maxps %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - maxps (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - maxss %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - maxss (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - minps %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - minps (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - minss %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - minss (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - maxps %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - maxps (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - maxss %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - maxss (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - minps %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - minps (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - minss %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - minss (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - movaps %xmm0, %xmm2
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 movaps %xmm0, (%rax)
# CHECK-NEXT: - - - - 0.50 0.50 - - - - movaps (%rax), %xmm2
@ -401,10 +401,10 @@ xorps (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - movups %xmm0, %xmm2
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 movups %xmm0, (%rax)
# CHECK-NEXT: - - - - 0.50 0.50 - - - - movups (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - mulps %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - mulps (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - mulss %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - mulss (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - mulps %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - mulps (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - mulss %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - mulss (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - orps %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - orps (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - pavgb %mm0, %mm2
@ -449,10 +449,10 @@ xorps (%rax), %xmm2
# CHECK-NEXT: - 3.00 1.00 - - - - - - - sqrtss %xmm0, %xmm2
# CHECK-NEXT: - 3.00 1.00 - 0.50 0.50 - - - - sqrtss (%rax), %xmm2
# CHECK-NEXT: - - - - 0.33 0.33 1.00 1.00 - 0.33 stmxcsr (%rax)
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - subps %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - subps (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - subss %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - subss (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - subps %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - subps (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - subss %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - subss (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - ucomiss %xmm0, %xmm1
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - ucomiss (%rax), %xmm1
# CHECK-NEXT: - - - - - - - 1.00 - - unpckhps %xmm0, %xmm2

View File

@ -402,24 +402,24 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: [6]: HasSideEffects
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 4 0.33 addpd %xmm0, %xmm2
# CHECK-NEXT: 1 4 0.50 addpd %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * addpd (%rax), %xmm2
# CHECK-NEXT: 1 4 0.33 addsd %xmm0, %xmm2
# CHECK-NEXT: 1 4 0.50 addsd %xmm0, %xmm2
# CHECK-NEXT: 2 9 0.50 * addsd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 andnpd %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * andnpd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 andpd %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * andpd (%rax), %xmm2
# CHECK-NEXT: 2 2 1.00 * * * clflush (%rax)
# CHECK-NEXT: 1 4 0.33 cmppd $0, %xmm0, %xmm2
# CHECK-NEXT: 1 4 0.50 cmppd $0, %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * cmppd $0, (%rax), %xmm2
# CHECK-NEXT: 1 4 0.33 cmpsd $0, %xmm0, %xmm2
# CHECK-NEXT: 1 4 0.50 cmpsd $0, %xmm0, %xmm2
# CHECK-NEXT: 2 9 0.50 * cmpsd $0, (%rax), %xmm2
# CHECK-NEXT: 1 2 1.00 comisd %xmm0, %xmm1
# CHECK-NEXT: 2 7 1.00 * comisd (%rax), %xmm1
# CHECK-NEXT: 2 5 1.00 cvtdq2pd %xmm0, %xmm2
# CHECK-NEXT: 1 4 0.50 cvtdq2pd %xmm0, %xmm2
# CHECK-NEXT: 3 11 1.00 * cvtdq2pd (%rax), %xmm2
# CHECK-NEXT: 1 4 0.33 cvtdq2ps %xmm0, %xmm2
# CHECK-NEXT: 1 4 0.50 cvtdq2ps %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * cvtdq2ps (%rax), %xmm2
# CHECK-NEXT: 2 5 1.00 cvtpd2dq %xmm0, %xmm2
# CHECK-NEXT: 3 11 1.00 * cvtpd2dq (%rax), %xmm2
@ -427,9 +427,9 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 3 11 1.00 * cvtpd2pi (%rax), %mm2
# CHECK-NEXT: 2 5 1.00 cvtpd2ps %xmm0, %xmm2
# CHECK-NEXT: 3 11 1.00 * cvtpd2ps (%rax), %xmm2
# CHECK-NEXT: 2 5 1.00 cvtpi2pd %mm0, %xmm2
# CHECK-NEXT: 3 10 1.00 * cvtpi2pd (%rax), %xmm2
# CHECK-NEXT: 1 4 0.33 cvtps2dq %xmm0, %xmm2
# CHECK-NEXT: 1 4 0.50 cvtpi2pd %mm0, %xmm2
# CHECK-NEXT: 2 9 0.50 * cvtpi2pd (%rax), %xmm2
# CHECK-NEXT: 1 4 0.50 cvtps2dq %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * cvtps2dq (%rax), %xmm2
# CHECK-NEXT: 2 5 1.00 cvtps2pd %xmm0, %xmm2
# CHECK-NEXT: 2 9 0.50 * cvtps2pd (%rax), %xmm2
@ -449,7 +449,7 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 3 11 1.00 * cvttpd2dq (%rax), %xmm2
# CHECK-NEXT: 2 5 1.00 cvttpd2pi %xmm0, %mm2
# CHECK-NEXT: 3 11 1.00 * cvttpd2pi (%rax), %mm2
# CHECK-NEXT: 1 4 0.33 cvttps2dq %xmm0, %xmm2
# CHECK-NEXT: 1 4 0.50 cvttps2dq %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * cvttps2dq (%rax), %xmm2
# CHECK-NEXT: 2 6 1.00 cvttsd2si %xmm0, %ecx
# CHECK-NEXT: 2 6 1.00 cvttsd2si %xmm0, %rcx
@ -461,13 +461,13 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 2 19 4.00 * divsd (%rax), %xmm2
# CHECK-NEXT: 2 2 0.50 * * * lfence
# CHECK-NEXT: 2 1 1.00 * * * maskmovdqu %xmm0, %xmm1
# CHECK-NEXT: 1 4 0.33 maxpd %xmm0, %xmm2
# CHECK-NEXT: 1 4 0.50 maxpd %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * maxpd (%rax), %xmm2
# CHECK-NEXT: 1 4 0.33 maxsd %xmm0, %xmm2
# CHECK-NEXT: 1 4 0.50 maxsd %xmm0, %xmm2
# CHECK-NEXT: 2 9 0.50 * maxsd (%rax), %xmm2
# CHECK-NEXT: 1 4 0.33 minpd %xmm0, %xmm2
# CHECK-NEXT: 1 4 0.50 minpd %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * minpd (%rax), %xmm2
# CHECK-NEXT: 1 4 0.33 minsd %xmm0, %xmm2
# CHECK-NEXT: 1 4 0.50 minsd %xmm0, %xmm2
# CHECK-NEXT: 2 9 0.50 * minsd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 movapd %xmm0, %xmm2
# CHECK-NEXT: 2 1 1.00 * movapd %xmm0, (%rax)
@ -504,9 +504,9 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 movupd %xmm0, %xmm2
# CHECK-NEXT: 2 1 1.00 * movupd %xmm0, (%rax)
# CHECK-NEXT: 1 6 0.50 * movupd (%rax), %xmm2
# CHECK-NEXT: 1 4 0.33 mulpd %xmm0, %xmm2
# CHECK-NEXT: 1 4 0.50 mulpd %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * mulpd (%rax), %xmm2
# CHECK-NEXT: 1 4 0.33 mulsd %xmm0, %xmm2
# CHECK-NEXT: 1 4 0.50 mulsd %xmm0, %xmm2
# CHECK-NEXT: 2 9 0.50 * mulsd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 orpd %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * orpd (%rax), %xmm2
@ -555,7 +555,7 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pcmpgtw %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pcmpgtw (%rax), %xmm2
# CHECK-NEXT: 2 3 1.00 pextrw $1, %xmm0, %ecx
# CHECK-NEXT: 1 4 0.33 pmaddwd %xmm0, %xmm2
# CHECK-NEXT: 1 4 0.50 pmaddwd %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * pmaddwd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pmaxsw %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pmaxsw (%rax), %xmm2
@ -566,15 +566,15 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pminub %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pminub (%rax), %xmm2
# CHECK-NEXT: 1 2 1.00 pmovmskb %xmm0, %ecx
# CHECK-NEXT: 1 4 0.33 pmulhuw %xmm0, %xmm2
# CHECK-NEXT: 1 4 0.50 pmulhuw %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * pmulhuw (%rax), %xmm2
# CHECK-NEXT: 1 4 0.33 pmulhw %xmm0, %xmm2
# CHECK-NEXT: 1 4 0.50 pmulhw %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * pmulhw (%rax), %xmm2
# CHECK-NEXT: 1 4 0.33 pmullw %xmm0, %xmm2
# CHECK-NEXT: 1 4 0.50 pmullw %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * pmullw (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 pmuludq %mm0, %mm2
# CHECK-NEXT: 2 9 1.00 * pmuludq (%rax), %mm2
# CHECK-NEXT: 1 4 0.33 pmuludq %xmm0, %xmm2
# CHECK-NEXT: 1 4 0.50 pmuludq %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * pmuludq (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 por %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * por (%rax), %xmm2
@ -654,9 +654,9 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 2 24 6.00 * sqrtpd (%rax), %xmm2
# CHECK-NEXT: 1 18 6.00 sqrtsd %xmm0, %xmm2
# CHECK-NEXT: 2 23 6.00 * sqrtsd (%rax), %xmm2
# CHECK-NEXT: 1 4 0.33 subpd %xmm0, %xmm2
# CHECK-NEXT: 1 4 0.50 subpd %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * subpd (%rax), %xmm2
# CHECK-NEXT: 1 4 0.33 subsd %xmm0, %xmm2
# CHECK-NEXT: 1 4 0.50 subsd %xmm0, %xmm2
# CHECK-NEXT: 2 9 0.50 * subsd (%rax), %xmm2
# CHECK-NEXT: 1 2 1.00 ucomisd %xmm0, %xmm1
# CHECK-NEXT: 2 7 1.00 * ucomisd (%rax), %xmm1
@ -681,28 +681,28 @@ xorpd (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
# CHECK-NEXT: - 38.00 101.08 69.08 62.67 62.67 14.00 112.08 1.75 4.67
# CHECK-NEXT: - 38.00 103.08 82.08 62.67 62.67 14.00 94.08 1.75 4.67
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - addpd %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - addpd (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - addsd %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - addsd (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - addpd %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - addpd (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - addsd %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - addsd (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - andnpd %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - andnpd (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - andpd %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - andpd (%rax), %xmm2
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 1.25 - clflush (%rax)
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - cmppd $0, %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - cmppd $0, (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - cmpsd $0, %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - cmpsd $0, (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - cmppd $0, %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cmppd $0, (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - cmpsd $0, %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cmpsd $0, (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - comisd %xmm0, %xmm1
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - comisd (%rax), %xmm1
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - cvtdq2pd %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - cvtdq2pd %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - cvtdq2pd (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - cvtdq2ps %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - cvtdq2ps %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - cvtdq2ps (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtpd2dq %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - cvtpd2dq (%rax), %xmm2
@ -710,16 +710,16 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - cvtpd2pi (%rax), %mm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtpd2ps %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - cvtpd2ps (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - cvtpi2pd %mm0, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - cvtpi2pd (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - cvtps2dq %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - cvtpi2pd %mm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtpi2pd (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - cvtps2dq %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - cvtps2dq (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtps2pd %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - cvtps2pd (%rax), %xmm2
# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - cvtsd2si %xmm0, %ecx
# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - cvtsd2si %xmm0, %rcx
# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - cvtsd2si (%rax), %ecx
# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - cvtsd2si (%rax), %rcx
# CHECK-NEXT: - - 1.00 1.00 - - - - - - cvtsd2si %xmm0, %ecx
# CHECK-NEXT: - - 1.00 1.00 - - - - - - cvtsd2si %xmm0, %rcx
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - cvtsd2si (%rax), %ecx
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - cvtsd2si (%rax), %rcx
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtsd2ss %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - cvtsd2ss (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtsi2sdl %ecx, %xmm2
@ -732,26 +732,26 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - cvttpd2dq (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvttpd2pi %xmm0, %mm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - cvttpd2pi (%rax), %mm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - cvttps2dq %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - cvttps2dq %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - cvttps2dq (%rax), %xmm2
# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - cvttsd2si %xmm0, %ecx
# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - cvttsd2si %xmm0, %rcx
# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - cvttsd2si (%rax), %ecx
# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - cvttsd2si (%rax), %rcx
# CHECK-NEXT: - - 1.00 1.00 - - - - - - cvttsd2si %xmm0, %ecx
# CHECK-NEXT: - - 1.00 1.00 - - - - - - cvttsd2si %xmm0, %rcx
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - cvttsd2si (%rax), %ecx
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - cvttsd2si (%rax), %rcx
# CHECK-NEXT: - 3.00 1.00 - - - - - - - divpd %xmm0, %xmm2
# CHECK-NEXT: - 4.00 1.00 - 0.50 0.50 - - - - divpd (%rax), %xmm2
# CHECK-NEXT: - 3.00 1.00 - - - - - - - divsd %xmm0, %xmm2
# CHECK-NEXT: - 4.00 1.00 - 0.50 0.50 - - - - divsd (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - 0.50 0.50 - lfence
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 maskmovdqu %xmm0, %xmm1
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - maxpd %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - maxpd (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - maxsd %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - maxsd (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - minpd %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - minpd (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - minsd %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - minsd (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - maxpd %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - maxpd (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - maxsd %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - maxsd (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - minpd %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - minpd (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - minsd %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - minsd (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - movapd %xmm0, %xmm2
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 movapd %xmm0, (%rax)
# CHECK-NEXT: - - - - 0.50 0.50 - - - - movapd (%rax), %xmm2
@ -787,10 +787,10 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - movupd %xmm0, %xmm2
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 movupd %xmm0, (%rax)
# CHECK-NEXT: - - - - 0.50 0.50 - - - - movupd (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - mulpd %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - mulpd (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - mulsd %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - mulsd (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - mulpd %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - mulpd (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - mulsd %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - mulsd (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - orpd %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - orpd (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 1.00 - - packssdw %xmm0, %xmm2
@ -838,8 +838,8 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - pcmpgtw %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pcmpgtw (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - pextrw $1, %xmm0, %ecx
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - pmaddwd %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - pmaddwd (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - pmaddwd %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pmaddwd (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - pmaxsw %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pmaxsw (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - pmaxub %xmm0, %xmm2
@ -849,16 +849,16 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - pminub %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pminub (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - pmovmskb %xmm0, %ecx
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - pmulhuw %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - pmulhuw (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - pmulhw %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - pmulhw (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - pmullw %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - pmullw (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - pmulhuw %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pmulhuw (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - pmulhw %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pmulhw (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - pmullw %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pmullw (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - pmuludq %mm0, %mm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - pmuludq (%rax), %mm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - pmuludq %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - pmuludq (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - pmuludq %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pmuludq (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - por %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - por (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 1.00 - - psadbw %xmm0, %xmm2
@ -937,10 +937,10 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: - 6.00 1.00 - 0.50 0.50 - - - - sqrtpd (%rax), %xmm2
# CHECK-NEXT: - 6.00 1.00 - - - - - - - sqrtsd %xmm0, %xmm2
# CHECK-NEXT: - 6.00 1.00 - 0.50 0.50 - - - - sqrtsd (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - subpd %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - subpd (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - subsd %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - subsd (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - subpd %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - subpd (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - subsd %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - subsd (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - ucomisd %xmm0, %xmm1
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - ucomisd (%rax), %xmm1
# CHECK-NEXT: - - - - - - - 1.00 - - unpckhpd %xmm0, %xmm2

View File

@ -39,9 +39,9 @@ movsldup (%rax), %xmm2
# CHECK-NEXT: [6]: HasSideEffects
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 4 0.33 addsubpd %xmm0, %xmm2
# CHECK-NEXT: 1 4 0.50 addsubpd %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * addsubpd (%rax), %xmm2
# CHECK-NEXT: 1 4 0.33 addsubps %xmm0, %xmm2
# CHECK-NEXT: 1 4 0.50 addsubps %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * addsubps (%rax), %xmm2
# CHECK-NEXT: 3 6 2.00 haddpd %xmm0, %xmm2
# CHECK-NEXT: 4 12 2.00 * haddpd (%rax), %xmm2
@ -73,14 +73,14 @@ movsldup (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
# CHECK-NEXT: - - 4.00 4.00 5.00 5.00 - 23.00 - -
# CHECK-NEXT: - - 4.67 4.67 5.00 5.00 - 21.67 - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - addsubpd %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - addsubpd (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - addsubps %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - addsubps (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - addsubpd %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - addsubpd (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - addsubps %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - addsubps (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - haddpd %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - haddpd (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - haddps %xmm0, %xmm2

View File

@ -189,8 +189,8 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: 2 3 1.00 pextrq $1, %xmm0, %rcx
# CHECK-NEXT: 3 2 1.00 * pextrq $1, %xmm0, (%rax)
# CHECK-NEXT: 3 2 1.00 * pextrw $1, %xmm0, (%rax)
# CHECK-NEXT: 1 4 0.33 phminposuw %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * phminposuw (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 phminposuw %xmm0, %xmm2
# CHECK-NEXT: 2 10 1.00 * phminposuw (%rax), %xmm2
# CHECK-NEXT: 2 2 2.00 pinsrb $1, %eax, %xmm1
# CHECK-NEXT: 2 6 1.00 * pinsrb $1, (%rax), %xmm1
# CHECK-NEXT: 2 2 2.00 pinsrd $1, %eax, %xmm1
@ -237,20 +237,20 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: 2 6 1.00 * pmovzxwd (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 pmovzxwq %xmm0, %xmm2
# CHECK-NEXT: 2 6 1.00 * pmovzxwq (%rax), %xmm2
# CHECK-NEXT: 1 4 0.33 pmuldq %xmm0, %xmm2
# CHECK-NEXT: 1 4 0.50 pmuldq %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * pmuldq (%rax), %xmm2
# CHECK-NEXT: 2 10 0.67 pmulld %xmm0, %xmm2
# CHECK-NEXT: 3 16 0.67 * pmulld (%rax), %xmm2
# CHECK-NEXT: 2 10 1.00 pmulld %xmm0, %xmm2
# CHECK-NEXT: 3 16 1.00 * pmulld (%rax), %xmm2
# CHECK-NEXT: 2 3 1.00 ptest %xmm0, %xmm1
# CHECK-NEXT: 3 9 1.00 * ptest (%rax), %xmm1
# CHECK-NEXT: 2 8 0.67 roundpd $1, %xmm0, %xmm2
# CHECK-NEXT: 3 14 0.67 * roundpd $1, (%rax), %xmm2
# CHECK-NEXT: 2 8 0.67 roundps $1, %xmm0, %xmm2
# CHECK-NEXT: 3 14 0.67 * roundps $1, (%rax), %xmm2
# CHECK-NEXT: 2 8 0.67 roundsd $1, %xmm0, %xmm2
# CHECK-NEXT: 3 14 0.67 * roundsd $1, (%rax), %xmm2
# CHECK-NEXT: 2 8 0.67 roundss $1, %xmm0, %xmm2
# CHECK-NEXT: 3 14 0.67 * roundss $1, (%rax), %xmm2
# CHECK-NEXT: 2 8 1.00 roundpd $1, %xmm0, %xmm2
# CHECK-NEXT: 3 14 1.00 * roundpd $1, (%rax), %xmm2
# CHECK-NEXT: 2 8 1.00 roundps $1, %xmm0, %xmm2
# CHECK-NEXT: 3 14 1.00 * roundps $1, (%rax), %xmm2
# CHECK-NEXT: 2 8 1.00 roundsd $1, %xmm0, %xmm2
# CHECK-NEXT: 3 14 1.00 * roundsd $1, (%rax), %xmm2
# CHECK-NEXT: 2 8 1.00 roundss $1, %xmm0, %xmm2
# CHECK-NEXT: 3 14 1.00 * roundss $1, (%rax), %xmm2
# CHECK: Resources:
# CHECK-NEXT: [0] - SKXDivider
@ -266,7 +266,7 @@ roundss $1, (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
# CHECK-NEXT: - - 31.67 25.67 23.67 23.67 5.00 74.67 - 1.67
# CHECK-NEXT: - - 36.67 28.67 23.67 23.67 5.00 66.67 - 1.67
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@ -304,8 +304,8 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - pextrq $1, %xmm0, %rcx
# CHECK-NEXT: - - - - 0.33 0.33 1.00 1.00 - 0.33 pextrq $1, %xmm0, (%rax)
# CHECK-NEXT: - - - - 0.33 0.33 1.00 1.00 - 0.33 pextrw $1, %xmm0, (%rax)
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - phminposuw %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - phminposuw (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - phminposuw %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - phminposuw (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 2.00 - - pinsrb $1, %eax, %xmm1
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - pinsrb $1, (%rax), %xmm1
# CHECK-NEXT: - - - - - - - 2.00 - - pinsrd $1, %eax, %xmm1
@ -352,17 +352,17 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - pmovzxwd (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 1.00 - - pmovzxwq %xmm0, %xmm2
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - pmovzxwq (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - pmuldq %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - pmuldq (%rax), %xmm2
# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - pmulld %xmm0, %xmm2
# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - pmulld (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - pmuldq %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pmuldq (%rax), %xmm2
# CHECK-NEXT: - - 1.00 1.00 - - - - - - pmulld %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - pmulld (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - ptest %xmm0, %xmm1
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - ptest (%rax), %xmm1
# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - roundpd $1, %xmm0, %xmm2
# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - roundpd $1, (%rax), %xmm2
# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - roundps $1, %xmm0, %xmm2
# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - roundps $1, (%rax), %xmm2
# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - roundsd $1, %xmm0, %xmm2
# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - roundsd $1, (%rax), %xmm2
# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - roundss $1, %xmm0, %xmm2
# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - roundss $1, (%rax), %xmm2
# CHECK-NEXT: - - 1.00 1.00 - - - - - - roundpd $1, %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - roundpd $1, (%rax), %xmm2
# CHECK-NEXT: - - 1.00 1.00 - - - - - - roundps $1, %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - roundps $1, (%rax), %xmm2
# CHECK-NEXT: - - 1.00 1.00 - - - - - - roundsd $1, %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - roundsd $1, (%rax), %xmm2
# CHECK-NEXT: - - 1.00 1.00 - - - - - - roundss $1, %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - roundss $1, (%rax), %xmm2

View File

@ -148,11 +148,11 @@ psignw (%rax), %xmm2
# CHECK-NEXT: 4 9 2.00 * phsubw (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 pmaddubsw %mm0, %mm2
# CHECK-NEXT: 2 9 1.00 * pmaddubsw (%rax), %mm2
# CHECK-NEXT: 1 4 0.33 pmaddubsw %xmm0, %xmm2
# CHECK-NEXT: 1 4 0.50 pmaddubsw %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * pmaddubsw (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 pmulhrsw %mm0, %mm2
# CHECK-NEXT: 2 9 1.00 * pmulhrsw (%rax), %mm2
# CHECK-NEXT: 1 4 0.33 pmulhrsw %xmm0, %xmm2
# CHECK-NEXT: 1 4 0.50 pmulhrsw %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * pmulhrsw (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 pshufb %mm0, %mm2
# CHECK-NEXT: 2 6 1.00 * pshufb (%rax), %mm2
@ -185,7 +185,7 @@ psignw (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
# CHECK-NEXT: - - 30.00 12.00 16.00 16.00 - 70.00 - -
# CHECK-NEXT: - - 30.67 12.67 16.00 16.00 - 68.67 - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@ -231,12 +231,12 @@ psignw (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - phsubw (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - pmaddubsw %mm0, %mm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - pmaddubsw (%rax), %mm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - pmaddubsw %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - pmaddubsw (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - pmaddubsw %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pmaddubsw (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - - - pmulhrsw %mm0, %mm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - pmulhrsw (%rax), %mm2
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - pmulhrsw %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - pmulhrsw (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - - - pmulhrsw %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pmulhrsw (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 1.00 - - pshufb %mm0, %mm2
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - pshufb (%rax), %mm2
# CHECK-NEXT: - - - - - - - 1.00 - - pshufb %xmm0, %xmm2

View File

@ -822,13 +822,13 @@ xorq (%rax), %rdi
# CHECK-NEXT: 3 7 1.00 * * negl (%rax)
# CHECK-NEXT: 1 1 0.25 negq %rcx
# CHECK-NEXT: 3 7 1.00 * * negq (%r10)
# CHECK-NEXT: 1 1 0.25 nop
# CHECK-NEXT: 1 1 0.25 nopw %di
# CHECK-NEXT: 1 1 0.25 nopw (%rcx)
# CHECK-NEXT: 1 1 0.25 nopl %esi
# CHECK-NEXT: 1 1 0.25 nopl (%r8)
# CHECK-NEXT: 1 1 0.25 nopq %rdx
# CHECK-NEXT: 1 1 0.25 nopq (%r9)
# CHECK-NEXT: 1 1 0.17 nop
# CHECK-NEXT: 1 1 0.17 nopw %di
# CHECK-NEXT: 1 1 0.17 nopw (%rcx)
# CHECK-NEXT: 1 1 0.17 nopl %esi
# CHECK-NEXT: 1 1 0.17 nopl (%r8)
# CHECK-NEXT: 1 1 0.17 nopq %rdx
# CHECK-NEXT: 1 1 0.17 nopq (%r9)
# CHECK-NEXT: 1 1 0.25 notb %dil
# CHECK-NEXT: 3 7 1.00 * * notb (%r8)
# CHECK-NEXT: 1 1 0.25 notw %si
@ -1164,7 +1164,7 @@ xorq (%rax), %rdi
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
# CHECK-NEXT: 60.00 - 431.50 225.50 202.00 202.00 167.00 186.00 416.00 69.00
# CHECK-NEXT: 60.00 - 429.75 223.75 202.00 202.00 167.00 184.25 414.25 69.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@ -1381,13 +1381,13 @@ xorq (%rax), %rdi
# CHECK-NEXT: - - 0.25 0.25 0.83 0.83 1.00 0.25 0.25 0.33 negl (%rax)
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - negq %rcx
# CHECK-NEXT: - - 0.25 0.25 0.83 0.83 1.00 0.25 0.25 0.33 negq (%r10)
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - nop
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - nopw %di
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - nopw (%rcx)
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - nopl %esi
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - nopl (%r8)
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - nopq %rdx
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - nopq (%r9)
# CHECK-NEXT: - - - - - - - - - - nop
# CHECK-NEXT: - - - - - - - - - - nopw %di
# CHECK-NEXT: - - - - - - - - - - nopw (%rcx)
# CHECK-NEXT: - - - - - - - - - - nopl %esi
# CHECK-NEXT: - - - - - - - - - - nopl (%r8)
# CHECK-NEXT: - - - - - - - - - - nopq %rdx
# CHECK-NEXT: - - - - - - - - - - nopq (%r9)
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - notb %dil
# CHECK-NEXT: - - 0.25 0.25 0.83 0.83 1.00 0.25 0.25 0.33 notb (%r8)
# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - notw %si