1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

[X86][Sched] A bunch of fixes to the Zen2 sched model latencies.

Summary:
As determined with `llvm-exegesis`.

Some of these look like typos/misunderstandings of the sched model td
spec:
  - latency defaults to `1` when not set => Maybe we can avoid
    having a default ?
  - problems with regexps not being anchored by default (XCHG matching
    CMPXHG)

Note that this is not complete, it fixes only the most obvious mistakes,
and only for latency (not uops).

Reviewers: RKSimon, GGanesh

Subscribers: hiraditya, jfb, mstojanovic, hfinkel, craig.topper, andreadb, lebedev.ri, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D73172
This commit is contained in:
Clement Courbet 2020-01-22 11:44:12 +01:00
parent 3e20bf5676
commit 56c810e79e
10 changed files with 341 additions and 344 deletions

View File

@ -187,7 +187,7 @@ defm : Zn2WriteResPair<WriteIMul8, [Zn2ALU1, Zn2Multiplier], 4>;
defm : X86WriteRes<WriteBSWAP32, [Zn2ALU], 1, [4], 1>;
defm : X86WriteRes<WriteBSWAP64, [Zn2ALU], 1, [4], 1>;
defm : X86WriteRes<WriteCMPXCHG, [Zn2ALU], 1, [1], 1>;
defm : X86WriteRes<WriteCMPXCHG, [Zn2ALU], 3, [1], 1>;
defm : X86WriteRes<WriteCMPXCHGRMW,[Zn2ALU,Zn2AGU], 8, [1,1], 5>;
defm : X86WriteRes<WriteXCHG, [Zn2ALU], 1, [2], 2>;
@ -216,7 +216,7 @@ defm : X86WriteRes<WriteBitTestSet, [Zn2ALU], 2, [1], 2>;
// Bit counts.
defm : Zn2WriteResPair<WriteBSF, [Zn2ALU], 3>;
defm : Zn2WriteResPair<WriteBSR, [Zn2ALU], 3>;
defm : Zn2WriteResPair<WriteBSR, [Zn2ALU], 4>;
defm : Zn2WriteResPair<WriteLZCNT, [Zn2ALU], 1>;
defm : Zn2WriteResPair<WriteTZCNT, [Zn2ALU], 2>;
defm : Zn2WriteResPair<WritePOPCNT, [Zn2ALU], 1>;
@ -272,13 +272,13 @@ defm : Zn2WriteResFpuPair<WriteFAdd64, [Zn2FPU0], 3>;
defm : Zn2WriteResFpuPair<WriteFAdd64X, [Zn2FPU0], 3>;
defm : Zn2WriteResFpuPair<WriteFAdd64Y, [Zn2FPU0], 3>;
defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
defm : Zn2WriteResFpuPair<WriteFCmp, [Zn2FPU0], 3>;
defm : Zn2WriteResFpuPair<WriteFCmpX, [Zn2FPU0], 3>;
defm : Zn2WriteResFpuPair<WriteFCmpY, [Zn2FPU0], 3>;
defm : Zn2WriteResFpuPair<WriteFCmp, [Zn2FPU0], 1>;
defm : Zn2WriteResFpuPair<WriteFCmpX, [Zn2FPU0], 1>;
defm : Zn2WriteResFpuPair<WriteFCmpY, [Zn2FPU0], 1>;
defm : X86WriteResPairUnsupported<WriteFCmpZ>;
defm : Zn2WriteResFpuPair<WriteFCmp64, [Zn2FPU0], 3>;
defm : Zn2WriteResFpuPair<WriteFCmp64X, [Zn2FPU0], 3>;
defm : Zn2WriteResFpuPair<WriteFCmp64Y, [Zn2FPU0], 3>;
defm : Zn2WriteResFpuPair<WriteFCmp64, [Zn2FPU0], 1>;
defm : Zn2WriteResFpuPair<WriteFCmp64X, [Zn2FPU0], 1>;
defm : Zn2WriteResFpuPair<WriteFCmp64Y, [Zn2FPU0], 1>;
defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
defm : Zn2WriteResFpuPair<WriteFCom, [Zn2FPU0], 3>;
defm : Zn2WriteResFpuPair<WriteFComX, [Zn2FPU0], 3>;
@ -314,8 +314,8 @@ defm : Zn2WriteResFpuPair<WriteFDiv64, [Zn2FPU3], 15>;
defm : Zn2WriteResFpuPair<WriteFDiv64X, [Zn2FPU3], 15>;
defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
defm : Zn2WriteResFpuPair<WriteFSign, [Zn2FPU3], 2>;
defm : Zn2WriteResFpuPair<WriteFRnd, [Zn2FPU3], 4, [1], 1, 7, 0>;
defm : Zn2WriteResFpuPair<WriteFRndY, [Zn2FPU3], 4, [1], 1, 7, 0>;
defm : Zn2WriteResFpuPair<WriteFRnd, [Zn2FPU3], 3, [1], 1, 7, 0>;
defm : Zn2WriteResFpuPair<WriteFRndY, [Zn2FPU3], 3, [1], 1, 7, 0>;
defm : X86WriteResPairUnsupported<WriteFRndZ>;
defm : Zn2WriteResFpuPair<WriteFLogic, [Zn2FPU], 1>;
defm : Zn2WriteResFpuPair<WriteFLogicY, [Zn2FPU], 1>;
@ -326,16 +326,16 @@ defm : X86WriteResPairUnsupported<WriteFTestZ>;
defm : Zn2WriteResFpuPair<WriteFShuffle, [Zn2FPU12], 1>;
defm : Zn2WriteResFpuPair<WriteFShuffleY, [Zn2FPU12], 1>;
defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
defm : Zn2WriteResFpuPair<WriteFVarShuffle, [Zn2FPU12], 1>;
defm : Zn2WriteResFpuPair<WriteFVarShuffleY,[Zn2FPU12], 1>;
defm : Zn2WriteResFpuPair<WriteFVarShuffle, [Zn2FPU12], 3>;
defm : Zn2WriteResFpuPair<WriteFVarShuffleY,[Zn2FPU12], 3>;
defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
defm : Zn2WriteResFpuPair<WriteFMul, [Zn2FPU01], 3, [1], 1, 7, 1>;
defm : Zn2WriteResFpuPair<WriteFMulX, [Zn2FPU01], 3, [1], 1, 7, 1>;
defm : Zn2WriteResFpuPair<WriteFMulY, [Zn2FPU01], 4, [1], 1, 7, 1>;
defm : Zn2WriteResFpuPair<WriteFMulY, [Zn2FPU01], 3, [1], 1, 7, 1>;
defm : X86WriteResPairUnsupported<WriteFMulZ>;
defm : Zn2WriteResFpuPair<WriteFMul64, [Zn2FPU01], 3, [1], 1, 7, 1>;
defm : Zn2WriteResFpuPair<WriteFMul64X, [Zn2FPU01], 3, [1], 1, 7, 1>;
defm : Zn2WriteResFpuPair<WriteFMul64Y, [Zn2FPU01], 4, [1], 1, 7, 1>;
defm : Zn2WriteResFpuPair<WriteFMul64Y, [Zn2FPU01], 3, [1], 1, 7, 1>;
defm : X86WriteResPairUnsupported<WriteFMul64Z>;
defm : Zn2WriteResFpuPair<WriteFMA, [Zn2FPU03], 5>;
defm : Zn2WriteResFpuPair<WriteFMAX, [Zn2FPU03], 5>;
@ -381,7 +381,7 @@ defm : X86WriteRes<WriteEMMS, [Zn2FPU], 2, [1], 1>;
defm : Zn2WriteResFpuPair<WriteVecShift, [Zn2FPU], 1>;
defm : Zn2WriteResFpuPair<WriteVecShiftX, [Zn2FPU2], 1>;
defm : Zn2WriteResFpuPair<WriteVecShiftY, [Zn2FPU2], 2>;
defm : Zn2WriteResFpuPair<WriteVecShiftY, [Zn2FPU2], 1>;
defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
defm : Zn2WriteResFpuPair<WriteVecShiftImm, [Zn2FPU], 1>;
defm : Zn2WriteResFpuPair<WriteVecShiftImmX, [Zn2FPU], 1>;
@ -403,7 +403,7 @@ defm : Zn2WriteResFpuPair<WriteVecIMulX, [Zn2FPU0], 4>;
defm : Zn2WriteResFpuPair<WriteVecIMulY, [Zn2FPU0], 4>;
defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
defm : Zn2WriteResFpuPair<WritePMULLD, [Zn2FPU0], 4, [1], 1, 7, 1>;
defm : Zn2WriteResFpuPair<WritePMULLDY, [Zn2FPU0], 3, [1], 1, 7, 1>;
defm : Zn2WriteResFpuPair<WritePMULLDY, [Zn2FPU0], 4, [1], 1, 7, 1>;
defm : X86WriteResPairUnsupported<WritePMULLDZ>;
defm : Zn2WriteResFpuPair<WriteShuffle, [Zn2FPU], 1>;
defm : Zn2WriteResFpuPair<WriteShuffleX, [Zn2FPU], 1>;
@ -425,8 +425,8 @@ defm : X86WriteResPairUnsupported<WritePSADBWZ>;
defm : Zn2WriteResFpuPair<WritePHMINPOS, [Zn2FPU0], 4>;
// Vector Shift Operations
defm : Zn2WriteResFpuPair<WriteVarVecShift, [Zn2FPU12], 1>;
defm : Zn2WriteResFpuPair<WriteVarVecShiftY, [Zn2FPU12], 1>;
defm : Zn2WriteResFpuPair<WriteVarVecShift, [Zn2FPU12], 3>;
defm : Zn2WriteResFpuPair<WriteVarVecShiftY, [Zn2FPU12], 3>;
defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
// Vector insert/extract operations.
@ -470,6 +470,12 @@ defm : Zn2WriteResFpuPair<WriteFVarShuffle256, [Zn2FPU], 100>;
def Zn2WriteMicrocoded : SchedWriteRes<[]> {
let Latency = 100;
}
defm : Zn2WriteResPair<WriteDPPS, [], 15>;
defm : Zn2WriteResPair<WriteFHAdd, [], 7>;
defm : Zn2WriteResPair<WriteFHAddY, [], 7>;
defm : Zn2WriteResPair<WritePHAdd, [], 3>;
defm : Zn2WriteResPair<WritePHAddX, [], 3>;
defm : Zn2WriteResPair<WritePHAddY, [], 3>;
def : SchedAlias<WriteMicrocoded, Zn2WriteMicrocoded>;
def : SchedAlias<WriteFCMOV, Zn2WriteMicrocoded>;
@ -518,14 +524,14 @@ def Zn2WriteXCHG : SchedWriteRes<[Zn2ALU]> {
let NumMicroOps = 2;
}
def : InstRW<[Zn2WriteXCHG], (instregex "XCHG(8|16|32|64)rr", "XCHG(16|32|64)ar")>;
def : InstRW<[Zn2WriteXCHG], (instregex "^XCHG(8|16|32|64)rr", "^XCHG(16|32|64)ar")>;
// r,m.
def Zn2WriteXCHGrm : SchedWriteRes<[Zn2AGU, Zn2ALU]> {
let Latency = 5;
let NumMicroOps = 2;
}
def : InstRW<[Zn2WriteXCHGrm, ReadAfterLd], (instregex "XCHG(8|16|32|64)rm")>;
def : InstRW<[Zn2WriteXCHGrm, ReadAfterLd], (instregex "^XCHG(8|16|32|64)rm")>;
def : InstRW<[WriteMicrocoded], (instrs XLAT)>;
@ -595,8 +601,11 @@ def : InstRW<[WriteALULd],
def Zn2WriteMul16 : SchedWriteRes<[Zn2ALU1, Zn2Multiplier]> {
let Latency = 3;
}
def Zn2WriteMul16Imm : SchedWriteRes<[Zn2ALU1, Zn2Multiplier]> {
let Latency = 4;
}
def : SchedAlias<WriteIMul16, Zn2WriteMul16>;
def : SchedAlias<WriteIMul16Imm, Zn2WriteMul16>;
def : SchedAlias<WriteIMul16Imm, Zn2WriteMul16Imm>;
def : SchedAlias<WriteIMul16Reg, Zn2WriteMul16>;
// m16.
@ -1002,6 +1011,7 @@ def : InstRW<[WriteMicrocoded], (instrs FNINIT)>;
// mm <- mm.
def Zn2WriteFPU12 : SchedWriteRes<[Zn2FPU12]> ;
def Zn2WriteFPU12Y : SchedWriteRes<[Zn2FPU12]> {
let Latency = 4;
let NumMicroOps = 2;
}
def Zn2WriteFPU12m : SchedWriteRes<[Zn2AGU, Zn2FPU12]> ;
@ -1110,15 +1120,6 @@ def : InstRW<[WriteMicrocoded], (instregex "VPGATHER(Q|D)(Q|D)(Y?)rm")>;
//-- Arithmetic instructions --//
// HADD, HSUB PS/PD
// PHADD|PHSUB (S) W/D.
def : SchedAlias<WritePHAdd, Zn2WriteMicrocoded>;
def : SchedAlias<WritePHAddLd, Zn2WriteMicrocoded>;
def : SchedAlias<WritePHAddX, Zn2WriteMicrocoded>;
def : SchedAlias<WritePHAddXLd, Zn2WriteMicrocoded>;
def : SchedAlias<WritePHAddY, Zn2WriteMicrocoded>;
def : SchedAlias<WritePHAddYLd, Zn2WriteMicrocoded>;
// PCMPGTQ.
def Zn2WritePCMPGTQr : SchedWriteRes<[Zn2FPU03]>;
def : InstRW<[Zn2WritePCMPGTQr], (instregex "(V?)PCMPGTQ(Y?)rr")>;
@ -1138,8 +1139,12 @@ def : InstRW<[Zn2WritePCMPGTQYm], (instrs VPCMPGTQYrm)>;
// PSLL,PSRL,PSRA W/D/Q.
// x,x / v,v,x.
def Zn2WritePShift : SchedWriteRes<[Zn2FPU2]> ;
def Zn2WritePShiftY : SchedWriteRes<[Zn2FPU2]> ;
def Zn2WritePShift : SchedWriteRes<[Zn2FPU2]> {
let Latency = 3;
}
def Zn2WritePShiftY : SchedWriteRes<[Zn2FPU2]> {
let Latency = 3;
}
// PSLL,PSRL DQ.
def : InstRW<[Zn2WritePShift], (instregex "(V?)PS(R|L)LDQri")>;
@ -1281,7 +1286,7 @@ def Zn2WriteCVTDQ2PDr: SchedWriteRes<[Zn2FPU12,Zn2FPU3]> {
}
// CVTDQ2PD.
// x,x.
def : InstRW<[Zn2WriteCVTDQ2PDr], (instregex "(V)?CVTDQ2PDrr")>;
def : InstRW<[Zn2WriteCVTDQ2PDr], (instregex "(V)?CVTDQ2P(D|S)rr")>;
// Same as xmm
// y,x.
@ -1291,9 +1296,9 @@ def : InstRW<[Zn2WriteCVTDQ2PDr], (instrs VCVTDQ2PSYrr)>;
def Zn2WriteCVTPD2DQr: SchedWriteRes<[Zn2FPU12, Zn2FPU3]> {
let Latency = 3;
}
// CVT(T)PD2DQ.
// CVT(T)P(D|S)2DQ.
// x,x.
def : InstRW<[Zn2WriteCVTPD2DQr], (instregex "(V?)CVT(T?)PD2DQrr")>;
def : InstRW<[Zn2WriteCVTPD2DQr], (instregex "(V?)CVT(T?)P(D|S)2DQrr")>;
def Zn2WriteCVTPD2DQLd: SchedWriteRes<[Zn2AGU,Zn2FPU12,Zn2FPU3]> {
let Latency = 10;
@ -1323,7 +1328,7 @@ def : InstRW<[Zn2WriteCVTPS2PDr], (instrs MMX_CVTPI2PDirr)>;
def : InstRW<[Zn2WriteCVTPS2PIr], (instregex "MMX_CVT(T?)PD2PIirr")>;
def Zn2WriteCVSTSI2SSr: SchedWriteRes<[Zn2FPU3]> {
let Latency = 4;
let Latency = 3;
}
// same as CVTPD2DQr
@ -1335,7 +1340,7 @@ def : InstRW<[Zn2WriteCVTPD2DQr], (instregex "(V?)CVT(T?)SS2SI(64)?rr")>;
def : InstRW<[Zn2WriteCVTPD2DQLd], (instregex "(V?)CVT(T?)SS2SI(64)?rm")>;
def Zn2WriteCVSTSI2SDr: SchedWriteRes<[Zn2FPU013, Zn2FPU3]> {
let Latency = 4;
let Latency = 3;
}
// CVTSI2SD.
// x,r32/64.
@ -1377,7 +1382,7 @@ defm : X86WriteResUnsupported<WriteCvtPH2PSZLd>;
//-- SSE4A instructions --//
// EXTRQ
def Zn2WriteEXTRQ: SchedWriteRes<[Zn2FPU12, Zn2FPU2]> {
let Latency = 2;
let Latency = 3;
}
def : InstRW<[Zn2WriteEXTRQ], (instregex "EXTRQ")>;
@ -1449,12 +1454,6 @@ def : InstRW<[Zn2WriteSHA256RNDS2Ld], (instregex "SHA256RNDS2rm")>;
//-- Arithmetic instructions --//
// HADD, HSUB PS/PD
def : SchedAlias<WriteFHAdd, Zn2WriteMicrocoded>;
def : SchedAlias<WriteFHAddLd, Zn2WriteMicrocoded>;
def : SchedAlias<WriteFHAddY, Zn2WriteMicrocoded>;
def : SchedAlias<WriteFHAddYLd, Zn2WriteMicrocoded>;
// VDIVPS.
// TODO - convert to Zn2WriteResFpuPair
// y,y,y.
@ -1491,11 +1490,9 @@ def : SchedAlias<WriteFDiv64YLd, Zn2WriteVDIVPDYLd>;
// DPPS.
// x,x,i / v,v,v,i.
def : SchedAlias<WriteDPPS, Zn2WriteMicrocoded>;
def : SchedAlias<WriteDPPSY, Zn2WriteMicrocoded>;
// x,m,i / v,v,m,i.
def : SchedAlias<WriteDPPSLd, Zn2WriteMicrocoded>;
def : SchedAlias<WriteDPPSYLd,Zn2WriteMicrocoded>;
// DPPD.

View File

@ -1098,18 +1098,18 @@ vzeroupper
# CHECK-NEXT: 1 8 0.50 * vbroadcastsd (%rax), %ymm2
# CHECK-NEXT: 1 8 0.50 * vbroadcastss (%rax), %xmm2
# CHECK-NEXT: 1 8 0.50 * vbroadcastss (%rax), %ymm2
# CHECK-NEXT: 1 3 1.00 vcmpeqpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 10 1.00 * vcmpeqpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vcmpeqpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 10 1.00 * vcmpeqpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 1.00 vcmpeqps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 10 1.00 * vcmpeqps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vcmpeqps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 10 1.00 * vcmpeqps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 1.00 vcmpeqsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 10 1.00 * vcmpeqsd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vcmpeqss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 10 1.00 * vcmpeqss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 vcmpeqpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vcmpeqpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 vcmpeqpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 8 1.00 * vcmpeqpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 1.00 vcmpeqps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vcmpeqps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 vcmpeqps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 8 1.00 * vcmpeqps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 1.00 vcmpeqsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vcmpeqsd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 vcmpeqss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vcmpeqss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vcomisd %xmm0, %xmm1
# CHECK-NEXT: 1 10 1.00 * vcomisd (%rax), %xmm1
# CHECK-NEXT: 1 3 1.00 vcomiss %xmm0, %xmm1
@ -1118,7 +1118,7 @@ vzeroupper
# CHECK-NEXT: 1 12 1.00 * vcvtdq2pd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 vcvtdq2pd %xmm0, %ymm2
# CHECK-NEXT: 1 12 1.00 * vcvtdq2pd (%rax), %ymm2
# CHECK-NEXT: 1 5 1.00 vcvtdq2ps %xmm0, %xmm2
# CHECK-NEXT: 1 3 1.00 vcvtdq2ps %xmm0, %xmm2
# CHECK-NEXT: 1 12 1.00 * vcvtdq2ps (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 vcvtdq2ps %ymm0, %ymm2
# CHECK-NEXT: 1 12 1.00 * vcvtdq2ps (%rax), %ymm2
@ -1130,7 +1130,7 @@ vzeroupper
# CHECK-NEXT: 2 10 0.50 * vcvtpd2psx (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 vcvtpd2ps %ymm0, %xmm2
# CHECK-NEXT: 1 10 1.00 * vcvtpd2psy (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 vcvtps2dq %xmm0, %xmm2
# CHECK-NEXT: 1 3 1.00 vcvtps2dq %xmm0, %xmm2
# CHECK-NEXT: 1 12 1.00 * vcvtps2dq (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 vcvtps2dq %ymm0, %ymm2
# CHECK-NEXT: 1 12 1.00 * vcvtps2dq (%rax), %ymm2
@ -1144,8 +1144,8 @@ vzeroupper
# CHECK-NEXT: 1 11 1.00 * vcvtsd2si (%rax), %rcx
# CHECK-NEXT: 1 3 1.00 vcvtsd2ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vcvtsd2ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 1.00 vcvtsi2sd %ecx, %xmm0, %xmm2
# CHECK-NEXT: 1 4 1.00 vcvtsi2sd %rcx, %xmm0, %xmm2
# CHECK-NEXT: 1 3 1.00 vcvtsi2sd %ecx, %xmm0, %xmm2
# CHECK-NEXT: 1 3 1.00 vcvtsi2sd %rcx, %xmm0, %xmm2
# CHECK-NEXT: 1 12 1.00 * vcvtsi2sdl (%rax), %xmm0, %xmm2
# CHECK-NEXT: 1 12 1.00 * vcvtsi2sdq (%rax), %xmm0, %xmm2
# CHECK-NEXT: 1 5 1.00 vcvtsi2ss %ecx, %xmm0, %xmm2
@ -1162,7 +1162,7 @@ vzeroupper
# CHECK-NEXT: 2 10 1.00 * vcvttpd2dqx (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 vcvttpd2dq %ymm0, %xmm2
# CHECK-NEXT: 2 10 1.00 * vcvttpd2dqy (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 vcvttps2dq %xmm0, %xmm2
# CHECK-NEXT: 1 3 1.00 vcvttps2dq %xmm0, %xmm2
# CHECK-NEXT: 1 12 1.00 * vcvttps2dq (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 vcvttps2dq %ymm0, %ymm2
# CHECK-NEXT: 1 12 1.00 * vcvttps2dq (%rax), %ymm2
@ -1188,30 +1188,30 @@ vzeroupper
# CHECK-NEXT: 1 22 1.00 * vdivss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 100 0.25 vdppd $22, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 100 0.25 * vdppd $22, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 100 0.25 vdpps $22, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 100 0.25 * vdpps $22, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 15 0.25 vdpps $22, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 19 0.33 * vdpps $22, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 100 0.25 vdpps $22, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 100 0.25 * vdpps $22, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.33 vextractf128 $1, %ymm0, %xmm2
# CHECK-NEXT: 2 8 0.33 * vextractf128 $1, %ymm0, (%rax)
# CHECK-NEXT: 1 2 2.00 vextractps $1, %xmm0, %ecx
# CHECK-NEXT: 2 5 2.00 * vextractps $1, %xmm0, (%rax)
# CHECK-NEXT: 1 100 0.25 vhaddpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 100 0.25 * vhaddpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 100 0.25 vhaddpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 100 0.25 * vhaddpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 100 0.25 vhaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 100 0.25 * vhaddps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 100 0.25 vhaddps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 100 0.25 * vhaddps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 100 0.25 vhsubpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 100 0.25 * vhsubpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 100 0.25 vhsubpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 100 0.25 * vhsubpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 100 0.25 vhsubps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 100 0.25 * vhsubps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 100 0.25 vhsubps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 100 0.25 * vhsubps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 7 0.25 vhaddpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 11 0.33 * vhaddpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 7 0.25 vhaddpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.33 * vhaddpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 7 0.25 vhaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 11 0.33 * vhaddps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 7 0.25 vhaddps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.33 * vhaddps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 7 0.25 vhsubpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 11 0.33 * vhsubpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 7 0.25 vhsubpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.33 * vhsubpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 7 0.25 vhsubps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 11 0.33 * vhsubps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 7 0.25 vhsubps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.33 * vhsubps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 2 0.33 vinsertf128 $1, %xmm0, %ymm1, %ymm2
# CHECK-NEXT: 2 9 0.33 * vinsertf128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vinsertps $1, %xmm0, %xmm1, %xmm2
@ -1228,30 +1228,30 @@ vzeroupper
# CHECK-NEXT: 2 8 0.50 * vmaskmovps (%rax), %ymm0, %ymm2
# CHECK-NEXT: 1 4 0.50 * * vmaskmovps %xmm0, %xmm1, (%rax)
# CHECK-NEXT: 2 5 1.00 * * vmaskmovps %ymm0, %ymm1, (%rax)
# CHECK-NEXT: 1 3 1.00 vmaxpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 10 1.00 * vmaxpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vmaxpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 10 1.00 * vmaxpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 1.00 vmaxps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 10 1.00 * vmaxps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vmaxps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 10 1.00 * vmaxps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 1.00 vmaxsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 10 1.00 * vmaxsd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vmaxss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 10 1.00 * vmaxss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vminpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 10 1.00 * vminpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vminpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 10 1.00 * vminpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 1.00 vminps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 10 1.00 * vminps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vminps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 10 1.00 * vminps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 1.00 vminsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 10 1.00 * vminsd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vminss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 10 1.00 * vminss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 vmaxpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vmaxpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 vmaxpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 8 1.00 * vmaxpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 1.00 vmaxps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vmaxps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 vmaxps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 8 1.00 * vmaxps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 1.00 vmaxsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vmaxsd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 vmaxss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vmaxss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 vminpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vminpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 vminpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 8 1.00 * vminpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 1.00 vminps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vminps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 vminps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 8 1.00 * vminps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 1.00 vminsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vminsd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 vminss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vminss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.25 vmovapd %xmm0, %xmm2
# CHECK-NEXT: 1 1 0.33 * vmovapd %xmm0, (%rax)
# CHECK-NEXT: 1 8 0.33 * vmovapd (%rax), %xmm2
@ -1341,12 +1341,12 @@ vzeroupper
# CHECK-NEXT: 1 100 0.25 * vmpsadbw $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 0.50 vmulpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vmulpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vmulpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vmulpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 0.50 vmulpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 10 0.50 * vmulpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 0.50 vmulps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vmulps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 0.50 vmulps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 0.50 * vmulps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 0.50 vmulps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 10 0.50 * vmulps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 0.50 vmulsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vmulsd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 0.50 vmulss %xmm0, %xmm1, %xmm2
@ -1433,20 +1433,20 @@ vzeroupper
# CHECK-NEXT: 1 100 0.25 * vperm2f128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vpermilpd $1, %xmm0, %xmm2
# CHECK-NEXT: 1 8 0.50 * vpermilpd $1, (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 vpermilpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 0.50 * vpermilpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 0.50 vpermilpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 10 0.50 * vpermilpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpermilpd $1, %ymm0, %ymm2
# CHECK-NEXT: 1 8 0.50 * vpermilpd $1, (%rax), %ymm2
# CHECK-NEXT: 1 1 0.50 vpermilpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 8 0.50 * vpermilpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 0.50 vpermilpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 10 0.50 * vpermilpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vpermilps $1, %xmm0, %xmm2
# CHECK-NEXT: 1 8 0.50 * vpermilps $1, (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 vpermilps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 0.50 * vpermilps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 0.50 vpermilps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 10 0.50 * vpermilps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpermilps $1, %ymm0, %ymm2
# CHECK-NEXT: 1 8 0.50 * vpermilps $1, (%rax), %ymm2
# CHECK-NEXT: 1 1 0.50 vpermilps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 8 0.50 * vpermilps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 0.50 vpermilps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 10 0.50 * vpermilps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 2 2.00 vpextrb $1, %xmm0, %ecx
# CHECK-NEXT: 2 5 3.00 * vpextrb $1, %xmm0, (%rax)
# CHECK-NEXT: 1 2 2.00 vpextrd $1, %xmm0, %ecx
@ -1455,20 +1455,20 @@ vzeroupper
# CHECK-NEXT: 2 5 3.00 * vpextrq $1, %xmm0, (%rax)
# CHECK-NEXT: 1 2 2.00 vpextrw $1, %xmm0, %ecx
# CHECK-NEXT: 2 5 3.00 * vpextrw $1, %xmm0, (%rax)
# CHECK-NEXT: 1 100 0.25 vphaddd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 100 0.25 * vphaddd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 100 0.25 vphaddsw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 100 0.25 * vphaddsw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 100 0.25 vphaddw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 100 0.25 * vphaddw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 0.25 vphaddd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 7 0.33 * vphaddd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 0.25 vphaddsw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 7 0.33 * vphaddsw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 0.25 vphaddw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 7 0.33 * vphaddw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 1.00 vphminposuw %xmm0, %xmm2
# CHECK-NEXT: 1 11 1.00 * vphminposuw (%rax), %xmm2
# CHECK-NEXT: 1 100 0.25 vphsubd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 100 0.25 * vphsubd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 100 0.25 vphsubsw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 100 0.25 * vphsubsw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 100 0.25 vphsubw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 100 0.25 * vphsubw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 0.25 vphsubd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 7 0.33 * vphsubd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 0.25 vphsubsw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 7 0.33 * vphsubsw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 0.25 vphsubw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 7 0.33 * vphsubw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.25 vpinsrb $1, %eax, %xmm1, %xmm2
# CHECK-NEXT: 1 8 0.33 * vpinsrb $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.25 vpinsrd $1, %eax, %xmm1, %xmm2
@ -1565,7 +1565,7 @@ vzeroupper
# CHECK-NEXT: 1 1 0.25 vpslld $1, %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 vpslld %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vpslld (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 vpslldq $1, %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vpslldq $1, %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.25 vpsllq $1, %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 vpsllq %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vpsllq (%rax), %xmm1, %xmm2
@ -1581,7 +1581,7 @@ vzeroupper
# CHECK-NEXT: 1 1 0.25 vpsrld $1, %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 vpsrld %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vpsrld (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 vpsrldq $1, %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vpsrldq $1, %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.25 vpsrlq $1, %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 vpsrlq %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vpsrlq (%rax), %xmm1, %xmm2
@ -1632,18 +1632,18 @@ vzeroupper
# CHECK-NEXT: 3 12 0.50 * vrcpps (%rax), %ymm2
# CHECK-NEXT: 1 5 0.50 vrcpss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 12 0.50 * vrcpss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 1.00 vroundpd $1, %xmm0, %xmm2
# CHECK-NEXT: 1 11 1.00 * vroundpd $1, (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 vroundpd $1, %ymm0, %ymm2
# CHECK-NEXT: 1 11 1.00 * vroundpd $1, (%rax), %ymm2
# CHECK-NEXT: 1 4 1.00 vroundps $1, %xmm0, %xmm2
# CHECK-NEXT: 1 11 1.00 * vroundps $1, (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 vroundps $1, %ymm0, %ymm2
# CHECK-NEXT: 1 11 1.00 * vroundps $1, (%rax), %ymm2
# CHECK-NEXT: 1 4 1.00 vroundsd $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 11 1.00 * vroundsd $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 1.00 vroundss $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 11 1.00 * vroundss $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vroundpd $1, %xmm0, %xmm2
# CHECK-NEXT: 1 10 1.00 * vroundpd $1, (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 vroundpd $1, %ymm0, %ymm2
# CHECK-NEXT: 1 10 1.00 * vroundpd $1, (%rax), %ymm2
# CHECK-NEXT: 1 3 1.00 vroundps $1, %xmm0, %xmm2
# CHECK-NEXT: 1 10 1.00 * vroundps $1, (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 vroundps $1, %ymm0, %ymm2
# CHECK-NEXT: 1 10 1.00 * vroundps $1, (%rax), %ymm2
# CHECK-NEXT: 1 3 1.00 vroundsd $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 10 1.00 * vroundsd $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vroundss $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 10 1.00 * vroundss $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vrsqrtps %xmm0, %xmm2
# CHECK-NEXT: 2 12 0.50 * vrsqrtps (%rax), %xmm2
# CHECK-NEXT: 2 5 1.00 vrsqrtps %ymm0, %ymm2
@ -1739,7 +1739,7 @@ vzeroupper
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12]
# CHECK-NEXT: 112.00 112.00 112.00 0.25 0.25 0.25 0.25 - 191.92 141.92 168.75 455.42 -
# CHECK-NEXT: 117.00 117.00 117.00 0.25 0.25 0.25 0.25 - 191.92 143.42 170.25 455.42 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions:
@ -1831,7 +1831,7 @@ vzeroupper
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - vcvtdq2pd (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 - vcvtdq2pd %xmm0, %ymm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - vcvtdq2pd (%rax), %ymm2
# CHECK-NEXT: - - - - - - - - - - - 1.00 - vcvtdq2ps %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 - vcvtdq2ps %xmm0, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - vcvtdq2ps (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 - vcvtdq2ps %ymm0, %ymm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - vcvtdq2ps (%rax), %ymm2
@ -1843,7 +1843,7 @@ vzeroupper
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.50 - - 0.50 - vcvtpd2psx (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - - 1.00 - vcvtpd2ps %ymm0, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - vcvtpd2psy (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - - 1.00 - vcvtps2dq %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 - vcvtps2dq %xmm0, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - vcvtps2dq (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - - 1.00 - vcvtps2dq %ymm0, %ymm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - vcvtps2dq (%rax), %ymm2
@ -1875,7 +1875,7 @@ vzeroupper
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 0.50 0.50 1.00 - vcvttpd2dqx (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 - vcvttpd2dq %ymm0, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 0.50 0.50 1.00 - vcvttpd2dqy (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - - 1.00 - vcvttps2dq %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 - vcvttps2dq %xmm0, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - vcvttps2dq (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - - 1.00 - vcvttps2dq %ymm0, %ymm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - vcvttps2dq (%rax), %ymm2
@ -1902,7 +1902,7 @@ vzeroupper
# CHECK-NEXT: - - - - - - - - - - - - - vdppd $22, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - vdppd $22, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - vdpps $22, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - vdpps $22, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vdpps $22, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - vdpps $22, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - - - - - - vdpps $22, (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - 0.33 0.33 - 0.33 - vextractf128 $1, %ymm0, %xmm2
@ -1910,21 +1910,21 @@ vzeroupper
# CHECK-NEXT: - - - - - - - - - 0.50 2.50 - - vextractps $1, %xmm0, %ecx
# CHECK-NEXT: 1.67 1.67 1.67 - - - - - - 0.50 2.50 - - vextractps $1, %xmm0, (%rax)
# CHECK-NEXT: - - - - - - - - - - - - - vhaddpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - vhaddpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vhaddpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - vhaddpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - - - - - - vhaddpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vhaddpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - - - - - - vhaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - vhaddps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vhaddps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - vhaddps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - - - - - - vhaddps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vhaddps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - - - - - - vhsubpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - vhsubpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vhsubpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - vhsubpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - - - - - - vhsubpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vhsubpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - - - - - - vhsubps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - vhsubps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vhsubps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - vhsubps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - - - - - - vhsubps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vhsubps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - 0.33 0.33 - 0.33 - vinsertf128 $1, %xmm0, %ymm1, %ymm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.33 0.33 - 0.33 - vinsertf128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - vinsertps $1, %xmm0, %xmm1, %xmm2
@ -2169,19 +2169,19 @@ vzeroupper
# CHECK-NEXT: - - - - - - - - - 0.50 2.50 - - vpextrw $1, %xmm0, %ecx
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 1.00 4.00 - - vpextrw $1, %xmm0, (%rax)
# CHECK-NEXT: - - - - - - - - - - - - - vphaddd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - vphaddd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vphaddd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - vphaddsw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - vphaddsw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vphaddsw (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - vphaddw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - vphaddw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vphaddw (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - 1.00 - - - - vphminposuw %xmm0, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 1.00 - - - - vphminposuw (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - vphsubd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - vphsubd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vphsubd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - vphsubsw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - vphsubsw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vphsubsw (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - vphsubw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - vphsubw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vphsubw (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - vpinsrb $1, %eax, %xmm1, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.25 0.25 0.25 0.25 - vpinsrb $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - vpinsrd $1, %eax, %xmm1, %xmm2

View File

@ -576,18 +576,18 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 100 0.25 * vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2
# CHECK-NEXT: 1 100 0.25 * vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 1 100 0.25 * vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: 1 100 0.25 vphaddd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 100 0.25 * vphaddd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 100 0.25 vphaddsw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 100 0.25 * vphaddsw (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 100 0.25 vphaddw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 100 0.25 * vphaddw (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 100 0.25 vphsubd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 100 0.25 * vphsubd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 100 0.25 vphsubsw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 100 0.25 * vphsubsw (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 100 0.25 vphsubw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 100 0.25 * vphsubw (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 0.25 vphaddd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 7 0.33 * vphaddd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 0.25 vphaddsw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 7 0.33 * vphaddsw (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 0.25 vphaddw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 7 0.33 * vphaddw (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 0.25 vphsubd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 7 0.33 * vphsubd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 0.25 vphsubsw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 7 0.33 * vphsubsw (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 0.25 vphsubw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 7 0.33 * vphsubw (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 1.00 vpmaddubsw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 11 1.00 * vpmaddubsw (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 1.00 vpmaddwd %ymm0, %ymm1, %ymm2
@ -625,29 +625,29 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.25 vpminuw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 8 0.33 * vpminuw (%rax), %ymm1, %ymm2
# CHECK-NEXT: 2 2 2.00 vpmovmskb %ymm0, %ecx
# CHECK-NEXT: 2 1 0.50 vpmovsxbd %xmm0, %ymm2
# CHECK-NEXT: 2 4 0.50 vpmovsxbd %xmm0, %ymm2
# CHECK-NEXT: 2 8 0.50 * vpmovsxbd (%rax), %ymm2
# CHECK-NEXT: 2 1 0.50 vpmovsxbq %xmm0, %ymm2
# CHECK-NEXT: 2 4 0.50 vpmovsxbq %xmm0, %ymm2
# CHECK-NEXT: 2 8 0.50 * vpmovsxbq (%rax), %ymm2
# CHECK-NEXT: 2 1 0.50 vpmovsxbw %xmm0, %ymm2
# CHECK-NEXT: 2 4 0.50 vpmovsxbw %xmm0, %ymm2
# CHECK-NEXT: 2 8 0.50 * vpmovsxbw (%rax), %ymm2
# CHECK-NEXT: 2 1 0.50 vpmovsxdq %xmm0, %ymm2
# CHECK-NEXT: 2 4 0.50 vpmovsxdq %xmm0, %ymm2
# CHECK-NEXT: 2 8 0.50 * vpmovsxdq (%rax), %ymm2
# CHECK-NEXT: 2 1 0.50 vpmovsxwd %xmm0, %ymm2
# CHECK-NEXT: 2 4 0.50 vpmovsxwd %xmm0, %ymm2
# CHECK-NEXT: 2 8 0.50 * vpmovsxwd (%rax), %ymm2
# CHECK-NEXT: 2 1 0.50 vpmovsxwq %xmm0, %ymm2
# CHECK-NEXT: 2 4 0.50 vpmovsxwq %xmm0, %ymm2
# CHECK-NEXT: 2 8 0.50 * vpmovsxwq (%rax), %ymm2
# CHECK-NEXT: 2 1 0.50 vpmovzxbd %xmm0, %ymm2
# CHECK-NEXT: 2 4 0.50 vpmovzxbd %xmm0, %ymm2
# CHECK-NEXT: 2 8 0.50 * vpmovzxbd (%rax), %ymm2
# CHECK-NEXT: 2 1 0.50 vpmovzxbq %xmm0, %ymm2
# CHECK-NEXT: 2 4 0.50 vpmovzxbq %xmm0, %ymm2
# CHECK-NEXT: 2 8 0.50 * vpmovzxbq (%rax), %ymm2
# CHECK-NEXT: 2 1 0.50 vpmovzxbw %xmm0, %ymm2
# CHECK-NEXT: 2 4 0.50 vpmovzxbw %xmm0, %ymm2
# CHECK-NEXT: 2 8 0.50 * vpmovzxbw (%rax), %ymm2
# CHECK-NEXT: 2 1 0.50 vpmovzxdq %xmm0, %ymm2
# CHECK-NEXT: 2 4 0.50 vpmovzxdq %xmm0, %ymm2
# CHECK-NEXT: 2 8 0.50 * vpmovzxdq (%rax), %ymm2
# CHECK-NEXT: 2 1 0.50 vpmovzxwd %xmm0, %ymm2
# CHECK-NEXT: 2 4 0.50 vpmovzxwd %xmm0, %ymm2
# CHECK-NEXT: 2 8 0.50 * vpmovzxwd (%rax), %ymm2
# CHECK-NEXT: 2 1 0.50 vpmovzxwq %xmm0, %ymm2
# CHECK-NEXT: 2 4 0.50 vpmovzxwq %xmm0, %ymm2
# CHECK-NEXT: 2 8 0.50 * vpmovzxwq (%rax), %ymm2
# CHECK-NEXT: 1 4 1.00 vpmuldq %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 11 1.00 * vpmuldq (%rax), %ymm1, %ymm2
@ -657,8 +657,8 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 11 1.00 * vpmulhuw (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 1.00 vpmulhw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 11 1.00 * vpmulhw (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 1.00 vpmulld %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 10 1.00 * vpmulld (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 1.00 vpmulld %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 11 1.00 * vpmulld (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 1.00 vpmullw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 11 1.00 * vpmullw (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 1.00 vpmuludq %ymm0, %ymm1, %ymm2
@ -682,51 +682,51 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.25 vpsignw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 8 0.33 * vpsignw (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.25 vpslld $1, %ymm0, %ymm2
# CHECK-NEXT: 1 2 1.00 vpslld %xmm0, %ymm1, %ymm2
# CHECK-NEXT: 1 9 1.00 * vpslld (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 1.00 vpslldq $1, %ymm1, %ymm2
# CHECK-NEXT: 1 1 1.00 vpslld %xmm0, %ymm1, %ymm2
# CHECK-NEXT: 1 8 1.00 * vpslld (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 1.00 vpslldq $1, %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.25 vpsllq $1, %ymm0, %ymm2
# CHECK-NEXT: 1 2 1.00 vpsllq %xmm0, %ymm1, %ymm2
# CHECK-NEXT: 1 9 1.00 * vpsllq (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vpsllvd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 0.50 * vpsllvd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpsllvd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 8 0.50 * vpsllvd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vpsllvq %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 0.50 * vpsllvq (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpsllvq %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 8 0.50 * vpsllvq (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 1.00 vpsllq %xmm0, %ymm1, %ymm2
# CHECK-NEXT: 1 8 1.00 * vpsllq (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 0.50 vpsllvd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 10 0.50 * vpsllvd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 0.50 vpsllvd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 10 0.50 * vpsllvd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 0.50 vpsllvq %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 10 0.50 * vpsllvq (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 0.50 vpsllvq %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 10 0.50 * vpsllvq (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.25 vpsllw $1, %ymm0, %ymm2
# CHECK-NEXT: 1 2 1.00 vpsllw %xmm0, %ymm1, %ymm2
# CHECK-NEXT: 1 9 1.00 * vpsllw (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 1.00 vpsllw %xmm0, %ymm1, %ymm2
# CHECK-NEXT: 1 8 1.00 * vpsllw (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.25 vpsrad $1, %ymm0, %ymm2
# CHECK-NEXT: 1 2 1.00 vpsrad %xmm0, %ymm1, %ymm2
# CHECK-NEXT: 1 9 1.00 * vpsrad (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vpsravd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 0.50 * vpsravd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpsravd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 8 0.50 * vpsravd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 1.00 vpsrad %xmm0, %ymm1, %ymm2
# CHECK-NEXT: 1 8 1.00 * vpsrad (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 0.50 vpsravd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 10 0.50 * vpsravd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 0.50 vpsravd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 10 0.50 * vpsravd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.25 vpsraw $1, %ymm0, %ymm2
# CHECK-NEXT: 1 2 1.00 vpsraw %xmm0, %ymm1, %ymm2
# CHECK-NEXT: 1 9 1.00 * vpsraw (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 1.00 vpsraw %xmm0, %ymm1, %ymm2
# CHECK-NEXT: 1 8 1.00 * vpsraw (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.25 vpsrld $1, %ymm0, %ymm2
# CHECK-NEXT: 1 2 1.00 vpsrld %xmm0, %ymm1, %ymm2
# CHECK-NEXT: 1 9 1.00 * vpsrld (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 1.00 vpsrldq $1, %ymm1, %ymm2
# CHECK-NEXT: 1 1 1.00 vpsrld %xmm0, %ymm1, %ymm2
# CHECK-NEXT: 1 8 1.00 * vpsrld (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 1.00 vpsrldq $1, %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.25 vpsrlq $1, %ymm0, %ymm2
# CHECK-NEXT: 1 2 1.00 vpsrlq %xmm0, %ymm1, %ymm2
# CHECK-NEXT: 1 9 1.00 * vpsrlq (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vpsrlvd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 0.50 * vpsrlvd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpsrlvd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 8 0.50 * vpsrlvd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vpsrlvq %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 0.50 * vpsrlvq (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpsrlvq %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 8 0.50 * vpsrlvq (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 1.00 vpsrlq %xmm0, %ymm1, %ymm2
# CHECK-NEXT: 1 8 1.00 * vpsrlq (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 0.50 vpsrlvd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 10 0.50 * vpsrlvd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 0.50 vpsrlvd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 10 0.50 * vpsrlvd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 0.50 vpsrlvq %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 10 0.50 * vpsrlvq (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 0.50 vpsrlvq %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 10 0.50 * vpsrlvq (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.25 vpsrlw $1, %ymm0, %ymm2
# CHECK-NEXT: 1 2 1.00 vpsrlw %xmm0, %ymm1, %ymm2
# CHECK-NEXT: 1 9 1.00 * vpsrlw (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 1.00 vpsrlw %xmm0, %ymm1, %ymm2
# CHECK-NEXT: 1 8 1.00 * vpsrlw (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.25 vpsubb %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 8 0.33 * vpsubb (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.25 vpsubd %ymm0, %ymm1, %ymm2
@ -779,7 +779,7 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12]
# CHECK-NEXT: 42.67 42.67 42.67 - - - - - 70.17 75.17 85.00 42.67 -
# CHECK-NEXT: 44.67 44.67 44.67 - - - - - 70.17 75.17 85.00 42.67 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions:
@ -900,17 +900,17 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - - - - - - vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: - - - - - - - - - - - - - vphaddd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - - - - - - vphaddd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vphaddd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - - - - - - vphaddsw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - - - - - - vphaddsw (%rax), %ymm1, %ymm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vphaddsw (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - - - - - - vphaddw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - - - - - - vphaddw (%rax), %ymm1, %ymm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vphaddw (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - - - - - - vphsubd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - - - - - - vphsubd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vphsubd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - - - - - - vphsubsw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - - - - - - vphsubsw (%rax), %ymm1, %ymm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vphsubsw (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - - - - - - vphsubw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - - - - - - vphsubw (%rax), %ymm1, %ymm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vphsubw (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - 1.00 - - - - vpmaddubsw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 1.00 - - - - vpmaddubsw (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - 1.00 - - - - vpmaddwd %ymm0, %ymm1, %ymm2

View File

@ -202,10 +202,10 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 1 8 0.33 * andnps (%rax), %xmm2
# CHECK-NEXT: 1 1 0.25 andps %xmm0, %xmm2
# CHECK-NEXT: 1 8 0.33 * andps (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 cmpeqps %xmm0, %xmm2
# CHECK-NEXT: 1 10 1.00 * cmpeqps (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 cmpeqss %xmm0, %xmm2
# CHECK-NEXT: 1 10 1.00 * cmpeqss (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 cmpeqps %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * cmpeqps (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 cmpeqss %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * cmpeqss (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 comiss %xmm0, %xmm1
# CHECK-NEXT: 1 10 1.00 * comiss (%rax), %xmm1
# CHECK-NEXT: 1 5 1.00 cvtpi2ps %mm0, %xmm2
@ -232,14 +232,14 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 1 22 1.00 * divss (%rax), %xmm2
# CHECK-NEXT: 1 100 0.25 * U ldmxcsr (%rax)
# CHECK-NEXT: 1 100 0.25 * * U maskmovq %mm0, %mm1
# CHECK-NEXT: 1 3 1.00 maxps %xmm0, %xmm2
# CHECK-NEXT: 1 10 1.00 * maxps (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 maxss %xmm0, %xmm2
# CHECK-NEXT: 1 10 1.00 * maxss (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 minps %xmm0, %xmm2
# CHECK-NEXT: 1 10 1.00 * minps (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 minss %xmm0, %xmm2
# CHECK-NEXT: 1 10 1.00 * minss (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 maxps %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * maxps (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 maxss %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * maxss (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 minps %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * minps (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 minss %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * minss (%rax), %xmm2
# CHECK-NEXT: 1 1 0.25 movaps %xmm0, %xmm2
# CHECK-NEXT: 1 1 0.33 * movaps %xmm0, (%rax)
# CHECK-NEXT: 1 8 0.33 * movaps (%rax), %xmm2

View File

@ -416,15 +416,15 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.25 andpd %xmm0, %xmm2
# CHECK-NEXT: 1 8 0.33 * andpd (%rax), %xmm2
# CHECK-NEXT: 1 8 0.33 * * U clflush (%rax)
# CHECK-NEXT: 1 3 1.00 cmpeqpd %xmm0, %xmm2
# CHECK-NEXT: 1 10 1.00 * cmpeqpd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 cmpeqsd %xmm0, %xmm2
# CHECK-NEXT: 1 10 1.00 * cmpeqsd (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 cmpeqpd %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * cmpeqpd (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 cmpeqsd %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * cmpeqsd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 comisd %xmm0, %xmm1
# CHECK-NEXT: 1 10 1.00 * comisd (%rax), %xmm1
# CHECK-NEXT: 1 3 1.00 cvtdq2pd %xmm0, %xmm2
# CHECK-NEXT: 1 12 1.00 * cvtdq2pd (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 cvtdq2ps %xmm0, %xmm2
# CHECK-NEXT: 1 3 1.00 cvtdq2ps %xmm0, %xmm2
# CHECK-NEXT: 1 12 1.00 * cvtdq2ps (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 cvtpd2dq %xmm0, %xmm2
# CHECK-NEXT: 2 10 1.00 * cvtpd2dq (%rax), %xmm2
@ -434,7 +434,7 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 2 10 0.50 * cvtpd2ps (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 cvtpi2pd %mm0, %xmm2
# CHECK-NEXT: 1 12 1.00 * cvtpi2pd (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 cvtps2dq %xmm0, %xmm2
# CHECK-NEXT: 1 3 1.00 cvtps2dq %xmm0, %xmm2
# CHECK-NEXT: 1 12 1.00 * cvtps2dq (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 cvtps2pd %xmm0, %xmm2
# CHECK-NEXT: 2 10 1.00 * cvtps2pd (%rax), %xmm2
@ -444,8 +444,8 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 11 1.00 * cvtsd2si (%rax), %rcx
# CHECK-NEXT: 1 3 1.00 cvtsd2ss %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * cvtsd2ss (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 cvtsi2sd %ecx, %xmm2
# CHECK-NEXT: 1 4 1.00 cvtsi2sd %rcx, %xmm2
# CHECK-NEXT: 1 3 1.00 cvtsi2sd %ecx, %xmm2
# CHECK-NEXT: 1 3 1.00 cvtsi2sd %rcx, %xmm2
# CHECK-NEXT: 1 12 1.00 * cvtsi2sdl (%rax), %xmm2
# CHECK-NEXT: 1 12 1.00 * cvtsi2sdl (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 cvtss2sd %xmm0, %xmm2
@ -454,7 +454,7 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 2 10 1.00 * cvttpd2dq (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 cvttpd2pi %xmm0, %mm2
# CHECK-NEXT: 1 12 1.00 * cvttpd2pi (%rax), %mm2
# CHECK-NEXT: 1 5 1.00 cvttps2dq %xmm0, %xmm2
# CHECK-NEXT: 1 3 1.00 cvttps2dq %xmm0, %xmm2
# CHECK-NEXT: 1 12 1.00 * cvttps2dq (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 cvttsd2si %xmm0, %ecx
# CHECK-NEXT: 1 4 1.00 cvttsd2si %xmm0, %rcx
@ -466,15 +466,15 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 22 1.00 * divsd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 * * U lfence
# CHECK-NEXT: 1 100 0.25 * * U maskmovdqu %xmm0, %xmm1
# CHECK-NEXT: 1 3 1.00 maxpd %xmm0, %xmm2
# CHECK-NEXT: 1 10 1.00 * maxpd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 maxsd %xmm0, %xmm2
# CHECK-NEXT: 1 10 1.00 * maxsd (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 maxpd %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * maxpd (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 maxsd %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * maxsd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 * * U mfence
# CHECK-NEXT: 1 3 1.00 minpd %xmm0, %xmm2
# CHECK-NEXT: 1 10 1.00 * minpd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 minsd %xmm0, %xmm2
# CHECK-NEXT: 1 10 1.00 * minsd (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 minpd %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * minpd (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 minsd %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * minsd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.25 movapd %xmm0, %xmm2
# CHECK-NEXT: 1 1 0.33 * movapd %xmm0, (%rax)
# CHECK-NEXT: 1 8 0.33 * movapd (%rax), %xmm2
@ -597,7 +597,7 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.25 pslld $1, %xmm2
# CHECK-NEXT: 1 1 1.00 pslld %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * pslld (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 pslldq $1, %xmm2
# CHECK-NEXT: 1 3 1.00 pslldq $1, %xmm2
# CHECK-NEXT: 1 1 0.25 psllq $1, %xmm2
# CHECK-NEXT: 1 1 1.00 psllq %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * psllq (%rax), %xmm2
@ -613,7 +613,7 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.25 psrld $1, %xmm2
# CHECK-NEXT: 1 1 1.00 psrld %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * psrld (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 psrldq $1, %xmm2
# CHECK-NEXT: 1 3 1.00 psrldq $1, %xmm2
# CHECK-NEXT: 1 1 0.25 psrlq $1, %xmm2
# CHECK-NEXT: 1 1 1.00 psrlq %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * psrlq (%rax), %xmm2
@ -692,7 +692,7 @@ xorpd (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12]
# CHECK-NEXT: 44.33 44.33 44.33 - - - - - 71.92 40.42 71.75 152.92 -
# CHECK-NEXT: 44.33 44.33 44.33 - - - - - 71.92 41.92 73.25 152.92 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions:
@ -713,7 +713,7 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 1.00 - - - - comisd (%rax), %xmm1
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 - cvtdq2pd %xmm0, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - cvtdq2pd (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - - 1.00 - cvtdq2ps %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 - cvtdq2ps %xmm0, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - cvtdq2ps (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 - cvtpd2dq %xmm0, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 0.50 0.50 1.00 - cvtpd2dq (%rax), %xmm2
@ -723,7 +723,7 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.50 - - 0.50 - cvtpd2ps (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - - 1.00 - cvtpi2pd %mm0, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - cvtpi2pd (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - - 1.00 - cvtps2dq %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 - cvtps2dq %xmm0, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - cvtps2dq (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - - 1.00 - cvtps2pd %xmm0, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - cvtps2pd (%rax), %xmm2
@ -743,7 +743,7 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 0.50 0.50 1.00 - cvttpd2dq (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - - 1.00 - cvttpd2pi %xmm0, %mm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - cvttpd2pi (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - - 1.00 - cvttps2dq %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 - cvttps2dq %xmm0, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - cvttps2dq (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - cvttsd2si %xmm0, %ecx
# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - cvttsd2si %xmm0, %rcx

View File

@ -47,14 +47,14 @@ mwait
# CHECK-NEXT: 1 10 1.00 * addsubpd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 addsubps %xmm0, %xmm2
# CHECK-NEXT: 1 10 1.00 * addsubps (%rax), %xmm2
# CHECK-NEXT: 1 100 0.25 haddpd %xmm0, %xmm2
# CHECK-NEXT: 1 100 0.25 * haddpd (%rax), %xmm2
# CHECK-NEXT: 1 100 0.25 haddps %xmm0, %xmm2
# CHECK-NEXT: 1 100 0.25 * haddps (%rax), %xmm2
# CHECK-NEXT: 1 100 0.25 hsubpd %xmm0, %xmm2
# CHECK-NEXT: 1 100 0.25 * hsubpd (%rax), %xmm2
# CHECK-NEXT: 1 100 0.25 hsubps %xmm0, %xmm2
# CHECK-NEXT: 1 100 0.25 * hsubps (%rax), %xmm2
# CHECK-NEXT: 1 7 0.25 haddpd %xmm0, %xmm2
# CHECK-NEXT: 2 11 0.33 * haddpd (%rax), %xmm2
# CHECK-NEXT: 1 7 0.25 haddps %xmm0, %xmm2
# CHECK-NEXT: 2 11 0.33 * haddps (%rax), %xmm2
# CHECK-NEXT: 1 7 0.25 hsubpd %xmm0, %xmm2
# CHECK-NEXT: 2 11 0.33 * hsubpd (%rax), %xmm2
# CHECK-NEXT: 1 7 0.25 hsubps %xmm0, %xmm2
# CHECK-NEXT: 2 11 0.33 * hsubps (%rax), %xmm2
# CHECK-NEXT: 1 8 0.33 * lddqu (%rax), %xmm2
# CHECK-NEXT: 1 100 0.25 U monitor
# CHECK-NEXT: 1 1 0.50 movddup %xmm0, %xmm2
@ -82,7 +82,7 @@ mwait
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12]
# CHECK-NEXT: 1.67 1.67 1.67 - - - - - 4.00 2.00 2.00 - -
# CHECK-NEXT: 3.00 3.00 3.00 - - - - - 4.00 2.00 2.00 - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions:
@ -91,13 +91,13 @@ mwait
# CHECK-NEXT: - - - - - - - - 1.00 - - - - addsubps %xmm0, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 1.00 - - - - addsubps (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - haddpd %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - haddpd (%rax), %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - haddpd (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - haddps %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - haddps (%rax), %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - haddps (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - hsubpd %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - hsubpd (%rax), %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - hsubpd (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - hsubps %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - hsubps (%rax), %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - hsubps (%rax), %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - lddqu (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - monitor
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - movddup %xmm0, %xmm2

View File

@ -165,8 +165,8 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: 1 8 0.50 * blendvps %xmm0, (%rax), %xmm2
# CHECK-NEXT: 1 100 0.25 dppd $22, %xmm0, %xmm2
# CHECK-NEXT: 1 100 0.25 * dppd $22, (%rax), %xmm2
# CHECK-NEXT: 1 100 0.25 dpps $22, %xmm0, %xmm2
# CHECK-NEXT: 1 100 0.25 * dpps $22, (%rax), %xmm2
# CHECK-NEXT: 1 15 0.25 dpps $22, %xmm0, %xmm2
# CHECK-NEXT: 2 19 0.33 * dpps $22, (%rax), %xmm2
# CHECK-NEXT: 1 2 2.00 extractps $1, %xmm0, %ecx
# CHECK-NEXT: 2 5 2.00 * extractps $1, %xmm0, (%rax)
# CHECK-NEXT: 1 1 0.50 insertps $1, %xmm0, %xmm2
@ -243,14 +243,14 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: 2 11 1.00 * pmulld (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 ptest %xmm0, %xmm1
# CHECK-NEXT: 2 8 1.00 * ptest (%rax), %xmm1
# CHECK-NEXT: 1 4 1.00 roundpd $1, %xmm0, %xmm2
# CHECK-NEXT: 1 11 1.00 * roundpd $1, (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 roundps $1, %xmm0, %xmm2
# CHECK-NEXT: 1 11 1.00 * roundps $1, (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 roundsd $1, %xmm0, %xmm2
# CHECK-NEXT: 1 11 1.00 * roundsd $1, (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 roundss $1, %xmm0, %xmm2
# CHECK-NEXT: 1 11 1.00 * roundss $1, (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 roundpd $1, %xmm0, %xmm2
# CHECK-NEXT: 1 10 1.00 * roundpd $1, (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 roundps $1, %xmm0, %xmm2
# CHECK-NEXT: 1 10 1.00 * roundps $1, (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 roundsd $1, %xmm0, %xmm2
# CHECK-NEXT: 1 10 1.00 * roundsd $1, (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 roundss $1, %xmm0, %xmm2
# CHECK-NEXT: 1 10 1.00 * roundss $1, (%rax), %xmm2
# CHECK: Resources:
# CHECK-NEXT: [0] - Zn2AGU0
@ -269,7 +269,7 @@ roundss $1, (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12]
# CHECK-NEXT: 16.67 16.67 16.67 - - - - - 25.17 26.67 44.00 21.17 -
# CHECK-NEXT: 17.00 17.00 17.00 - - - - - 25.17 26.67 44.00 21.17 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions:
@ -284,7 +284,7 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - dppd $22, %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - dppd $22, (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - dpps $22, %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - dpps $22, (%rax), %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - dpps $22, (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - 0.50 2.50 - - extractps $1, %xmm0, %ecx
# CHECK-NEXT: 1.67 1.67 1.67 - - - - - - 0.50 2.50 - - extractps $1, %xmm0, (%rax)
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - insertps $1, %xmm0, %xmm2

View File

@ -19,8 +19,8 @@ movntss %xmm0, (%rax)
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 2 1.00 extrq %xmm0, %xmm2
# CHECK-NEXT: 1 2 1.00 extrq $22, $2, %xmm2
# CHECK-NEXT: 1 3 1.00 extrq %xmm0, %xmm2
# CHECK-NEXT: 1 3 1.00 extrq $22, $2, %xmm2
# CHECK-NEXT: 1 4 1.00 insertq %xmm0, %xmm2
# CHECK-NEXT: 1 4 1.00 insertq $22, $22, %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * movntsd %xmm0, (%rax)

View File

@ -122,30 +122,30 @@ psignw (%rax), %xmm2
# CHECK-NEXT: 1 8 0.33 * palignr $1, (%rax), %mm2
# CHECK-NEXT: 1 1 0.25 palignr $1, %xmm0, %xmm2
# CHECK-NEXT: 1 8 0.33 * palignr $1, (%rax), %xmm2
# CHECK-NEXT: 1 100 0.25 phaddd %mm0, %mm2
# CHECK-NEXT: 1 100 0.25 * phaddd (%rax), %mm2
# CHECK-NEXT: 1 100 0.25 phaddd %xmm0, %xmm2
# CHECK-NEXT: 1 100 0.25 * phaddd (%rax), %xmm2
# CHECK-NEXT: 1 100 0.25 phaddsw %mm0, %mm2
# CHECK-NEXT: 1 100 0.25 * phaddsw (%rax), %mm2
# CHECK-NEXT: 1 100 0.25 phaddsw %xmm0, %xmm2
# CHECK-NEXT: 1 100 0.25 * phaddsw (%rax), %xmm2
# CHECK-NEXT: 1 100 0.25 phaddw %mm0, %mm2
# CHECK-NEXT: 1 100 0.25 * phaddw (%rax), %mm2
# CHECK-NEXT: 1 100 0.25 phaddw %xmm0, %xmm2
# CHECK-NEXT: 1 100 0.25 * phaddw (%rax), %xmm2
# CHECK-NEXT: 1 100 0.25 phsubd %mm0, %mm2
# CHECK-NEXT: 1 100 0.25 * phsubd (%rax), %mm2
# CHECK-NEXT: 1 100 0.25 phsubd %xmm0, %xmm2
# CHECK-NEXT: 1 100 0.25 * phsubd (%rax), %xmm2
# CHECK-NEXT: 1 100 0.25 phsubsw %mm0, %mm2
# CHECK-NEXT: 1 100 0.25 * phsubsw (%rax), %mm2
# CHECK-NEXT: 1 100 0.25 phsubsw %xmm0, %xmm2
# CHECK-NEXT: 1 100 0.25 * phsubsw (%rax), %xmm2
# CHECK-NEXT: 1 100 0.25 phsubw %mm0, %mm2
# CHECK-NEXT: 1 100 0.25 * phsubw (%rax), %mm2
# CHECK-NEXT: 1 100 0.25 phsubw %xmm0, %xmm2
# CHECK-NEXT: 1 100 0.25 * phsubw (%rax), %xmm2
# CHECK-NEXT: 1 3 0.25 phaddd %mm0, %mm2
# CHECK-NEXT: 2 7 0.33 * phaddd (%rax), %mm2
# CHECK-NEXT: 1 3 0.25 phaddd %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.33 * phaddd (%rax), %xmm2
# CHECK-NEXT: 1 3 0.25 phaddsw %mm0, %mm2
# CHECK-NEXT: 2 7 0.33 * phaddsw (%rax), %mm2
# CHECK-NEXT: 1 3 0.25 phaddsw %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.33 * phaddsw (%rax), %xmm2
# CHECK-NEXT: 1 3 0.25 phaddw %mm0, %mm2
# CHECK-NEXT: 2 7 0.33 * phaddw (%rax), %mm2
# CHECK-NEXT: 1 3 0.25 phaddw %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.33 * phaddw (%rax), %xmm2
# CHECK-NEXT: 1 3 0.25 phsubd %mm0, %mm2
# CHECK-NEXT: 2 7 0.33 * phsubd (%rax), %mm2
# CHECK-NEXT: 1 3 0.25 phsubd %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.33 * phsubd (%rax), %xmm2
# CHECK-NEXT: 1 3 0.25 phsubsw %mm0, %mm2
# CHECK-NEXT: 2 7 0.33 * phsubsw (%rax), %mm2
# CHECK-NEXT: 1 3 0.25 phsubsw %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.33 * phsubsw (%rax), %xmm2
# CHECK-NEXT: 1 3 0.25 phsubw %mm0, %mm2
# CHECK-NEXT: 2 7 0.33 * phsubw (%rax), %mm2
# CHECK-NEXT: 1 3 0.25 phsubw %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.33 * phsubw (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 pmaddubsw %mm0, %mm2
# CHECK-NEXT: 1 11 1.00 * pmaddubsw (%rax), %mm2
# CHECK-NEXT: 1 4 1.00 pmaddubsw %xmm0, %xmm2
@ -188,7 +188,7 @@ psignw (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12]
# CHECK-NEXT: 6.67 6.67 6.67 - - - - - 16.00 8.00 8.00 8.00 -
# CHECK-NEXT: 10.67 10.67 10.67 - - - - - 16.00 8.00 8.00 8.00 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions:
@ -209,29 +209,29 @@ psignw (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - palignr $1, %xmm0, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.25 0.25 0.25 0.25 - palignr $1, (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - phaddd %mm0, %mm2
# CHECK-NEXT: - - - - - - - - - - - - - phaddd (%rax), %mm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - phaddd (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - - - - phaddd %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - phaddd (%rax), %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - phaddd (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - phaddsw %mm0, %mm2
# CHECK-NEXT: - - - - - - - - - - - - - phaddsw (%rax), %mm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - phaddsw (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - - - - phaddsw %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - phaddsw (%rax), %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - phaddsw (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - phaddw %mm0, %mm2
# CHECK-NEXT: - - - - - - - - - - - - - phaddw (%rax), %mm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - phaddw (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - - - - phaddw %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - phaddw (%rax), %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - phaddw (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - phsubd %mm0, %mm2
# CHECK-NEXT: - - - - - - - - - - - - - phsubd (%rax), %mm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - phsubd (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - - - - phsubd %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - phsubd (%rax), %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - phsubd (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - phsubsw %mm0, %mm2
# CHECK-NEXT: - - - - - - - - - - - - - phsubsw (%rax), %mm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - phsubsw (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - - - - phsubsw %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - phsubsw (%rax), %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - phsubsw (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - phsubw %mm0, %mm2
# CHECK-NEXT: - - - - - - - - - - - - - phsubw (%rax), %mm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - phsubw (%rax), %mm2
# CHECK-NEXT: - - - - - - - - - - - - - phsubw %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - phsubw (%rax), %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - phsubw (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - 1.00 - - - - pmaddubsw %mm0, %mm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 1.00 - - - - pmaddubsw (%rax), %mm2
# CHECK-NEXT: - - - - - - - - 1.00 - - - - pmaddubsw %xmm0, %xmm2

View File

@ -1002,17 +1002,17 @@ xorq (%rax), %rdi
# CHECK-NEXT: 2 5 0.33 * * andq %rsi, (%rax)
# CHECK-NEXT: 2 5 0.33 * andq (%rax), %rdi
# CHECK-NEXT: 1 3 0.25 bsfw %si, %di
# CHECK-NEXT: 1 3 0.25 bsrw %si, %di
# CHECK-NEXT: 1 4 0.25 bsrw %si, %di
# CHECK-NEXT: 2 7 0.33 * bsfw (%rax), %di
# CHECK-NEXT: 2 7 0.33 * bsrw (%rax), %di
# CHECK-NEXT: 2 8 0.33 * bsrw (%rax), %di
# CHECK-NEXT: 1 3 0.25 bsfl %esi, %edi
# CHECK-NEXT: 1 3 0.25 bsrl %esi, %edi
# CHECK-NEXT: 1 4 0.25 bsrl %esi, %edi
# CHECK-NEXT: 2 7 0.33 * bsfl (%rax), %edi
# CHECK-NEXT: 2 7 0.33 * bsrl (%rax), %edi
# CHECK-NEXT: 2 8 0.33 * bsrl (%rax), %edi
# CHECK-NEXT: 1 3 0.25 bsfq %rsi, %rdi
# CHECK-NEXT: 1 3 0.25 bsrq %rsi, %rdi
# CHECK-NEXT: 1 4 0.25 bsrq %rsi, %rdi
# CHECK-NEXT: 2 7 0.33 * bsfq (%rax), %rdi
# CHECK-NEXT: 2 7 0.33 * bsrq (%rax), %rdi
# CHECK-NEXT: 2 8 0.33 * bsrq (%rax), %rdi
# CHECK-NEXT: 1 1 1.00 bswapl %eax
# CHECK-NEXT: 1 1 1.00 bswapq %rax
# CHECK-NEXT: 1 1 0.25 btw %si, %di
@ -1106,13 +1106,13 @@ xorq (%rax), %rdi
# CHECK-NEXT: 1 100 0.25 U cmpsw %es:(%rdi), (%rsi)
# CHECK-NEXT: 1 100 0.25 U cmpsl %es:(%rdi), (%rsi)
# CHECK-NEXT: 1 100 0.25 U cmpsq %es:(%rdi), (%rsi)
# CHECK-NEXT: 1 1 0.25 cmpxchgb %cl, %bl
# CHECK-NEXT: 1 3 0.25 cmpxchgb %cl, %bl
# CHECK-NEXT: 5 8 0.33 * * cmpxchgb %cl, (%rbx)
# CHECK-NEXT: 1 1 0.25 cmpxchgw %cx, %bx
# CHECK-NEXT: 1 3 0.25 cmpxchgw %cx, %bx
# CHECK-NEXT: 5 8 0.33 * * cmpxchgw %cx, (%rbx)
# CHECK-NEXT: 1 1 0.25 cmpxchgl %ecx, %ebx
# CHECK-NEXT: 1 3 0.25 cmpxchgl %ecx, %ebx
# CHECK-NEXT: 5 8 0.33 * * cmpxchgl %ecx, (%rbx)
# CHECK-NEXT: 1 1 0.25 cmpxchgq %rcx, %rbx
# CHECK-NEXT: 1 3 0.25 cmpxchgq %rcx, %rbx
# CHECK-NEXT: 5 8 0.33 * * cmpxchgq %rcx, (%rbx)
# CHECK-NEXT: 1 100 0.25 U cpuid
# CHECK-NEXT: 1 1 0.25 decb %dil
@ -1146,9 +1146,9 @@ xorq (%rax), %rdi
# CHECK-NEXT: 1 7 1.00 * imulw (%rax)
# CHECK-NEXT: 1 3 1.00 imulw %si, %di
# CHECK-NEXT: 1 7 1.00 * imulw (%rax), %di
# CHECK-NEXT: 1 3 1.00 imulw $511, %si, %di
# CHECK-NEXT: 1 4 1.00 imulw $511, %si, %di
# CHECK-NEXT: 1 7 1.00 * imulw $511, (%rax), %di
# CHECK-NEXT: 1 3 1.00 imulw $7, %si, %di
# CHECK-NEXT: 1 4 1.00 imulw $7, %si, %di
# CHECK-NEXT: 1 7 1.00 * imulw $7, (%rax), %di
# CHECK-NEXT: 1 3 1.00 imull %edi
# CHECK-NEXT: 1 7 1.00 * imull (%rax)