1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

[X86][BtVer2] Fix the number of micro opcodes for a bunch of YMM instructions.

The Jaguar backend natively supports 128-bit data types. Operations on YMM
registers are split into two COPs (complex operations). Each COP consumes a slot
in the dispatch group, and in the reorder buffer.

The scheduling model for Jaguar should mark those instructions as `let
NumMicroOps = 2`.

This was found when testing AVX code for BtVer2 using llvm-mca.

llvm-svn: 328694
This commit is contained in:
Andrea Di Biagio 2018-03-28 10:49:33 +00:00
parent 9905496b29
commit 7f3dddc539
3 changed files with 721 additions and 14 deletions

View File

@ -547,6 +547,7 @@ def : InstRW<[JWriteVDPPSYLd, ReadAfterLd], (instrs VDPPSYrmi)>;
def JWriteFAddY: SchedWriteRes<[JFPU0, JFPA]> {
let Latency = 3;
let ResourceCycles = [2, 2];
let NumMicroOps = 2;
}
def : InstRW<[JWriteFAddY], (instrs VADDPDYrr, VADDPSYrr,
VSUBPDYrr, VSUBPSYrr,
@ -555,6 +556,7 @@ def : InstRW<[JWriteFAddY], (instrs VADDPDYrr, VADDPSYrr,
def JWriteFAddYLd: SchedWriteRes<[JLAGU, JFPU0, JFPA]> {
let Latency = 8;
let ResourceCycles = [2, 2, 2];
let NumMicroOps = 2;
}
def : InstRW<[JWriteFAddYLd, ReadAfterLd], (instrs VADDPDYrm, VADDPSYrm,
VSUBPDYrm, VSUBPSYrm,
@ -563,36 +565,42 @@ def : InstRW<[JWriteFAddYLd, ReadAfterLd], (instrs VADDPDYrm, VADDPSYrm,
def JWriteFDivY: SchedWriteRes<[JFPU1, JFPM]> {
let Latency = 38;
let ResourceCycles = [2, 38];
let NumMicroOps = 2;
}
def : InstRW<[JWriteFDivY], (instrs VDIVPDYrr, VDIVPSYrr)>;
def JWriteFDivYLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> {
let Latency = 43;
let ResourceCycles = [2, 2, 38];
let NumMicroOps = 2;
}
def : InstRW<[JWriteFDivYLd, ReadAfterLd], (instrs VDIVPDYrm, VDIVPSYrm)>;
def JWriteVMULYPD: SchedWriteRes<[JFPU1, JFPM]> {
let Latency = 4;
let ResourceCycles = [2, 4];
let NumMicroOps = 2;
}
def : InstRW<[JWriteVMULYPD], (instrs VMULPDYrr)>;
def JWriteVMULYPDLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> {
let Latency = 9;
let ResourceCycles = [2, 2, 4];
let NumMicroOps = 2;
}
def : InstRW<[JWriteVMULYPDLd, ReadAfterLd], (instrs VMULPDYrm)>;
def JWriteVMULYPS: SchedWriteRes<[JFPU1, JFPM]> {
let Latency = 2;
let ResourceCycles = [2, 2];
let NumMicroOps = 2;
}
def : InstRW<[JWriteVMULYPS], (instrs VMULPSYrr, VRCPPSYr, VRSQRTPSYr)>;
def JWriteVMULYPSLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> {
let Latency = 7;
let ResourceCycles = [2, 2, 2];
let NumMicroOps = 2;
}
def : InstRW<[JWriteVMULYPSLd, ReadAfterLd], (instrs VMULPSYrm, VRCPPSYm, VRSQRTPSYm)>;
@ -611,6 +619,7 @@ def : InstRW<[JWriteVMULPDLd], (instrs MULPDrm, MULSDrm, VMULPDrm, VMULSDrm)>;
def JWriteVCVTY: SchedWriteRes<[JFPU1, JSTC]> {
let Latency = 3;
let ResourceCycles = [2, 2];
let NumMicroOps = 2;
}
def : InstRW<[JWriteVCVTY], (instrs VCVTDQ2PDYrr, VCVTDQ2PSYrr,
VCVTPS2DQYrr, VCVTTPS2DQYrr,
@ -619,6 +628,7 @@ def : InstRW<[JWriteVCVTY], (instrs VCVTDQ2PDYrr, VCVTDQ2PSYrr,
def JWriteVCVTYLd: SchedWriteRes<[JLAGU, JFPU1, JSTC]> {
let Latency = 8;
let ResourceCycles = [2, 2, 2];
let NumMicroOps = 2;
}
def : InstRW<[JWriteVCVTYLd, ReadAfterLd], (instrs VCVTDQ2PDYrm, VCVTDQ2PSYrm,
VCVTPS2DQYrm, VCVTTPS2DQYrm,
@ -834,12 +844,14 @@ def : InstRW<[JWriteVSQRTYPDLd], (instrs VSQRTPDYm)>;
def JWriteVSQRTYPS: SchedWriteRes<[JFPU1, JFPM]> {
let Latency = 42;
let ResourceCycles = [2, 42];
let NumMicroOps = 2;
}
def : InstRW<[JWriteVSQRTYPS], (instrs VSQRTPSYr)>;
def JWriteVSQRTYPSLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> {
let Latency = 47;
let ResourceCycles = [2, 2, 42];
let NumMicroOps = 2;
}
def : InstRW<[JWriteVSQRTYPSLd], (instrs VSQRTPSYm)>;

View File

@ -39,8 +39,8 @@ vsqrtps %ymm0, %ymm2
# CHECK-NEXT: 1 2 1.00 vpclmulqdq $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 21 21.00 vsqrtps %xmm0, %xmm2
# CHECK-NEXT: 1 3 2.00 vaddps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 42 42.00 vsqrtps %ymm0, %ymm2
# CHECK-NEXT: 2 3 2.00 vaddps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 42 42.00 vsqrtps %ymm0, %ymm2
# CHECK: Resources:
@ -87,13 +87,13 @@ vsqrtps %ymm0, %ymm2
# CHECK-NEXT: [0,4] . DeeeER . . . . . . . . . . . . . vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: [0,5] . DeeeeeeeeeeeeeeeeeeeeeER . . . . . . . . . vsqrtps %xmm0, %xmm2
# CHECK-NEXT: [0,6] . DeeeE-----------------R . . . . . . . . . vaddps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: [0,7] . D====================eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER vsqrtps %ymm0, %ymm2
# CHECK-NEXT: [0,7] . D===================eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER vsqrtps %ymm0, %ymm2
# CHECK: [1,0] . D=eeE----------------------------------------------------------R vpmulld %xmm0, %xmm1, %xmm2
# CHECK-NEXT: [1,1] . DeE------------------------------------------------------------R vpand %xmm0, %xmm1, %xmm2
# CHECK-NEXT: [1,2] . DeeeE---------------------------------------------------------R vcvttps2dq %xmm0, %xmm2
# CHECK-NEXT: [1,3] . D=eeE---------------------------------------------------------R vpclmulqdq $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: [1,4] . .D=eeeE-------------------------------------------------------R vaddps %xmm0, %xmm1, %xmm2
# CHECK: [1,0] . DeeE----------------------------------------------------------R vpmulld %xmm0, %xmm1, %xmm2
# CHECK-NEXT: [1,1] . DeE-----------------------------------------------------------R vpand %xmm0, %xmm1, %xmm2
# CHECK-NEXT: [1,2] . .DeeeE--------------------------------------------------------R vcvttps2dq %xmm0, %xmm2
# CHECK-NEXT: [1,3] . .DeeE---------------------------------------------------------R vpclmulqdq $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: [1,4] . . DeeeE-------------------------------------------------------R vaddps %xmm0, %xmm1, %xmm2
# CHECK: Average Wait times (based on the timeline view):
@ -103,11 +103,11 @@ vsqrtps %ymm0, %ymm2
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 1.5 1.5 29.0 vpmulld %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1. 2 1.0 1.0 30.5 vpand %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2. 2 1.0 1.0 28.5 vcvttps2dq %xmm0, %xmm2
# CHECK-NEXT: 3. 2 1.5 1.5 29.0 vpclmulqdq $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 4. 2 1.5 1.5 27.5 vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 0. 2 1.0 1.0 29.0 vpmulld %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1. 2 1.0 1.0 30.0 vpand %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2. 2 1.0 1.0 28.0 vcvttps2dq %xmm0, %xmm2
# CHECK-NEXT: 3. 2 1.0 1.0 29.0 vpclmulqdq $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 4. 2 1.0 1.0 27.5 vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 5. 1 1.0 1.0 0.0 vsqrtps %xmm0, %xmm2
# CHECK-NEXT: 6. 1 1.0 1.0 17.0 vaddps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 7. 1 21.0 21.0 0.0 vsqrtps %ymm0, %ymm2
# CHECK-NEXT: 7. 1 20.0 20.0 0.0 vsqrtps %ymm0, %ymm2

View File

@ -1008,6 +1008,701 @@ vxorps (%rax), %ymm1, %ymm2
vzeroall
vzeroupper
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 3 1.00 vaddpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vaddpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 3 2.00 vaddpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 8 2.00 * vaddpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vaddps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 3 2.00 vaddps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 8 2.00 * vaddps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 1.00 vaddsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vaddsd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vaddss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vaddss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vaddsubpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vaddsubpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 3 2.00 vaddsubpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 8 2.00 * vaddsubpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 1.00 vaddsubps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vaddsubps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 3 2.00 vaddsubps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 8 2.00 * vaddsubps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 1.00 vaesdec %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vaesdec (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vaesdeclast %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vaesdeclast (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vaesenc %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vaesenc (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vaesenclast %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vaesenclast (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 1.00 vaesimc %xmm0, %xmm2
# CHECK-NEXT: 1 7 1.00 * vaesimc (%rax), %xmm2
# CHECK-NEXT: 1 2 1.00 vaeskeygenassist $22, %xmm0, %xmm2
# CHECK-NEXT: 1 7 1.00 * vaeskeygenassist $22, (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 vandnpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vandnpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 1 1.00 vandnpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 6 2.00 * vandnpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vandnps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vandnps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 1 1.00 vandnps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 6 2.00 * vandnps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vandpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vandpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 1 1.00 vandpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 6 2.00 * vandpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vandps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vandps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 1 1.00 vandps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 6 2.00 * vandps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vblendpd $11, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vblendpd $11, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 1 1.00 vblendpd $11, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 6 2.00 * vblendpd $11, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vblendps $11, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vblendps $11, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 1 1.00 vblendps $11, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 6 2.00 * vblendps $11, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 3 2 2.00 vblendvpd %xmm3, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 3 7 2.00 * vblendvpd %xmm3, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 6 3 3.00 vblendvpd %ymm3, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 6 8 3.00 * vblendvpd %ymm3, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 3 2 2.00 vblendvps %xmm3, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 3 7 2.00 * vblendvps %xmm3, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 6 3 3.00 vblendvps %ymm3, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 6 8 3.00 * vblendvps %ymm3, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 6 1.00 * vbroadcastf128 (%rax), %ymm2
# CHECK-NEXT: 1 6 2.00 * vbroadcastsd (%rax), %ymm2
# CHECK-NEXT: 1 6 1.00 * vbroadcastss (%rax), %xmm2
# CHECK-NEXT: 1 6 2.00 * vbroadcastss (%rax), %ymm2
# CHECK-NEXT: 1 2 1.00 vcmppd $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vcmppd $0, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 2.00 vcmppd $0, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 7 2.00 * vcmppd $0, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 2 1.00 vcmpps $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vcmpps $0, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 2.00 vcmpps $0, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 7 2.00 * vcmpps $0, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 2 1.00 vcmpsd $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vcmpsd $0, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 1.00 vcmpss $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vcmpss $0, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vcomisd %xmm0, %xmm1
# CHECK-NEXT: 1 8 1.00 * vcomisd (%rax), %xmm1
# CHECK-NEXT: 1 3 1.00 vcomiss %xmm0, %xmm1
# CHECK-NEXT: 1 8 1.00 * vcomiss (%rax), %xmm1
# CHECK-NEXT: 1 3 1.00 vcvtdq2pd %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * vcvtdq2pd (%rax), %xmm2
# CHECK-NEXT: 2 3 2.00 vcvtdq2pd %xmm0, %ymm2
# CHECK-NEXT: 2 8 2.00 * vcvtdq2pd (%rax), %ymm2
# CHECK-NEXT: 1 3 1.00 vcvtdq2ps %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * vcvtdq2ps (%rax), %xmm2
# CHECK-NEXT: 2 3 2.00 vcvtdq2ps %ymm0, %ymm2
# CHECK-NEXT: 2 8 2.00 * vcvtdq2ps (%rax), %ymm2
# CHECK-NEXT: 1 3 1.00 vcvtpd2dq %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * vcvtpd2dqx (%rax), %xmm2
# CHECK-NEXT: 1 6 2.00 vcvtpd2dq %ymm0, %xmm2
# CHECK-NEXT: 1 11 2.00 * vcvtpd2dqy (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 vcvtpd2ps %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * vcvtpd2psx (%rax), %xmm2
# CHECK-NEXT: 1 6 2.00 vcvtpd2ps %ymm0, %xmm2
# CHECK-NEXT: 1 11 2.00 * vcvtpd2psy (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 vcvtps2dq %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * vcvtps2dq (%rax), %xmm2
# CHECK-NEXT: 2 3 2.00 vcvtps2dq %ymm0, %ymm2
# CHECK-NEXT: 2 8 2.00 * vcvtps2dq (%rax), %ymm2
# CHECK-NEXT: 1 3 1.00 vcvtps2pd %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * vcvtps2pd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 vcvtps2pd %xmm0, %ymm2
# CHECK-NEXT: 1 8 1.00 * vcvtps2pd (%rax), %ymm2
# CHECK-NEXT: 2 7 1.00 vcvtsd2si %xmm0, %ecx
# CHECK-NEXT: 2 7 1.00 vcvtsd2si %xmm0, %rcx
# CHECK-NEXT: 2 12 1.00 * vcvtsd2si (%rax), %ecx
# CHECK-NEXT: 2 12 1.00 * vcvtsd2si (%rax), %rcx
# CHECK-NEXT: 2 7 2.00 vcvtsd2ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 12 2.00 * vcvtsd2ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 9 1.00 vcvtsi2sdl %ecx, %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 vcvtsi2sdq %rcx, %xmm0, %xmm2
# CHECK-NEXT: 2 14 1.00 * vcvtsi2sdl (%rax), %xmm0, %xmm2
# CHECK-NEXT: 2 14 1.00 * vcvtsi2sdq (%rax), %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 vcvtsi2ssl %ecx, %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 vcvtsi2ssq %rcx, %xmm0, %xmm2
# CHECK-NEXT: 2 14 1.00 * vcvtsi2ssl (%rax), %xmm0, %xmm2
# CHECK-NEXT: 2 14 1.00 * vcvtsi2ssq (%rax), %xmm0, %xmm2
# CHECK-NEXT: 2 7 2.00 vcvtss2sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 12 2.00 * vcvtss2sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 7 1.00 vcvtss2si %xmm0, %ecx
# CHECK-NEXT: 2 7 1.00 vcvtss2si %xmm0, %rcx
# CHECK-NEXT: 2 12 1.00 * vcvtss2si (%rax), %ecx
# CHECK-NEXT: 2 12 1.00 * vcvtss2si (%rax), %rcx
# CHECK-NEXT: 1 3 1.00 vcvttpd2dq %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * vcvttpd2dqx (%rax), %xmm2
# CHECK-NEXT: 1 6 2.00 vcvttpd2dq %ymm0, %xmm2
# CHECK-NEXT: 1 11 2.00 * vcvttpd2dqy (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 vcvttps2dq %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * vcvttps2dq (%rax), %xmm2
# CHECK-NEXT: 2 3 2.00 vcvttps2dq %ymm0, %ymm2
# CHECK-NEXT: 2 8 2.00 * vcvttps2dq (%rax), %ymm2
# CHECK-NEXT: 2 7 1.00 vcvttsd2si %xmm0, %ecx
# CHECK-NEXT: 2 7 1.00 vcvttsd2si %xmm0, %rcx
# CHECK-NEXT: 2 12 1.00 * vcvttsd2si (%rax), %ecx
# CHECK-NEXT: 2 12 1.00 * vcvttsd2si (%rax), %rcx
# CHECK-NEXT: 2 7 1.00 vcvttss2si %xmm0, %ecx
# CHECK-NEXT: 2 7 1.00 vcvttss2si %xmm0, %rcx
# CHECK-NEXT: 2 12 1.00 * vcvttss2si (%rax), %ecx
# CHECK-NEXT: 2 12 1.00 * vcvttss2si (%rax), %rcx
# CHECK-NEXT: 1 19 19.00 vdivpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 24 19.00 * vdivpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 38 38.00 vdivpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 43 38.00 * vdivpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 19 19.00 vdivps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 24 19.00 * vdivps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 38 38.00 vdivps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 43 38.00 * vdivps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 19 19.00 vdivsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 24 19.00 * vdivsd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 19 19.00 vdivss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 24 19.00 * vdivss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 3 9 3.00 vdppd $22, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 3 14 3.00 * vdppd $22, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 5 11 3.00 vdpps $22, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 5 16 3.00 * vdpps $22, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 10 12 6.00 vdpps $22, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 10 17 6.00 * vdpps $22, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vextractf128 $1, %ymm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * vextractf128 $1, %ymm0, (%rax)
# CHECK-NEXT: 1 1 0.50 vextractps $1, %xmm0, %ecx
# CHECK-NEXT: 2 6 1.00 * vextractps $1, %xmm0, (%rax)
# CHECK-NEXT: 1 3 1.00 vhaddpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vhaddpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 2.00 vhaddpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 8 2.00 * vhaddpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 1.00 vhaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vhaddps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 2.00 vhaddps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 8 2.00 * vhaddps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 1.00 vhsubpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vhsubpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 2.00 vhsubpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 8 2.00 * vhsubpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 1.00 vhsubps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vhsubps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 2.00 vhsubps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 8 2.00 * vhsubps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vinsertf128 $1, %xmm0, %ymm1, %ymm2
# CHECK-NEXT: 1 6 1.00 * vinsertf128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vinsertps $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vinsertps $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 1.00 * vlddqu (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 * vlddqu (%rax), %ymm2
# CHECK-NEXT: 1 5 1.00 * * * vldmxcsr (%rax)
# CHECK-NEXT: 1 1 1.00 * * * vmaskmovdqu %xmm0, %xmm1
# CHECK-NEXT: 1 6 1.00 * vmaskmovpd (%rax), %xmm0, %xmm2
# CHECK-NEXT: 2 6 2.00 * vmaskmovpd (%rax), %ymm0, %ymm2
# CHECK-NEXT: 1 6 2.00 * * vmaskmovpd %xmm0, %xmm1, (%rax)
# CHECK-NEXT: 2 6 2.00 * * vmaskmovpd %ymm0, %ymm1, (%rax)
# CHECK-NEXT: 1 6 1.00 * vmaskmovps (%rax), %xmm0, %xmm2
# CHECK-NEXT: 2 6 2.00 * vmaskmovps (%rax), %ymm0, %ymm2
# CHECK-NEXT: 1 6 2.00 * * vmaskmovps %xmm0, %xmm1, (%rax)
# CHECK-NEXT: 2 6 2.00 * * vmaskmovps %ymm0, %ymm1, (%rax)
# CHECK-NEXT: 1 2 1.00 vmaxpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vmaxpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 2.00 vmaxpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 7 2.00 * vmaxpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 2 1.00 vmaxps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vmaxps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 2.00 vmaxps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 7 2.00 * vmaxps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 2 1.00 vmaxsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vmaxsd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 1.00 vmaxss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vmaxss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 1.00 vminpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vminpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 2.00 vminpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 7 2.00 * vminpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 2 1.00 vminps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vminps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 2.00 vminps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 7 2.00 * vminps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 2 1.00 vminsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vminsd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 1.00 vminss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vminss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vmovapd %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * vmovapd %xmm0, (%rax)
# CHECK-NEXT: 1 5 1.00 * vmovapd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 vmovapd %ymm0, %ymm2
# CHECK-NEXT: 1 1 1.00 * vmovapd %ymm0, (%rax)
# CHECK-NEXT: 1 5 1.00 * vmovapd (%rax), %ymm2
# CHECK-NEXT: 1 1 0.50 vmovaps %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, (%rax)
# CHECK-NEXT: 1 5 1.00 * vmovaps (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 vmovaps %ymm0, %ymm2
# CHECK-NEXT: 1 1 1.00 * vmovaps %ymm0, (%rax)
# CHECK-NEXT: 1 5 1.00 * vmovaps (%rax), %ymm2
# CHECK-NEXT: 1 1 0.50 vmovd %eax, %xmm2
# CHECK-NEXT: 1 5 1.00 * vmovd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 vmovd %xmm0, %ecx
# CHECK-NEXT: 1 1 1.00 * vmovd %xmm0, (%rax)
# CHECK-NEXT: 1 1 0.50 vmovddup %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * vmovddup (%rax), %xmm2
# CHECK-NEXT: 2 1 1.00 vmovddup %ymm0, %ymm2
# CHECK-NEXT: 2 6 2.00 * vmovddup (%rax), %ymm2
# CHECK-NEXT: 1 1 0.50 vmovdqa %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * vmovdqa %xmm0, (%rax)
# CHECK-NEXT: 1 5 1.00 * vmovdqa (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 vmovdqa %ymm0, %ymm2
# CHECK-NEXT: 1 1 1.00 * vmovdqa %ymm0, (%rax)
# CHECK-NEXT: 1 5 1.00 * vmovdqa (%rax), %ymm2
# CHECK-NEXT: 1 1 0.50 vmovdqu %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * vmovdqu %xmm0, (%rax)
# CHECK-NEXT: 1 5 1.00 * vmovdqu (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 vmovdqu %ymm0, %ymm2
# CHECK-NEXT: 1 1 1.00 * vmovdqu %ymm0, (%rax)
# CHECK-NEXT: 1 5 1.00 * vmovdqu (%rax), %ymm2
# CHECK-NEXT: 1 1 0.50 vmovhlps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vmovlhps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 * vmovhpd %xmm0, (%rax)
# CHECK-NEXT: 1 6 1.00 * vmovhpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 * vmovhps %xmm0, (%rax)
# CHECK-NEXT: 1 6 1.00 * vmovhps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 * vmovlpd %xmm0, (%rax)
# CHECK-NEXT: 1 6 1.00 * vmovlpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 * vmovlps %xmm0, (%rax)
# CHECK-NEXT: 1 6 1.00 * vmovlps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vmovmskpd %xmm0, %ecx
# CHECK-NEXT: 1 3 1.00 vmovmskpd %ymm0, %ecx
# CHECK-NEXT: 1 3 1.00 vmovmskps %xmm0, %ecx
# CHECK-NEXT: 1 3 1.00 vmovmskps %ymm0, %ecx
# CHECK-NEXT: 1 2 1.00 * vmovntdq %xmm0, (%rax)
# CHECK-NEXT: 1 3 2.00 * vmovntdq %ymm0, (%rax)
# CHECK-NEXT: 1 5 1.00 * vmovntdqa (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 * vmovntdqa (%rax), %ymm2
# CHECK-NEXT: 1 3 1.00 * vmovntpd %xmm0, (%rax)
# CHECK-NEXT: 1 3 2.00 * vmovntpd %ymm0, (%rax)
# CHECK-NEXT: 1 3 1.00 * vmovntps %xmm0, (%rax)
# CHECK-NEXT: 1 3 2.00 * vmovntps %ymm0, (%rax)
# CHECK-NEXT: 1 1 0.50 vmovq %xmm0, %xmm2
# CHECK-NEXT: 1 1 0.50 vmovq %rax, %xmm2
# CHECK-NEXT: 1 5 1.00 * vmovq (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 vmovq %xmm0, %rcx
# CHECK-NEXT: 1 1 1.00 * vmovq %xmm0, (%rax)
# CHECK-NEXT: 1 1 0.50 vmovsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 * vmovsd %xmm0, (%rax)
# CHECK-NEXT: 1 5 1.00 * vmovsd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 vmovshdup %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * vmovshdup (%rax), %xmm2
# CHECK-NEXT: 2 1 1.00 vmovshdup %ymm0, %ymm2
# CHECK-NEXT: 2 6 2.00 * vmovshdup (%rax), %ymm2
# CHECK-NEXT: 1 1 0.50 vmovsldup %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * vmovsldup (%rax), %xmm2
# CHECK-NEXT: 2 1 1.00 vmovsldup %ymm0, %ymm2
# CHECK-NEXT: 2 6 2.00 * vmovsldup (%rax), %ymm2
# CHECK-NEXT: 1 1 0.50 vmovss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 * vmovss %xmm0, (%rax)
# CHECK-NEXT: 1 5 1.00 * vmovss (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 vmovupd %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * vmovupd %xmm0, (%rax)
# CHECK-NEXT: 1 5 1.00 * vmovupd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 vmovupd %ymm0, %ymm2
# CHECK-NEXT: 1 1 1.00 * vmovupd %ymm0, (%rax)
# CHECK-NEXT: 1 5 1.00 * vmovupd (%rax), %ymm2
# CHECK-NEXT: 1 1 0.50 vmovups %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * vmovups %xmm0, (%rax)
# CHECK-NEXT: 1 5 1.00 * vmovups (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 vmovups %ymm0, %ymm2
# CHECK-NEXT: 1 1 1.00 * vmovups %ymm0, (%rax)
# CHECK-NEXT: 1 5 1.00 * vmovups (%rax), %ymm2
# CHECK-NEXT: 1 3 2.00 vmpsadbw $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 2.00 * vmpsadbw $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 2.00 vmulpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 9 2.00 * vmulpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 4 4.00 vmulpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 9 4.00 * vmulpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 2 1.00 vmulps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vmulps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 2 2.00 vmulps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 7 2.00 * vmulps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 2.00 vmulsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 9 2.00 * vmulsd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 1.00 vmulss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vmulss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vorpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vorpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 1 1.00 vorpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 6 2.00 * vorpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vorps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vorps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 1 1.00 vorps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 6 2.00 * vorps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vpabsb %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpabsb (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 vpabsd %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpabsd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 vpabsw %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpabsw (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 vpackssdw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpackssdw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpacksswb %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpacksswb (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpackusdw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpackusdw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpackuswb %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpackuswb (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpaddb %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpaddb (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpaddd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpaddd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpaddq %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpaddq (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpaddsb %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpaddsb (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpaddsw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpaddsw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpaddusb %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpaddusb (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpaddusw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpaddusw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpaddw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpaddw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpalignr $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpalignr $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpand %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpand (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpandn %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpandn (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpavgb %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpavgb (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpavgw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpavgw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 3 2 2.00 vpblendvb %xmm3, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 3 7 2.00 * vpblendvb %xmm3, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpblendw $11, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpblendw $11, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 1.00 vpclmulqdq $11, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vpclmulqdq $11, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpcmpeqb %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpcmpeqb (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpcmpeqd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpcmpeqd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpcmpeqq %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpcmpeqq (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpcmpeqw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpcmpeqw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpcmpgtb %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpcmpgtb (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpcmpgtd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpcmpgtd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpcmpgtq %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpcmpgtq (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpcmpgtw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpcmpgtw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vperm2f128 $1, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 6 1.00 * vperm2f128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vpermilpd $1, %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpermilpd $1, (%rax), %xmm2
# CHECK-NEXT: 3 2 2.00 vpermilpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 3 7 2.00 * vpermilpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 1 1.00 vpermilpd $1, %ymm0, %ymm2
# CHECK-NEXT: 2 6 2.00 * vpermilpd $1, (%rax), %ymm2
# CHECK-NEXT: 6 3 3.00 vpermilpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 6 8 3.00 * vpermilpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vpermilps $1, %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpermilps $1, (%rax), %xmm2
# CHECK-NEXT: 3 2 2.00 vpermilps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 3 7 2.00 * vpermilps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 1 1.00 vpermilps $1, %ymm0, %ymm2
# CHECK-NEXT: 2 6 2.00 * vpermilps $1, (%rax), %ymm2
# CHECK-NEXT: 6 3 3.00 vpermilps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 6 8 3.00 * vpermilps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vpextrb $1, %xmm0, %ecx
# CHECK-NEXT: 2 6 1.00 * vpextrb $1, %xmm0, (%rax)
# CHECK-NEXT: 1 1 0.50 vpextrd $1, %xmm0, %ecx
# CHECK-NEXT: 2 6 1.00 * vpextrd $1, %xmm0, (%rax)
# CHECK-NEXT: 1 1 0.50 vpextrq $1, %xmm0, %rcx
# CHECK-NEXT: 2 6 1.00 * vpextrq $1, %xmm0, (%rax)
# CHECK-NEXT: 1 1 0.50 vpextrw $1, %xmm0, %ecx
# CHECK-NEXT: 2 6 1.00 * vpextrw $1, %xmm0, (%rax)
# CHECK-NEXT: 1 1 0.50 vphaddd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vphaddd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vphaddsw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vphaddsw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vphaddw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vphaddw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 1.00 vphminposuw %xmm0, %xmm2
# CHECK-NEXT: 1 7 1.00 * vphminposuw (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 vphsubd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vphsubd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vphsubsw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vphsubsw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vphsubw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vphsubw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpinsrb $1, %eax, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpinsrb $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpinsrd $1, %eax, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpinsrd $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpinsrq $1, %rax, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpinsrq $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpinsrw $1, %eax, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpinsrw $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 1.00 vpmaddubsw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vpmaddubsw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 1.00 vpmaddwd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vpmaddwd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpmaxsb %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpmaxsb (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpmaxsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpmaxsd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpmaxsw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpmaxsw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpmaxub %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpmaxub (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpmaxud %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpmaxud (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpmaxuw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpmaxuw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpminsb %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpminsb (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpminsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpminsd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpminsw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpminsw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpminub %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpminub (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpminud %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpminud (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpminuw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpminuw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vpmovmskb %xmm0, %ecx
# CHECK-NEXT: 1 1 0.50 vpmovsxbd %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpmovsxbd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 vpmovsxbq %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpmovsxbq (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 vpmovsxbw %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpmovsxbw (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 vpmovsxdq %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpmovsxdq (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 vpmovsxwd %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpmovsxwd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 vpmovsxwq %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpmovsxwq (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 vpmovzxbd %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpmovzxbd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 vpmovzxbq %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpmovzxbq (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 vpmovzxbw %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpmovzxbw (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 vpmovzxdq %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpmovzxdq (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 vpmovzxwd %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpmovzxwd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 vpmovzxwq %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpmovzxwq (%rax), %xmm2
# CHECK-NEXT: 1 2 1.00 vpmuldq %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vpmuldq (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 1.00 vpmulhrsw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vpmulhrsw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 1.00 vpmulhuw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vpmulhuw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 1.00 vpmulhw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vpmulhw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 1.00 vpmulld %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vpmulld (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 1.00 vpmullw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vpmullw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 1.00 vpmuludq %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vpmuludq (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpor %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpor (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpsadbw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpsadbw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 3 2 2.00 vpshufb %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 3 7 2.00 * vpshufb (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpshufd $1, %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpshufd $1, (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 vpshufhw $1, %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpshufhw $1, (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 vpshuflw $1, %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpshuflw $1, (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 vpsignb %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpsignb (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpsignd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpsignd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpsignw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpsignw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpslld $1, %xmm0, %xmm2
# CHECK-NEXT: 1 1 0.50 vpslld %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpslld (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpslldq $1, %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpsllq $1, %xmm0, %xmm2
# CHECK-NEXT: 1 1 0.50 vpsllq %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpsllq (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpsllw $1, %xmm0, %xmm2
# CHECK-NEXT: 1 1 0.50 vpsllw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpsllw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpsrad $1, %xmm0, %xmm2
# CHECK-NEXT: 1 1 0.50 vpsrad %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpsrad (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpsraw $1, %xmm0, %xmm2
# CHECK-NEXT: 1 1 0.50 vpsraw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpsraw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpsrld $1, %xmm0, %xmm2
# CHECK-NEXT: 1 1 0.50 vpsrld %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpsrld (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpsrldq $1, %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpsrlq $1, %xmm0, %xmm2
# CHECK-NEXT: 1 1 0.50 vpsrlq %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpsrlq (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpsrlw $1, %xmm0, %xmm2
# CHECK-NEXT: 1 1 0.50 vpsrlw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpsrlw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpsubb %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpsubb (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpsubd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpsubd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpsubq %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpsubq (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpsubsb %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpsubsb (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpsubsw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpsubsw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpsubusb %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpsubusb (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpsubusw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpsubusw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpsubw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpsubw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vptest %xmm0, %xmm1
# CHECK-NEXT: 1 8 1.00 * vptest (%rax), %xmm1
# CHECK-NEXT: 3 4 2.00 vptest %ymm0, %ymm1
# CHECK-NEXT: 3 9 2.00 * vptest (%rax), %ymm1
# CHECK-NEXT: 1 1 0.50 vpunpckhbw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpunpckhbw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpunpckhdq %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpunpckhdq (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpunpckhqdq %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpunpckhqdq (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpunpckhwd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpunpckhwd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpunpcklbw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpunpcklbw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpunpckldq %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpunpckldq (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpunpcklqdq %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpunpcklqdq (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpunpcklwd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpunpcklwd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpxor %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpxor (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 1.00 vrcpps %xmm0, %xmm2
# CHECK-NEXT: 1 7 1.00 * vrcpps (%rax), %xmm2
# CHECK-NEXT: 2 2 2.00 vrcpps %ymm0, %ymm2
# CHECK-NEXT: 2 7 2.00 * vrcpps (%rax), %ymm2
# CHECK-NEXT: 1 2 1.00 vrcpss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vrcpss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vroundpd $1, %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * vroundpd $1, (%rax), %xmm2
# CHECK-NEXT: 2 3 2.00 vroundpd $1, %ymm0, %ymm2
# CHECK-NEXT: 2 8 2.00 * vroundpd $1, (%rax), %ymm2
# CHECK-NEXT: 1 3 1.00 vroundps $1, %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * vroundps $1, (%rax), %xmm2
# CHECK-NEXT: 2 3 2.00 vroundps $1, %ymm0, %ymm2
# CHECK-NEXT: 2 8 2.00 * vroundps $1, (%rax), %ymm2
# CHECK-NEXT: 1 3 1.00 vroundsd $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vroundsd $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vroundss $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vroundss $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 1.00 vrsqrtps %xmm0, %xmm2
# CHECK-NEXT: 1 7 1.00 * vrsqrtps (%rax), %xmm2
# CHECK-NEXT: 2 2 2.00 vrsqrtps %ymm0, %ymm2
# CHECK-NEXT: 2 7 2.00 * vrsqrtps (%rax), %ymm2
# CHECK-NEXT: 1 2 1.00 vrsqrtss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vrsqrtss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vshufpd $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vshufpd $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 1 1.00 vshufpd $1, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 6 2.00 * vshufpd $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vshufps $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vshufps $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 1 1.00 vshufps $1, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 6 2.00 * vshufps $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 27 27.00 vsqrtpd %xmm0, %xmm2
# CHECK-NEXT: 1 32 27.00 * vsqrtpd (%rax), %xmm2
# CHECK-NEXT: 1 54 54.00 vsqrtpd %ymm0, %ymm2
# CHECK-NEXT: 1 59 54.00 * vsqrtpd (%rax), %ymm2
# CHECK-NEXT: 1 21 21.00 vsqrtps %xmm0, %xmm2
# CHECK-NEXT: 1 26 21.00 * vsqrtps (%rax), %xmm2
# CHECK-NEXT: 2 42 42.00 vsqrtps %ymm0, %ymm2
# CHECK-NEXT: 2 47 42.00 * vsqrtps (%rax), %ymm2
# CHECK-NEXT: 1 27 27.00 vsqrtsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 32 27.00 * vsqrtsd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 21 21.00 vsqrtss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 26 21.00 * vsqrtss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 * * * vstmxcsr (%rax)
# CHECK-NEXT: 1 3 1.00 vsubpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vsubpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 3 2.00 vsubpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 8 2.00 * vsubpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 1.00 vsubps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vsubps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 3 2.00 vsubps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 8 2.00 * vsubps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 1.00 vsubsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vsubsd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vsubss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 1.00 * vsubss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vtestpd %xmm0, %xmm1
# CHECK-NEXT: 1 8 1.00 * vtestpd (%rax), %xmm1
# CHECK-NEXT: 3 4 2.00 vtestpd %ymm0, %ymm1
# CHECK-NEXT: 3 9 2.00 * vtestpd (%rax), %ymm1
# CHECK-NEXT: 1 3 1.00 vtestps %xmm0, %xmm1
# CHECK-NEXT: 1 8 1.00 * vtestps (%rax), %xmm1
# CHECK-NEXT: 3 4 2.00 vtestps %ymm0, %ymm1
# CHECK-NEXT: 3 9 2.00 * vtestps (%rax), %ymm1
# CHECK-NEXT: 1 3 1.00 vucomisd %xmm0, %xmm1
# CHECK-NEXT: 1 8 1.00 * vucomisd (%rax), %xmm1
# CHECK-NEXT: 1 3 1.00 vucomiss %xmm0, %xmm1
# CHECK-NEXT: 1 8 1.00 * vucomiss (%rax), %xmm1
# CHECK-NEXT: 1 1 0.50 vunpckhpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vunpckhpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 1 1.00 vunpckhpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 6 2.00 * vunpckhpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vunpckhps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vunpckhps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 1 1.00 vunpckhps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 6 2.00 * vunpckhps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vunpcklpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vunpcklpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 1 1.00 vunpcklpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 6 2.00 * vunpcklpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vunpcklps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vunpcklps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 1 1.00 vunpcklps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 6 2.00 * vunpcklps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vxorpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vxorpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 1 1.00 vxorpd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 6 2.00 * vxorpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vxorps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vxorps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 1 1.00 vxorps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 6 2.00 * vxorps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 73 90 - * * * vzeroall
# CHECK-NEXT: 37 46 - * * * vzeroupper
# CHECK: Resources:
# CHECK-NEXT: [0] - JALU0
# CHECK-NEXT: [1] - JALU1