1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

[X86] Update the haswell and broadwell scheduler information for gather instructions

Broadwell was missing half the gather instructions. Both models
had some mixups in the resource costs and number of uops.

I've updated here based on what I think the original IACA source
says with some cross checking against the microcode.

I'm not sure about latency as the IACA source I have doesn't have
that information. So I'm using the latency from uops.info.

I plan to update Skylake models as well, but I'll do that in a
separate patch.

Differential Revision: https://reviews.llvm.org/D73844
This commit is contained in:
Craig Topper 2020-02-03 17:57:12 -08:00
parent 8dcc593a5a
commit 3c613b53f1
4 changed files with 95 additions and 127 deletions

View File

@ -1480,54 +1480,42 @@ def BWWriteResGroup182 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> {
def: InstRW<[BWWriteResGroup182], (instregex "DIVR_FI(16|32)m")>;
def BWWriteResGroup183_1 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
let Latency = 22;
let Latency = 17;
let NumMicroOps = 7;
let ResourceCycles = [1,3,2,1];
}
def: InstRW<[BWWriteResGroup183_1], (instrs VGATHERQPDrm)>;
def: InstRW<[BWWriteResGroup183_1], (instrs VGATHERDPDrm, VPGATHERDQrm,
VGATHERQPDrm, VPGATHERQQrm)>;
def BWWriteResGroup183_2 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
let Latency = 23;
let Latency = 18;
let NumMicroOps = 9;
let ResourceCycles = [1,3,4,1];
}
def: InstRW<[BWWriteResGroup183_2], (instrs VGATHERQPDYrm)>;
def: InstRW<[BWWriteResGroup183_2], (instrs VGATHERDPDYrm, VPGATHERDQYrm,
VGATHERQPDYrm, VPGATHERQQYrm)>;
def BWWriteResGroup183_3 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
let Latency = 24;
let Latency = 19;
let NumMicroOps = 9;
let ResourceCycles = [1,5,2,1];
}
def: InstRW<[BWWriteResGroup183_3], (instrs VGATHERQPSYrm)>;
def: InstRW<[BWWriteResGroup183_3], (instrs VGATHERQPSrm, VPGATHERQDrm)>;
def BWWriteResGroup183_4 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
let Latency = 25;
let NumMicroOps = 7;
let ResourceCycles = [1,3,2,1];
let Latency = 19;
let NumMicroOps = 10;
let ResourceCycles = [1,4,4,1];
}
def: InstRW<[BWWriteResGroup183_4], (instrs VGATHERDPDrm,
VGATHERDPSrm)>;
def: InstRW<[BWWriteResGroup183_4], (instrs VGATHERDPSrm, VPGATHERDDrm,
VGATHERQPSYrm, VPGATHERQDYrm)>;
def BWWriteResGroup183_5 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
let Latency = 26;
let NumMicroOps = 9;
let ResourceCycles = [1,5,2,1];
}
def: InstRW<[BWWriteResGroup183_5], (instrs VGATHERDPDYrm)>;
def BWWriteResGroup183_6 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
let Latency = 26;
let Latency = 21;
let NumMicroOps = 14;
let ResourceCycles = [1,4,8,1];
}
def: InstRW<[BWWriteResGroup183_6], (instrs VGATHERDPSYrm)>;
def BWWriteResGroup183_7 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
let Latency = 27;
let NumMicroOps = 9;
let ResourceCycles = [1,5,2,1];
}
def: InstRW<[BWWriteResGroup183_7], (instrs VGATHERQPSrm)>;
def: InstRW<[BWWriteResGroup183_5], (instrs VGATHERDPSYrm, VPGATHERDDYrm)>;
def BWWriteResGroup185 : SchedWriteRes<[BWPort4,BWPort6,BWPort23,BWPort237,BWPort0156]> {
let Latency = 29;

View File

@ -1785,75 +1785,55 @@ def HWWriteResGroup183 : SchedWriteRes<[HWPort0,HWPort1,HWPort4,HWPort5,HWPort6,
}
def: InstRW<[HWWriteResGroup183], (instrs FSTENVm)>;
def HWWriteResGroup184 : SchedWriteRes<[HWPort0, HWPort5, HWPort15, HWPort015, HWPort06, HWPort23]> {
let Latency = 26;
def HWWriteResGroup184 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
let Latency = 14;
let NumMicroOps = 12;
let ResourceCycles = [2,2,1,3,2,2];
let ResourceCycles = [2,2,2,1,3,2];
}
def: InstRW<[HWWriteResGroup184], (instrs VGATHERDPDrm,
VPGATHERDQrm,
VPGATHERDDrm)>;
def: InstRW<[HWWriteResGroup184], (instrs VGATHERDPDrm, VPGATHERDQrm)>;
def HWWriteResGroup185 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> {
let Latency = 24;
let NumMicroOps = 22;
let ResourceCycles = [5,3,4,1,5,4];
}
def: InstRW<[HWWriteResGroup185], (instrs VGATHERQPDYrm,
VPGATHERQQYrm)>;
def HWWriteResGroup186 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> {
let Latency = 28;
let NumMicroOps = 22;
let ResourceCycles = [5,3,4,1,5,4];
}
def: InstRW<[HWWriteResGroup186], (instrs VPGATHERQDYrm)>;
def HWWriteResGroup187 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> {
let Latency = 25;
let NumMicroOps = 22;
let ResourceCycles = [5,3,4,1,5,4];
}
def: InstRW<[HWWriteResGroup187], (instrs VPGATHERQDrm)>;
def HWWriteResGroup188 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> {
let Latency = 27;
def HWWriteResGroup185 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
let Latency = 17;
let NumMicroOps = 20;
let ResourceCycles = [3,3,4,1,5,4];
}
def: InstRW<[HWWriteResGroup188], (instrs VGATHERDPDYrm,
VPGATHERDQYrm)>;
def: InstRW<[HWWriteResGroup185], (instrs VGATHERDPDYrm, VPGATHERDQYrm)>;
def HWWriteResGroup189 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> {
let Latency = 27;
def HWWriteResGroup186 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
let Latency = 16;
let NumMicroOps = 20;
let ResourceCycles = [3,3,4,1,5,4];
}
def: InstRW<[HWWriteResGroup186], (instrs VGATHERDPSrm, VPGATHERDDrm)>;
def HWWriteResGroup187 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
let Latency = 22;
let NumMicroOps = 34;
let ResourceCycles = [5,3,8,1,9,8];
}
def: InstRW<[HWWriteResGroup189], (instrs VGATHERDPSYrm,
VPGATHERDDYrm)>;
def: InstRW<[HWWriteResGroup187], (instrs VGATHERDPSYrm, VPGATHERDDYrm)>;
def HWWriteResGroup190 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> {
let Latency = 23;
def HWWriteResGroup188 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
let Latency = 15;
let NumMicroOps = 14;
let ResourceCycles = [3,3,2,1,3,2];
}
def: InstRW<[HWWriteResGroup190], (instrs VGATHERQPDrm,
VPGATHERQQrm)>;
def: InstRW<[HWWriteResGroup188], (instrs VGATHERQPDrm, VPGATHERQQrm)>;
def HWWriteResGroup191 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> {
let Latency = 28;
def HWWriteResGroup189 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
let Latency = 17;
let NumMicroOps = 22;
let ResourceCycles = [5,3,4,1,5,4];
}
def: InstRW<[HWWriteResGroup189], (instrs VGATHERQPDYrm, VPGATHERQQYrm,
VGATHERQPSYrm, VPGATHERQDYrm)>;
def HWWriteResGroup190 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
let Latency = 16;
let NumMicroOps = 15;
let ResourceCycles = [3,3,2,1,4,2];
}
def: InstRW<[HWWriteResGroup191], (instrs VGATHERQPSYrm)>;
def HWWriteResGroup192 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> {
let Latency = 25;
let NumMicroOps = 15;
let ResourceCycles = [3,3,2,1,4,2];
}
def: InstRW<[HWWriteResGroup192], (instrs VGATHERQPSrm,
VGATHERDPSrm)>;
def: InstRW<[HWWriteResGroup190], (instrs VGATHERQPSrm, VPGATHERQDrm)>;
def: InstRW<[WriteZero], (instrs CLC)>;

View File

@ -465,14 +465,14 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 1.00 vbroadcastss %xmm0, %ymm0
# CHECK-NEXT: 1 3 1.00 vextracti128 $1, %ymm0, %xmm2
# CHECK-NEXT: 2 1 1.00 * vextracti128 $1, %ymm0, (%rax)
# CHECK-NEXT: 7 25 3.00 * vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 9 26 5.00 * vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2
# CHECK-NEXT: 7 25 3.00 * vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 14 26 4.00 * vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: 7 22 3.00 * vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 9 23 3.00 * vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: 9 27 5.00 * vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 9 24 5.00 * vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2
# CHECK-NEXT: 7 17 3.00 * vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 9 18 3.00 * vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2
# CHECK-NEXT: 10 19 4.00 * vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 14 21 4.00 * vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: 7 17 3.00 * vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 9 18 3.00 * vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: 9 19 5.00 * vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 10 19 4.00 * vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2
# CHECK-NEXT: 1 3 1.00 vinserti128 $1, %xmm0, %ymm1, %ymm2
# CHECK-NEXT: 2 6 0.50 * vinserti128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 6 0.50 * vmovntdqa (%rax), %ymm0
@ -568,14 +568,14 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 2 9 1.00 * vpermps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 1.00 vpermq $1, %ymm0, %ymm2
# CHECK-NEXT: 2 9 1.00 * vpermq $1, (%rax), %ymm2
# CHECK-NEXT: 1 5 0.50 * vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 1 5 0.50 * vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: 1 5 0.50 * vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 1 5 0.50 * vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2
# CHECK-NEXT: 1 5 0.50 * vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 1 5 0.50 * vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2
# CHECK-NEXT: 1 5 0.50 * vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 1 5 0.50 * vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: 10 19 4.00 * vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 14 21 4.00 * vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: 7 17 3.00 * vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 9 18 3.00 * vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2
# CHECK-NEXT: 9 19 5.00 * vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 10 19 4.00 * vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2
# CHECK-NEXT: 7 17 3.00 * vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 9 18 3.00 * vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: 3 3 2.00 vphaddd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 4 9 2.00 * vphaddd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 3 3 2.00 vphaddsw %ymm0, %ymm1, %ymm2
@ -776,7 +776,7 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
# CHECK-NEXT: - - 94.67 58.67 85.67 85.67 13.00 237.67 2.00 1.67
# CHECK-NEXT: - - 96.67 60.67 99.67 99.67 21.00 266.67 4.00 1.67
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@ -786,13 +786,13 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - 1.00 - - vextracti128 $1, %ymm0, %xmm2
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vextracti128 $1, %ymm0, (%rax)
# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 3.25 0.25 - vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 5.25 0.25 - vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2
# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 3.25 0.25 - vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - 0.25 0.25 2.00 2.00 1.00 3.25 0.25 - vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2
# CHECK-NEXT: - - 0.25 0.25 2.00 2.00 1.00 4.25 0.25 - vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - 0.25 0.25 4.00 4.00 1.00 4.25 0.25 - vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 3.25 0.25 - vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - 0.25 0.25 2.00 2.00 1.00 3.25 0.25 - vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 5.25 0.25 - vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 5.25 0.25 - vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2
# CHECK-NEXT: - - 0.25 0.25 2.00 2.00 1.00 4.25 0.25 - vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2
# CHECK-NEXT: - - - - - - - 1.00 - - vinserti128 $1, %xmm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vinserti128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vmovntdqa (%rax), %ymm0
@ -888,14 +888,14 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpermps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - 1.00 - - vpermq $1, %ymm0, %ymm2
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpermq $1, (%rax), %ymm2
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: - - 0.25 0.25 2.00 2.00 1.00 4.25 0.25 - vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - 0.25 0.25 4.00 4.00 1.00 4.25 0.25 - vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 3.25 0.25 - vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - 0.25 0.25 2.00 2.00 1.00 3.25 0.25 - vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2
# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 5.25 0.25 - vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - 0.25 0.25 2.00 2.00 1.00 4.25 0.25 - vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2
# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 3.25 0.25 - vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - 0.25 0.25 2.00 2.00 1.00 3.25 0.25 - vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: - - - 0.50 - - - 2.50 - - vphaddd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 - 2.50 - - vphaddd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - 0.50 - - - 2.50 - - vphaddsw %ymm0, %ymm1, %ymm2

View File

@ -465,14 +465,14 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 1.00 vbroadcastss %xmm0, %ymm0
# CHECK-NEXT: 1 3 1.00 vextracti128 $1, %ymm0, %xmm2
# CHECK-NEXT: 2 1 1.00 * vextracti128 $1, %ymm0, (%rax)
# CHECK-NEXT: 12 26 2.67 * vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 20 27 4.00 * vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2
# CHECK-NEXT: 15 25 3.67 * vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 34 27 6.50 * vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: 14 23 3.33 * vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 22 24 5.00 * vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: 15 25 3.67 * vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 15 28 3.67 * vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2
# CHECK-NEXT: 12 14 2.67 * vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 20 17 4.00 * vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2
# CHECK-NEXT: 20 16 4.00 * vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 34 22 6.50 * vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: 14 15 3.33 * vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 22 17 5.00 * vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: 15 16 3.67 * vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 22 17 5.00 * vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2
# CHECK-NEXT: 1 3 1.00 vinserti128 $1, %xmm0, %ymm1, %ymm2
# CHECK-NEXT: 2 7 0.50 * vinserti128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 7 0.50 * vmovntdqa (%rax), %ymm0
@ -568,14 +568,14 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 2 10 1.00 * vpermps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 1.00 vpermq $1, %ymm0, %ymm2
# CHECK-NEXT: 2 10 1.00 * vpermq $1, (%rax), %ymm2
# CHECK-NEXT: 12 26 2.67 * vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 34 27 6.50 * vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: 12 26 2.67 * vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 20 27 4.00 * vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2
# CHECK-NEXT: 22 25 5.00 * vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 22 28 5.00 * vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2
# CHECK-NEXT: 14 23 3.33 * vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 22 24 5.00 * vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: 20 16 4.00 * vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 34 22 6.50 * vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: 12 14 2.67 * vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 20 17 4.00 * vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2
# CHECK-NEXT: 15 16 3.67 * vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 22 17 5.00 * vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2
# CHECK-NEXT: 14 15 3.33 * vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 22 17 5.00 * vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: 3 3 2.00 vphaddd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 4 10 2.00 * vphaddd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 3 3 2.00 vphaddsw %ymm0, %ymm1, %ymm2
@ -776,7 +776,7 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
# CHECK-NEXT: - - 202.67 89.67 97.67 97.67 5.00 282.67 28.00 1.67
# CHECK-NEXT: - - 206.67 90.67 99.67 99.67 5.00 284.67 30.00 1.67
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@ -787,12 +787,12 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vextracti128 $1, %ymm0, (%rax)
# CHECK-NEXT: - - 4.00 1.50 1.00 1.00 - 3.50 1.00 - vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - 6.67 2.17 2.00 2.00 - 5.17 2.00 - vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2
# CHECK-NEXT: - - 5.33 1.83 1.00 1.00 - 4.83 1.00 - vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - 6.67 2.17 2.00 2.00 - 5.17 2.00 - vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - 12.00 3.50 4.00 4.00 - 6.50 4.00 - vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: - - 5.00 1.50 1.00 1.00 - 4.50 1.00 - vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - 8.67 2.17 2.00 2.00 - 5.17 2.00 - vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: - - 5.33 1.83 1.00 1.00 - 4.83 1.00 - vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - 5.33 1.83 1.00 1.00 - 4.83 1.00 - vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2
# CHECK-NEXT: - - 8.67 2.17 2.00 2.00 - 5.17 2.00 - vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2
# CHECK-NEXT: - - - - - - - 1.00 - - vinserti128 $1, %xmm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vinserti128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vmovntdqa (%rax), %ymm0
@ -888,11 +888,11 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpermps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - 1.00 - - vpermq $1, %ymm0, %ymm2
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpermq $1, (%rax), %ymm2
# CHECK-NEXT: - - 4.00 1.50 1.00 1.00 - 3.50 1.00 - vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - 6.67 2.17 2.00 2.00 - 5.17 2.00 - vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - 12.00 3.50 4.00 4.00 - 6.50 4.00 - vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: - - 4.00 1.50 1.00 1.00 - 3.50 1.00 - vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - 6.67 2.17 2.00 2.00 - 5.17 2.00 - vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2
# CHECK-NEXT: - - 8.67 2.17 2.00 2.00 - 5.17 2.00 - vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - 5.33 1.83 1.00 1.00 - 4.83 1.00 - vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - 8.67 2.17 2.00 2.00 - 5.17 2.00 - vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2
# CHECK-NEXT: - - 5.00 1.50 1.00 1.00 - 4.50 1.00 - vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - 8.67 2.17 2.00 2.00 - 5.17 2.00 - vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2