mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
[X86] Update the haswell and broadwell scheduler information for gather instructions
Broadwell was missing half the gather instructions. Both models had some mixups in the resource costs and number of uops. I've updated here based on what I think the original IACA source says with some cross checking against the microcode. I'm not sure about latency as the IACA source I have doesn't have that information. So I'm using the latency from uops.info. I plan to update Skylake models as well, but I'll do that in a separate patch. Differential Revision: https://reviews.llvm.org/D73844
This commit is contained in:
parent
8dcc593a5a
commit
3c613b53f1
@ -1480,54 +1480,42 @@ def BWWriteResGroup182 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> {
|
||||
def: InstRW<[BWWriteResGroup182], (instregex "DIVR_FI(16|32)m")>;
|
||||
|
||||
def BWWriteResGroup183_1 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
|
||||
let Latency = 22;
|
||||
let Latency = 17;
|
||||
let NumMicroOps = 7;
|
||||
let ResourceCycles = [1,3,2,1];
|
||||
}
|
||||
def: InstRW<[BWWriteResGroup183_1], (instrs VGATHERQPDrm)>;
|
||||
def: InstRW<[BWWriteResGroup183_1], (instrs VGATHERDPDrm, VPGATHERDQrm,
|
||||
VGATHERQPDrm, VPGATHERQQrm)>;
|
||||
|
||||
def BWWriteResGroup183_2 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
|
||||
let Latency = 23;
|
||||
let Latency = 18;
|
||||
let NumMicroOps = 9;
|
||||
let ResourceCycles = [1,3,4,1];
|
||||
}
|
||||
def: InstRW<[BWWriteResGroup183_2], (instrs VGATHERQPDYrm)>;
|
||||
def: InstRW<[BWWriteResGroup183_2], (instrs VGATHERDPDYrm, VPGATHERDQYrm,
|
||||
VGATHERQPDYrm, VPGATHERQQYrm)>;
|
||||
|
||||
def BWWriteResGroup183_3 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
|
||||
let Latency = 24;
|
||||
let Latency = 19;
|
||||
let NumMicroOps = 9;
|
||||
let ResourceCycles = [1,5,2,1];
|
||||
}
|
||||
def: InstRW<[BWWriteResGroup183_3], (instrs VGATHERQPSYrm)>;
|
||||
def: InstRW<[BWWriteResGroup183_3], (instrs VGATHERQPSrm, VPGATHERQDrm)>;
|
||||
|
||||
def BWWriteResGroup183_4 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
|
||||
let Latency = 25;
|
||||
let NumMicroOps = 7;
|
||||
let ResourceCycles = [1,3,2,1];
|
||||
let Latency = 19;
|
||||
let NumMicroOps = 10;
|
||||
let ResourceCycles = [1,4,4,1];
|
||||
}
|
||||
def: InstRW<[BWWriteResGroup183_4], (instrs VGATHERDPDrm,
|
||||
VGATHERDPSrm)>;
|
||||
def: InstRW<[BWWriteResGroup183_4], (instrs VGATHERDPSrm, VPGATHERDDrm,
|
||||
VGATHERQPSYrm, VPGATHERQDYrm)>;
|
||||
|
||||
def BWWriteResGroup183_5 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
|
||||
let Latency = 26;
|
||||
let NumMicroOps = 9;
|
||||
let ResourceCycles = [1,5,2,1];
|
||||
}
|
||||
def: InstRW<[BWWriteResGroup183_5], (instrs VGATHERDPDYrm)>;
|
||||
|
||||
def BWWriteResGroup183_6 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
|
||||
let Latency = 26;
|
||||
let Latency = 21;
|
||||
let NumMicroOps = 14;
|
||||
let ResourceCycles = [1,4,8,1];
|
||||
}
|
||||
def: InstRW<[BWWriteResGroup183_6], (instrs VGATHERDPSYrm)>;
|
||||
|
||||
def BWWriteResGroup183_7 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
|
||||
let Latency = 27;
|
||||
let NumMicroOps = 9;
|
||||
let ResourceCycles = [1,5,2,1];
|
||||
}
|
||||
def: InstRW<[BWWriteResGroup183_7], (instrs VGATHERQPSrm)>;
|
||||
def: InstRW<[BWWriteResGroup183_5], (instrs VGATHERDPSYrm, VPGATHERDDYrm)>;
|
||||
|
||||
def BWWriteResGroup185 : SchedWriteRes<[BWPort4,BWPort6,BWPort23,BWPort237,BWPort0156]> {
|
||||
let Latency = 29;
|
||||
|
@ -1785,75 +1785,55 @@ def HWWriteResGroup183 : SchedWriteRes<[HWPort0,HWPort1,HWPort4,HWPort5,HWPort6,
|
||||
}
|
||||
def: InstRW<[HWWriteResGroup183], (instrs FSTENVm)>;
|
||||
|
||||
def HWWriteResGroup184 : SchedWriteRes<[HWPort0, HWPort5, HWPort15, HWPort015, HWPort06, HWPort23]> {
|
||||
let Latency = 26;
|
||||
def HWWriteResGroup184 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
|
||||
let Latency = 14;
|
||||
let NumMicroOps = 12;
|
||||
let ResourceCycles = [2,2,1,3,2,2];
|
||||
let ResourceCycles = [2,2,2,1,3,2];
|
||||
}
|
||||
def: InstRW<[HWWriteResGroup184], (instrs VGATHERDPDrm,
|
||||
VPGATHERDQrm,
|
||||
VPGATHERDDrm)>;
|
||||
def: InstRW<[HWWriteResGroup184], (instrs VGATHERDPDrm, VPGATHERDQrm)>;
|
||||
|
||||
def HWWriteResGroup185 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> {
|
||||
let Latency = 24;
|
||||
let NumMicroOps = 22;
|
||||
let ResourceCycles = [5,3,4,1,5,4];
|
||||
}
|
||||
def: InstRW<[HWWriteResGroup185], (instrs VGATHERQPDYrm,
|
||||
VPGATHERQQYrm)>;
|
||||
|
||||
def HWWriteResGroup186 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> {
|
||||
let Latency = 28;
|
||||
let NumMicroOps = 22;
|
||||
let ResourceCycles = [5,3,4,1,5,4];
|
||||
}
|
||||
def: InstRW<[HWWriteResGroup186], (instrs VPGATHERQDYrm)>;
|
||||
|
||||
def HWWriteResGroup187 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> {
|
||||
let Latency = 25;
|
||||
let NumMicroOps = 22;
|
||||
let ResourceCycles = [5,3,4,1,5,4];
|
||||
}
|
||||
def: InstRW<[HWWriteResGroup187], (instrs VPGATHERQDrm)>;
|
||||
|
||||
def HWWriteResGroup188 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> {
|
||||
let Latency = 27;
|
||||
def HWWriteResGroup185 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
|
||||
let Latency = 17;
|
||||
let NumMicroOps = 20;
|
||||
let ResourceCycles = [3,3,4,1,5,4];
|
||||
}
|
||||
def: InstRW<[HWWriteResGroup188], (instrs VGATHERDPDYrm,
|
||||
VPGATHERDQYrm)>;
|
||||
def: InstRW<[HWWriteResGroup185], (instrs VGATHERDPDYrm, VPGATHERDQYrm)>;
|
||||
|
||||
def HWWriteResGroup189 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> {
|
||||
let Latency = 27;
|
||||
def HWWriteResGroup186 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
|
||||
let Latency = 16;
|
||||
let NumMicroOps = 20;
|
||||
let ResourceCycles = [3,3,4,1,5,4];
|
||||
}
|
||||
def: InstRW<[HWWriteResGroup186], (instrs VGATHERDPSrm, VPGATHERDDrm)>;
|
||||
|
||||
def HWWriteResGroup187 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
|
||||
let Latency = 22;
|
||||
let NumMicroOps = 34;
|
||||
let ResourceCycles = [5,3,8,1,9,8];
|
||||
}
|
||||
def: InstRW<[HWWriteResGroup189], (instrs VGATHERDPSYrm,
|
||||
VPGATHERDDYrm)>;
|
||||
def: InstRW<[HWWriteResGroup187], (instrs VGATHERDPSYrm, VPGATHERDDYrm)>;
|
||||
|
||||
def HWWriteResGroup190 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> {
|
||||
let Latency = 23;
|
||||
def HWWriteResGroup188 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
|
||||
let Latency = 15;
|
||||
let NumMicroOps = 14;
|
||||
let ResourceCycles = [3,3,2,1,3,2];
|
||||
}
|
||||
def: InstRW<[HWWriteResGroup190], (instrs VGATHERQPDrm,
|
||||
VPGATHERQQrm)>;
|
||||
def: InstRW<[HWWriteResGroup188], (instrs VGATHERQPDrm, VPGATHERQQrm)>;
|
||||
|
||||
def HWWriteResGroup191 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> {
|
||||
let Latency = 28;
|
||||
def HWWriteResGroup189 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
|
||||
let Latency = 17;
|
||||
let NumMicroOps = 22;
|
||||
let ResourceCycles = [5,3,4,1,5,4];
|
||||
}
|
||||
def: InstRW<[HWWriteResGroup189], (instrs VGATHERQPDYrm, VPGATHERQQYrm,
|
||||
VGATHERQPSYrm, VPGATHERQDYrm)>;
|
||||
|
||||
def HWWriteResGroup190 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
|
||||
let Latency = 16;
|
||||
let NumMicroOps = 15;
|
||||
let ResourceCycles = [3,3,2,1,4,2];
|
||||
}
|
||||
def: InstRW<[HWWriteResGroup191], (instrs VGATHERQPSYrm)>;
|
||||
|
||||
def HWWriteResGroup192 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> {
|
||||
let Latency = 25;
|
||||
let NumMicroOps = 15;
|
||||
let ResourceCycles = [3,3,2,1,4,2];
|
||||
}
|
||||
def: InstRW<[HWWriteResGroup192], (instrs VGATHERQPSrm,
|
||||
VGATHERDPSrm)>;
|
||||
def: InstRW<[HWWriteResGroup190], (instrs VGATHERQPSrm, VPGATHERQDrm)>;
|
||||
|
||||
def: InstRW<[WriteZero], (instrs CLC)>;
|
||||
|
||||
|
@ -465,14 +465,14 @@ vpxor (%rax), %ymm1, %ymm2
|
||||
# CHECK-NEXT: 1 3 1.00 vbroadcastss %xmm0, %ymm0
|
||||
# CHECK-NEXT: 1 3 1.00 vextracti128 $1, %ymm0, %xmm2
|
||||
# CHECK-NEXT: 2 1 1.00 * vextracti128 $1, %ymm0, (%rax)
|
||||
# CHECK-NEXT: 7 25 3.00 * vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: 9 26 5.00 * vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2
|
||||
# CHECK-NEXT: 7 25 3.00 * vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: 14 26 4.00 * vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2
|
||||
# CHECK-NEXT: 7 22 3.00 * vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: 9 23 3.00 * vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2
|
||||
# CHECK-NEXT: 9 27 5.00 * vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: 9 24 5.00 * vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2
|
||||
# CHECK-NEXT: 7 17 3.00 * vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: 9 18 3.00 * vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2
|
||||
# CHECK-NEXT: 10 19 4.00 * vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: 14 21 4.00 * vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2
|
||||
# CHECK-NEXT: 7 17 3.00 * vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: 9 18 3.00 * vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2
|
||||
# CHECK-NEXT: 9 19 5.00 * vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: 10 19 4.00 * vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 vinserti128 $1, %xmm0, %ymm1, %ymm2
|
||||
# CHECK-NEXT: 2 6 0.50 * vinserti128 $1, (%rax), %ymm1, %ymm2
|
||||
# CHECK-NEXT: 1 6 0.50 * vmovntdqa (%rax), %ymm0
|
||||
@ -568,14 +568,14 @@ vpxor (%rax), %ymm1, %ymm2
|
||||
# CHECK-NEXT: 2 9 1.00 * vpermps (%rax), %ymm1, %ymm2
|
||||
# CHECK-NEXT: 1 3 1.00 vpermq $1, %ymm0, %ymm2
|
||||
# CHECK-NEXT: 2 9 1.00 * vpermq $1, (%rax), %ymm2
|
||||
# CHECK-NEXT: 1 5 0.50 * vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: 1 5 0.50 * vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2
|
||||
# CHECK-NEXT: 1 5 0.50 * vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: 1 5 0.50 * vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2
|
||||
# CHECK-NEXT: 1 5 0.50 * vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: 1 5 0.50 * vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2
|
||||
# CHECK-NEXT: 1 5 0.50 * vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: 1 5 0.50 * vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2
|
||||
# CHECK-NEXT: 10 19 4.00 * vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: 14 21 4.00 * vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2
|
||||
# CHECK-NEXT: 7 17 3.00 * vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: 9 18 3.00 * vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2
|
||||
# CHECK-NEXT: 9 19 5.00 * vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: 10 19 4.00 * vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2
|
||||
# CHECK-NEXT: 7 17 3.00 * vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: 9 18 3.00 * vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2
|
||||
# CHECK-NEXT: 3 3 2.00 vphaddd %ymm0, %ymm1, %ymm2
|
||||
# CHECK-NEXT: 4 9 2.00 * vphaddd (%rax), %ymm1, %ymm2
|
||||
# CHECK-NEXT: 3 3 2.00 vphaddsw %ymm0, %ymm1, %ymm2
|
||||
@ -776,7 +776,7 @@ vpxor (%rax), %ymm1, %ymm2
|
||||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
|
||||
# CHECK-NEXT: - - 94.67 58.67 85.67 85.67 13.00 237.67 2.00 1.67
|
||||
# CHECK-NEXT: - - 96.67 60.67 99.67 99.67 21.00 266.67 4.00 1.67
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
|
||||
@ -786,13 +786,13 @@ vpxor (%rax), %ymm1, %ymm2
|
||||
# CHECK-NEXT: - - - - - - - 1.00 - - vextracti128 $1, %ymm0, %xmm2
|
||||
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vextracti128 $1, %ymm0, (%rax)
|
||||
# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 3.25 0.25 - vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 5.25 0.25 - vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2
|
||||
# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 3.25 0.25 - vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: - - 0.25 0.25 2.00 2.00 1.00 3.25 0.25 - vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2
|
||||
# CHECK-NEXT: - - 0.25 0.25 2.00 2.00 1.00 4.25 0.25 - vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: - - 0.25 0.25 4.00 4.00 1.00 4.25 0.25 - vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2
|
||||
# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 3.25 0.25 - vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: - - 0.25 0.25 2.00 2.00 1.00 3.25 0.25 - vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2
|
||||
# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 5.25 0.25 - vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 5.25 0.25 - vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2
|
||||
# CHECK-NEXT: - - 0.25 0.25 2.00 2.00 1.00 4.25 0.25 - vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2
|
||||
# CHECK-NEXT: - - - - - - - 1.00 - - vinserti128 $1, %xmm0, %ymm1, %ymm2
|
||||
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vinserti128 $1, (%rax), %ymm1, %ymm2
|
||||
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vmovntdqa (%rax), %ymm0
|
||||
@ -888,14 +888,14 @@ vpxor (%rax), %ymm1, %ymm2
|
||||
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpermps (%rax), %ymm1, %ymm2
|
||||
# CHECK-NEXT: - - - - - - - 1.00 - - vpermq $1, %ymm0, %ymm2
|
||||
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpermq $1, (%rax), %ymm2
|
||||
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2
|
||||
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2
|
||||
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2
|
||||
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2
|
||||
# CHECK-NEXT: - - 0.25 0.25 2.00 2.00 1.00 4.25 0.25 - vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: - - 0.25 0.25 4.00 4.00 1.00 4.25 0.25 - vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2
|
||||
# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 3.25 0.25 - vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: - - 0.25 0.25 2.00 2.00 1.00 3.25 0.25 - vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2
|
||||
# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 5.25 0.25 - vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: - - 0.25 0.25 2.00 2.00 1.00 4.25 0.25 - vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2
|
||||
# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 3.25 0.25 - vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: - - 0.25 0.25 2.00 2.00 1.00 3.25 0.25 - vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2
|
||||
# CHECK-NEXT: - - - 0.50 - - - 2.50 - - vphaddd %ymm0, %ymm1, %ymm2
|
||||
# CHECK-NEXT: - - - 0.50 0.50 0.50 - 2.50 - - vphaddd (%rax), %ymm1, %ymm2
|
||||
# CHECK-NEXT: - - - 0.50 - - - 2.50 - - vphaddsw %ymm0, %ymm1, %ymm2
|
||||
|
@ -465,14 +465,14 @@ vpxor (%rax), %ymm1, %ymm2
|
||||
# CHECK-NEXT: 1 3 1.00 vbroadcastss %xmm0, %ymm0
|
||||
# CHECK-NEXT: 1 3 1.00 vextracti128 $1, %ymm0, %xmm2
|
||||
# CHECK-NEXT: 2 1 1.00 * vextracti128 $1, %ymm0, (%rax)
|
||||
# CHECK-NEXT: 12 26 2.67 * vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: 20 27 4.00 * vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2
|
||||
# CHECK-NEXT: 15 25 3.67 * vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: 34 27 6.50 * vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2
|
||||
# CHECK-NEXT: 14 23 3.33 * vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: 22 24 5.00 * vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2
|
||||
# CHECK-NEXT: 15 25 3.67 * vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: 15 28 3.67 * vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2
|
||||
# CHECK-NEXT: 12 14 2.67 * vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: 20 17 4.00 * vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2
|
||||
# CHECK-NEXT: 20 16 4.00 * vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: 34 22 6.50 * vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2
|
||||
# CHECK-NEXT: 14 15 3.33 * vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: 22 17 5.00 * vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2
|
||||
# CHECK-NEXT: 15 16 3.67 * vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: 22 17 5.00 * vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 vinserti128 $1, %xmm0, %ymm1, %ymm2
|
||||
# CHECK-NEXT: 2 7 0.50 * vinserti128 $1, (%rax), %ymm1, %ymm2
|
||||
# CHECK-NEXT: 1 7 0.50 * vmovntdqa (%rax), %ymm0
|
||||
@ -568,14 +568,14 @@ vpxor (%rax), %ymm1, %ymm2
|
||||
# CHECK-NEXT: 2 10 1.00 * vpermps (%rax), %ymm1, %ymm2
|
||||
# CHECK-NEXT: 1 3 1.00 vpermq $1, %ymm0, %ymm2
|
||||
# CHECK-NEXT: 2 10 1.00 * vpermq $1, (%rax), %ymm2
|
||||
# CHECK-NEXT: 12 26 2.67 * vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: 34 27 6.50 * vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2
|
||||
# CHECK-NEXT: 12 26 2.67 * vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: 20 27 4.00 * vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2
|
||||
# CHECK-NEXT: 22 25 5.00 * vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: 22 28 5.00 * vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2
|
||||
# CHECK-NEXT: 14 23 3.33 * vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: 22 24 5.00 * vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2
|
||||
# CHECK-NEXT: 20 16 4.00 * vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: 34 22 6.50 * vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2
|
||||
# CHECK-NEXT: 12 14 2.67 * vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: 20 17 4.00 * vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2
|
||||
# CHECK-NEXT: 15 16 3.67 * vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: 22 17 5.00 * vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2
|
||||
# CHECK-NEXT: 14 15 3.33 * vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: 22 17 5.00 * vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2
|
||||
# CHECK-NEXT: 3 3 2.00 vphaddd %ymm0, %ymm1, %ymm2
|
||||
# CHECK-NEXT: 4 10 2.00 * vphaddd (%rax), %ymm1, %ymm2
|
||||
# CHECK-NEXT: 3 3 2.00 vphaddsw %ymm0, %ymm1, %ymm2
|
||||
@ -776,7 +776,7 @@ vpxor (%rax), %ymm1, %ymm2
|
||||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
|
||||
# CHECK-NEXT: - - 202.67 89.67 97.67 97.67 5.00 282.67 28.00 1.67
|
||||
# CHECK-NEXT: - - 206.67 90.67 99.67 99.67 5.00 284.67 30.00 1.67
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
|
||||
@ -787,12 +787,12 @@ vpxor (%rax), %ymm1, %ymm2
|
||||
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vextracti128 $1, %ymm0, (%rax)
|
||||
# CHECK-NEXT: - - 4.00 1.50 1.00 1.00 - 3.50 1.00 - vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: - - 6.67 2.17 2.00 2.00 - 5.17 2.00 - vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2
|
||||
# CHECK-NEXT: - - 5.33 1.83 1.00 1.00 - 4.83 1.00 - vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: - - 6.67 2.17 2.00 2.00 - 5.17 2.00 - vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: - - 12.00 3.50 4.00 4.00 - 6.50 4.00 - vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2
|
||||
# CHECK-NEXT: - - 5.00 1.50 1.00 1.00 - 4.50 1.00 - vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: - - 8.67 2.17 2.00 2.00 - 5.17 2.00 - vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2
|
||||
# CHECK-NEXT: - - 5.33 1.83 1.00 1.00 - 4.83 1.00 - vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: - - 5.33 1.83 1.00 1.00 - 4.83 1.00 - vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2
|
||||
# CHECK-NEXT: - - 8.67 2.17 2.00 2.00 - 5.17 2.00 - vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2
|
||||
# CHECK-NEXT: - - - - - - - 1.00 - - vinserti128 $1, %xmm0, %ymm1, %ymm2
|
||||
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vinserti128 $1, (%rax), %ymm1, %ymm2
|
||||
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vmovntdqa (%rax), %ymm0
|
||||
@ -888,11 +888,11 @@ vpxor (%rax), %ymm1, %ymm2
|
||||
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpermps (%rax), %ymm1, %ymm2
|
||||
# CHECK-NEXT: - - - - - - - 1.00 - - vpermq $1, %ymm0, %ymm2
|
||||
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpermq $1, (%rax), %ymm2
|
||||
# CHECK-NEXT: - - 4.00 1.50 1.00 1.00 - 3.50 1.00 - vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: - - 6.67 2.17 2.00 2.00 - 5.17 2.00 - vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: - - 12.00 3.50 4.00 4.00 - 6.50 4.00 - vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2
|
||||
# CHECK-NEXT: - - 4.00 1.50 1.00 1.00 - 3.50 1.00 - vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: - - 6.67 2.17 2.00 2.00 - 5.17 2.00 - vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2
|
||||
# CHECK-NEXT: - - 8.67 2.17 2.00 2.00 - 5.17 2.00 - vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: - - 5.33 1.83 1.00 1.00 - 4.83 1.00 - vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: - - 8.67 2.17 2.00 2.00 - 5.17 2.00 - vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2
|
||||
# CHECK-NEXT: - - 5.00 1.50 1.00 1.00 - 4.50 1.00 - vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2
|
||||
# CHECK-NEXT: - - 8.67 2.17 2.00 2.00 - 5.17 2.00 - vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2
|
||||
|
Loading…
Reference in New Issue
Block a user