1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 02:52:53 +02:00

[X86][Atom] Fix vector fadd/fcmp/fmul resource/throughputs

Match whats documented in the Intel AOM - these are all fadd/fcmp use Port1 and fmul uses Port1, but in many cases BOTH ports are required - this was being incorrectly modelled as EITHER port.

Discovered while investigating the correct fptoui costs to fix the regressions in D101555.

Now that we can use in-order models in llvm-mca, the atom model is a good "worst case scenario" analysis for x86.
This commit is contained in:
Simon Pilgrim 2021-05-20 18:43:30 +01:00
parent 5487f701b4
commit e8c62e4bcc
5 changed files with 185 additions and 184 deletions

View File

@ -37,6 +37,7 @@ def AtomPort0 : ProcResource<1>; // ALU: ALU0, shift/rotate, load/store
def AtomPort1 : ProcResource<1>; // ALU: ALU1, bit processing, jump, and LEA
// SIMD/FP: SIMD ALU, FP Adder
// NOTE: This is for ops that can use EITHER port, not for ops that require BOTH ports.
def AtomPort01 : ProcResGroup<[AtomPort0, AtomPort1]>;
// Loads are 3 cycles, so ReadAfterLd registers needn't be available until 3
@ -223,30 +224,30 @@ defm : X86WriteResUnsupported<WriteFMoveY>;
defm : X86WriteRes<WriteEMMS, [AtomPort01], 5, [5], 1>;
defm : AtomWriteResPair<WriteFAdd, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFAddX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFAdd, [AtomPort1], [AtomPort0,AtomPort1], 5, 5, [1], [1,1]>;
defm : AtomWriteResPair<WriteFAddX, [AtomPort1], [AtomPort0,AtomPort1], 5, 5, [1], [1,1]>;
defm : X86WriteResPairUnsupported<WriteFAddY>;
defm : X86WriteResPairUnsupported<WriteFAddZ>;
defm : AtomWriteResPair<WriteFAdd64, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFAdd64X, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : AtomWriteResPair<WriteFAdd64, [AtomPort1], [AtomPort0,AtomPort1], 5, 5, [1], [1,1]>;
defm : AtomWriteResPair<WriteFAdd64X, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [5,5], [6,6]>;
defm : X86WriteResPairUnsupported<WriteFAdd64Y>;
defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
defm : AtomWriteResPair<WriteFCmp, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFCmpX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFCmp, [AtomPort1], [AtomPort0,AtomPort1], 5, 5, [1], [1,1]>;
defm : AtomWriteResPair<WriteFCmpX, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [5,5], [6,6]>;
defm : X86WriteResPairUnsupported<WriteFCmpY>;
defm : X86WriteResPairUnsupported<WriteFCmpZ>;
defm : AtomWriteResPair<WriteFCmp64, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFCmp64X, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : AtomWriteResPair<WriteFCmp64, [AtomPort1], [AtomPort0,AtomPort1], 5, 5, [1], [1,1]>;
defm : AtomWriteResPair<WriteFCmp64X, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [5,5], [6,6]>;
defm : X86WriteResPairUnsupported<WriteFCmp64Y>;
defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
defm : AtomWriteResPair<WriteFCom, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFComX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFMul, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
defm : AtomWriteResPair<WriteFMulX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFMul, [AtomPort0], [AtomPort0], 4, 4, [2], [2]>;
defm : AtomWriteResPair<WriteFMulX, [AtomPort0], [AtomPort0], 5, 5, [2], [2]>;
defm : X86WriteResPairUnsupported<WriteFMulY>;
defm : X86WriteResPairUnsupported<WriteFMulZ>;
defm : AtomWriteResPair<WriteFMul64, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFMul64X, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>;
defm : AtomWriteResPair<WriteFMul64, [AtomPort0], [AtomPort0], 5, 5, [2], [2]>;
defm : AtomWriteResPair<WriteFMul64X, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 9, 10, [9,9], [10,10]>;
defm : X86WriteResPairUnsupported<WriteFMul64Y>;
defm : X86WriteResPairUnsupported<WriteFMul64Z>;
defm : AtomWriteResPair<WriteFRcp, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;

View File

@ -194,18 +194,18 @@ xorps (%rax), %xmm2
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 5 5.00 addps %xmm0, %xmm2
# CHECK-NEXT: 1 5 5.00 * addps (%rax), %xmm2
# CHECK-NEXT: 1 5 5.00 addss %xmm0, %xmm2
# CHECK-NEXT: 1 5 5.00 * addss (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 addps %xmm0, %xmm2
# CHECK-NEXT: 1 5 1.00 * addps (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 addss %xmm0, %xmm2
# CHECK-NEXT: 1 5 1.00 * addss (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 andnps %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * andnps (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 andps %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * andps (%rax), %xmm2
# CHECK-NEXT: 1 5 5.00 cmpeqps %xmm0, %xmm2
# CHECK-NEXT: 1 5 5.00 * cmpeqps (%rax), %xmm2
# CHECK-NEXT: 1 5 5.00 cmpeqss %xmm0, %xmm2
# CHECK-NEXT: 1 5 5.00 * cmpeqss (%rax), %xmm2
# CHECK-NEXT: 1 6 5.00 cmpeqps %xmm0, %xmm2
# CHECK-NEXT: 1 7 6.00 * cmpeqps (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 cmpeqss %xmm0, %xmm2
# CHECK-NEXT: 1 5 1.00 * cmpeqss (%rax), %xmm2
# CHECK-NEXT: 1 9 4.50 comiss %xmm0, %xmm1
# CHECK-NEXT: 1 10 5.00 * comiss (%rax), %xmm1
# CHECK-NEXT: 1 5 5.00 cvtpi2ps %mm0, %xmm2
@ -232,14 +232,14 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 1 34 17.00 * divss (%rax), %xmm2
# CHECK-NEXT: 1 5 2.50 * * U ldmxcsr (%rax)
# CHECK-NEXT: 1 1 1.00 * * U maskmovq %mm0, %mm1
# CHECK-NEXT: 1 5 5.00 maxps %xmm0, %xmm2
# CHECK-NEXT: 1 5 5.00 * maxps (%rax), %xmm2
# CHECK-NEXT: 1 5 5.00 maxss %xmm0, %xmm2
# CHECK-NEXT: 1 5 5.00 * maxss (%rax), %xmm2
# CHECK-NEXT: 1 5 5.00 minps %xmm0, %xmm2
# CHECK-NEXT: 1 5 5.00 * minps (%rax), %xmm2
# CHECK-NEXT: 1 5 5.00 minss %xmm0, %xmm2
# CHECK-NEXT: 1 5 5.00 * minss (%rax), %xmm2
# CHECK-NEXT: 1 6 5.00 maxps %xmm0, %xmm2
# CHECK-NEXT: 1 7 6.00 * maxps (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 maxss %xmm0, %xmm2
# CHECK-NEXT: 1 5 1.00 * maxss (%rax), %xmm2
# CHECK-NEXT: 1 6 5.00 minps %xmm0, %xmm2
# CHECK-NEXT: 1 7 6.00 * minps (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 minss %xmm0, %xmm2
# CHECK-NEXT: 1 5 1.00 * minss (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 movaps %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * movaps %xmm0, (%rax)
# CHECK-NEXT: 1 1 1.00 * movaps (%rax), %xmm2
@ -258,10 +258,10 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 movups %xmm0, %xmm2
# CHECK-NEXT: 1 2 1.00 * movups %xmm0, (%rax)
# CHECK-NEXT: 1 3 1.50 * movups (%rax), %xmm2
# CHECK-NEXT: 1 5 5.00 mulps %xmm0, %xmm2
# CHECK-NEXT: 1 5 5.00 * mulps (%rax), %xmm2
# CHECK-NEXT: 1 4 4.00 mulss %xmm0, %xmm2
# CHECK-NEXT: 1 4 4.00 * mulss (%rax), %xmm2
# CHECK-NEXT: 1 5 2.00 mulps %xmm0, %xmm2
# CHECK-NEXT: 1 5 2.00 * mulps (%rax), %xmm2
# CHECK-NEXT: 1 4 2.00 mulss %xmm0, %xmm2
# CHECK-NEXT: 1 4 2.00 * mulss (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 orps %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * orps (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pavgb %mm0, %mm2
@ -306,10 +306,10 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 1 34 17.00 sqrtss %xmm0, %xmm2
# CHECK-NEXT: 1 34 17.00 * sqrtss (%rax), %xmm2
# CHECK-NEXT: 1 15 7.50 * U stmxcsr (%rax)
# CHECK-NEXT: 1 5 5.00 subps %xmm0, %xmm2
# CHECK-NEXT: 1 5 5.00 * subps (%rax), %xmm2
# CHECK-NEXT: 1 5 5.00 subss %xmm0, %xmm2
# CHECK-NEXT: 1 5 5.00 * subss (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 subps %xmm0, %xmm2
# CHECK-NEXT: 1 5 1.00 * subps (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 subss %xmm0, %xmm2
# CHECK-NEXT: 1 5 1.00 * subss (%rax), %xmm2
# CHECK-NEXT: 1 9 4.50 ucomiss %xmm0, %xmm1
# CHECK-NEXT: 1 10 5.00 * ucomiss (%rax), %xmm1
# CHECK-NEXT: 1 1 1.00 unpckhps %xmm0, %xmm2
@ -325,22 +325,22 @@ xorps (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1]
# CHECK-NEXT: 508.00 346.00
# CHECK-NEXT: 438.00 393.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] Instructions:
# CHECK-NEXT: 5.00 - addps %xmm0, %xmm2
# CHECK-NEXT: 5.00 - addps (%rax), %xmm2
# CHECK-NEXT: 5.00 - addss %xmm0, %xmm2
# CHECK-NEXT: 5.00 - addss (%rax), %xmm2
# CHECK-NEXT: - 1.00 addps %xmm0, %xmm2
# CHECK-NEXT: 1.00 1.00 addps (%rax), %xmm2
# CHECK-NEXT: - 1.00 addss %xmm0, %xmm2
# CHECK-NEXT: 1.00 1.00 addss (%rax), %xmm2
# CHECK-NEXT: 0.50 0.50 andnps %xmm0, %xmm2
# CHECK-NEXT: 1.00 - andnps (%rax), %xmm2
# CHECK-NEXT: 0.50 0.50 andps %xmm0, %xmm2
# CHECK-NEXT: 1.00 - andps (%rax), %xmm2
# CHECK-NEXT: 5.00 - cmpeqps %xmm0, %xmm2
# CHECK-NEXT: 5.00 - cmpeqps (%rax), %xmm2
# CHECK-NEXT: 5.00 - cmpeqss %xmm0, %xmm2
# CHECK-NEXT: 5.00 - cmpeqss (%rax), %xmm2
# CHECK-NEXT: 5.00 5.00 cmpeqps %xmm0, %xmm2
# CHECK-NEXT: 6.00 6.00 cmpeqps (%rax), %xmm2
# CHECK-NEXT: - 1.00 cmpeqss %xmm0, %xmm2
# CHECK-NEXT: 1.00 1.00 cmpeqss (%rax), %xmm2
# CHECK-NEXT: 4.50 4.50 comiss %xmm0, %xmm1
# CHECK-NEXT: 5.00 5.00 comiss (%rax), %xmm1
# CHECK-NEXT: - 5.00 cvtpi2ps %mm0, %xmm2
@ -367,14 +367,14 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 17.00 17.00 divss (%rax), %xmm2
# CHECK-NEXT: 2.50 2.50 ldmxcsr (%rax)
# CHECK-NEXT: 1.00 - maskmovq %mm0, %mm1
# CHECK-NEXT: 5.00 - maxps %xmm0, %xmm2
# CHECK-NEXT: 5.00 - maxps (%rax), %xmm2
# CHECK-NEXT: 5.00 - maxss %xmm0, %xmm2
# CHECK-NEXT: 5.00 - maxss (%rax), %xmm2
# CHECK-NEXT: 5.00 - minps %xmm0, %xmm2
# CHECK-NEXT: 5.00 - minps (%rax), %xmm2
# CHECK-NEXT: 5.00 - minss %xmm0, %xmm2
# CHECK-NEXT: 5.00 - minss (%rax), %xmm2
# CHECK-NEXT: 5.00 5.00 maxps %xmm0, %xmm2
# CHECK-NEXT: 6.00 6.00 maxps (%rax), %xmm2
# CHECK-NEXT: - 1.00 maxss %xmm0, %xmm2
# CHECK-NEXT: 1.00 1.00 maxss (%rax), %xmm2
# CHECK-NEXT: 5.00 5.00 minps %xmm0, %xmm2
# CHECK-NEXT: 6.00 6.00 minps (%rax), %xmm2
# CHECK-NEXT: - 1.00 minss %xmm0, %xmm2
# CHECK-NEXT: 1.00 1.00 minss (%rax), %xmm2
# CHECK-NEXT: 0.50 0.50 movaps %xmm0, %xmm2
# CHECK-NEXT: 1.00 - movaps %xmm0, (%rax)
# CHECK-NEXT: 1.00 - movaps (%rax), %xmm2
@ -393,10 +393,10 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 0.50 0.50 movups %xmm0, %xmm2
# CHECK-NEXT: 1.00 1.00 movups %xmm0, (%rax)
# CHECK-NEXT: 1.50 1.50 movups (%rax), %xmm2
# CHECK-NEXT: 5.00 - mulps %xmm0, %xmm2
# CHECK-NEXT: 5.00 - mulps (%rax), %xmm2
# CHECK-NEXT: 4.00 - mulss %xmm0, %xmm2
# CHECK-NEXT: 4.00 - mulss (%rax), %xmm2
# CHECK-NEXT: 2.00 - mulps %xmm0, %xmm2
# CHECK-NEXT: 2.00 - mulps (%rax), %xmm2
# CHECK-NEXT: 2.00 - mulss %xmm0, %xmm2
# CHECK-NEXT: 2.00 - mulss (%rax), %xmm2
# CHECK-NEXT: 0.50 0.50 orps %xmm0, %xmm2
# CHECK-NEXT: 1.00 - orps (%rax), %xmm2
# CHECK-NEXT: 0.50 0.50 pavgb %mm0, %mm2
@ -441,10 +441,10 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 17.00 17.00 sqrtss %xmm0, %xmm2
# CHECK-NEXT: 17.00 17.00 sqrtss (%rax), %xmm2
# CHECK-NEXT: 7.50 7.50 stmxcsr (%rax)
# CHECK-NEXT: 5.00 - subps %xmm0, %xmm2
# CHECK-NEXT: 5.00 - subps (%rax), %xmm2
# CHECK-NEXT: 5.00 - subss %xmm0, %xmm2
# CHECK-NEXT: 5.00 - subss (%rax), %xmm2
# CHECK-NEXT: - 1.00 subps %xmm0, %xmm2
# CHECK-NEXT: 1.00 1.00 subps (%rax), %xmm2
# CHECK-NEXT: - 1.00 subss %xmm0, %xmm2
# CHECK-NEXT: 1.00 1.00 subss (%rax), %xmm2
# CHECK-NEXT: 4.50 4.50 ucomiss %xmm0, %xmm1
# CHECK-NEXT: 5.00 5.00 ucomiss (%rax), %xmm1
# CHECK-NEXT: 1.00 - unpckhps %xmm0, %xmm2

View File

@ -407,19 +407,19 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 6 3.00 addpd %xmm0, %xmm2
# CHECK-NEXT: 1 7 3.50 * addpd (%rax), %xmm2
# CHECK-NEXT: 1 5 5.00 addsd %xmm0, %xmm2
# CHECK-NEXT: 1 5 5.00 * addsd (%rax), %xmm2
# CHECK-NEXT: 1 6 5.00 addpd %xmm0, %xmm2
# CHECK-NEXT: 1 7 6.00 * addpd (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 addsd %xmm0, %xmm2
# CHECK-NEXT: 1 5 1.00 * addsd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 andnpd %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * andnpd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 andpd %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * andpd (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 * * U clflush (%rax)
# CHECK-NEXT: 1 6 3.00 cmpeqpd %xmm0, %xmm2
# CHECK-NEXT: 1 7 3.50 * cmpeqpd (%rax), %xmm2
# CHECK-NEXT: 1 5 5.00 cmpeqsd %xmm0, %xmm2
# CHECK-NEXT: 1 5 5.00 * cmpeqsd (%rax), %xmm2
# CHECK-NEXT: 1 6 5.00 cmpeqpd %xmm0, %xmm2
# CHECK-NEXT: 1 7 6.00 * cmpeqpd (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 cmpeqsd %xmm0, %xmm2
# CHECK-NEXT: 1 5 1.00 * cmpeqsd (%rax), %xmm2
# CHECK-NEXT: 1 9 4.50 comisd %xmm0, %xmm1
# CHECK-NEXT: 1 10 5.00 * comisd (%rax), %xmm1
# CHECK-NEXT: 1 7 3.50 cvtdq2pd %xmm0, %xmm2
@ -466,15 +466,15 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 62 31.00 * divsd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 * * U lfence
# CHECK-NEXT: 1 2 1.00 * * U maskmovdqu %xmm0, %xmm1
# CHECK-NEXT: 1 6 3.00 maxpd %xmm0, %xmm2
# CHECK-NEXT: 1 7 3.50 * maxpd (%rax), %xmm2
# CHECK-NEXT: 1 5 5.00 maxsd %xmm0, %xmm2
# CHECK-NEXT: 1 5 5.00 * maxsd (%rax), %xmm2
# CHECK-NEXT: 1 6 5.00 maxpd %xmm0, %xmm2
# CHECK-NEXT: 1 7 6.00 * maxpd (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 maxsd %xmm0, %xmm2
# CHECK-NEXT: 1 5 1.00 * maxsd (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 * * U mfence
# CHECK-NEXT: 1 6 3.00 minpd %xmm0, %xmm2
# CHECK-NEXT: 1 7 3.50 * minpd (%rax), %xmm2
# CHECK-NEXT: 1 5 5.00 minsd %xmm0, %xmm2
# CHECK-NEXT: 1 5 5.00 * minsd (%rax), %xmm2
# CHECK-NEXT: 1 6 5.00 minpd %xmm0, %xmm2
# CHECK-NEXT: 1 7 6.00 * minpd (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 minsd %xmm0, %xmm2
# CHECK-NEXT: 1 5 1.00 * minsd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 movapd %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * movapd %xmm0, (%rax)
# CHECK-NEXT: 1 1 1.00 * movapd (%rax), %xmm2
@ -510,10 +510,10 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 movupd %xmm0, %xmm2
# CHECK-NEXT: 1 2 1.00 * movupd %xmm0, (%rax)
# CHECK-NEXT: 1 3 1.50 * movupd (%rax), %xmm2
# CHECK-NEXT: 1 9 4.50 mulpd %xmm0, %xmm2
# CHECK-NEXT: 1 10 5.00 * mulpd (%rax), %xmm2
# CHECK-NEXT: 1 5 5.00 mulsd %xmm0, %xmm2
# CHECK-NEXT: 1 5 5.00 * mulsd (%rax), %xmm2
# CHECK-NEXT: 1 9 9.00 mulpd %xmm0, %xmm2
# CHECK-NEXT: 1 10 10.00 * mulpd (%rax), %xmm2
# CHECK-NEXT: 1 5 2.00 mulsd %xmm0, %xmm2
# CHECK-NEXT: 1 5 2.00 * mulsd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 orpd %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * orpd (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 packssdw %xmm0, %xmm2
@ -662,10 +662,10 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 125 62.50 * sqrtpd (%rax), %xmm2
# CHECK-NEXT: 1 62 31.00 sqrtsd %xmm0, %xmm2
# CHECK-NEXT: 1 62 31.00 * sqrtsd (%rax), %xmm2
# CHECK-NEXT: 1 6 3.00 subpd %xmm0, %xmm2
# CHECK-NEXT: 1 7 3.50 * subpd (%rax), %xmm2
# CHECK-NEXT: 1 5 5.00 subsd %xmm0, %xmm2
# CHECK-NEXT: 1 5 5.00 * subsd (%rax), %xmm2
# CHECK-NEXT: 1 6 5.00 subpd %xmm0, %xmm2
# CHECK-NEXT: 1 7 6.00 * subpd (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 subsd %xmm0, %xmm2
# CHECK-NEXT: 1 5 1.00 * subsd (%rax), %xmm2
# CHECK-NEXT: 1 9 4.50 ucomisd %xmm0, %xmm1
# CHECK-NEXT: 1 10 5.00 * ucomisd (%rax), %xmm1
# CHECK-NEXT: 1 1 1.00 unpckhpd %xmm0, %xmm2
@ -681,23 +681,23 @@ xorpd (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1]
# CHECK-NEXT: 833.50 634.50
# CHECK-NEXT: 814.50 676.50
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] Instructions:
# CHECK-NEXT: 3.00 3.00 addpd %xmm0, %xmm2
# CHECK-NEXT: 3.50 3.50 addpd (%rax), %xmm2
# CHECK-NEXT: 5.00 - addsd %xmm0, %xmm2
# CHECK-NEXT: 5.00 - addsd (%rax), %xmm2
# CHECK-NEXT: 5.00 5.00 addpd %xmm0, %xmm2
# CHECK-NEXT: 6.00 6.00 addpd (%rax), %xmm2
# CHECK-NEXT: - 1.00 addsd %xmm0, %xmm2
# CHECK-NEXT: 1.00 1.00 addsd (%rax), %xmm2
# CHECK-NEXT: 0.50 0.50 andnpd %xmm0, %xmm2
# CHECK-NEXT: 1.00 - andnpd (%rax), %xmm2
# CHECK-NEXT: 0.50 0.50 andpd %xmm0, %xmm2
# CHECK-NEXT: 1.00 - andpd (%rax), %xmm2
# CHECK-NEXT: 1.00 - clflush (%rax)
# CHECK-NEXT: 3.00 3.00 cmpeqpd %xmm0, %xmm2
# CHECK-NEXT: 3.50 3.50 cmpeqpd (%rax), %xmm2
# CHECK-NEXT: 5.00 - cmpeqsd %xmm0, %xmm2
# CHECK-NEXT: 5.00 - cmpeqsd (%rax), %xmm2
# CHECK-NEXT: 5.00 5.00 cmpeqpd %xmm0, %xmm2
# CHECK-NEXT: 6.00 6.00 cmpeqpd (%rax), %xmm2
# CHECK-NEXT: - 1.00 cmpeqsd %xmm0, %xmm2
# CHECK-NEXT: 1.00 1.00 cmpeqsd (%rax), %xmm2
# CHECK-NEXT: 4.50 4.50 comisd %xmm0, %xmm1
# CHECK-NEXT: 5.00 5.00 comisd (%rax), %xmm1
# CHECK-NEXT: 3.50 3.50 cvtdq2pd %xmm0, %xmm2
@ -744,15 +744,15 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 31.00 31.00 divsd (%rax), %xmm2
# CHECK-NEXT: 0.50 0.50 lfence
# CHECK-NEXT: 1.00 1.00 maskmovdqu %xmm0, %xmm1
# CHECK-NEXT: 3.00 3.00 maxpd %xmm0, %xmm2
# CHECK-NEXT: 3.50 3.50 maxpd (%rax), %xmm2
# CHECK-NEXT: 5.00 - maxsd %xmm0, %xmm2
# CHECK-NEXT: 5.00 - maxsd (%rax), %xmm2
# CHECK-NEXT: 5.00 5.00 maxpd %xmm0, %xmm2
# CHECK-NEXT: 6.00 6.00 maxpd (%rax), %xmm2
# CHECK-NEXT: - 1.00 maxsd %xmm0, %xmm2
# CHECK-NEXT: 1.00 1.00 maxsd (%rax), %xmm2
# CHECK-NEXT: 1.00 - mfence
# CHECK-NEXT: 3.00 3.00 minpd %xmm0, %xmm2
# CHECK-NEXT: 3.50 3.50 minpd (%rax), %xmm2
# CHECK-NEXT: 5.00 - minsd %xmm0, %xmm2
# CHECK-NEXT: 5.00 - minsd (%rax), %xmm2
# CHECK-NEXT: 5.00 5.00 minpd %xmm0, %xmm2
# CHECK-NEXT: 6.00 6.00 minpd (%rax), %xmm2
# CHECK-NEXT: - 1.00 minsd %xmm0, %xmm2
# CHECK-NEXT: 1.00 1.00 minsd (%rax), %xmm2
# CHECK-NEXT: 0.50 0.50 movapd %xmm0, %xmm2
# CHECK-NEXT: 1.00 - movapd %xmm0, (%rax)
# CHECK-NEXT: 1.00 - movapd (%rax), %xmm2
@ -788,10 +788,10 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 0.50 0.50 movupd %xmm0, %xmm2
# CHECK-NEXT: 1.00 1.00 movupd %xmm0, (%rax)
# CHECK-NEXT: 1.50 1.50 movupd (%rax), %xmm2
# CHECK-NEXT: 4.50 4.50 mulpd %xmm0, %xmm2
# CHECK-NEXT: 5.00 5.00 mulpd (%rax), %xmm2
# CHECK-NEXT: 5.00 - mulsd %xmm0, %xmm2
# CHECK-NEXT: 5.00 - mulsd (%rax), %xmm2
# CHECK-NEXT: 9.00 9.00 mulpd %xmm0, %xmm2
# CHECK-NEXT: 10.00 10.00 mulpd (%rax), %xmm2
# CHECK-NEXT: 2.00 - mulsd %xmm0, %xmm2
# CHECK-NEXT: 2.00 - mulsd (%rax), %xmm2
# CHECK-NEXT: 0.50 0.50 orpd %xmm0, %xmm2
# CHECK-NEXT: 1.00 - orpd (%rax), %xmm2
# CHECK-NEXT: 1.00 - packssdw %xmm0, %xmm2
@ -940,10 +940,10 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 62.50 62.50 sqrtpd (%rax), %xmm2
# CHECK-NEXT: 31.00 31.00 sqrtsd %xmm0, %xmm2
# CHECK-NEXT: 31.00 31.00 sqrtsd (%rax), %xmm2
# CHECK-NEXT: 3.00 3.00 subpd %xmm0, %xmm2
# CHECK-NEXT: 3.50 3.50 subpd (%rax), %xmm2
# CHECK-NEXT: 5.00 - subsd %xmm0, %xmm2
# CHECK-NEXT: 5.00 - subsd (%rax), %xmm2
# CHECK-NEXT: 5.00 5.00 subpd %xmm0, %xmm2
# CHECK-NEXT: 6.00 6.00 subpd (%rax), %xmm2
# CHECK-NEXT: - 1.00 subsd %xmm0, %xmm2
# CHECK-NEXT: 1.00 1.00 subsd (%rax), %xmm2
# CHECK-NEXT: 4.50 4.50 ucomisd %xmm0, %xmm1
# CHECK-NEXT: 5.00 5.00 ucomisd (%rax), %xmm1
# CHECK-NEXT: 1.00 - unpckhpd %xmm0, %xmm2

View File

@ -43,10 +43,10 @@ mwait
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 6 3.00 addsubpd %xmm0, %xmm2
# CHECK-NEXT: 1 7 3.50 * addsubpd (%rax), %xmm2
# CHECK-NEXT: 1 5 5.00 addsubps %xmm0, %xmm2
# CHECK-NEXT: 1 5 5.00 * addsubps (%rax), %xmm2
# CHECK-NEXT: 1 6 5.00 addsubpd %xmm0, %xmm2
# CHECK-NEXT: 1 7 6.00 * addsubpd (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 addsubps %xmm0, %xmm2
# CHECK-NEXT: 1 5 1.00 * addsubps (%rax), %xmm2
# CHECK-NEXT: 1 8 4.00 haddpd %xmm0, %xmm2
# CHECK-NEXT: 1 9 4.50 * haddpd (%rax), %xmm2
# CHECK-NEXT: 1 8 4.00 haddps %xmm0, %xmm2
@ -71,14 +71,14 @@ mwait
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1]
# CHECK-NEXT: 103.50 87.50
# CHECK-NEXT: 99.00 94.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] Instructions:
# CHECK-NEXT: 3.00 3.00 addsubpd %xmm0, %xmm2
# CHECK-NEXT: 3.50 3.50 addsubpd (%rax), %xmm2
# CHECK-NEXT: 5.00 - addsubps %xmm0, %xmm2
# CHECK-NEXT: 5.00 - addsubps (%rax), %xmm2
# CHECK-NEXT: 5.00 5.00 addsubpd %xmm0, %xmm2
# CHECK-NEXT: 6.00 6.00 addsubpd (%rax), %xmm2
# CHECK-NEXT: - 1.00 addsubps %xmm0, %xmm2
# CHECK-NEXT: 1.00 1.00 addsubps (%rax), %xmm2
# CHECK-NEXT: 4.00 4.00 haddpd %xmm0, %xmm2
# CHECK-NEXT: 4.50 4.50 haddpd (%rax), %xmm2
# CHECK-NEXT: 4.00 4.00 haddps %xmm0, %xmm2

View File

@ -208,14 +208,14 @@ fyl2xp1
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 99 49.50 U f2xm1
# CHECK-NEXT: 1 1 1.00 U fabs
# CHECK-NEXT: 1 5 5.00 U fadd %st, %st(1)
# CHECK-NEXT: 1 5 5.00 U fadd %st(2), %st
# CHECK-NEXT: 1 5 5.00 * U fadds (%ecx)
# CHECK-NEXT: 1 5 5.00 * U faddl (%ecx)
# CHECK-NEXT: 1 5 5.00 U faddp %st, %st(1)
# CHECK-NEXT: 1 5 5.00 U faddp %st, %st(2)
# CHECK-NEXT: 1 5 5.00 * U fiadds (%ecx)
# CHECK-NEXT: 1 5 5.00 * U fiaddl (%ecx)
# CHECK-NEXT: 1 5 1.00 U fadd %st, %st(1)
# CHECK-NEXT: 1 5 1.00 U fadd %st(2), %st
# CHECK-NEXT: 1 5 1.00 * U fadds (%ecx)
# CHECK-NEXT: 1 5 1.00 * U faddl (%ecx)
# CHECK-NEXT: 1 5 1.00 U faddp %st, %st(1)
# CHECK-NEXT: 1 5 1.00 U faddp %st, %st(2)
# CHECK-NEXT: 1 5 1.00 * U fiadds (%ecx)
# CHECK-NEXT: 1 5 1.00 * U fiaddl (%ecx)
# CHECK-NEXT: 1 100 0.50 * U fbld (%ecx)
# CHECK-NEXT: 1 100 0.50 * U fbstp (%eax)
# CHECK-NEXT: 1 1 1.00 U fchs
@ -288,14 +288,14 @@ fyl2xp1
# CHECK-NEXT: 1 10 5.00 U fldln2
# CHECK-NEXT: 1 10 5.00 U fldpi
# CHECK-NEXT: 1 1 0.50 U fldz
# CHECK-NEXT: 1 4 4.00 U fmul %st, %st(1)
# CHECK-NEXT: 1 4 4.00 U fmul %st(2), %st
# CHECK-NEXT: 1 4 4.00 * U fmuls (%ecx)
# CHECK-NEXT: 1 4 4.00 * U fmull (%eax)
# CHECK-NEXT: 1 4 4.00 U fmulp %st, %st(1)
# CHECK-NEXT: 1 4 4.00 U fmulp %st, %st(2)
# CHECK-NEXT: 1 4 4.00 * U fimuls (%ecx)
# CHECK-NEXT: 1 4 4.00 * U fimull (%eax)
# CHECK-NEXT: 1 4 2.00 U fmul %st, %st(1)
# CHECK-NEXT: 1 4 2.00 U fmul %st(2), %st
# CHECK-NEXT: 1 4 2.00 * U fmuls (%ecx)
# CHECK-NEXT: 1 4 2.00 * U fmull (%eax)
# CHECK-NEXT: 1 4 2.00 U fmulp %st, %st(1)
# CHECK-NEXT: 1 4 2.00 U fmulp %st, %st(2)
# CHECK-NEXT: 1 4 2.00 * U fimuls (%ecx)
# CHECK-NEXT: 1 4 2.00 * U fimull (%eax)
# CHECK-NEXT: 1 1 0.50 U fnop
# CHECK-NEXT: 1 183 91.50 U fpatan
# CHECK-NEXT: 1 55 27.50 U fprem
@ -321,22 +321,22 @@ fyl2xp1
# CHECK-NEXT: 1 100 0.50 * U frstor (%eax)
# CHECK-NEXT: 1 1 0.50 U wait
# CHECK-NEXT: 1 100 0.50 * U fnsave (%eax)
# CHECK-NEXT: 1 5 5.00 U fsub %st, %st(1)
# CHECK-NEXT: 1 5 5.00 U fsub %st(2), %st
# CHECK-NEXT: 1 5 5.00 * U fsubs (%ecx)
# CHECK-NEXT: 1 5 5.00 * U fsubl (%eax)
# CHECK-NEXT: 1 5 5.00 U fsubp %st, %st(1)
# CHECK-NEXT: 1 5 5.00 U fsubp %st, %st(2)
# CHECK-NEXT: 1 5 5.00 * U fisubs (%ecx)
# CHECK-NEXT: 1 5 5.00 * U fisubl (%eax)
# CHECK-NEXT: 1 5 5.00 U fsubr %st, %st(1)
# CHECK-NEXT: 1 5 5.00 U fsubr %st(2), %st
# CHECK-NEXT: 1 5 5.00 * U fsubrs (%ecx)
# CHECK-NEXT: 1 5 5.00 * U fsubrl (%eax)
# CHECK-NEXT: 1 5 5.00 U fsubrp %st, %st(1)
# CHECK-NEXT: 1 5 5.00 U fsubrp %st, %st(2)
# CHECK-NEXT: 1 5 5.00 * U fisubrs (%ecx)
# CHECK-NEXT: 1 5 5.00 * U fisubrl (%eax)
# CHECK-NEXT: 1 5 1.00 U fsub %st, %st(1)
# CHECK-NEXT: 1 5 1.00 U fsub %st(2), %st
# CHECK-NEXT: 1 5 1.00 * U fsubs (%ecx)
# CHECK-NEXT: 1 5 1.00 * U fsubl (%eax)
# CHECK-NEXT: 1 5 1.00 U fsubp %st, %st(1)
# CHECK-NEXT: 1 5 1.00 U fsubp %st, %st(2)
# CHECK-NEXT: 1 5 1.00 * U fisubs (%ecx)
# CHECK-NEXT: 1 5 1.00 * U fisubl (%eax)
# CHECK-NEXT: 1 5 1.00 U fsubr %st, %st(1)
# CHECK-NEXT: 1 5 1.00 U fsubr %st(2), %st
# CHECK-NEXT: 1 5 1.00 * U fsubrs (%ecx)
# CHECK-NEXT: 1 5 1.00 * U fsubrl (%eax)
# CHECK-NEXT: 1 5 1.00 U fsubrp %st, %st(1)
# CHECK-NEXT: 1 5 1.00 U fsubrp %st, %st(2)
# CHECK-NEXT: 1 5 1.00 * U fisubrs (%ecx)
# CHECK-NEXT: 1 5 1.00 * U fisubrl (%eax)
# CHECK-NEXT: 1 9 4.50 U ftst
# CHECK-NEXT: 1 1 1.00 U fucom %st(1)
# CHECK-NEXT: 1 1 1.00 U fucom %st(3)
@ -361,20 +361,20 @@ fyl2xp1
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1]
# CHECK-NEXT: 1624.00 1416.00
# CHECK-NEXT: 1500.00 1440.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] Instructions:
# CHECK-NEXT: 49.50 49.50 f2xm1
# CHECK-NEXT: - 1.00 fabs
# CHECK-NEXT: 5.00 - fadd %st, %st(1)
# CHECK-NEXT: 5.00 - fadd %st(2), %st
# CHECK-NEXT: 5.00 - fadds (%ecx)
# CHECK-NEXT: 5.00 - faddl (%ecx)
# CHECK-NEXT: 5.00 - faddp %st, %st(1)
# CHECK-NEXT: 5.00 - faddp %st, %st(2)
# CHECK-NEXT: 5.00 - fiadds (%ecx)
# CHECK-NEXT: 5.00 - fiaddl (%ecx)
# CHECK-NEXT: - 1.00 fadd %st, %st(1)
# CHECK-NEXT: - 1.00 fadd %st(2), %st
# CHECK-NEXT: 1.00 1.00 fadds (%ecx)
# CHECK-NEXT: 1.00 1.00 faddl (%ecx)
# CHECK-NEXT: - 1.00 faddp %st, %st(1)
# CHECK-NEXT: - 1.00 faddp %st, %st(2)
# CHECK-NEXT: 1.00 1.00 fiadds (%ecx)
# CHECK-NEXT: 1.00 1.00 fiaddl (%ecx)
# CHECK-NEXT: 0.50 0.50 fbld (%ecx)
# CHECK-NEXT: 0.50 0.50 fbstp (%eax)
# CHECK-NEXT: - 1.00 fchs
@ -447,14 +447,14 @@ fyl2xp1
# CHECK-NEXT: 5.00 5.00 fldln2
# CHECK-NEXT: 5.00 5.00 fldpi
# CHECK-NEXT: 0.50 0.50 fldz
# CHECK-NEXT: 4.00 - fmul %st, %st(1)
# CHECK-NEXT: 4.00 - fmul %st(2), %st
# CHECK-NEXT: 4.00 - fmuls (%ecx)
# CHECK-NEXT: 4.00 - fmull (%eax)
# CHECK-NEXT: 4.00 - fmulp %st, %st(1)
# CHECK-NEXT: 4.00 - fmulp %st, %st(2)
# CHECK-NEXT: 4.00 - fimuls (%ecx)
# CHECK-NEXT: 4.00 - fimull (%eax)
# CHECK-NEXT: 2.00 - fmul %st, %st(1)
# CHECK-NEXT: 2.00 - fmul %st(2), %st
# CHECK-NEXT: 2.00 - fmuls (%ecx)
# CHECK-NEXT: 2.00 - fmull (%eax)
# CHECK-NEXT: 2.00 - fmulp %st, %st(1)
# CHECK-NEXT: 2.00 - fmulp %st, %st(2)
# CHECK-NEXT: 2.00 - fimuls (%ecx)
# CHECK-NEXT: 2.00 - fimull (%eax)
# CHECK-NEXT: 0.50 0.50 fnop
# CHECK-NEXT: 91.50 91.50 fpatan
# CHECK-NEXT: 27.50 27.50 fprem
@ -480,22 +480,22 @@ fyl2xp1
# CHECK-NEXT: 0.50 0.50 frstor (%eax)
# CHECK-NEXT: 0.50 0.50 wait
# CHECK-NEXT: 0.50 0.50 fnsave (%eax)
# CHECK-NEXT: 5.00 - fsub %st, %st(1)
# CHECK-NEXT: 5.00 - fsub %st(2), %st
# CHECK-NEXT: 5.00 - fsubs (%ecx)
# CHECK-NEXT: 5.00 - fsubl (%eax)
# CHECK-NEXT: 5.00 - fsubp %st, %st(1)
# CHECK-NEXT: 5.00 - fsubp %st, %st(2)
# CHECK-NEXT: 5.00 - fisubs (%ecx)
# CHECK-NEXT: 5.00 - fisubl (%eax)
# CHECK-NEXT: 5.00 - fsubr %st, %st(1)
# CHECK-NEXT: 5.00 - fsubr %st(2), %st
# CHECK-NEXT: 5.00 - fsubrs (%ecx)
# CHECK-NEXT: 5.00 - fsubrl (%eax)
# CHECK-NEXT: 5.00 - fsubrp %st, %st(1)
# CHECK-NEXT: 5.00 - fsubrp %st, %st(2)
# CHECK-NEXT: 5.00 - fisubrs (%ecx)
# CHECK-NEXT: 5.00 - fisubrl (%eax)
# CHECK-NEXT: - 1.00 fsub %st, %st(1)
# CHECK-NEXT: - 1.00 fsub %st(2), %st
# CHECK-NEXT: 1.00 1.00 fsubs (%ecx)
# CHECK-NEXT: 1.00 1.00 fsubl (%eax)
# CHECK-NEXT: - 1.00 fsubp %st, %st(1)
# CHECK-NEXT: - 1.00 fsubp %st, %st(2)
# CHECK-NEXT: 1.00 1.00 fisubs (%ecx)
# CHECK-NEXT: 1.00 1.00 fisubl (%eax)
# CHECK-NEXT: - 1.00 fsubr %st, %st(1)
# CHECK-NEXT: - 1.00 fsubr %st(2), %st
# CHECK-NEXT: 1.00 1.00 fsubrs (%ecx)
# CHECK-NEXT: 1.00 1.00 fsubrl (%eax)
# CHECK-NEXT: - 1.00 fsubrp %st, %st(1)
# CHECK-NEXT: - 1.00 fsubrp %st, %st(2)
# CHECK-NEXT: 1.00 1.00 fisubrs (%ecx)
# CHECK-NEXT: 1.00 1.00 fisubrl (%eax)
# CHECK-NEXT: 4.50 4.50 ftst
# CHECK-NEXT: - 1.00 fucom %st(1)
# CHECK-NEXT: - 1.00 fucom %st(3)