mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
[UpdateTestChecks] Add update_mca_test_checks.py script
This script can be used to regenerate tests in the test/tools/llvm-mca directory (PR36904). Regenerated a number of tests using the pattern: test/tools/llvm-mca/*/*/*.s Differential Revision: https://reviews.llvm.org/D45369 llvm-svn: 330246
This commit is contained in:
parent
348b96dc3b
commit
9dc124e019
@ -1,6 +1,7 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=cortex-a57 -iterations=600 -timeline -timeline-max-iterations=4 < %s | FileCheck %s
|
||||
|
||||
b t
|
||||
b t
|
||||
|
||||
# CHECK: Iterations: 600
|
||||
# CHECK-NEXT: Instructions: 600
|
||||
@ -8,7 +9,6 @@
|
||||
# CHECK-NEXT: Dispatch Width: 3
|
||||
# CHECK-NEXT: IPC: 1.00
|
||||
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
# CHECK-NEXT: [1]: #uOps
|
||||
# CHECK-NEXT: [2]: Latency
|
||||
@ -18,8 +18,7 @@
|
||||
# CHECK-NEXT: [6]: HasSideEffects
|
||||
|
||||
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
# CHECK-NEXT: 1 1 1.00 b t
|
||||
|
||||
# CHECK-NEXT: 1 1 1.00 b t
|
||||
|
||||
# CHECK: Resources:
|
||||
# CHECK-NEXT: [0] - A57UnitB
|
||||
@ -31,24 +30,22 @@
|
||||
# CHECK-NEXT: [5] - A57UnitW
|
||||
# CHECK-NEXT: [6] - A57UnitX
|
||||
|
||||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1.0] [1.1] [2] [3] [4] [5] [6]
|
||||
# CHECK-NEXT: 1.00 - - - - - - -
|
||||
# CHECK-NEXT: [0] [1.0] [1.1] [2] [3] [4] [5] [6]
|
||||
# CHECK-NEXT: 1.00 - - - - - - -
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1.0] [1.1] [2] [3] [4] [5] [6] Instructions:
|
||||
# CHECK-NEXT: 1.00 - - - - - - - b t
|
||||
|
||||
# CHECK-NEXT: 1.00 - - - - - - - b t
|
||||
|
||||
# CHECK: Timeline view:
|
||||
|
||||
# CHECK: Index 0123456
|
||||
|
||||
# CHECK: [0,0] DeER .. b t
|
||||
# CHECK-NEXT: [1,0] D=eER.. b t
|
||||
# CHECK-NEXT: [2,0] D==eER. b t
|
||||
# CHECK-NEXT: [3,0] .D==eER b t
|
||||
|
||||
# CHECK: [0,0] DeER .. b t
|
||||
# CHECK-NEXT: [1,0] D=eER.. b t
|
||||
# CHECK-NEXT: [2,0] D==eER. b t
|
||||
# CHECK-NEXT: [3,0] .D==eER b t
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
@ -57,4 +54,5 @@
|
||||
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
||||
|
||||
# CHECK: [0] [1] [2] [3]
|
||||
# CHECK-NEXT: 0. 4 2.3 2.3 0.0 b t
|
||||
# CHECK-NEXT: 0. 4 2.3 2.3 0.0 b t
|
||||
|
||||
|
@ -1,19 +1,20 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -iterations=300 -timeline -timeline-max-iterations=3 -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=M3
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m1 -iterations=300 -timeline -timeline-max-iterations=3 -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=M1
|
||||
|
||||
b t
|
||||
b t
|
||||
|
||||
# ALL: Iterations: 300
|
||||
# ALL-NEXT: Instructions: 300
|
||||
|
||||
# M3-NEXT: Total Cycles: 51
|
||||
# M3-NEXT: Dispatch Width: 6
|
||||
# M3-NEXT: IPC: 5.88
|
||||
|
||||
# M1-NEXT: Total Cycles: 76
|
||||
# M1-NEXT: Dispatch Width: 4
|
||||
# M1-NEXT: IPC: 3.95
|
||||
# M3: Iterations: 300
|
||||
# M3-NEXT: Instructions: 300
|
||||
# M3-NEXT: Total Cycles: 51
|
||||
# M3-NEXT: Dispatch Width: 6
|
||||
# M3-NEXT: IPC: 5.88
|
||||
|
||||
# M1: Iterations: 300
|
||||
# M1-NEXT: Instructions: 300
|
||||
# M1-NEXT: Total Cycles: 76
|
||||
# M1-NEXT: Dispatch Width: 4
|
||||
# M1-NEXT: IPC: 3.95
|
||||
|
||||
# ALL: Instruction Info:
|
||||
# ALL-NEXT: [1]: #uOps
|
||||
@ -24,8 +25,15 @@
|
||||
# ALL-NEXT: [6]: HasSideEffects
|
||||
|
||||
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
# ALL-NEXT: 1 0 - b t
|
||||
# ALL-NEXT: 1 0 - b t
|
||||
|
||||
# ALL: Timeline view:
|
||||
|
||||
# ALL: Index 01
|
||||
|
||||
# ALL: [0,0] DR b t
|
||||
# ALL-NEXT: [1,0] DR b t
|
||||
# ALL-NEXT: [2,0] DR b t
|
||||
|
||||
# ALL: Average Wait times (based on the timeline view):
|
||||
# ALL-NEXT: [0]: Executions
|
||||
@ -34,4 +42,5 @@
|
||||
# ALL-NEXT: [3]: Average time elapsed from WB until retire stage
|
||||
|
||||
# ALL: [0] [1] [2] [3]
|
||||
# ALL-NEXT: 0. 3 0.0 0.0 0.0 b t
|
||||
# ALL-NEXT: 0. 3 0.0 0.0 0.0 b t
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m3 -iterations=1 -scheduler-stats -resource-pressure=false -instruction-info=false < %s | FileCheck %s -check-prefix=ALL
|
||||
# RUN: llvm-mca -march=aarch64 -mcpu=exynos-m1 -iterations=1 -scheduler-stats -resource-pressure=false -instruction-info=false < %s | FileCheck %s -check-prefix=ALL
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=300 -timeline -timeline-max-iterations=3 < %s | FileCheck %s
|
||||
|
||||
vmulps %xmm0, %xmm1, %xmm2
|
||||
@ -10,7 +11,6 @@ vhaddps %xmm3, %xmm3, %xmm4
|
||||
# CHECK-NEXT: Dispatch Width: 2
|
||||
# CHECK-NEXT: IPC: 1.48
|
||||
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
# CHECK-NEXT: [1]: #uOps
|
||||
# CHECK-NEXT: [2]: Latency
|
||||
@ -24,7 +24,6 @@ vhaddps %xmm3, %xmm3, %xmm4
|
||||
# CHECK-NEXT: 1 3 1.00 vhaddps %xmm2, %xmm2, %xmm3
|
||||
# CHECK-NEXT: 1 3 1.00 vhaddps %xmm3, %xmm3, %xmm4
|
||||
|
||||
|
||||
# CHECK: Resources:
|
||||
# CHECK-NEXT: [0] - JALU0
|
||||
# CHECK-NEXT: [1] - JALU1
|
||||
@ -42,8 +41,8 @@ vhaddps %xmm3, %xmm3, %xmm4
|
||||
# CHECK-NEXT: [13] - JVIMUL
|
||||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
|
||||
# CHECK-NEXT: - - - 2.00 1.00 2.00 1.00 - - - - - - -
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
|
||||
# CHECK-NEXT: - - - 2.00 1.00 2.00 1.00 - - - - - - -
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
|
||||
@ -51,7 +50,6 @@ vhaddps %xmm3, %xmm3, %xmm4
|
||||
# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vhaddps %xmm2, %xmm2, %xmm3
|
||||
# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vhaddps %xmm3, %xmm3, %xmm4
|
||||
|
||||
|
||||
# CHECK: Timeline view:
|
||||
# CHECK-NEXT: 012345
|
||||
# CHECK-NEXT: Index 0123456789
|
||||
@ -66,7 +64,6 @@ vhaddps %xmm3, %xmm3, %xmm4
|
||||
# CHECK-NEXT: [2,1] . D====eeeER . vhaddps %xmm2, %xmm2, %xmm3
|
||||
# CHECK-NEXT: [2,2] . D======eeeER vhaddps %xmm3, %xmm3, %xmm4
|
||||
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
||||
@ -74,6 +71,7 @@ vhaddps %xmm3, %xmm3, %xmm4
|
||||
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
||||
|
||||
# CHECK: [0] [1] [2] [3]
|
||||
# CHECK-NEXT: 0. 3 1.0 1.0 3.3 vmulps %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: 1. 3 3.3 0.7 1.0 vhaddps %xmm2, %xmm2, %xmm3
|
||||
# CHECK-NEXT: 2. 3 5.7 0.0 0.0 vhaddps %xmm3, %xmm3, %xmm4
|
||||
# CHECK-NEXT: 0. 3 1.0 1.0 3.3 vmulps %xmm0, %xmm1, %xmm2
|
||||
# CHECK-NEXT: 1. 3 3.3 0.7 1.0 vhaddps %xmm2, %xmm2, %xmm3
|
||||
# CHECK-NEXT: 2. 3 5.7 0.0 0.0 vhaddps %xmm3, %xmm3, %xmm4
|
||||
|
||||
|
@ -1,8 +1,14 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s
|
||||
|
||||
vshufps $0, %xmm0, %xmm1, %xmm1
|
||||
vhaddps (%rdi), %xmm1, %xmm2
|
||||
|
||||
# CHECK: Iterations: 1
|
||||
# CHECK-NEXT: Instructions: 2
|
||||
# CHECK-NEXT: Total Cycles: 11
|
||||
# CHECK-NEXT: Dispatch Width: 2
|
||||
# CHECK-NEXT: IPC: 0.18
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
# CHECK-NEXT: [1]: #uOps
|
||||
@ -18,6 +24,18 @@ vhaddps (%rdi), %xmm1, %xmm2
|
||||
|
||||
# CHECK: Timeline view:
|
||||
# CHECK-NEXT: 0
|
||||
# CHECK-NEXT: Index 0123456789
|
||||
# CHECK-NEXT: Index 0123456789
|
||||
|
||||
# CHECK: [0,0] DeER . . vshufps $0, %xmm0, %xmm1, %xmm1
|
||||
# CHECK-NEXT: [0,1] DeeeeeeeeER vhaddps (%rdi), %xmm1, %xmm2
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
||||
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
||||
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
||||
|
||||
# CHECK: [0] [1] [2] [3]
|
||||
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vshufps $0, %xmm0, %xmm1, %xmm1
|
||||
# CHECK-NEXT: 1. 1 1.0 0.0 0.0 vhaddps (%rdi), %xmm1, %xmm2
|
||||
|
||||
|
@ -1,8 +1,15 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s
|
||||
|
||||
vshufps $0, %xmm0, %xmm1, %xmm1
|
||||
vhaddps (%rdi), %ymm1, %ymm2
|
||||
|
||||
# CHECK: Iterations: 1
|
||||
# CHECK-NEXT: Instructions: 2
|
||||
# CHECK-NEXT: Total Cycles: 12
|
||||
# CHECK-NEXT: Dispatch Width: 2
|
||||
# CHECK-NEXT: IPC: 0.17
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
# CHECK-NEXT: [1]: #uOps
|
||||
# CHECK-NEXT: [2]: Latency
|
||||
@ -15,9 +22,20 @@ vhaddps (%rdi), %ymm1, %ymm2
|
||||
# CHECK-NEXT: 1 1 0.50 vshufps $0, %xmm0, %xmm1, %xmm1
|
||||
# CHECK-NEXT: 2 8 2.00 * vhaddps (%rdi), %ymm1, %ymm2
|
||||
|
||||
|
||||
# CHECK: Timeline view:
|
||||
# CHECK-NEXT: 01
|
||||
# CHECK-NEXT: Index 0123456789
|
||||
# CHECK-NEXT: Index 0123456789
|
||||
|
||||
# CHECK: [0,0] DeER . .. vshufps $0, %xmm0, %xmm1, %xmm1
|
||||
# CHECK-NEXT: [0,1] .DeeeeeeeeER vhaddps (%rdi), %ymm1, %ymm2
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
||||
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
||||
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
||||
|
||||
# CHECK: [0] [1] [2] [3]
|
||||
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vshufps $0, %xmm0, %xmm1, %xmm1
|
||||
# CHECK-NEXT: 1. 1 1.0 1.0 0.0 vhaddps (%rdi), %ymm1, %ymm2
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -resource-pressure=false -instruction-info=true < %s | FileCheck %s --check-prefix=ENABLED
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -resource-pressure=false -instruction-info=false < %s | FileCheck %s -check-prefix=DISABLED
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -resource-pressure=false -instruction-info < %s | FileCheck %s -check-prefix=ENABLED
|
||||
@ -9,6 +10,12 @@ vhaddps %xmm3, %xmm3, %xmm4
|
||||
|
||||
# DISABLED-NOT: Instruction Info:
|
||||
|
||||
# ENABLED: Iterations: 100
|
||||
# ENABLED-NEXT: Instructions: 300
|
||||
# ENABLED-NEXT: Total Cycles: 209
|
||||
# ENABLED-NEXT: Dispatch Width: 2
|
||||
# ENABLED-NEXT: IPC: 1.44
|
||||
|
||||
# ENABLED: Instruction Info:
|
||||
# ENABLED-NEXT: [1]: #uOps
|
||||
# ENABLED-NEXT: [2]: Latency
|
||||
@ -21,3 +28,4 @@ vhaddps %xmm3, %xmm3, %xmm4
|
||||
# ENABLED-NEXT: 1 2 1.00 vmulps %xmm0, %xmm1, %xmm2
|
||||
# ENABLED-NEXT: 1 3 1.00 vhaddps %xmm2, %xmm2, %xmm3
|
||||
# ENABLED-NEXT: 1 3 1.00 vhaddps %xmm3, %xmm3, %xmm4
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=100 -timeline -timeline-max-iterations=1 -noalias=false < %s | FileCheck %s
|
||||
|
||||
vmovaps (%rsi), %xmm0
|
||||
@ -9,14 +10,13 @@ vmovaps %xmm0, 32(%rdi)
|
||||
vmovaps 48(%rsi), %xmm0
|
||||
vmovaps %xmm0, 48(%rdi)
|
||||
|
||||
# CHECK: Iterations: 100
|
||||
# CHECK: Iterations: 100
|
||||
# CHECK-NEXT: Instructions: 800
|
||||
# CHECK-NEXT: Total Cycles: 2403
|
||||
# CHECK-NEXT: Dispatch Width: 2
|
||||
# CHECK-NEXT: IPC: 0.33
|
||||
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
# CHECK: Instruction Info:
|
||||
# CHECK-NEXT: [1]: #uOps
|
||||
# CHECK-NEXT: [2]: Latency
|
||||
# CHECK-NEXT: [3]: RThroughput
|
||||
@ -24,18 +24,17 @@ vmovaps %xmm0, 48(%rdi)
|
||||
# CHECK-NEXT: [5]: MayStore
|
||||
# CHECK-NEXT: [6]: HasSideEffects
|
||||
|
||||
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
# CHECK-NEXT: 1 5 1.00 * vmovaps (%rsi), %xmm0
|
||||
# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, (%rdi)
|
||||
# CHECK-NEXT: 1 5 1.00 * vmovaps 16(%rsi), %xmm0
|
||||
# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 16(%rdi)
|
||||
# CHECK-NEXT: 1 5 1.00 * vmovaps 32(%rsi), %xmm0
|
||||
# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 32(%rdi)
|
||||
# CHECK-NEXT: 1 5 1.00 * vmovaps 48(%rsi), %xmm0
|
||||
# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 48(%rdi)
|
||||
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
# CHECK-NEXT: 1 5 1.00 * vmovaps (%rsi), %xmm0
|
||||
# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, (%rdi)
|
||||
# CHECK-NEXT: 1 5 1.00 * vmovaps 16(%rsi), %xmm0
|
||||
# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 16(%rdi)
|
||||
# CHECK-NEXT: 1 5 1.00 * vmovaps 32(%rsi), %xmm0
|
||||
# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 32(%rdi)
|
||||
# CHECK-NEXT: 1 5 1.00 * vmovaps 48(%rsi), %xmm0
|
||||
# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 48(%rdi)
|
||||
|
||||
|
||||
# CHECK: Resources:
|
||||
# CHECK: Resources:
|
||||
# CHECK-NEXT: [0] - JALU0
|
||||
# CHECK-NEXT: [1] - JALU1
|
||||
# CHECK-NEXT: [2] - JDiv
|
||||
@ -51,12 +50,11 @@ vmovaps %xmm0, 48(%rdi)
|
||||
# CHECK-NEXT: [12] - JVALU1
|
||||
# CHECK-NEXT: [13] - JVIMUL
|
||||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
|
||||
# CHECK-NEXT: - - - 2.00 2.00 3.99 4.01 4.00 - 4.00 4.00 - - -
|
||||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
|
||||
# CHECK-NEXT: - - - 2.00 2.00 3.99 4.01 4.00 - 4.00 4.00 - - -
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
|
||||
# CHECK-NEXT: - - - - 1.00 0.99 0.01 1.00 - - - - - - vmovaps (%rsi), %xmm0
|
||||
# CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vmovaps %xmm0, (%rdi)
|
||||
@ -67,9 +65,8 @@ vmovaps %xmm0, 48(%rdi)
|
||||
# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vmovaps 48(%rsi), %xmm0
|
||||
# CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vmovaps %xmm0, 48(%rdi)
|
||||
|
||||
|
||||
# CHECK: Timeline view:
|
||||
# CHECK-NEXT: 0123456789
|
||||
# CHECK-NEXT: 0123456789
|
||||
# CHECK-NEXT: Index 0123456789 0123456
|
||||
|
||||
# CHECK: [0,0] DeeeeeER . . . .. vmovaps (%rsi), %xmm0
|
||||
@ -81,18 +78,19 @@ vmovaps %xmm0, 48(%rdi)
|
||||
# CHECK-NEXT: [0,6] . D===============eeeeeER. vmovaps 48(%rsi), %xmm0
|
||||
# CHECK-NEXT: [0,7] . D====================eER vmovaps %xmm0, 48(%rdi)
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
||||
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
||||
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
||||
|
||||
# CHECK: [0] [1] [2] [3]
|
||||
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vmovaps (%rsi), %xmm0
|
||||
# CHECK-NEXT: 1. 1 6.0 0.0 0.0 vmovaps %xmm0, (%rdi)
|
||||
# CHECK-NEXT: 2. 1 6.0 0.0 0.0 vmovaps 16(%rsi), %xmm0
|
||||
# CHECK-NEXT: 3. 1 11.0 0.0 0.0 vmovaps %xmm0, 16(%rdi)
|
||||
# CHECK-NEXT: 4. 1 11.0 0.0 0.0 vmovaps 32(%rsi), %xmm0
|
||||
# CHECK-NEXT: 5. 1 16.0 0.0 0.0 vmovaps %xmm0, 32(%rdi)
|
||||
# CHECK-NEXT: 6. 1 16.0 0.0 0.0 vmovaps 48(%rsi), %xmm0
|
||||
# CHECK-NEXT: 7. 1 21.0 0.0 0.0 vmovaps %xmm0, 48(%rdi)
|
||||
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vmovaps (%rsi), %xmm0
|
||||
# CHECK-NEXT: 1. 1 6.0 0.0 0.0 vmovaps %xmm0, (%rdi)
|
||||
# CHECK-NEXT: 2. 1 6.0 0.0 0.0 vmovaps 16(%rsi), %xmm0
|
||||
# CHECK-NEXT: 3. 1 11.0 0.0 0.0 vmovaps %xmm0, 16(%rdi)
|
||||
# CHECK-NEXT: 4. 1 11.0 0.0 0.0 vmovaps 32(%rsi), %xmm0
|
||||
# CHECK-NEXT: 5. 1 16.0 0.0 0.0 vmovaps %xmm0, 32(%rdi)
|
||||
# CHECK-NEXT: 6. 1 16.0 0.0 0.0 vmovaps 48(%rsi), %xmm0
|
||||
# CHECK-NEXT: 7. 1 21.0 0.0 0.0 vmovaps %xmm0, 48(%rdi)
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=100 -timeline -timeline-max-iterations=1 < %s | FileCheck %s
|
||||
|
||||
vmovaps (%rsi), %xmm0
|
||||
@ -9,15 +10,13 @@ vmovaps %xmm0, 32(%rdi)
|
||||
vmovaps 48(%rsi), %xmm0
|
||||
vmovaps %xmm0, 48(%rdi)
|
||||
|
||||
|
||||
# CHECK: Iterations: 100
|
||||
# CHECK: Iterations: 100
|
||||
# CHECK-NEXT: Instructions: 800
|
||||
# CHECK-NEXT: Total Cycles: 408
|
||||
# CHECK-NEXT: Dispatch Width: 2
|
||||
# CHECK-NEXT: IPC: 1.96
|
||||
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
# CHECK: Instruction Info:
|
||||
# CHECK-NEXT: [1]: #uOps
|
||||
# CHECK-NEXT: [2]: Latency
|
||||
# CHECK-NEXT: [3]: RThroughput
|
||||
@ -25,18 +24,17 @@ vmovaps %xmm0, 48(%rdi)
|
||||
# CHECK-NEXT: [5]: MayStore
|
||||
# CHECK-NEXT: [6]: HasSideEffects
|
||||
|
||||
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
# CHECK-NEXT: 1 5 1.00 * vmovaps (%rsi), %xmm0
|
||||
# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, (%rdi)
|
||||
# CHECK-NEXT: 1 5 1.00 * vmovaps 16(%rsi), %xmm0
|
||||
# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 16(%rdi)
|
||||
# CHECK-NEXT: 1 5 1.00 * vmovaps 32(%rsi), %xmm0
|
||||
# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 32(%rdi)
|
||||
# CHECK-NEXT: 1 5 1.00 * vmovaps 48(%rsi), %xmm0
|
||||
# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 48(%rdi)
|
||||
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
# CHECK-NEXT: 1 5 1.00 * vmovaps (%rsi), %xmm0
|
||||
# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, (%rdi)
|
||||
# CHECK-NEXT: 1 5 1.00 * vmovaps 16(%rsi), %xmm0
|
||||
# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 16(%rdi)
|
||||
# CHECK-NEXT: 1 5 1.00 * vmovaps 32(%rsi), %xmm0
|
||||
# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 32(%rdi)
|
||||
# CHECK-NEXT: 1 5 1.00 * vmovaps 48(%rsi), %xmm0
|
||||
# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 48(%rdi)
|
||||
|
||||
|
||||
# CHECK: Resources:
|
||||
# CHECK: Resources:
|
||||
# CHECK-NEXT: [0] - JALU0
|
||||
# CHECK-NEXT: [1] - JALU1
|
||||
# CHECK-NEXT: [2] - JDiv
|
||||
@ -52,12 +50,11 @@ vmovaps %xmm0, 48(%rdi)
|
||||
# CHECK-NEXT: [12] - JVALU1
|
||||
# CHECK-NEXT: [13] - JVIMUL
|
||||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
|
||||
# CHECK-NEXT: - - - 2.00 2.00 3.97 4.03 4.00 - 4.00 4.00 - - -
|
||||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
|
||||
# CHECK-NEXT: - - - 2.00 2.00 3.97 4.03 4.00 - 4.00 4.00 - - -
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
|
||||
# CHECK-NEXT: - - - - 1.00 0.98 0.02 1.00 - - - - - - vmovaps (%rsi), %xmm0
|
||||
# CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vmovaps %xmm0, (%rdi)
|
||||
@ -68,10 +65,9 @@ vmovaps %xmm0, 48(%rdi)
|
||||
# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vmovaps 48(%rsi), %xmm0
|
||||
# CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vmovaps %xmm0, 48(%rdi)
|
||||
|
||||
|
||||
# CHECK: Timeline view:
|
||||
# CHECK-NEXT: 01
|
||||
# CHECK-NEXT: Index 0123456789
|
||||
# CHECK: Timeline view:
|
||||
# CHECK-NEXT: 01
|
||||
# CHECK-NEXT: Index 0123456789
|
||||
|
||||
# CHECK: [0,0] DeeeeeER .. vmovaps (%rsi), %xmm0
|
||||
# CHECK-NEXT: [0,1] D=====eER .. vmovaps %xmm0, (%rdi)
|
||||
@ -82,19 +78,19 @@ vmovaps %xmm0, 48(%rdi)
|
||||
# CHECK-NEXT: [0,6] . DeeeeeER. vmovaps 48(%rsi), %xmm0
|
||||
# CHECK-NEXT: [0,7] . D=====eER vmovaps %xmm0, 48(%rdi)
|
||||
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
||||
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
||||
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
||||
|
||||
# CHECK: [0] [1] [2] [3]
|
||||
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vmovaps (%rsi), %xmm0
|
||||
# CHECK-NEXT: 1. 1 6.0 0.0 0.0 vmovaps %xmm0, (%rdi)
|
||||
# CHECK-NEXT: 2. 1 1.0 1.0 0.0 vmovaps 16(%rsi), %xmm0
|
||||
# CHECK-NEXT: 3. 1 6.0 0.0 0.0 vmovaps %xmm0, 16(%rdi)
|
||||
# CHECK-NEXT: 4. 1 1.0 1.0 0.0 vmovaps 32(%rsi), %xmm0
|
||||
# CHECK-NEXT: 5. 1 6.0 0.0 0.0 vmovaps %xmm0, 32(%rdi)
|
||||
# CHECK-NEXT: 6. 1 1.0 1.0 0.0 vmovaps 48(%rsi), %xmm0
|
||||
# CHECK-NEXT: 7. 1 6.0 0.0 0.0 vmovaps %xmm0, 48(%rdi)
|
||||
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vmovaps (%rsi), %xmm0
|
||||
# CHECK-NEXT: 1. 1 6.0 0.0 0.0 vmovaps %xmm0, (%rdi)
|
||||
# CHECK-NEXT: 2. 1 1.0 1.0 0.0 vmovaps 16(%rsi), %xmm0
|
||||
# CHECK-NEXT: 3. 1 6.0 0.0 0.0 vmovaps %xmm0, 16(%rdi)
|
||||
# CHECK-NEXT: 4. 1 1.0 1.0 0.0 vmovaps 32(%rsi), %xmm0
|
||||
# CHECK-NEXT: 5. 1 6.0 0.0 0.0 vmovaps %xmm0, 32(%rdi)
|
||||
# CHECK-NEXT: 6. 1 1.0 1.0 0.0 vmovaps 48(%rsi), %xmm0
|
||||
# CHECK-NEXT: 7. 1 6.0 0.0 0.0 vmovaps %xmm0, 48(%rdi)
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -timeline -timeline-max-iterations=2 < %s | FileCheck %s
|
||||
|
||||
# VALU0/VALU1
|
||||
@ -57,7 +58,7 @@ vsqrtps %ymm0, %ymm2
|
||||
# CHECK-NEXT: [13] - JVIMUL
|
||||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
|
||||
# CHECK-NEXT: - - - 3.00 63.00 6.01 5.99 - - - 1.00 1.00 1.00 3.00
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
|
@ -1,3 +1,4 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -resource-pressure=false -retire-stats -iterations=1 < %s | FileCheck %s
|
||||
|
||||
vsqrtps %xmm0, %xmm2
|
||||
|
@ -1,19 +1,18 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s
|
||||
|
||||
# The vmul can start executing 3cy in advance. That is beause the first use
|
||||
# operand (i.e. %xmm1) is a ReadAfterLd. That means, the memory operand is
|
||||
# evaluated before %xmm1.
|
||||
|
||||
|
||||
vaddps %xmm0, %xmm0, %xmm1
|
||||
vmulps (%rdi), %xmm1, %xmm2
|
||||
|
||||
|
||||
# CHECK: Iterations: 1
|
||||
# CHECK-NEXT: Instructions: 2
|
||||
# CHECK-NEXT: Total Cycles: 10
|
||||
# CHECK-NEXT: Dispatch Width: 2
|
||||
|
||||
# CHECK-NEXT: IPC: 0.20
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
# CHECK-NEXT: [1]: #uOps
|
||||
@ -27,14 +26,13 @@ vmulps (%rdi), %xmm1, %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm0, %xmm1
|
||||
# CHECK-NEXT: 1 7 1.00 * vmulps (%rdi), %xmm1, %xmm2
|
||||
|
||||
|
||||
# CHECK: Timeline view:
|
||||
|
||||
# CHECK: Index 0123456789
|
||||
|
||||
# CHECK: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1
|
||||
# CHECK-NEXT: [0,1] DeeeeeeeER vmulps (%rdi), %xmm1, %xmm2
|
||||
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
||||
@ -42,5 +40,6 @@ vmulps (%rdi), %xmm1, %xmm2
|
||||
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
||||
|
||||
# CHECK: [0] [1] [2] [3]
|
||||
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm1
|
||||
# CHECK-NEXT: 1. 1 1.0 0.0 0.0 vmulps (%rdi), %xmm1, %xmm2
|
||||
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm1
|
||||
# CHECK-NEXT: 1. 1 1.0 0.0 0.0 vmulps (%rdi), %xmm1, %xmm2
|
||||
|
||||
|
@ -1,13 +1,19 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -resource-pressure=0 -timeline < %s | FileCheck %s
|
||||
|
||||
imull %esi
|
||||
imull (%rdi)
|
||||
|
||||
|
||||
# The second integer multiply can start at cycle 2 because the implicit reads
|
||||
# can start after the load operand is evaluated.
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
# CHECK: Iterations: 1
|
||||
# CHECK-NEXT: Instructions: 2
|
||||
# CHECK-NEXT: Total Cycles: 10
|
||||
# CHECK-NEXT: Dispatch Width: 2
|
||||
# CHECK-NEXT: IPC: 0.20
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
# CHECK-NEXT: [1]: #uOps
|
||||
# CHECK-NEXT: [2]: Latency
|
||||
# CHECK-NEXT: [3]: RThroughput
|
||||
@ -19,8 +25,20 @@
|
||||
# CHECK-NEXT: 2 3 1.00 imull %esi
|
||||
# CHECK-NEXT: 2 6 1.00 * imull (%rdi)
|
||||
|
||||
|
||||
# CHECK: Timeline view:
|
||||
|
||||
# CHECK: Index 0123456789
|
||||
|
||||
# CHECK: [0,0] DeeeER . imull %esi
|
||||
# CHECK-NEXT: [0,1] .DeeeeeeER imull (%rdi)
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
||||
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
||||
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
||||
|
||||
# CHECK: [0] [1] [2] [3]
|
||||
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 imull %esi
|
||||
# CHECK-NEXT: 1. 1 1.0 1.0 0.0 imull (%rdi)
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -resource-pressure=0 -timeline -dispatch=3 < %s | FileCheck %s
|
||||
|
||||
add %rdi, %rsi
|
||||
|
@ -1,11 +1,14 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=5 -instruction-info=false -dispatch-stats -register-file-stats -timeline < %s | FileCheck %s
|
||||
|
||||
vaddps %xmm0, %xmm0, %xmm0
|
||||
vmulps %xmm0, %xmm0, %xmm0
|
||||
|
||||
# CHECK: Iterations: 5
|
||||
# CHECK: Iterations: 5
|
||||
# CHECK-NEXT: Instructions: 10
|
||||
|
||||
# CHECK-NEXT: Total Cycles: 28
|
||||
# CHECK-NEXT: Dispatch Width: 2
|
||||
# CHECK-NEXT: IPC: 0.36
|
||||
|
||||
# CHECK: Dynamic Dispatch Stall Cycles:
|
||||
# CHECK-NEXT: RAT - Register unavailable: 0
|
||||
@ -15,10 +18,14 @@ vmulps %xmm0, %xmm0, %xmm0
|
||||
# CHECK-NEXT: SQ - Store queue full: 0
|
||||
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
|
||||
|
||||
# CHECK: Dispatch Logic - number of cycles where we saw N instructions dispatched:
|
||||
# CHECK-NEXT: [# dispatched], [# cycles]
|
||||
# CHECK-NEXT: 0, 23 (82.1%)
|
||||
# CHECK-NEXT: 2, 5 (17.9%)
|
||||
|
||||
# CHECK: Register File statistics:
|
||||
# CHECK-NEXT: Total number of mappings created: 10
|
||||
# CHECK-NEXT: Max number of mappings used: 10
|
||||
# CHECK-NEXT: Total number of mappings created: 10
|
||||
# CHECK-NEXT: Max number of mappings used: 10
|
||||
|
||||
# CHECK: * Register File #1 -- FpuPRF:
|
||||
# CHECK-NEXT: Number of physical registers: 72
|
||||
@ -30,9 +37,33 @@ vmulps %xmm0, %xmm0, %xmm0
|
||||
# CHECK-NEXT: Total number of mappings created: 0
|
||||
# CHECK-NEXT: Max number of mappings used: 0
|
||||
|
||||
# CHECK: Resources:
|
||||
# CHECK-NEXT: [0] - JALU0
|
||||
# CHECK-NEXT: [1] - JALU1
|
||||
# CHECK-NEXT: [2] - JDiv
|
||||
# CHECK-NEXT: [3] - JFPA
|
||||
# CHECK-NEXT: [4] - JFPM
|
||||
# CHECK-NEXT: [5] - JFPU0
|
||||
# CHECK-NEXT: [6] - JFPU1
|
||||
# CHECK-NEXT: [7] - JLAGU
|
||||
# CHECK-NEXT: [8] - JMul
|
||||
# CHECK-NEXT: [9] - JSAGU
|
||||
# CHECK-NEXT: [10] - JSTC
|
||||
# CHECK-NEXT: [11] - JVALU0
|
||||
# CHECK-NEXT: [12] - JVALU1
|
||||
# CHECK-NEXT: [13] - JVIMUL
|
||||
|
||||
# CHECK: Timeline view:
|
||||
# CHECK-NEXT: 0123456789
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
|
||||
# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - -
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
|
||||
# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vaddps %xmm0, %xmm0, %xmm0
|
||||
# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - vmulps %xmm0, %xmm0, %xmm0
|
||||
|
||||
# CHECK: Timeline view:
|
||||
# CHECK-NEXT: 0123456789
|
||||
# CHECK-NEXT: Index 0123456789 01234567
|
||||
|
||||
# CHECK: [0,0] DeeeER . . . . . vaddps %xmm0, %xmm0, %xmm0
|
||||
@ -45,3 +76,14 @@ vmulps %xmm0, %xmm0, %xmm0
|
||||
# CHECK-NEXT: [3,1] . D===============eeER . . vmulps %xmm0, %xmm0, %xmm0
|
||||
# CHECK-NEXT: [4,0] . D================eeeER . vaddps %xmm0, %xmm0, %xmm0
|
||||
# CHECK-NEXT: [4,1] . D===================eeER vmulps %xmm0, %xmm0, %xmm0
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
||||
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
||||
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
||||
|
||||
# CHECK: [0] [1] [2] [3]
|
||||
# CHECK-NEXT: 0. 5 9.0 0.2 0.0 vaddps %xmm0, %xmm0, %xmm0
|
||||
# CHECK-NEXT: 1. 5 12.0 0.0 0.0 vmulps %xmm0, %xmm0, %xmm0
|
||||
|
||||
|
@ -1,13 +1,16 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -register-file-size=5 -iterations=5 -instruction-info=false -dispatch-stats -register-file-stats -timeline < %s | FileCheck %s
|
||||
|
||||
vaddps %xmm0, %xmm0, %xmm0
|
||||
vmulps %xmm0, %xmm0, %xmm0
|
||||
|
||||
# CHECK: Iterations: 5
|
||||
# CHECK: Iterations: 5
|
||||
# CHECK-NEXT: Instructions: 10
|
||||
# CHECK-NEXT: Total Cycles: 28
|
||||
# CHECK-NEXT: Dispatch Width: 2
|
||||
# CHECK-NEXT: IPC: 0.36
|
||||
|
||||
|
||||
# CHECK: Dynamic Dispatch Stall Cycles:
|
||||
# CHECK: Dynamic Dispatch Stall Cycles:
|
||||
# CHECK-NEXT: RAT - Register unavailable: 13
|
||||
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
||||
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
|
||||
@ -15,10 +18,15 @@ vmulps %xmm0, %xmm0, %xmm0
|
||||
# CHECK-NEXT: SQ - Store queue full: 0
|
||||
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
|
||||
|
||||
# CHECK: Dispatch Logic - number of cycles where we saw N instructions dispatched:
|
||||
# CHECK-NEXT: [# dispatched], [# cycles]
|
||||
# CHECK-NEXT: 0, 20 (71.4%)
|
||||
# CHECK-NEXT: 2, 2 (7.1%)
|
||||
# CHECK-NEXT: 1, 6 (21.4%)
|
||||
|
||||
# CHECK: Register File statistics:
|
||||
# CHECK-NEXT: Total number of mappings created: 10
|
||||
# CHECK-NEXT: Max number of mappings used: 5
|
||||
# CHECK-NEXT: Total number of mappings created: 10
|
||||
# CHECK-NEXT: Max number of mappings used: 5
|
||||
|
||||
# CHECK: * Register File #1 -- FpuPRF:
|
||||
# CHECK-NEXT: Number of physical registers: 72
|
||||
@ -30,10 +38,35 @@ vmulps %xmm0, %xmm0, %xmm0
|
||||
# CHECK-NEXT: Total number of mappings created: 0
|
||||
# CHECK-NEXT: Max number of mappings used: 0
|
||||
|
||||
# CHECK: Resources:
|
||||
# CHECK-NEXT: [0] - JALU0
|
||||
# CHECK-NEXT: [1] - JALU1
|
||||
# CHECK-NEXT: [2] - JDiv
|
||||
# CHECK-NEXT: [3] - JFPA
|
||||
# CHECK-NEXT: [4] - JFPM
|
||||
# CHECK-NEXT: [5] - JFPU0
|
||||
# CHECK-NEXT: [6] - JFPU1
|
||||
# CHECK-NEXT: [7] - JLAGU
|
||||
# CHECK-NEXT: [8] - JMul
|
||||
# CHECK-NEXT: [9] - JSAGU
|
||||
# CHECK-NEXT: [10] - JSTC
|
||||
# CHECK-NEXT: [11] - JVALU0
|
||||
# CHECK-NEXT: [12] - JVALU1
|
||||
# CHECK-NEXT: [13] - JVIMUL
|
||||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
|
||||
# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - -
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
|
||||
# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vaddps %xmm0, %xmm0, %xmm0
|
||||
# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - vmulps %xmm0, %xmm0, %xmm0
|
||||
|
||||
# CHECK: Timeline view:
|
||||
# CHECK-NEXT: 0123456789
|
||||
# CHECK-NEXT: 0123456789
|
||||
# CHECK-NEXT: Index 0123456789 01234567
|
||||
|
||||
# CHECK: [0,0] DeeeER . . . . . vaddps %xmm0, %xmm0, %xmm0
|
||||
# CHECK-NEXT: [0,1] D===eeER . . . . . vmulps %xmm0, %xmm0, %xmm0
|
||||
# CHECK-NEXT: [1,0] .D====eeeER . . . . vaddps %xmm0, %xmm0, %xmm0
|
||||
@ -44,3 +77,14 @@ vmulps %xmm0, %xmm0, %xmm0
|
||||
# CHECK-NEXT: [3,1] . . D========eeER . . vmulps %xmm0, %xmm0, %xmm0
|
||||
# CHECK-NEXT: [4,0] . . . D========eeeER . vaddps %xmm0, %xmm0, %xmm0
|
||||
# CHECK-NEXT: [4,1] . . . D========eeER vmulps %xmm0, %xmm0, %xmm0
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
||||
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
||||
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
||||
|
||||
# CHECK: [0] [1] [2] [3]
|
||||
# CHECK-NEXT: 0. 5 6.6 0.2 0.0 vaddps %xmm0, %xmm0, %xmm0
|
||||
# CHECK-NEXT: 1. 5 7.8 0.0 0.0 vmulps %xmm0, %xmm0, %xmm0
|
||||
|
||||
|
@ -1,9 +1,13 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -register-file-size=5 -iterations=2 -dispatch-stats -register-file-stats -timeline < %s | FileCheck %s
|
||||
|
||||
idiv %eax
|
||||
|
||||
# CHECK: Iterations: 2
|
||||
# CHECK-NEXT: Instructions: 2
|
||||
# CHECK-NEXT: Total Cycles: 55
|
||||
# CHECK-NEXT: Dispatch Width: 2
|
||||
# CHECK-NEXT: IPC: 0.04
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
# CHECK-NEXT: [1]: #uOps
|
||||
@ -16,7 +20,6 @@ idiv %eax
|
||||
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
# CHECK-NEXT: 2 25 25.00 * idivl %eax
|
||||
|
||||
|
||||
# CHECK: Dynamic Dispatch Stall Cycles:
|
||||
# CHECK-NEXT: RAT - Register unavailable: 26
|
||||
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
||||
@ -25,10 +28,14 @@ idiv %eax
|
||||
# CHECK-NEXT: SQ - Store queue full: 0
|
||||
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
|
||||
|
||||
# CHECK: Dispatch Logic - number of cycles where we saw N instructions dispatched:
|
||||
# CHECK-NEXT: [# dispatched], [# cycles]
|
||||
# CHECK-NEXT: 0, 53 (96.4%)
|
||||
# CHECK-NEXT: 1, 2 (3.6%)
|
||||
|
||||
# CHECK: Register File statistics:
|
||||
# CHECK-NEXT: Total number of mappings created: 6
|
||||
# CHECK-NEXT: Max number of mappings used: 3
|
||||
# CHECK-NEXT: Total number of mappings created: 6
|
||||
# CHECK-NEXT: Max number of mappings used: 3
|
||||
|
||||
# CHECK: * Register File #1 -- FpuPRF:
|
||||
# CHECK-NEXT: Number of physical registers: 72
|
||||
@ -40,10 +47,43 @@ idiv %eax
|
||||
# CHECK-NEXT: Total number of mappings created: 6
|
||||
# CHECK-NEXT: Max number of mappings used: 3
|
||||
|
||||
# CHECK: Resources:
|
||||
# CHECK-NEXT: [0] - JALU0
|
||||
# CHECK-NEXT: [1] - JALU1
|
||||
# CHECK-NEXT: [2] - JDiv
|
||||
# CHECK-NEXT: [3] - JFPA
|
||||
# CHECK-NEXT: [4] - JFPM
|
||||
# CHECK-NEXT: [5] - JFPU0
|
||||
# CHECK-NEXT: [6] - JFPU1
|
||||
# CHECK-NEXT: [7] - JLAGU
|
||||
# CHECK-NEXT: [8] - JMul
|
||||
# CHECK-NEXT: [9] - JSAGU
|
||||
# CHECK-NEXT: [10] - JSTC
|
||||
# CHECK-NEXT: [11] - JVALU0
|
||||
# CHECK-NEXT: [12] - JVALU1
|
||||
# CHECK-NEXT: [13] - JVIMUL
|
||||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
|
||||
# CHECK-NEXT: - 1.00 25.00 - - - - - - - - - - -
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
|
||||
# CHECK-NEXT: - 1.00 25.00 - - - - - - - - - - - idivl %eax
|
||||
|
||||
# CHECK: Timeline view:
|
||||
# CHECK-NEXT: 0123456789 0123456789 01234
|
||||
# CHECK-NEXT: Index 0123456789 0123456789 0123456789
|
||||
# CHECK-NEXT: 0123456789 0123456789 01234
|
||||
# CHECK-NEXT: Index 0123456789 0123456789 0123456789
|
||||
|
||||
# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeER . . . . . . idivl %eax
|
||||
# CHECK-NEXT: [1,0] . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeER idivl %eax
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
||||
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
||||
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
||||
|
||||
# CHECK: [0] [1] [2] [3]
|
||||
# CHECK-NEXT: 0. 2 1.0 1.0 0.0 idivl %eax
|
||||
|
||||
|
@ -1,9 +1,13 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=22 -dispatch-stats -register-file-stats -resource-pressure=false -timeline -timeline-max-iterations=3 < %s | FileCheck %s
|
||||
|
||||
idiv %eax
|
||||
|
||||
# CHECK: Iterations: 22
|
||||
# CHECK-NEXT: Instructions: 22
|
||||
# CHECK-NEXT: Total Cycles: 553
|
||||
# CHECK-NEXT: Dispatch Width: 2
|
||||
# CHECK-NEXT: IPC: 0.04
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
# CHECK-NEXT: [1]: #uOps
|
||||
@ -16,7 +20,6 @@ idiv %eax
|
||||
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
# CHECK-NEXT: 2 25 25.00 * idivl %eax
|
||||
|
||||
|
||||
# CHECK: Dynamic Dispatch Stall Cycles:
|
||||
# CHECK-NEXT: RAT - Register unavailable: 6
|
||||
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
|
||||
@ -25,10 +28,14 @@ idiv %eax
|
||||
# CHECK-NEXT: SQ - Store queue full: 0
|
||||
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
|
||||
|
||||
# CHECK: Dispatch Logic - number of cycles where we saw N instructions dispatched:
|
||||
# CHECK-NEXT: [# dispatched], [# cycles]
|
||||
# CHECK-NEXT: 0, 531 (96.0%)
|
||||
# CHECK-NEXT: 1, 22 (4.0%)
|
||||
|
||||
# CHECK: Register File statistics:
|
||||
# CHECK-NEXT: Total number of mappings created: 66
|
||||
# CHECK-NEXT: Max number of mappings used: 63
|
||||
# CHECK-NEXT: Total number of mappings created: 66
|
||||
# CHECK-NEXT: Max number of mappings used: 63
|
||||
|
||||
# CHECK: * Register File #1 -- FpuPRF:
|
||||
# CHECK-NEXT: Number of physical registers: 72
|
||||
@ -40,10 +47,20 @@ idiv %eax
|
||||
# CHECK-NEXT: Total number of mappings created: 66
|
||||
# CHECK-NEXT: Max number of mappings used: 63
|
||||
|
||||
|
||||
# CHECK: Timeline view:
|
||||
# CHECK-NEXT: 0123456789 0123456789 0123456789 01234567
|
||||
# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789
|
||||
# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789
|
||||
|
||||
# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeER . . . . . . . . . . . idivl %eax
|
||||
# CHECK-NEXT: [1,0] .D========================eeeeeeeeeeeeeeeeeeeeeeeeeER . . . . . . idivl %eax
|
||||
# CHECK-NEXT: [2,0] . D================================================eeeeeeeeeeeeeeeeeeeeeeeeeER idivl %eax
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
||||
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
||||
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
||||
|
||||
# CHECK: [0] [1] [2] [3]
|
||||
# CHECK-NEXT: 0. 3 25.0 0.3 0.0 idivl %eax
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -resource-pressure=false -instruction-info=false -dispatch-stats -register-file-stats -timeline < %s | FileCheck %s
|
||||
|
||||
vdivps %ymm0, %ymm0, %ymm1
|
||||
@ -34,14 +35,12 @@
|
||||
vaddps %ymm3, %ymm0, %ymm5
|
||||
vaddps %ymm3, %ymm0, %ymm6
|
||||
|
||||
|
||||
# CHECK: Iterations: 1
|
||||
# CHECK-NEXT: Instructions: 33
|
||||
# CHECK-NEXT: Total Cycles: 70
|
||||
# CHECK-NEXT: Dispatch Width: 2
|
||||
# CHECK-NEXT: IPC: 0.47
|
||||
|
||||
|
||||
# CHECK: Dynamic Dispatch Stall Cycles:
|
||||
# CHECK-NEXT: RAT - Register unavailable: 0
|
||||
# CHECK-NEXT: RCU - Retire tokens unavailable: 8
|
||||
@ -50,10 +49,14 @@
|
||||
# CHECK-NEXT: SQ - Store queue full: 0
|
||||
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
|
||||
|
||||
# CHECK: Dispatch Logic - number of cycles where we saw N instructions dispatched:
|
||||
# CHECK-NEXT: [# dispatched], [# cycles]
|
||||
# CHECK-NEXT: 0, 37 (52.9%)
|
||||
# CHECK-NEXT: 1, 33 (47.1%)
|
||||
|
||||
# CHECK: Register File statistics:
|
||||
# CHECK-NEXT: Total number of mappings created: 66
|
||||
# CHECK-NEXT: Max number of mappings used: 64
|
||||
# CHECK-NEXT: Total number of mappings created: 66
|
||||
# CHECK-NEXT: Max number of mappings used: 64
|
||||
|
||||
# CHECK: * Register File #1 -- FpuPRF:
|
||||
# CHECK-NEXT: Number of physical registers: 72
|
||||
@ -65,9 +68,8 @@
|
||||
# CHECK-NEXT: Total number of mappings created: 0
|
||||
# CHECK-NEXT: Max number of mappings used: 0
|
||||
|
||||
|
||||
# CHECK: Timeline view:
|
||||
# CHECK-NEXT: 0123456789 0123456789 0123456789
|
||||
# CHECK-NEXT: 0123456789 0123456789 0123456789
|
||||
# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789
|
||||
|
||||
# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER . . . . . . vdivps %ymm0, %ymm0, %ymm1
|
||||
@ -103,3 +105,45 @@
|
||||
# CHECK-NEXT: [0,30] . . . . . . D==============================eeeER . vaddps %ymm3, %ymm0, %ymm4
|
||||
# CHECK-NEXT: [0,31] . . . . . . .D===============================eeeER . vaddps %ymm3, %ymm0, %ymm5
|
||||
# CHECK-NEXT: [0,32] . . . . . . . . D========================eeeER vaddps %ymm3, %ymm0, %ymm6
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
||||
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
||||
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
||||
|
||||
# CHECK: [0] [1] [2] [3]
|
||||
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vdivps %ymm0, %ymm0, %ymm1
|
||||
# CHECK-NEXT: 1. 1 1.0 1.0 34.0 vaddps %ymm0, %ymm0, %ymm2
|
||||
# CHECK-NEXT: 2. 1 2.0 2.0 33.0 vaddps %ymm0, %ymm0, %ymm3
|
||||
# CHECK-NEXT: 3. 1 3.0 3.0 31.0 vaddps %ymm0, %ymm0, %ymm4
|
||||
# CHECK-NEXT: 4. 1 4.0 4.0 30.0 vaddps %ymm0, %ymm0, %ymm5
|
||||
# CHECK-NEXT: 5. 1 5.0 5.0 28.0 vaddps %ymm0, %ymm0, %ymm6
|
||||
# CHECK-NEXT: 6. 1 6.0 6.0 27.0 vaddps %ymm0, %ymm0, %ymm7
|
||||
# CHECK-NEXT: 7. 1 7.0 7.0 25.0 vaddps %ymm0, %ymm0, %ymm8
|
||||
# CHECK-NEXT: 8. 1 8.0 8.0 24.0 vaddps %ymm0, %ymm0, %ymm9
|
||||
# CHECK-NEXT: 9. 1 9.0 9.0 22.0 vaddps %ymm0, %ymm0, %ymm10
|
||||
# CHECK-NEXT: 10. 1 10.0 10.0 21.0 vaddps %ymm0, %ymm0, %ymm11
|
||||
# CHECK-NEXT: 11. 1 11.0 11.0 19.0 vaddps %ymm0, %ymm0, %ymm12
|
||||
# CHECK-NEXT: 12. 1 12.0 12.0 18.0 vaddps %ymm0, %ymm0, %ymm13
|
||||
# CHECK-NEXT: 13. 1 13.0 13.0 16.0 vaddps %ymm0, %ymm0, %ymm14
|
||||
# CHECK-NEXT: 14. 1 14.0 14.0 15.0 vaddps %ymm0, %ymm0, %ymm15
|
||||
# CHECK-NEXT: 15. 1 15.0 15.0 13.0 vaddps %ymm2, %ymm0, %ymm0
|
||||
# CHECK-NEXT: 16. 1 17.0 0.0 11.0 vaddps %ymm2, %ymm0, %ymm3
|
||||
# CHECK-NEXT: 17. 1 18.0 2.0 9.0 vaddps %ymm2, %ymm0, %ymm4
|
||||
# CHECK-NEXT: 18. 1 19.0 4.0 8.0 vaddps %ymm2, %ymm0, %ymm5
|
||||
# CHECK-NEXT: 19. 1 20.0 6.0 6.0 vaddps %ymm2, %ymm0, %ymm6
|
||||
# CHECK-NEXT: 20. 1 21.0 8.0 5.0 vaddps %ymm2, %ymm0, %ymm7
|
||||
# CHECK-NEXT: 21. 1 22.0 10.0 3.0 vaddps %ymm2, %ymm0, %ymm8
|
||||
# CHECK-NEXT: 22. 1 23.0 12.0 2.0 vaddps %ymm2, %ymm0, %ymm9
|
||||
# CHECK-NEXT: 23. 1 24.0 14.0 0.0 vaddps %ymm2, %ymm0, %ymm10
|
||||
# CHECK-NEXT: 24. 1 25.0 16.0 0.0 vaddps %ymm2, %ymm0, %ymm11
|
||||
# CHECK-NEXT: 25. 1 26.0 18.0 0.0 vaddps %ymm2, %ymm0, %ymm12
|
||||
# CHECK-NEXT: 26. 1 27.0 20.0 0.0 vaddps %ymm2, %ymm0, %ymm13
|
||||
# CHECK-NEXT: 27. 1 28.0 22.0 0.0 vaddps %ymm2, %ymm0, %ymm14
|
||||
# CHECK-NEXT: 28. 1 29.0 24.0 0.0 vaddps %ymm2, %ymm0, %ymm15
|
||||
# CHECK-NEXT: 29. 1 30.0 23.0 0.0 vaddps %ymm3, %ymm0, %ymm2
|
||||
# CHECK-NEXT: 30. 1 31.0 25.0 0.0 vaddps %ymm3, %ymm0, %ymm4
|
||||
# CHECK-NEXT: 31. 1 32.0 27.0 0.0 vaddps %ymm3, %ymm0, %ymm5
|
||||
# CHECK-NEXT: 32. 1 25.0 25.0 0.0 vaddps %ymm3, %ymm0, %ymm6
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -instruction-tables < %s | FileCheck %s
|
||||
|
||||
aesdec %xmm0, %xmm2
|
||||
@ -18,6 +19,28 @@ aesimc (%rax), %xmm2
|
||||
aeskeygenassist $22, %xmm0, %xmm2
|
||||
aeskeygenassist $22, (%rax), %xmm2
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
# CHECK-NEXT: [1]: #uOps
|
||||
# CHECK-NEXT: [2]: Latency
|
||||
# CHECK-NEXT: [3]: RThroughput
|
||||
# CHECK-NEXT: [4]: MayLoad
|
||||
# CHECK-NEXT: [5]: MayStore
|
||||
# CHECK-NEXT: [6]: HasSideEffects
|
||||
|
||||
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
# CHECK-NEXT: 2 3 1.00 aesdec %xmm0, %xmm2
|
||||
# CHECK-NEXT: 2 8 1.00 * aesdec (%rax), %xmm2
|
||||
# CHECK-NEXT: 2 3 1.00 aesdeclast %xmm0, %xmm2
|
||||
# CHECK-NEXT: 2 8 1.00 * aesdeclast (%rax), %xmm2
|
||||
# CHECK-NEXT: 2 3 1.00 aesenc %xmm0, %xmm2
|
||||
# CHECK-NEXT: 2 8 1.00 * aesenc (%rax), %xmm2
|
||||
# CHECK-NEXT: 2 3 1.00 aesenclast %xmm0, %xmm2
|
||||
# CHECK-NEXT: 2 8 1.00 * aesenclast (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 2 1.00 aesimc %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 7 1.00 * aesimc (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 2 1.00 aeskeygenassist $22, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 7 1.00 * aeskeygenassist $22, (%rax), %xmm2
|
||||
|
||||
# CHECK: Resources:
|
||||
# CHECK-NEXT: [0] - JALU0
|
||||
# CHECK-NEXT: [1] - JALU1
|
||||
@ -34,6 +57,10 @@ aeskeygenassist $22, (%rax), %xmm2
|
||||
# CHECK-NEXT: [12] - JVALU1
|
||||
# CHECK-NEXT: [13] - JVIMUL
|
||||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
|
||||
# CHECK-NEXT: - - - - - 12.00 - 6.00 - - - - - 12.00
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
|
||||
# CHECK-NEXT: - - - - - 1.00 - - - - - - - 1.00 aesdec %xmm0, %xmm2
|
||||
@ -48,3 +75,4 @@ aeskeygenassist $22, (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - - - 1.00 - 1.00 - - - - - 1.00 aesimc (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - - - 1.00 - - - - - - - 1.00 aeskeygenassist $22, %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - - - 1.00 - 1.00 - - - - - 1.00 aeskeygenassist $22, (%rax), %xmm2
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -instruction-tables < %s | FileCheck %s
|
||||
|
||||
vaddpd %xmm0, %xmm1, %xmm2
|
||||
@ -1008,7 +1009,6 @@ vxorps (%rax), %ymm1, %ymm2
|
||||
vzeroall
|
||||
vzeroupper
|
||||
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
# CHECK-NEXT: [1]: #uOps
|
||||
# CHECK-NEXT: [2]: Latency
|
||||
@ -1702,7 +1702,6 @@ vzeroupper
|
||||
# CHECK-NEXT: 73 90 - * * * vzeroall
|
||||
# CHECK-NEXT: 37 46 - * * * vzeroupper
|
||||
|
||||
|
||||
# CHECK: Resources:
|
||||
# CHECK-NEXT: [0] - JALU0
|
||||
# CHECK-NEXT: [1] - JALU1
|
||||
@ -1719,6 +1718,10 @@ vzeroupper
|
||||
# CHECK-NEXT: [12] - JVALU1
|
||||
# CHECK-NEXT: [13] - JVIMUL
|
||||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
|
||||
# CHECK-NEXT: 48.00 2.00 - 355.50 907.50 402.00 398.00 381.00 - 43.00 114.00 116.50 116.50 40.00
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
|
||||
# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vaddpd %xmm0, %xmm1, %xmm2
|
||||
@ -2404,3 +2407,4 @@ vzeroupper
|
||||
# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 2.00 - - - - - - vxorps (%rax), %ymm1, %ymm2
|
||||
# CHECK-NEXT: - - - - - - - - - - - - - - vzeroall
|
||||
# CHECK-NEXT: - - - - - - - - - - - - - - vzeroupper
|
||||
|
||||
|
@ -1,8 +1,21 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -instruction-tables < %s | FileCheck %s
|
||||
|
||||
pclmulqdq $11, %xmm0, %xmm2
|
||||
pclmulqdq $11, (%rax), %xmm2
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
# CHECK-NEXT: [1]: #uOps
|
||||
# CHECK-NEXT: [2]: Latency
|
||||
# CHECK-NEXT: [3]: RThroughput
|
||||
# CHECK-NEXT: [4]: MayLoad
|
||||
# CHECK-NEXT: [5]: MayStore
|
||||
# CHECK-NEXT: [6]: HasSideEffects
|
||||
|
||||
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
# CHECK-NEXT: 1 2 1.00 pclmulqdq $11, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 7 1.00 * pclmulqdq $11, (%rax), %xmm2
|
||||
|
||||
# CHECK: Resources:
|
||||
# CHECK-NEXT: [0] - JALU0
|
||||
# CHECK-NEXT: [1] - JALU1
|
||||
@ -19,7 +32,12 @@ pclmulqdq $11, (%rax), %xmm2
|
||||
# CHECK-NEXT: [12] - JVALU1
|
||||
# CHECK-NEXT: [13] - JVIMUL
|
||||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
|
||||
# CHECK-NEXT: - - - - - 2.00 - 1.00 - - - - - 2.00
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
|
||||
# CHECK-NEXT: - - - - - 1.00 - - - - - - - 1.00 pclmulqdq $11, %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - - - 1.00 - 1.00 - - - - - 1.00 pclmulqdq $11, (%rax), %xmm2
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 < %s | FileCheck %s
|
||||
|
||||
vcvtph2ps %xmm0, %xmm2
|
||||
@ -53,7 +54,7 @@ vcvtps2ph $0, %ymm0, (%rax)
|
||||
# CHECK-NEXT: [13] - JVIMUL
|
||||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
|
||||
# CHECK-NEXT: - - - 2.00 2.00 - 12.00 2.00 - 2.00 12.00 - - -
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
|
@ -1,3 +1,4 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -instruction-tables < %s | FileCheck %s
|
||||
|
||||
addps %xmm0, %xmm2
|
||||
@ -134,7 +135,6 @@ unpcklps (%rax), %xmm2
|
||||
xorps %xmm0, %xmm2
|
||||
xorps (%rax), %xmm2
|
||||
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
# CHECK-NEXT: [1]: #uOps
|
||||
# CHECK-NEXT: [2]: Latency
|
||||
@ -236,7 +236,6 @@ xorps (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 xorps %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * xorps (%rax), %xmm2
|
||||
|
||||
|
||||
# CHECK: Resources:
|
||||
# CHECK-NEXT: [0] - JALU0
|
||||
# CHECK-NEXT: [1] - JALU1
|
||||
@ -253,8 +252,11 @@ xorps (%rax), %xmm2
|
||||
# CHECK-NEXT: [12] - JVALU1
|
||||
# CHECK-NEXT: [13] - JVIMUL
|
||||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
|
||||
# CHECK-NEXT: 13.00 - - 44.50 183.50 37.50 47.50 42.00 - 7.00 15.00 1.00 1.00 -
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
|
||||
# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - addps %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - addps (%rax), %xmm2
|
||||
@ -347,3 +349,4 @@ xorps (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - unpcklps (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - xorps %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - xorps (%rax), %xmm2
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -instruction-tables < %s | FileCheck %s
|
||||
|
||||
addpd %xmm0, %xmm2
|
||||
@ -375,6 +376,270 @@ unpcklpd (%rax), %xmm2
|
||||
xorpd %xmm0, %xmm2
|
||||
xorpd (%rax), %xmm2
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
# CHECK-NEXT: [1]: #uOps
|
||||
# CHECK-NEXT: [2]: Latency
|
||||
# CHECK-NEXT: [3]: RThroughput
|
||||
# CHECK-NEXT: [4]: MayLoad
|
||||
# CHECK-NEXT: [5]: MayStore
|
||||
# CHECK-NEXT: [6]: HasSideEffects
|
||||
|
||||
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
# CHECK-NEXT: 1 3 1.00 addpd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 8 1.00 * addpd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 addsd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 8 1.00 * addsd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 addsubpd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 8 1.00 * addsubpd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 andnpd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * andnpd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 andpd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * andpd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 2 1.00 cmppd $0, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 7 1.00 * cmppd $0, (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 2 1.00 cmpsd $0, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 7 1.00 * cmpsd $0, (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 comisd %xmm0, %xmm1
|
||||
# CHECK-NEXT: 1 8 1.00 * comisd (%rax), %xmm1
|
||||
# CHECK-NEXT: 1 3 1.00 cvtdq2pd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 8 1.00 * cvtdq2pd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 cvtdq2ps %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 8 1.00 * cvtdq2ps (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 cvtpd2dq %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 8 1.00 * cvtpd2dq (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 cvtpd2ps %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 8 1.00 * cvtpd2ps (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 cvtps2dq %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 8 1.00 * cvtps2dq (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 cvtps2pd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 8 1.00 * cvtps2pd (%rax), %xmm2
|
||||
# CHECK-NEXT: 2 7 1.00 cvtsd2si %xmm0, %ecx
|
||||
# CHECK-NEXT: 2 7 1.00 cvtsd2si %xmm0, %rcx
|
||||
# CHECK-NEXT: 2 12 1.00 * cvtsd2si (%rax), %ecx
|
||||
# CHECK-NEXT: 2 12 1.00 * cvtsd2si (%rax), %rcx
|
||||
# CHECK-NEXT: 2 7 2.00 cvtsd2ss %xmm0, %xmm2
|
||||
# CHECK-NEXT: 2 12 2.00 * cvtsd2ss (%rax), %xmm2
|
||||
# CHECK-NEXT: 2 9 1.00 cvtsi2sdl %ecx, %xmm2
|
||||
# CHECK-NEXT: 2 9 1.00 cvtsi2sdq %rcx, %xmm2
|
||||
# CHECK-NEXT: 2 14 1.00 * cvtsi2sdl (%rax), %xmm2
|
||||
# CHECK-NEXT: 2 14 1.00 * cvtsi2sdl (%rax), %xmm2
|
||||
# CHECK-NEXT: 2 7 2.00 cvtss2sd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 2 12 2.00 * cvtss2sd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 cvttpd2dq %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 8 1.00 * cvttpd2dq (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 cvttps2dq %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 8 1.00 * cvttps2dq (%rax), %xmm2
|
||||
# CHECK-NEXT: 2 7 1.00 cvttsd2si %xmm0, %ecx
|
||||
# CHECK-NEXT: 2 7 1.00 cvttsd2si %xmm0, %rcx
|
||||
# CHECK-NEXT: 2 12 1.00 * cvttsd2si (%rax), %ecx
|
||||
# CHECK-NEXT: 2 12 1.00 * cvttsd2si (%rax), %rcx
|
||||
# CHECK-NEXT: 1 19 19.00 divpd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 24 19.00 * divpd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 19 19.00 divsd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 24 19.00 * divsd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 1.00 * * * maskmovdqu %xmm0, %xmm1
|
||||
# CHECK-NEXT: 1 2 1.00 maxpd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 7 1.00 * maxpd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 2 1.00 maxsd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 7 1.00 * maxsd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 2 1.00 minpd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 7 1.00 * minpd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 2 1.00 minsd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 7 1.00 * minsd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 movapd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 1 1.00 * movapd %xmm0, (%rax)
|
||||
# CHECK-NEXT: 1 5 1.00 * movapd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 movd %eax, %xmm2
|
||||
# CHECK-NEXT: 1 5 1.00 * movd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 movd %xmm0, %ecx
|
||||
# CHECK-NEXT: 1 1 1.00 * movd %xmm0, (%rax)
|
||||
# CHECK-NEXT: 1 1 0.50 movdqa %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 1 1.00 * movdqa %xmm0, (%rax)
|
||||
# CHECK-NEXT: 1 5 1.00 * movdqa (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 movdqu %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 1 1.00 * movdqu %xmm0, (%rax)
|
||||
# CHECK-NEXT: 1 5 1.00 * movdqu (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 1.00 * movhpd %xmm0, (%rax)
|
||||
# CHECK-NEXT: 1 6 1.00 * movhpd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 1.00 * movlpd %xmm0, (%rax)
|
||||
# CHECK-NEXT: 1 6 1.00 * movlpd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 movmskpd %xmm0, %ecx
|
||||
# CHECK-NEXT: 1 2 1.00 * movntdq %xmm0, (%rax)
|
||||
# CHECK-NEXT: 1 3 1.00 * movntpd %xmm0, (%rax)
|
||||
# CHECK-NEXT: 1 1 0.50 movq %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 movq %rax, %xmm2
|
||||
# CHECK-NEXT: 1 5 1.00 * movq (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 movq %xmm0, %rcx
|
||||
# CHECK-NEXT: 1 1 1.00 * movq %xmm0, (%rax)
|
||||
# CHECK-NEXT: 1 1 0.50 movsd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 1 1.00 * movsd %xmm0, (%rax)
|
||||
# CHECK-NEXT: 1 5 1.00 * movsd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 movupd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 1 1.00 * movupd %xmm0, (%rax)
|
||||
# CHECK-NEXT: 1 5 1.00 * movupd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 4 2.00 mulpd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 9 2.00 * mulpd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 4 2.00 mulsd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 9 2.00 * mulsd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 orpd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * orpd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pabsb %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pabsb (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pabsd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pabsd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pabsw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pabsw (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 packssdw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * packssdw (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 packsswb %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * packsswb (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 packuswb %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * packuswb (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 paddb %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * paddb (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 paddd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * paddd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 paddq %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * paddq (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 paddsb %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * paddsb (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 paddsw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * paddsw (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 paddusb %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * paddusb (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 paddusw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * paddusw (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 paddw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * paddw (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pand %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pand (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pandn %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pandn (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pavgb %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pavgb (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pavgw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pavgw (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pcmpeqb %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pcmpeqb (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pcmpeqd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pcmpeqd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pcmpeqw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pcmpeqw (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pcmpgtb %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pcmpgtb (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pcmpgtd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pcmpgtd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pcmpgtw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pcmpgtw (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 pextrw $1, %xmm0, %ecx
|
||||
# CHECK-NEXT: 1 2 1.00 pmaddwd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 7 1.00 * pmaddwd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pmaxsw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pmaxsw (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pmaxub %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pmaxub (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pminsw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pminsw (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pminub %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pminub (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 pmovmskb %xmm0, %ecx
|
||||
# CHECK-NEXT: 1 2 1.00 pmulhuw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 7 1.00 * pmulhuw (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 2 1.00 pmulhw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 7 1.00 * pmulhw (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 2 1.00 pmullw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 7 1.00 * pmullw (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 2 1.00 pmuludq %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 7 1.00 * pmuludq (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 por %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * por (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 2 0.50 psadbw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 7 1.00 * psadbw (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pshufd $1, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pshufd $1, (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pshufhw $1, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pshufhw $1, (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pshuflw $1, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pshuflw $1, (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pslld $1, %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pslld %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pslld (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pslldq $1, %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 psllq $1, %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 psllq %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * psllq (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 psllw $1, %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 psllw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * psllw (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 psrad $1, %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 psrad %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * psrad (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 psraw $1, %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 psraw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * psraw (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 psrld $1, %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 psrld %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * psrld (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 psrldq $1, %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 psrlq $1, %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 psrlq %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * psrlq (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 psrlw $1, %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 psrlw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * psrlw (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 psubb %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * psubb (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 psubd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * psubd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 psubq %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * psubq (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 psubsb %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * psubsb (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 psubsw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * psubsw (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 psubusb %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * psubusb (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 psubusw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * psubusw (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 psubw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * psubw (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 punpckhbw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * punpckhbw (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 punpckhdq %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * punpckhdq (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 punpckhqdq %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * punpckhqdq (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 punpckhwd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * punpckhwd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 punpcklbw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * punpcklbw (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 punpckldq %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * punpckldq (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 punpcklqdq %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * punpcklqdq (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 punpcklwd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * punpcklwd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pxor %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pxor (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 shufpd $1, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * shufpd $1, (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 27 27.00 sqrtpd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 32 27.00 * sqrtpd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 27 27.00 sqrtsd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 32 27.00 * sqrtsd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 subpd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 8 1.00 * subpd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 subsd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 8 1.00 * subsd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 ucomisd %xmm0, %xmm1
|
||||
# CHECK-NEXT: 1 8 1.00 * ucomisd (%rax), %xmm1
|
||||
# CHECK-NEXT: 1 1 0.50 unpckhpd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * unpckhpd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 unpcklpd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * unpcklpd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 xorpd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * xorpd (%rax), %xmm2
|
||||
|
||||
# CHECK: Resources:
|
||||
# CHECK-NEXT: [0] - JALU0
|
||||
# CHECK-NEXT: [1] - JALU1
|
||||
@ -391,8 +656,11 @@ xorpd (%rax), %xmm2
|
||||
# CHECK-NEXT: [12] - JVALU1
|
||||
# CHECK-NEXT: [13] - JVIMUL
|
||||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
|
||||
# CHECK-NEXT: 17.00 2.00 - 47.50 202.50 115.00 127.00 114.00 - 12.00 43.00 65.50 65.50 10.00
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
|
||||
# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - addpd %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - addpd (%rax), %xmm2
|
||||
@ -648,3 +916,4 @@ xorpd (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - unpcklpd (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - xorpd %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - xorpd (%rax), %xmm2
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -instruction-tables < %s | FileCheck %s
|
||||
|
||||
addsubpd %xmm0, %xmm2
|
||||
@ -29,6 +30,35 @@ movshdup (%rax), %xmm2
|
||||
movsldup %xmm0, %xmm2
|
||||
movsldup (%rax), %xmm2
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
# CHECK-NEXT: [1]: #uOps
|
||||
# CHECK-NEXT: [2]: Latency
|
||||
# CHECK-NEXT: [3]: RThroughput
|
||||
# CHECK-NEXT: [4]: MayLoad
|
||||
# CHECK-NEXT: [5]: MayStore
|
||||
# CHECK-NEXT: [6]: HasSideEffects
|
||||
|
||||
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
# CHECK-NEXT: 1 3 1.00 addsubpd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 8 1.00 * addsubpd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 addsubps %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 8 1.00 * addsubps (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 haddpd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 8 1.00 * haddpd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 haddps %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 8 1.00 * haddps (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 hsubpd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 8 1.00 * hsubpd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 hsubps %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 8 1.00 * hsubps (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 5 1.00 * lddqu (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 movddup %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * movddup (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 movshdup %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * movshdup (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 movsldup %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * movsldup (%rax), %xmm2
|
||||
|
||||
# CHECK: Resources:
|
||||
# CHECK-NEXT: [0] - JALU0
|
||||
# CHECK-NEXT: [1] - JALU1
|
||||
@ -45,6 +75,10 @@ movsldup (%rax), %xmm2
|
||||
# CHECK-NEXT: [12] - JVALU1
|
||||
# CHECK-NEXT: [13] - JVIMUL
|
||||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
|
||||
# CHECK-NEXT: - - - 15.00 3.00 15.50 3.50 10.00 - - - 0.50 0.50 -
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
|
||||
# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - addsubpd %xmm0, %xmm2
|
||||
@ -66,3 +100,4 @@ movsldup (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - movshdup (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - movsldup %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - movsldup (%rax), %xmm2
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -instruction-tables < %s | FileCheck %s
|
||||
|
||||
blendpd $11, %xmm0, %xmm2
|
||||
@ -145,6 +146,112 @@ roundsd $1, (%rax), %xmm2
|
||||
roundss $1, %xmm0, %xmm2
|
||||
roundss $1, (%rax), %xmm2
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
# CHECK-NEXT: [1]: #uOps
|
||||
# CHECK-NEXT: [2]: Latency
|
||||
# CHECK-NEXT: [3]: RThroughput
|
||||
# CHECK-NEXT: [4]: MayLoad
|
||||
# CHECK-NEXT: [5]: MayStore
|
||||
# CHECK-NEXT: [6]: HasSideEffects
|
||||
|
||||
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
# CHECK-NEXT: 1 1 0.50 blendpd $11, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * blendpd $11, (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 blendps $11, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * blendps $11, (%rax), %xmm2
|
||||
# CHECK-NEXT: 3 2 2.00 blendvpd %xmm0, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 3 7 2.00 * blendvpd %xmm0, (%rax), %xmm2
|
||||
# CHECK-NEXT: 3 2 2.00 blendvps %xmm0, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 3 7 2.00 * blendvps %xmm0, (%rax), %xmm2
|
||||
# CHECK-NEXT: 3 9 3.00 dppd $22, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 3 14 3.00 * dppd $22, (%rax), %xmm2
|
||||
# CHECK-NEXT: 5 11 3.00 dpps $22, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 5 16 3.00 * dpps $22, (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 extractps $1, %xmm0, %ecx
|
||||
# CHECK-NEXT: 1 3 1.00 * extractps $1, %xmm0, (%rax)
|
||||
# CHECK-NEXT: 1 1 0.50 insertps $1, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * insertps $1, (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 5 1.00 * movntdqa (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 3 2.00 mpsadbw $1, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 8 2.00 * mpsadbw $1, (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 packusdw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * packusdw (%rax), %xmm2
|
||||
# CHECK-NEXT: 3 2 2.00 pblendvb %xmm0, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 3 7 2.00 * pblendvb %xmm0, (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pblendw $11, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pblendw $11, (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pcmpeqq %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pcmpeqq (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 pextrb $1, %xmm0, %ecx
|
||||
# CHECK-NEXT: 1 3 1.00 * pextrb $1, %xmm0, (%rax)
|
||||
# CHECK-NEXT: 1 3 1.00 pextrd $1, %xmm0, %ecx
|
||||
# CHECK-NEXT: 1 3 1.00 * pextrd $1, %xmm0, (%rax)
|
||||
# CHECK-NEXT: 1 3 1.00 pextrq $1, %xmm0, %rcx
|
||||
# CHECK-NEXT: 1 3 1.00 * pextrq $1, %xmm0, (%rax)
|
||||
# CHECK-NEXT: 1 3 1.00 * pextrw $1, %xmm0, (%rax)
|
||||
# CHECK-NEXT: 1 2 1.00 phminposuw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 7 1.00 * phminposuw (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pinsrb $1, %eax, %xmm1
|
||||
# CHECK-NEXT: 1 6 1.00 * pinsrb $1, (%rax), %xmm1
|
||||
# CHECK-NEXT: 1 1 0.50 pinsrd $1, %eax, %xmm1
|
||||
# CHECK-NEXT: 1 6 1.00 * pinsrd $1, (%rax), %xmm1
|
||||
# CHECK-NEXT: 1 1 0.50 pinsrq $1, %rax, %xmm1
|
||||
# CHECK-NEXT: 1 6 1.00 * pinsrq $1, (%rax), %xmm1
|
||||
# CHECK-NEXT: 1 1 0.50 pmaxsb %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pmaxsb (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pmaxsd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pmaxsd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pmaxud %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pmaxud (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pmaxuw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pmaxuw (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pminsb %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pminsb (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pminsd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pminsd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pminud %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pminud (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pminuw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pminuw (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pmovsxbd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pmovsxbd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pmovsxbq %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pmovsxbq (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pmovsxbw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pmovsxbw (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pmovsxdq %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pmovsxdq (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pmovsxwd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pmovsxwd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pmovsxwq %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pmovsxwq (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pmovzxbd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pmovzxbd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pmovzxbq %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pmovzxbq (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pmovzxbw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pmovzxbw (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pmovzxdq %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pmovzxdq (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pmovzxwd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pmovzxwd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pmovzxwq %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pmovzxwq (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 2 1.00 pmuldq %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 7 1.00 * pmuldq (%rax), %xmm2
|
||||
# CHECK-NEXT: 3 4 2.00 pmulld %xmm0, %xmm2
|
||||
# CHECK-NEXT: 3 9 2.00 * pmulld (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 ptest %xmm0, %xmm1
|
||||
# CHECK-NEXT: 1 8 1.00 * ptest (%rax), %xmm1
|
||||
# CHECK-NEXT: 1 3 1.00 roundpd $1, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 8 1.00 * roundpd $1, (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 roundps $1, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 8 1.00 * roundps $1, (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 roundsd $1, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 8 1.00 * roundsd $1, (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 roundss $1, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 8 1.00 * roundss $1, (%rax), %xmm2
|
||||
|
||||
# CHECK: Resources:
|
||||
# CHECK-NEXT: [0] - JALU0
|
||||
# CHECK-NEXT: [1] - JALU1
|
||||
@ -161,6 +268,10 @@ roundss $1, (%rax), %xmm2
|
||||
# CHECK-NEXT: [12] - JVALU1
|
||||
# CHECK-NEXT: [13] - JVIMUL
|
||||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
|
||||
# CHECK-NEXT: 6.00 - - 37.00 23.00 57.50 42.50 44.00 - 5.00 5.00 31.50 31.50 12.00
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
|
||||
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - blendpd $11, %xmm0, %xmm2
|
||||
@ -259,3 +370,4 @@ roundss $1, (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - roundsd $1, (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - roundss $1, %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - roundss $1, (%rax), %xmm2
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -instruction-tables < %s | FileCheck %s
|
||||
|
||||
crc32b %al, %ecx
|
||||
@ -30,6 +31,36 @@ pcmpistrm $1, (%rax), %xmm2
|
||||
pcmpgtq %xmm0, %xmm2
|
||||
pcmpgtq (%rax), %xmm2
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
# CHECK-NEXT: [1]: #uOps
|
||||
# CHECK-NEXT: [2]: Latency
|
||||
# CHECK-NEXT: [3]: RThroughput
|
||||
# CHECK-NEXT: [4]: MayLoad
|
||||
# CHECK-NEXT: [5]: MayStore
|
||||
# CHECK-NEXT: [6]: HasSideEffects
|
||||
|
||||
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
# CHECK-NEXT: 3 3 2.00 crc32b %al, %ecx
|
||||
# CHECK-NEXT: 3 6 2.00 * crc32b (%rax), %ecx
|
||||
# CHECK-NEXT: 3 3 2.00 crc32l %eax, %ecx
|
||||
# CHECK-NEXT: 3 6 2.00 * crc32l (%rax), %ecx
|
||||
# CHECK-NEXT: 3 3 2.00 crc32w %ax, %ecx
|
||||
# CHECK-NEXT: 3 6 2.00 * crc32w (%rax), %ecx
|
||||
# CHECK-NEXT: 3 3 2.00 crc32b %al, %rcx
|
||||
# CHECK-NEXT: 3 6 2.00 * crc32b (%rax), %rcx
|
||||
# CHECK-NEXT: 3 3 2.00 crc32q %rax, %rcx
|
||||
# CHECK-NEXT: 3 6 2.00 * crc32q (%rax), %rcx
|
||||
# CHECK-NEXT: 9 14 5.00 pcmpestri $1, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 9 19 5.00 * pcmpestri $1, (%rax), %xmm2
|
||||
# CHECK-NEXT: 9 14 5.00 pcmpestrm $1, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 9 19 5.00 * pcmpestrm $1, (%rax), %xmm2
|
||||
# CHECK-NEXT: 3 7 2.00 pcmpistri $1, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 3 12 2.00 * pcmpistri $1, (%rax), %xmm2
|
||||
# CHECK-NEXT: 3 8 2.00 pcmpistrm $1, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 3 13 2.00 * pcmpistrm $1, (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 pcmpgtq %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * pcmpgtq (%rax), %xmm2
|
||||
|
||||
# CHECK: Resources:
|
||||
# CHECK-NEXT: [0] - JALU0
|
||||
# CHECK-NEXT: [1] - JALU1
|
||||
@ -46,6 +77,10 @@ pcmpgtq (%rax), %xmm2
|
||||
# CHECK-NEXT: [12] - JVALU1
|
||||
# CHECK-NEXT: [13] - JVIMUL
|
||||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
|
||||
# CHECK-NEXT: 28.00 20.00 - 8.00 - 1.00 9.00 18.00 - 8.00 - 13.00 37.00 -
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
|
||||
# CHECK-NEXT: 2.00 2.00 - - - - - - - - - - - - crc32b %al, %ecx
|
||||
@ -68,3 +103,4 @@ pcmpgtq (%rax), %xmm2
|
||||
# CHECK-NEXT: 1.00 - - 1.00 - - 1.00 1.00 - - - - 2.00 - pcmpistrm $1, (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - pcmpgtq %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - pcmpgtq (%rax), %xmm2
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -instruction-tables < %s | FileCheck %s
|
||||
|
||||
extrq %xmm0, %xmm2
|
||||
@ -9,6 +10,22 @@ insertq $22, $22, %xmm0, %xmm2
|
||||
movntsd %xmm0, (%rax)
|
||||
movntss %xmm0, (%rax)
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
# CHECK-NEXT: [1]: #uOps
|
||||
# CHECK-NEXT: [2]: Latency
|
||||
# CHECK-NEXT: [3]: RThroughput
|
||||
# CHECK-NEXT: [4]: MayLoad
|
||||
# CHECK-NEXT: [5]: MayStore
|
||||
# CHECK-NEXT: [6]: HasSideEffects
|
||||
|
||||
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
# CHECK-NEXT: 1 1 0.50 extrq %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 extrq $22, $2, %xmm2
|
||||
# CHECK-NEXT: 1 2 2.00 insertq %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 2 2.00 insertq $22, $22, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 * movntsd %xmm0, (%rax)
|
||||
# CHECK-NEXT: 1 3 1.00 * movntss %xmm0, (%rax)
|
||||
|
||||
# CHECK: Resources:
|
||||
# CHECK-NEXT: [0] - JALU0
|
||||
# CHECK-NEXT: [1] - JALU1
|
||||
@ -25,6 +42,10 @@ movntss %xmm0, (%rax)
|
||||
# CHECK-NEXT: [12] - JVALU1
|
||||
# CHECK-NEXT: [13] - JVIMUL
|
||||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
|
||||
# CHECK-NEXT: - - - - - 2.00 4.00 - - 2.00 2.00 5.00 5.00 -
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
|
||||
# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - extrq %xmm0, %xmm2
|
||||
@ -33,3 +54,4 @@ movntss %xmm0, (%rax)
|
||||
# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 2.00 2.00 - insertq $22, $22, %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - movntsd %xmm0, (%rax)
|
||||
# CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - movntss %xmm0, (%rax)
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -instruction-tables < %s | FileCheck %s
|
||||
|
||||
palignr $1, %xmm0, %xmm2
|
||||
@ -39,6 +40,42 @@ psignd (%rax), %xmm2
|
||||
psignw %xmm0, %xmm2
|
||||
psignw (%rax), %xmm2
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
# CHECK-NEXT: [1]: #uOps
|
||||
# CHECK-NEXT: [2]: Latency
|
||||
# CHECK-NEXT: [3]: RThroughput
|
||||
# CHECK-NEXT: [4]: MayLoad
|
||||
# CHECK-NEXT: [5]: MayStore
|
||||
# CHECK-NEXT: [6]: HasSideEffects
|
||||
|
||||
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
# CHECK-NEXT: 1 1 0.50 palignr $1, %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * palignr $1, (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 phaddd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * phaddd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 phaddsw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * phaddsw (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 phaddw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * phaddw (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 phsubd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * phsubd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 phsubsw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * phsubsw (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 phsubw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * phsubw (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 2 1.00 pmaddubsw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 7 1.00 * pmaddubsw (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 2 1.00 pmulhrsw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 7 1.00 * pmulhrsw (%rax), %xmm2
|
||||
# CHECK-NEXT: 3 2 2.00 pshufb %xmm0, %xmm2
|
||||
# CHECK-NEXT: 3 7 2.00 * pshufb (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 psignb %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * psignb (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 psignd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * psignd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 1 0.50 psignw %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 6 1.00 * psignw (%rax), %xmm2
|
||||
|
||||
# CHECK: Resources:
|
||||
# CHECK-NEXT: [0] - JALU0
|
||||
# CHECK-NEXT: [1] - JALU1
|
||||
@ -55,6 +92,10 @@ psignw (%rax), %xmm2
|
||||
# CHECK-NEXT: [12] - JVALU1
|
||||
# CHECK-NEXT: [13] - JVIMUL
|
||||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
|
||||
# CHECK-NEXT: - - - - - 15.00 11.00 13.00 - - - 14.00 14.00 4.00
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
|
||||
# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - palignr $1, %xmm0, %xmm2
|
||||
@ -83,3 +124,4 @@ psignw (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - psignd (%rax), %xmm2
|
||||
# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - psignw %xmm0, %xmm2
|
||||
# CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - psignw (%rax), %xmm2
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -scheduler-stats < %s | FileCheck %s
|
||||
|
||||
vmulps (%rsi), %xmm0, %xmm0
|
||||
@ -48,7 +49,7 @@ add %rsi, %rsi
|
||||
# CHECK-NEXT: [13] - JVIMUL
|
||||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
|
||||
# CHECK-NEXT: - 1.00 - - 1.00 - 1.00 1.00 - - - - - -
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
|
@ -1,15 +1,15 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=100 < %s | FileCheck %s
|
||||
|
||||
add %edi, %eax
|
||||
|
||||
# CHECK: Iterations: 100
|
||||
# CHECK: Iterations: 100
|
||||
# CHECK-NEXT: Instructions: 100
|
||||
# CHECK-NEXT: Total Cycles: 103
|
||||
# CHECK-NEXT: Dispatch Width: 2
|
||||
# CHECK-NEXT: IPC: 0.97
|
||||
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
# CHECK: Instruction Info:
|
||||
# CHECK-NEXT: [1]: #uOps
|
||||
# CHECK-NEXT: [2]: Latency
|
||||
# CHECK-NEXT: [3]: RThroughput
|
||||
@ -17,11 +17,10 @@ add %edi, %eax
|
||||
# CHECK-NEXT: [5]: MayStore
|
||||
# CHECK-NEXT: [6]: HasSideEffects
|
||||
|
||||
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
# CHECK-NEXT: 1 1 0.50 addl %edi, %eax
|
||||
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
# CHECK-NEXT: 1 1 0.50 addl %edi, %eax
|
||||
|
||||
|
||||
# CHECK-LABEL: Resources:
|
||||
# CHECK: Resources:
|
||||
# CHECK-NEXT: [0] - JALU0
|
||||
# CHECK-NEXT: [1] - JALU1
|
||||
# CHECK-NEXT: [2] - JDiv
|
||||
@ -37,11 +36,11 @@ add %edi, %eax
|
||||
# CHECK-NEXT: [12] - JVALU1
|
||||
# CHECK-NEXT: [13] - JVIMUL
|
||||
|
||||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
|
||||
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - -
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
|
||||
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - addl %edi, %eax
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
|
||||
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - addl %edi, %eax
|
||||
|
||||
|
@ -1,8 +1,15 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s
|
||||
|
||||
vaddps %xmm0, %xmm0, %xmm1
|
||||
vandps (%rdi), %xmm1, %xmm2
|
||||
|
||||
# CHECK: Iterations: 1
|
||||
# CHECK-NEXT: Instructions: 2
|
||||
# CHECK-NEXT: Total Cycles: 9
|
||||
# CHECK-NEXT: Dispatch Width: 2
|
||||
# CHECK-NEXT: IPC: 0.22
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
# CHECK-NEXT: [1]: #uOps
|
||||
# CHECK-NEXT: [2]: Latency
|
||||
@ -15,8 +22,20 @@ vandps (%rdi), %xmm1, %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm0, %xmm1
|
||||
# CHECK-NEXT: 1 6 1.00 * vandps (%rdi), %xmm1, %xmm2
|
||||
|
||||
|
||||
# CHECK: Timeline view:
|
||||
|
||||
# CHECK: Index 012345678
|
||||
|
||||
# CHECK: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1
|
||||
# CHECK-NEXT: [0,1] DeeeeeeER vandps (%rdi), %xmm1, %xmm2
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
||||
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
||||
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
||||
|
||||
# CHECK: [0] [1] [2] [3]
|
||||
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm1
|
||||
# CHECK-NEXT: 1. 1 1.0 0.0 0.0 vandps (%rdi), %xmm1, %xmm2
|
||||
|
||||
|
@ -1,8 +1,15 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s
|
||||
|
||||
vaddps %ymm0, %ymm0, %ymm1
|
||||
vandps (%rdi), %ymm1, %ymm2
|
||||
|
||||
# CHECK: Iterations: 1
|
||||
# CHECK-NEXT: Instructions: 2
|
||||
# CHECK-NEXT: Total Cycles: 10
|
||||
# CHECK-NEXT: Dispatch Width: 2
|
||||
# CHECK-NEXT: IPC: 0.20
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
# CHECK-NEXT: [1]: #uOps
|
||||
# CHECK-NEXT: [2]: Latency
|
||||
@ -15,8 +22,20 @@ vandps (%rdi), %ymm1, %ymm2
|
||||
# CHECK-NEXT: 2 3 2.00 vaddps %ymm0, %ymm0, %ymm1
|
||||
# CHECK-NEXT: 2 6 2.00 * vandps (%rdi), %ymm1, %ymm2
|
||||
|
||||
|
||||
# CHECK: Timeline view:
|
||||
|
||||
# CHECK: Index 0123456789
|
||||
|
||||
# CHECK: [0,0] DeeeER . vaddps %ymm0, %ymm0, %ymm1
|
||||
# CHECK-NEXT: [0,1] .DeeeeeeER vandps (%rdi), %ymm1, %ymm2
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
||||
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
||||
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
||||
|
||||
# CHECK: [0] [1] [2] [3]
|
||||
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vaddps %ymm0, %ymm0, %ymm1
|
||||
# CHECK-NEXT: 1. 1 1.0 1.0 0.0 vandps (%rdi), %ymm1, %ymm2
|
||||
|
||||
|
368
utils/update_mca_test_checks.py
Normal file
368
utils/update_mca_test_checks.py
Normal file
@ -0,0 +1,368 @@
|
||||
#!/usr/bin/env python2.7
|
||||
|
||||
"""A test case update script.
|
||||
|
||||
This script is a utility to update LLVM 'llvm-mca' based test cases with new
|
||||
FileCheck patterns.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
from collections import defaultdict
|
||||
import difflib
|
||||
import glob
|
||||
import os
|
||||
import sys
|
||||
import warnings
|
||||
|
||||
from UpdateTestChecks import common
|
||||
|
||||
|
||||
COMMENT_CHAR = '#'
|
||||
ADVERT_PREFIX = '{} NOTE: Assertions have been autogenerated by '.format(
|
||||
COMMENT_CHAR)
|
||||
ADVERT = '{}utils/{}'.format(ADVERT_PREFIX, os.path.basename(__file__))
|
||||
|
||||
|
||||
class Error(Exception):
|
||||
""" Generic Error to be raised without printing a traceback.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
def _warn(msg):
|
||||
""" Log a user warning to stderr.
|
||||
"""
|
||||
warnings.warn(msg, Warning, stacklevel=2)
|
||||
|
||||
|
||||
def _configure_warnings(args):
|
||||
warnings.resetwarnings()
|
||||
if args.w:
|
||||
warnings.simplefilter('ignore')
|
||||
if args.Werror:
|
||||
warnings.simplefilter('error')
|
||||
|
||||
|
||||
def _showwarning(message, category, filename, lineno, file=None, line=None):
|
||||
""" Version of warnings.showwarning that won't attempt to print out the
|
||||
line at the location of the warning if the line text is not explicitly
|
||||
specified.
|
||||
"""
|
||||
if file is None:
|
||||
file = sys.stderr
|
||||
if line is None:
|
||||
line = ''
|
||||
file.write(warnings.formatwarning(message, category, filename, lineno, line))
|
||||
|
||||
|
||||
def _parse_args():
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument('-v', '--verbose',
|
||||
action='store_true',
|
||||
help='show verbose output')
|
||||
parser.add_argument('-w',
|
||||
action='store_true',
|
||||
help='suppress warnings')
|
||||
parser.add_argument('-Werror',
|
||||
action='store_true',
|
||||
help='promote warnings to errors')
|
||||
parser.add_argument('--llvm-mca-binary',
|
||||
metavar='<path>',
|
||||
default='llvm-mca',
|
||||
help='the binary to use to generate the test case '
|
||||
'(default: llvm-mca)')
|
||||
parser.add_argument('tests',
|
||||
metavar='<test-path>',
|
||||
nargs='+')
|
||||
args = parser.parse_args()
|
||||
|
||||
_configure_warnings(args)
|
||||
|
||||
if os.path.basename(args.llvm_mca_binary) != 'llvm-mca':
|
||||
_warn('unexpected binary name: {}'.format(args.llvm_mca_binary))
|
||||
|
||||
return args
|
||||
|
||||
|
||||
def _find_run_lines(input_lines, args):
|
||||
raw_lines = [m.group(1)
|
||||
for m in [common.RUN_LINE_RE.match(l) for l in input_lines]
|
||||
if m]
|
||||
run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
|
||||
for l in raw_lines[1:]:
|
||||
if run_lines[-1].endswith(r'\\'):
|
||||
run_lines[-1] = run_lines[-1].rstrip('\\') + ' ' + l
|
||||
else:
|
||||
run_lines.append(l)
|
||||
|
||||
if args.verbose:
|
||||
sys.stderr.write('Found {} RUN line{}:\n'.format(
|
||||
len(run_lines), '' if len(run_lines) == 1 else 's'))
|
||||
for line in run_lines:
|
||||
sys.stderr.write(' RUN: {}\n'.format(line))
|
||||
|
||||
return run_lines
|
||||
|
||||
|
||||
def _get_run_infos(run_lines, args):
|
||||
run_infos = []
|
||||
for run_line in run_lines:
|
||||
try:
|
||||
(tool_cmd, filecheck_cmd) = tuple([cmd.strip()
|
||||
for cmd in run_line.split('|', 1)])
|
||||
except ValueError:
|
||||
_warn('could not split tool and filecheck commands: {}'.format(run_line))
|
||||
continue
|
||||
|
||||
tool_basename = os.path.basename(args.llvm_mca_binary)
|
||||
|
||||
if not tool_cmd.startswith(tool_basename + ' '):
|
||||
_warn('skipping non-{} RUN line: {}'.format(tool_basename, run_line))
|
||||
continue
|
||||
|
||||
if not filecheck_cmd.startswith('FileCheck '):
|
||||
_warn('skipping non-FileCheck RUN line: {}'.format(run_line))
|
||||
continue
|
||||
|
||||
tool_cmd_args = tool_cmd[len(tool_basename):].strip()
|
||||
tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip()
|
||||
|
||||
check_prefixes = [item
|
||||
for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd)
|
||||
for item in m.group(1).split(',')]
|
||||
if not check_prefixes:
|
||||
check_prefixes = ['CHECK']
|
||||
|
||||
run_infos.append((check_prefixes, tool_cmd_args))
|
||||
|
||||
return run_infos
|
||||
|
||||
|
||||
def _get_block_infos(run_infos, test_path, args): # noqa
|
||||
""" For each run line, run the tool with the specified args and collect the
|
||||
output. We use the concept of 'blocks' for uniquing, where a block is
|
||||
a series of lines of text with no more than one newline character between
|
||||
each one. For example:
|
||||
|
||||
This
|
||||
is
|
||||
one
|
||||
block
|
||||
|
||||
This is
|
||||
another block
|
||||
|
||||
This is yet another block
|
||||
|
||||
We then build up a 'block_infos' structure containing a dict where the
|
||||
text of each block is the key and a list of the sets of prefixes that may
|
||||
generate that particular block. This then goes through a series of
|
||||
transformations to minimise the amount of CHECK lines that need to be
|
||||
written by taking advantage of common prefixes.
|
||||
"""
|
||||
|
||||
def _block_key(tool_args, prefixes):
|
||||
""" Get a hashable key based on the current tool_args and prefixes.
|
||||
"""
|
||||
return ' '.join([tool_args] + prefixes)
|
||||
|
||||
all_blocks = {}
|
||||
max_block_len = 0
|
||||
|
||||
# Run the tool for each run line to generate all of the blocks.
|
||||
for prefixes, tool_args in run_infos:
|
||||
key = _block_key(tool_args, prefixes)
|
||||
raw_tool_output = common.invoke_tool(args.llvm_mca_binary,
|
||||
tool_args,
|
||||
test_path)
|
||||
|
||||
# Replace any lines consisting of purely whitespace with empty lines.
|
||||
raw_tool_output = '\n'.join(line if line.strip() else ''
|
||||
for line in raw_tool_output.splitlines())
|
||||
|
||||
# Split blocks, stripping all trailing whitespace, but keeping preceding
|
||||
# whitespace except for newlines so that columns will line up visually.
|
||||
all_blocks[key] = [b.lstrip('\n').rstrip()
|
||||
for b in raw_tool_output.split('\n\n')]
|
||||
max_block_len = max(max_block_len, len(all_blocks[key]))
|
||||
|
||||
# If necessary, pad the lists of blocks with empty blocks so that they are
|
||||
# all the same length.
|
||||
for key in all_blocks:
|
||||
len_to_pad = max_block_len - len(all_blocks[key])
|
||||
all_blocks[key] += [''] * len_to_pad
|
||||
|
||||
# Create the block_infos structure where it is a nested dict in the form of:
|
||||
# block number -> block text -> list of prefix sets
|
||||
block_infos = defaultdict(lambda: defaultdict(list))
|
||||
for prefixes, tool_args in run_infos:
|
||||
key = _block_key(tool_args, prefixes)
|
||||
for block_num, block_text in enumerate(all_blocks[key]):
|
||||
block_infos[block_num][block_text].append(set(prefixes))
|
||||
|
||||
# Now go through the block_infos structure and attempt to smartly prune the
|
||||
# number of prefixes per block to the minimal set possible to output.
|
||||
for block_num in range(len(block_infos)):
|
||||
|
||||
# When there are multiple block texts for a block num, remove any
|
||||
# prefixes that are common to more than one of them.
|
||||
# E.g. [ [{ALL,FOO}] , [{ALL,BAR}] ] -> [ [{FOO}] , [{BAR}] ]
|
||||
all_sets = [s for s in block_infos[block_num].values()]
|
||||
pruned_sets = []
|
||||
|
||||
for i, setlist in enumerate(all_sets):
|
||||
other_set_values = set([elem for j, setlist2 in enumerate(all_sets)
|
||||
for set_ in setlist2 for elem in set_
|
||||
if i != j])
|
||||
pruned_sets.append([s - other_set_values for s in setlist])
|
||||
|
||||
for i, block_text in enumerate(block_infos[block_num]):
|
||||
|
||||
# When a block text matches multiple sets of prefixes, try removing any
|
||||
# prefixes that aren't common to all of them.
|
||||
# E.g. [ {ALL,FOO} , {ALL,BAR} ] -> [{ALL}]
|
||||
common_values = pruned_sets[i][0].copy()
|
||||
for s in pruned_sets[i][1:]:
|
||||
common_values &= s
|
||||
if common_values:
|
||||
pruned_sets[i] = [common_values]
|
||||
|
||||
# Everything should be uniqued as much as possible by now. Apply the
|
||||
# newly pruned sets to the block_infos structure.
|
||||
# If there are any blocks of text that still match multiple prefixes,
|
||||
# output a warning.
|
||||
current_set = set()
|
||||
for s in pruned_sets[i]:
|
||||
s = sorted(list(s))
|
||||
if s:
|
||||
current_set.add(s[0])
|
||||
if len(s) > 1:
|
||||
_warn('Multiple prefixes generating same output: {} '
|
||||
'(discarding {})'.format(','.join(s), ','.join(s[1:])))
|
||||
|
||||
block_infos[block_num][block_text] = sorted(list(current_set))
|
||||
|
||||
return block_infos
|
||||
|
||||
|
||||
def _write_output(test_path, input_lines, prefix_list, block_infos, # noqa
|
||||
args):
|
||||
prefix_set = set([prefix for prefixes, _ in prefix_list
|
||||
for prefix in prefixes])
|
||||
not_prefix_set = set()
|
||||
|
||||
output_lines = []
|
||||
for input_line in input_lines:
|
||||
if input_line.startswith(ADVERT_PREFIX):
|
||||
continue
|
||||
|
||||
if input_line.startswith(COMMENT_CHAR):
|
||||
m = common.CHECK_RE.match(input_line)
|
||||
try:
|
||||
prefix = m.group(1)
|
||||
except AttributeError:
|
||||
prefix = None
|
||||
|
||||
if '{}-NOT:'.format(prefix) in input_line:
|
||||
not_prefix_set.add(prefix)
|
||||
|
||||
if prefix not in prefix_set or prefix in not_prefix_set:
|
||||
output_lines.append(input_line)
|
||||
continue
|
||||
|
||||
if common.should_add_line_to_output(input_line, prefix_set):
|
||||
# This input line of the function body will go as-is into the output.
|
||||
# Except make leading whitespace uniform: 2 spaces.
|
||||
input_line = common.SCRUB_LEADING_WHITESPACE_RE.sub(r' ', input_line)
|
||||
|
||||
# Skip empty lines if the previous output line is also empty.
|
||||
if input_line or output_lines[-1]:
|
||||
output_lines.append(input_line)
|
||||
else:
|
||||
continue
|
||||
|
||||
# Add a blank line before the new checks if required.
|
||||
if output_lines[-1]:
|
||||
output_lines.append('')
|
||||
|
||||
output_check_lines = []
|
||||
for block_num in range(len(block_infos)):
|
||||
for block_text in sorted(block_infos[block_num]):
|
||||
if not block_text:
|
||||
continue
|
||||
|
||||
if block_infos[block_num][block_text]:
|
||||
lines = block_text.split('\n')
|
||||
for prefix in block_infos[block_num][block_text]:
|
||||
if prefix in not_prefix_set:
|
||||
_warn('not writing for prefix {0} due to presence of "{0}-NOT:" '
|
||||
'in input file.'.format(prefix))
|
||||
continue
|
||||
|
||||
output_check_lines.append(
|
||||
'{} {}: {}'.format(COMMENT_CHAR, prefix, lines[0]).rstrip())
|
||||
for line in lines[1:]:
|
||||
output_check_lines.append(
|
||||
'{} {}-NEXT: {}'.format(COMMENT_CHAR, prefix, line).rstrip())
|
||||
output_check_lines.append('')
|
||||
|
||||
if output_check_lines:
|
||||
output_lines.insert(0, ADVERT)
|
||||
output_lines.extend(output_check_lines)
|
||||
|
||||
if input_lines == output_lines:
|
||||
sys.stderr.write(' [unchanged]\n')
|
||||
return
|
||||
|
||||
diff = list(difflib.Differ().compare(input_lines, output_lines))
|
||||
sys.stderr.write(
|
||||
' [{} lines total ({} added, {} removed)]\n'.format(
|
||||
len(output_lines),
|
||||
len([l for l in diff if l[0] == '+']),
|
||||
len([l for l in diff if l[0] == '-'])))
|
||||
|
||||
if args.verbose:
|
||||
sys.stderr.write(
|
||||
'Writing {} lines to {}...\n\n'.format(len(output_lines), test_path))
|
||||
|
||||
with open(test_path, 'wb') as f:
|
||||
for line in output_lines:
|
||||
f.write('{}\n'.format(line.rstrip()).encode())
|
||||
|
||||
|
||||
def main():
|
||||
args = _parse_args()
|
||||
test_paths = [test for pattern in args.tests for test in glob.glob(pattern)]
|
||||
for test_path in test_paths:
|
||||
sys.stderr.write('Test: {}\n'.format(test_path))
|
||||
|
||||
# Call this per test. By default each warning will only be written once
|
||||
# per source location. Reset the warning filter so that now each warning
|
||||
# will be written once per source location per test.
|
||||
_configure_warnings(args)
|
||||
|
||||
if args.verbose:
|
||||
sys.stderr.write(
|
||||
'Scanning for RUN lines in test file: {}\n'.format(test_path))
|
||||
|
||||
if not os.path.isfile(test_path):
|
||||
raise Error('could not find test file: {}'.format(test_path))
|
||||
|
||||
with open(test_path) as f:
|
||||
input_lines = [l.rstrip() for l in f]
|
||||
|
||||
run_lines = _find_run_lines(input_lines, args)
|
||||
run_infos = _get_run_infos(run_lines, args)
|
||||
block_infos = _get_block_infos(run_infos, test_path, args)
|
||||
_write_output(test_path, input_lines, run_infos, block_infos, args)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
try:
|
||||
warnings.showwarning = _showwarning
|
||||
sys.exit(main())
|
||||
except Error as e:
|
||||
sys.stdout.write('error: {}\n'.format(e))
|
||||
sys.exit(1)
|
Loading…
Reference in New Issue
Block a user